• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2009 Apple Inc. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
17  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  */
25 
26 #include "config.h"
27 #include "LiteralParser.h"
28 
29 #include "JSArray.h"
30 #include "JSString.h"
31 #include "Lexer.h"
32 #include "UStringBuilder.h"
33 #include <wtf/ASCIICType.h>
34 #include <wtf/dtoa.h>
35 
36 namespace JSC {
37 
isJSONWhiteSpace(const UChar & c)38 static inline bool isJSONWhiteSpace(const UChar& c)
39 {
40     // The JSON RFC 4627 defines a list of allowed characters to be considered
41     // insignificant white space: http://www.ietf.org/rfc/rfc4627.txt (2. JSON Grammar).
42     return c == ' ' || c == 0x9 || c == 0xA || c == 0xD;
43 }
44 
lex(LiteralParserToken & token)45 LiteralParser::TokenType LiteralParser::Lexer::lex(LiteralParserToken& token)
46 {
47     while (m_ptr < m_end && isJSONWhiteSpace(*m_ptr))
48         ++m_ptr;
49 
50     ASSERT(m_ptr <= m_end);
51     if (m_ptr >= m_end) {
52         token.type = TokEnd;
53         token.start = token.end = m_ptr;
54         return TokEnd;
55     }
56     token.type = TokError;
57     token.start = m_ptr;
58     switch (*m_ptr) {
59         case '[':
60             token.type = TokLBracket;
61             token.end = ++m_ptr;
62             return TokLBracket;
63         case ']':
64             token.type = TokRBracket;
65             token.end = ++m_ptr;
66             return TokRBracket;
67         case '(':
68             token.type = TokLParen;
69             token.end = ++m_ptr;
70             return TokLBracket;
71         case ')':
72             token.type = TokRParen;
73             token.end = ++m_ptr;
74             return TokRBracket;
75         case '{':
76             token.type = TokLBrace;
77             token.end = ++m_ptr;
78             return TokLBrace;
79         case '}':
80             token.type = TokRBrace;
81             token.end = ++m_ptr;
82             return TokRBrace;
83         case ',':
84             token.type = TokComma;
85             token.end = ++m_ptr;
86             return TokComma;
87         case ':':
88             token.type = TokColon;
89             token.end = ++m_ptr;
90             return TokColon;
91         case '"':
92             if (m_mode == StrictJSON)
93                 return lexString<StrictJSON>(token);
94             return lexString<NonStrictJSON>(token);
95         case 't':
96             if (m_end - m_ptr >= 4 && m_ptr[1] == 'r' && m_ptr[2] == 'u' && m_ptr[3] == 'e') {
97                 m_ptr += 4;
98                 token.type = TokTrue;
99                 token.end = m_ptr;
100                 return TokTrue;
101             }
102             break;
103         case 'f':
104             if (m_end - m_ptr >= 5 && m_ptr[1] == 'a' && m_ptr[2] == 'l' && m_ptr[3] == 's' && m_ptr[4] == 'e') {
105                 m_ptr += 5;
106                 token.type = TokFalse;
107                 token.end = m_ptr;
108                 return TokFalse;
109             }
110             break;
111         case 'n':
112             if (m_end - m_ptr >= 4 && m_ptr[1] == 'u' && m_ptr[2] == 'l' && m_ptr[3] == 'l') {
113                 m_ptr += 4;
114                 token.type = TokNull;
115                 token.end = m_ptr;
116                 return TokNull;
117             }
118             break;
119         case '-':
120         case '0':
121         case '1':
122         case '2':
123         case '3':
124         case '4':
125         case '5':
126         case '6':
127         case '7':
128         case '8':
129         case '9':
130             return lexNumber(token);
131     }
132     return TokError;
133 }
134 
isSafeStringCharacter(UChar c)135 template <LiteralParser::ParserMode mode> static inline bool isSafeStringCharacter(UChar c)
136 {
137     return (c >= ' ' && (mode == LiteralParser::StrictJSON || c <= 0xff) && c != '\\' && c != '"') || c == '\t';
138 }
139 
140 // "inline" is required here to help WINSCW compiler resolve specialized argument in templated functions.
lexString(LiteralParserToken & token)141 template <LiteralParser::ParserMode mode> inline LiteralParser::TokenType LiteralParser::Lexer::lexString(LiteralParserToken& token)
142 {
143     ++m_ptr;
144     const UChar* runStart;
145     UStringBuilder builder;
146     do {
147         runStart = m_ptr;
148         while (m_ptr < m_end && isSafeStringCharacter<mode>(*m_ptr))
149             ++m_ptr;
150         if (runStart < m_ptr)
151             builder.append(runStart, m_ptr - runStart);
152         if ((mode == StrictJSON) && m_ptr < m_end && *m_ptr == '\\') {
153             ++m_ptr;
154             if (m_ptr >= m_end)
155                 return TokError;
156             switch (*m_ptr) {
157                 case '"':
158                     builder.append('"');
159                     m_ptr++;
160                     break;
161                 case '\\':
162                     builder.append('\\');
163                     m_ptr++;
164                     break;
165                 case '/':
166                     builder.append('/');
167                     m_ptr++;
168                     break;
169                 case 'b':
170                     builder.append('\b');
171                     m_ptr++;
172                     break;
173                 case 'f':
174                     builder.append('\f');
175                     m_ptr++;
176                     break;
177                 case 'n':
178                     builder.append('\n');
179                     m_ptr++;
180                     break;
181                 case 'r':
182                     builder.append('\r');
183                     m_ptr++;
184                     break;
185                 case 't':
186                     builder.append('\t');
187                     m_ptr++;
188                     break;
189 
190                 case 'u':
191                     if ((m_end - m_ptr) < 5) // uNNNN == 5 characters
192                         return TokError;
193                     for (int i = 1; i < 5; i++) {
194                         if (!isASCIIHexDigit(m_ptr[i]))
195                             return TokError;
196                     }
197                     builder.append(JSC::Lexer::convertUnicode(m_ptr[1], m_ptr[2], m_ptr[3], m_ptr[4]));
198                     m_ptr += 5;
199                     break;
200 
201                 default:
202                     return TokError;
203             }
204         }
205     } while ((mode == StrictJSON) && m_ptr != runStart && (m_ptr < m_end) && *m_ptr != '"');
206 
207     if (m_ptr >= m_end || *m_ptr != '"')
208         return TokError;
209 
210     token.stringToken = builder.toUString();
211     token.type = TokString;
212     token.end = ++m_ptr;
213     return TokString;
214 }
215 
lexNumber(LiteralParserToken & token)216 LiteralParser::TokenType LiteralParser::Lexer::lexNumber(LiteralParserToken& token)
217 {
218     // ES5 and json.org define numbers as
219     // number
220     //     int
221     //     int frac? exp?
222     //
223     // int
224     //     -? 0
225     //     -? digit1-9 digits?
226     //
227     // digits
228     //     digit digits?
229     //
230     // -?(0 | [1-9][0-9]*) ('.' [0-9]+)? ([eE][+-]? [0-9]+)?
231 
232     if (m_ptr < m_end && *m_ptr == '-') // -?
233         ++m_ptr;
234 
235     // (0 | [1-9][0-9]*)
236     if (m_ptr < m_end && *m_ptr == '0') // 0
237         ++m_ptr;
238     else if (m_ptr < m_end && *m_ptr >= '1' && *m_ptr <= '9') { // [1-9]
239         ++m_ptr;
240         // [0-9]*
241         while (m_ptr < m_end && isASCIIDigit(*m_ptr))
242             ++m_ptr;
243     } else
244         return TokError;
245 
246     // ('.' [0-9]+)?
247     if (m_ptr < m_end && *m_ptr == '.') {
248         ++m_ptr;
249         // [0-9]+
250         if (m_ptr >= m_end || !isASCIIDigit(*m_ptr))
251             return TokError;
252 
253         ++m_ptr;
254         while (m_ptr < m_end && isASCIIDigit(*m_ptr))
255             ++m_ptr;
256     }
257 
258     //  ([eE][+-]? [0-9]+)?
259     if (m_ptr < m_end && (*m_ptr == 'e' || *m_ptr == 'E')) { // [eE]
260         ++m_ptr;
261 
262         // [-+]?
263         if (m_ptr < m_end && (*m_ptr == '-' || *m_ptr == '+'))
264             ++m_ptr;
265 
266         // [0-9]+
267         if (m_ptr >= m_end || !isASCIIDigit(*m_ptr))
268             return TokError;
269 
270         ++m_ptr;
271         while (m_ptr < m_end && isASCIIDigit(*m_ptr))
272             ++m_ptr;
273     }
274 
275     token.type = TokNumber;
276     token.end = m_ptr;
277     Vector<char, 64> buffer(token.end - token.start + 1);
278     int i;
279     for (i = 0; i < token.end - token.start; i++) {
280         ASSERT(static_cast<char>(token.start[i]) == token.start[i]);
281         buffer[i] = static_cast<char>(token.start[i]);
282     }
283     buffer[i] = 0;
284     char* end;
285     token.numberToken = WTF::strtod(buffer.data(), &end);
286     ASSERT(buffer.data() + (token.end - token.start) == end);
287     return TokNumber;
288 }
289 
parse(ParserState initialState)290 JSValue LiteralParser::parse(ParserState initialState)
291 {
292     ParserState state = initialState;
293     MarkedArgumentBuffer objectStack;
294     JSValue lastValue;
295     Vector<ParserState, 16> stateStack;
296     Vector<Identifier, 16> identifierStack;
297     while (1) {
298         switch(state) {
299             startParseArray:
300             case StartParseArray: {
301                 JSArray* array = constructEmptyArray(m_exec);
302                 objectStack.append(array);
303                 // fallthrough
304             }
305             doParseArrayStartExpression:
306             case DoParseArrayStartExpression: {
307                 TokenType lastToken = m_lexer.currentToken().type;
308                 if (m_lexer.next() == TokRBracket) {
309                     if (lastToken == TokComma)
310                         return JSValue();
311                     m_lexer.next();
312                     lastValue = objectStack.last();
313                     objectStack.removeLast();
314                     break;
315                 }
316 
317                 stateStack.append(DoParseArrayEndExpression);
318                 goto startParseExpression;
319             }
320             case DoParseArrayEndExpression: {
321                  asArray(objectStack.last())->push(m_exec, lastValue);
322 
323                 if (m_lexer.currentToken().type == TokComma)
324                     goto doParseArrayStartExpression;
325 
326                 if (m_lexer.currentToken().type != TokRBracket)
327                     return JSValue();
328 
329                 m_lexer.next();
330                 lastValue = objectStack.last();
331                 objectStack.removeLast();
332                 break;
333             }
334             startParseObject:
335             case StartParseObject: {
336                 JSObject* object = constructEmptyObject(m_exec);
337                 objectStack.append(object);
338 
339                 TokenType type = m_lexer.next();
340                 if (type == TokString) {
341                     Lexer::LiteralParserToken identifierToken = m_lexer.currentToken();
342 
343                     // Check for colon
344                     if (m_lexer.next() != TokColon)
345                         return JSValue();
346 
347                     m_lexer.next();
348                     identifierStack.append(Identifier(m_exec, identifierToken.stringToken));
349                     stateStack.append(DoParseObjectEndExpression);
350                     goto startParseExpression;
351                 } else if (type != TokRBrace)
352                     return JSValue();
353                 m_lexer.next();
354                 lastValue = objectStack.last();
355                 objectStack.removeLast();
356                 break;
357             }
358             doParseObjectStartExpression:
359             case DoParseObjectStartExpression: {
360                 TokenType type = m_lexer.next();
361                 if (type != TokString)
362                     return JSValue();
363                 Lexer::LiteralParserToken identifierToken = m_lexer.currentToken();
364 
365                 // Check for colon
366                 if (m_lexer.next() != TokColon)
367                     return JSValue();
368 
369                 m_lexer.next();
370                 identifierStack.append(Identifier(m_exec, identifierToken.stringToken));
371                 stateStack.append(DoParseObjectEndExpression);
372                 goto startParseExpression;
373             }
374             case DoParseObjectEndExpression:
375             {
376                 asObject(objectStack.last())->putDirect(m_exec->globalData(), identifierStack.last(), lastValue);
377                 identifierStack.removeLast();
378                 if (m_lexer.currentToken().type == TokComma)
379                     goto doParseObjectStartExpression;
380                 if (m_lexer.currentToken().type != TokRBrace)
381                     return JSValue();
382                 m_lexer.next();
383                 lastValue = objectStack.last();
384                 objectStack.removeLast();
385                 break;
386             }
387             startParseExpression:
388             case StartParseExpression: {
389                 switch (m_lexer.currentToken().type) {
390                     case TokLBracket:
391                         goto startParseArray;
392                     case TokLBrace:
393                         goto startParseObject;
394                     case TokString: {
395                         Lexer::LiteralParserToken stringToken = m_lexer.currentToken();
396                         m_lexer.next();
397                         lastValue = jsString(m_exec, stringToken.stringToken);
398                         break;
399                     }
400                     case TokNumber: {
401                         Lexer::LiteralParserToken numberToken = m_lexer.currentToken();
402                         m_lexer.next();
403                         lastValue = jsNumber(numberToken.numberToken);
404                         break;
405                     }
406                     case TokNull:
407                         m_lexer.next();
408                         lastValue = jsNull();
409                         break;
410 
411                     case TokTrue:
412                         m_lexer.next();
413                         lastValue = jsBoolean(true);
414                         break;
415 
416                     case TokFalse:
417                         m_lexer.next();
418                         lastValue = jsBoolean(false);
419                         break;
420 
421                     default:
422                         // Error
423                         return JSValue();
424                 }
425                 break;
426             }
427             case StartParseStatement: {
428                 switch (m_lexer.currentToken().type) {
429                     case TokLBracket:
430                     case TokNumber:
431                     case TokString:
432                         goto startParseExpression;
433 
434                     case TokLParen: {
435                         m_lexer.next();
436                         stateStack.append(StartParseStatementEndStatement);
437                         goto startParseExpression;
438                     }
439                     default:
440                         return JSValue();
441                 }
442             }
443             case StartParseStatementEndStatement: {
444                 ASSERT(stateStack.isEmpty());
445                 if (m_lexer.currentToken().type != TokRParen)
446                     return JSValue();
447                 if (m_lexer.next() == TokEnd)
448                     return lastValue;
449                 return JSValue();
450             }
451             default:
452                 ASSERT_NOT_REACHED();
453         }
454         if (stateStack.isEmpty())
455             return lastValue;
456         state = stateStack.last();
457         stateStack.removeLast();
458         continue;
459     }
460 }
461 
462 }
463