• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2009 Apple Inc. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
17  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  */
25 
26 #include "config.h"
27 #include "LiteralParser.h"
28 
29 #include "JSArray.h"
30 #include "JSString.h"
31 #include "Lexer.h"
32 #include <wtf/ASCIICType.h>
33 #include <wtf/dtoa.h>
34 
35 namespace JSC {
36 
lex(LiteralParserToken & token)37 LiteralParser::TokenType LiteralParser::Lexer::lex(LiteralParserToken& token)
38 {
39     while (m_ptr < m_end && isASCIISpace(*m_ptr))
40         ++m_ptr;
41 
42     ASSERT(m_ptr <= m_end);
43     if (m_ptr >= m_end) {
44         token.type = TokEnd;
45         token.start = token.end = m_ptr;
46         return TokEnd;
47     }
48     token.type = TokError;
49     token.start = m_ptr;
50     switch (*m_ptr) {
51         case '[':
52             token.type = TokLBracket;
53             token.end = ++m_ptr;
54             return TokLBracket;
55         case ']':
56             token.type = TokRBracket;
57             token.end = ++m_ptr;
58             return TokRBracket;
59         case '(':
60             token.type = TokLParen;
61             token.end = ++m_ptr;
62             return TokLBracket;
63         case ')':
64             token.type = TokRParen;
65             token.end = ++m_ptr;
66             return TokRBracket;
67         case '{':
68             token.type = TokLBrace;
69             token.end = ++m_ptr;
70             return TokLBrace;
71         case '}':
72             token.type = TokRBrace;
73             token.end = ++m_ptr;
74             return TokRBrace;
75         case ',':
76             token.type = TokComma;
77             token.end = ++m_ptr;
78             return TokComma;
79         case ':':
80             token.type = TokColon;
81             token.end = ++m_ptr;
82             return TokColon;
83         case '"':
84             if (m_mode == StrictJSON)
85                 return lexString<StrictJSON>(token);
86             return lexString<NonStrictJSON>(token);
87         case 't':
88             if (m_end - m_ptr >= 4 && m_ptr[1] == 'r' && m_ptr[2] == 'u' && m_ptr[3] == 'e') {
89                 m_ptr += 4;
90                 token.type = TokTrue;
91                 token.end = m_ptr;
92                 return TokTrue;
93             }
94             break;
95         case 'f':
96             if (m_end - m_ptr >= 5 && m_ptr[1] == 'a' && m_ptr[2] == 'l' && m_ptr[3] == 's' && m_ptr[4] == 'e') {
97                 m_ptr += 5;
98                 token.type = TokFalse;
99                 token.end = m_ptr;
100                 return TokFalse;
101             }
102             break;
103         case 'n':
104             if (m_end - m_ptr >= 4 && m_ptr[1] == 'u' && m_ptr[2] == 'l' && m_ptr[3] == 'l') {
105                 m_ptr += 4;
106                 token.type = TokNull;
107                 token.end = m_ptr;
108                 return TokNull;
109             }
110             break;
111         case '-':
112         case '0':
113         case '1':
114         case '2':
115         case '3':
116         case '4':
117         case '5':
118         case '6':
119         case '7':
120         case '8':
121         case '9':
122             return lexNumber(token);
123     }
124     return TokError;
125 }
126 
isSafeStringCharacter(UChar c)127 template <LiteralParser::ParserMode mode> static inline bool isSafeStringCharacter(UChar c)
128 {
129     return (c >= ' ' && (mode == LiteralParser::StrictJSON || c <= 0xff) && c != '\\' && c != '"') || c == '\t';
130 }
131 
132 // "inline" is required here to help WINSCW compiler resolve specialized argument in templated functions.
lexString(LiteralParserToken & token)133 template <LiteralParser::ParserMode mode> inline LiteralParser::TokenType LiteralParser::Lexer::lexString(LiteralParserToken& token)
134 {
135     ++m_ptr;
136     const UChar* runStart;
137     token.stringToken = UString();
138     do {
139         runStart = m_ptr;
140         while (m_ptr < m_end && isSafeStringCharacter<mode>(*m_ptr))
141             ++m_ptr;
142         if (runStart < m_ptr)
143             token.stringToken.append(runStart, m_ptr - runStart);
144         if ((mode == StrictJSON) && m_ptr < m_end && *m_ptr == '\\') {
145             ++m_ptr;
146             if (m_ptr >= m_end)
147                 return TokError;
148             switch (*m_ptr) {
149                 case '"':
150                     token.stringToken.append('"');
151                     m_ptr++;
152                     break;
153                 case '\\':
154                     token.stringToken.append('\\');
155                     m_ptr++;
156                     break;
157                 case '/':
158                     token.stringToken.append('/');
159                     m_ptr++;
160                     break;
161                 case 'b':
162                     token.stringToken.append('\b');
163                     m_ptr++;
164                     break;
165                 case 'f':
166                     token.stringToken.append('\f');
167                     m_ptr++;
168                     break;
169                 case 'n':
170                     token.stringToken.append('\n');
171                     m_ptr++;
172                     break;
173                 case 'r':
174                     token.stringToken.append('\r');
175                     m_ptr++;
176                     break;
177                 case 't':
178                     token.stringToken.append('\t');
179                     m_ptr++;
180                     break;
181 
182                 case 'u':
183                     if ((m_end - m_ptr) < 5) // uNNNN == 5 characters
184                         return TokError;
185                     for (int i = 1; i < 5; i++) {
186                         if (!isASCIIHexDigit(m_ptr[i]))
187                             return TokError;
188                     }
189                     token.stringToken.append(JSC::Lexer::convertUnicode(m_ptr[1], m_ptr[2], m_ptr[3], m_ptr[4]));
190                     m_ptr += 5;
191                     break;
192 
193                 default:
194                     return TokError;
195             }
196         }
197     } while ((mode == StrictJSON) && m_ptr != runStart && (m_ptr < m_end) && *m_ptr != '"');
198 
199     if (m_ptr >= m_end || *m_ptr != '"')
200         return TokError;
201 
202     token.type = TokString;
203     token.end = ++m_ptr;
204     return TokString;
205 }
206 
lexNumber(LiteralParserToken & token)207 LiteralParser::TokenType LiteralParser::Lexer::lexNumber(LiteralParserToken& token)
208 {
209     // ES5 and json.org define numbers as
210     // number
211     //     int
212     //     int frac? exp?
213     //
214     // int
215     //     -? 0
216     //     -? digit1-9 digits?
217     //
218     // digits
219     //     digit digits?
220     //
221     // -?(0 | [1-9][0-9]*) ('.' [0-9]+)? ([eE][+-]? [0-9]+)?
222 
223     if (m_ptr < m_end && *m_ptr == '-') // -?
224         ++m_ptr;
225 
226     // (0 | [1-9][0-9]*)
227     if (m_ptr < m_end && *m_ptr == '0') // 0
228         ++m_ptr;
229     else if (m_ptr < m_end && *m_ptr >= '1' && *m_ptr <= '9') { // [1-9]
230         ++m_ptr;
231         // [0-9]*
232         while (m_ptr < m_end && isASCIIDigit(*m_ptr))
233             ++m_ptr;
234     } else
235         return TokError;
236 
237     // ('.' [0-9]+)?
238     if (m_ptr < m_end && *m_ptr == '.') {
239         ++m_ptr;
240         // [0-9]+
241         if (m_ptr >= m_end || !isASCIIDigit(*m_ptr))
242             return TokError;
243 
244         ++m_ptr;
245         while (m_ptr < m_end && isASCIIDigit(*m_ptr))
246             ++m_ptr;
247     }
248 
249     //  ([eE][+-]? [0-9]+)?
250     if (m_ptr < m_end && (*m_ptr == 'e' || *m_ptr == 'E')) { // [eE]
251         ++m_ptr;
252 
253         // [-+]?
254         if (m_ptr < m_end && (*m_ptr == '-' || *m_ptr == '+'))
255             ++m_ptr;
256 
257         // [0-9]+
258         if (m_ptr >= m_end || !isASCIIDigit(*m_ptr))
259             return TokError;
260 
261         ++m_ptr;
262         while (m_ptr < m_end && isASCIIDigit(*m_ptr))
263             ++m_ptr;
264     }
265 
266     token.type = TokNumber;
267     token.end = m_ptr;
268     Vector<char, 64> buffer(token.end - token.start + 1);
269     int i;
270     for (i = 0; i < token.end - token.start; i++) {
271         ASSERT(static_cast<char>(token.start[i]) == token.start[i]);
272         buffer[i] = static_cast<char>(token.start[i]);
273     }
274     buffer[i] = 0;
275     char* end;
276     token.numberToken = WTF::strtod(buffer.data(), &end);
277     ASSERT(buffer.data() + (token.end - token.start) == end);
278     return TokNumber;
279 }
280 
parse(ParserState initialState)281 JSValue LiteralParser::parse(ParserState initialState)
282 {
283     ParserState state = initialState;
284     MarkedArgumentBuffer objectStack;
285     JSValue lastValue;
286     Vector<ParserState, 16> stateStack;
287     Vector<Identifier, 16> identifierStack;
288     while (1) {
289         switch(state) {
290             startParseArray:
291             case StartParseArray: {
292                 JSArray* array = constructEmptyArray(m_exec);
293                 objectStack.append(array);
294                 // fallthrough
295             }
296             doParseArrayStartExpression:
297             case DoParseArrayStartExpression: {
298                 if (m_lexer.next() == TokRBracket) {
299                     m_lexer.next();
300                     lastValue = objectStack.last();
301                     objectStack.removeLast();
302                     break;
303                 }
304 
305                 stateStack.append(DoParseArrayEndExpression);
306                 goto startParseExpression;
307             }
308             case DoParseArrayEndExpression: {
309                  asArray(objectStack.last())->push(m_exec, lastValue);
310 
311                 if (m_lexer.currentToken().type == TokComma)
312                     goto doParseArrayStartExpression;
313 
314                 if (m_lexer.currentToken().type != TokRBracket)
315                     return JSValue();
316 
317                 m_lexer.next();
318                 lastValue = objectStack.last();
319                 objectStack.removeLast();
320                 break;
321             }
322             startParseObject:
323             case StartParseObject: {
324                 JSObject* object = constructEmptyObject(m_exec);
325                 objectStack.append(object);
326 
327                 TokenType type = m_lexer.next();
328                 if (type == TokString) {
329                     Lexer::LiteralParserToken identifierToken = m_lexer.currentToken();
330 
331                     // Check for colon
332                     if (m_lexer.next() != TokColon)
333                         return JSValue();
334 
335                     m_lexer.next();
336                     identifierStack.append(Identifier(m_exec, identifierToken.stringToken));
337                     stateStack.append(DoParseObjectEndExpression);
338                     goto startParseExpression;
339                 } else if (type != TokRBrace)
340                     return JSValue();
341                 m_lexer.next();
342                 lastValue = objectStack.last();
343                 objectStack.removeLast();
344                 break;
345             }
346             doParseObjectStartExpression:
347             case DoParseObjectStartExpression: {
348                 TokenType type = m_lexer.next();
349                 if (type != TokString)
350                     return JSValue();
351                 Lexer::LiteralParserToken identifierToken = m_lexer.currentToken();
352 
353                 // Check for colon
354                 if (m_lexer.next() != TokColon)
355                     return JSValue();
356 
357                 m_lexer.next();
358                 identifierStack.append(Identifier(m_exec, identifierToken.stringToken));
359                 stateStack.append(DoParseObjectEndExpression);
360                 goto startParseExpression;
361             }
362             case DoParseObjectEndExpression:
363             {
364                 asObject(objectStack.last())->putDirect(identifierStack.last(), lastValue);
365                 identifierStack.removeLast();
366                 if (m_lexer.currentToken().type == TokComma)
367                     goto doParseObjectStartExpression;
368                 if (m_lexer.currentToken().type != TokRBrace)
369                     return JSValue();
370                 m_lexer.next();
371                 lastValue = objectStack.last();
372                 objectStack.removeLast();
373                 break;
374             }
375             startParseExpression:
376             case StartParseExpression: {
377                 switch (m_lexer.currentToken().type) {
378                     case TokLBracket:
379                         goto startParseArray;
380                     case TokLBrace:
381                         goto startParseObject;
382                     case TokString: {
383                         Lexer::LiteralParserToken stringToken = m_lexer.currentToken();
384                         m_lexer.next();
385                         lastValue = jsString(m_exec, stringToken.stringToken);
386                         break;
387                     }
388                     case TokNumber: {
389                         Lexer::LiteralParserToken numberToken = m_lexer.currentToken();
390                         m_lexer.next();
391                         lastValue = jsNumber(m_exec, numberToken.numberToken);
392                         break;
393                     }
394                     case TokNull:
395                         m_lexer.next();
396                         lastValue = jsNull();
397                         break;
398 
399                     case TokTrue:
400                         m_lexer.next();
401                         lastValue = jsBoolean(true);
402                         break;
403 
404                     case TokFalse:
405                         m_lexer.next();
406                         lastValue = jsBoolean(false);
407                         break;
408 
409                     default:
410                         // Error
411                         return JSValue();
412                 }
413                 break;
414             }
415             case StartParseStatement: {
416                 switch (m_lexer.currentToken().type) {
417                     case TokLBracket:
418                     case TokNumber:
419                     case TokString:
420                         goto startParseExpression;
421 
422                     case TokLParen: {
423                         m_lexer.next();
424                         stateStack.append(StartParseStatementEndStatement);
425                         goto startParseExpression;
426                     }
427                     default:
428                         return JSValue();
429                 }
430             }
431             case StartParseStatementEndStatement: {
432                 ASSERT(stateStack.isEmpty());
433                 if (m_lexer.currentToken().type != TokRParen)
434                     return JSValue();
435                 if (m_lexer.next() == TokEnd)
436                     return lastValue;
437                 return JSValue();
438             }
439             default:
440                 ASSERT_NOT_REACHED();
441         }
442         if (stateStack.isEmpty())
443             return lastValue;
444         state = stateStack.last();
445         stateStack.removeLast();
446         continue;
447     }
448 }
449 
450 }
451