1 /*
2 * Copyright (C) 2009 Apple Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26 #include "config.h"
27 #include "LiteralParser.h"
28
29 #include "JSArray.h"
30 #include "JSString.h"
31 #include "Lexer.h"
32 #include <wtf/ASCIICType.h>
33 #include <wtf/dtoa.h>
34
35 namespace JSC {
36
lex(LiteralParserToken & token)37 LiteralParser::TokenType LiteralParser::Lexer::lex(LiteralParserToken& token)
38 {
39 while (m_ptr < m_end && isASCIISpace(*m_ptr))
40 ++m_ptr;
41
42 ASSERT(m_ptr <= m_end);
43 if (m_ptr >= m_end) {
44 token.type = TokEnd;
45 token.start = token.end = m_ptr;
46 return TokEnd;
47 }
48 token.type = TokError;
49 token.start = m_ptr;
50 switch (*m_ptr) {
51 case '[':
52 token.type = TokLBracket;
53 token.end = ++m_ptr;
54 return TokLBracket;
55 case ']':
56 token.type = TokRBracket;
57 token.end = ++m_ptr;
58 return TokRBracket;
59 case '(':
60 token.type = TokLParen;
61 token.end = ++m_ptr;
62 return TokLBracket;
63 case ')':
64 token.type = TokRParen;
65 token.end = ++m_ptr;
66 return TokRBracket;
67 case '{':
68 token.type = TokLBrace;
69 token.end = ++m_ptr;
70 return TokLBrace;
71 case '}':
72 token.type = TokRBrace;
73 token.end = ++m_ptr;
74 return TokRBrace;
75 case ',':
76 token.type = TokComma;
77 token.end = ++m_ptr;
78 return TokComma;
79 case ':':
80 token.type = TokColon;
81 token.end = ++m_ptr;
82 return TokColon;
83 case '"':
84 if (m_mode == StrictJSON)
85 return lexString<StrictJSON>(token);
86 return lexString<NonStrictJSON>(token);
87 case 't':
88 if (m_end - m_ptr >= 4 && m_ptr[1] == 'r' && m_ptr[2] == 'u' && m_ptr[3] == 'e') {
89 m_ptr += 4;
90 token.type = TokTrue;
91 token.end = m_ptr;
92 return TokTrue;
93 }
94 break;
95 case 'f':
96 if (m_end - m_ptr >= 5 && m_ptr[1] == 'a' && m_ptr[2] == 'l' && m_ptr[3] == 's' && m_ptr[4] == 'e') {
97 m_ptr += 5;
98 token.type = TokFalse;
99 token.end = m_ptr;
100 return TokFalse;
101 }
102 break;
103 case 'n':
104 if (m_end - m_ptr >= 4 && m_ptr[1] == 'u' && m_ptr[2] == 'l' && m_ptr[3] == 'l') {
105 m_ptr += 4;
106 token.type = TokNull;
107 token.end = m_ptr;
108 return TokNull;
109 }
110 break;
111 case '-':
112 case '0':
113 case '1':
114 case '2':
115 case '3':
116 case '4':
117 case '5':
118 case '6':
119 case '7':
120 case '8':
121 case '9':
122 return lexNumber(token);
123 }
124 return TokError;
125 }
126
isSafeStringCharacter(UChar c)127 template <LiteralParser::ParserMode mode> static inline bool isSafeStringCharacter(UChar c)
128 {
129 return (c >= ' ' && (mode == LiteralParser::StrictJSON || c <= 0xff) && c != '\\' && c != '"') || c == '\t';
130 }
131
132 // "inline" is required here to help WINSCW compiler resolve specialized argument in templated functions.
lexString(LiteralParserToken & token)133 template <LiteralParser::ParserMode mode> inline LiteralParser::TokenType LiteralParser::Lexer::lexString(LiteralParserToken& token)
134 {
135 ++m_ptr;
136 const UChar* runStart;
137 token.stringToken = UString();
138 do {
139 runStart = m_ptr;
140 while (m_ptr < m_end && isSafeStringCharacter<mode>(*m_ptr))
141 ++m_ptr;
142 if (runStart < m_ptr)
143 token.stringToken.append(runStart, m_ptr - runStart);
144 if ((mode == StrictJSON) && m_ptr < m_end && *m_ptr == '\\') {
145 ++m_ptr;
146 if (m_ptr >= m_end)
147 return TokError;
148 switch (*m_ptr) {
149 case '"':
150 token.stringToken.append('"');
151 m_ptr++;
152 break;
153 case '\\':
154 token.stringToken.append('\\');
155 m_ptr++;
156 break;
157 case '/':
158 token.stringToken.append('/');
159 m_ptr++;
160 break;
161 case 'b':
162 token.stringToken.append('\b');
163 m_ptr++;
164 break;
165 case 'f':
166 token.stringToken.append('\f');
167 m_ptr++;
168 break;
169 case 'n':
170 token.stringToken.append('\n');
171 m_ptr++;
172 break;
173 case 'r':
174 token.stringToken.append('\r');
175 m_ptr++;
176 break;
177 case 't':
178 token.stringToken.append('\t');
179 m_ptr++;
180 break;
181
182 case 'u':
183 if ((m_end - m_ptr) < 5) // uNNNN == 5 characters
184 return TokError;
185 for (int i = 1; i < 5; i++) {
186 if (!isASCIIHexDigit(m_ptr[i]))
187 return TokError;
188 }
189 token.stringToken.append(JSC::Lexer::convertUnicode(m_ptr[1], m_ptr[2], m_ptr[3], m_ptr[4]));
190 m_ptr += 5;
191 break;
192
193 default:
194 return TokError;
195 }
196 }
197 } while ((mode == StrictJSON) && m_ptr != runStart && (m_ptr < m_end) && *m_ptr != '"');
198
199 if (m_ptr >= m_end || *m_ptr != '"')
200 return TokError;
201
202 token.type = TokString;
203 token.end = ++m_ptr;
204 return TokString;
205 }
206
lexNumber(LiteralParserToken & token)207 LiteralParser::TokenType LiteralParser::Lexer::lexNumber(LiteralParserToken& token)
208 {
209 // ES5 and json.org define numbers as
210 // number
211 // int
212 // int frac? exp?
213 //
214 // int
215 // -? 0
216 // -? digit1-9 digits?
217 //
218 // digits
219 // digit digits?
220 //
221 // -?(0 | [1-9][0-9]*) ('.' [0-9]+)? ([eE][+-]? [0-9]+)?
222
223 if (m_ptr < m_end && *m_ptr == '-') // -?
224 ++m_ptr;
225
226 // (0 | [1-9][0-9]*)
227 if (m_ptr < m_end && *m_ptr == '0') // 0
228 ++m_ptr;
229 else if (m_ptr < m_end && *m_ptr >= '1' && *m_ptr <= '9') { // [1-9]
230 ++m_ptr;
231 // [0-9]*
232 while (m_ptr < m_end && isASCIIDigit(*m_ptr))
233 ++m_ptr;
234 } else
235 return TokError;
236
237 // ('.' [0-9]+)?
238 if (m_ptr < m_end && *m_ptr == '.') {
239 ++m_ptr;
240 // [0-9]+
241 if (m_ptr >= m_end || !isASCIIDigit(*m_ptr))
242 return TokError;
243
244 ++m_ptr;
245 while (m_ptr < m_end && isASCIIDigit(*m_ptr))
246 ++m_ptr;
247 }
248
249 // ([eE][+-]? [0-9]+)?
250 if (m_ptr < m_end && (*m_ptr == 'e' || *m_ptr == 'E')) { // [eE]
251 ++m_ptr;
252
253 // [-+]?
254 if (m_ptr < m_end && (*m_ptr == '-' || *m_ptr == '+'))
255 ++m_ptr;
256
257 // [0-9]+
258 if (m_ptr >= m_end || !isASCIIDigit(*m_ptr))
259 return TokError;
260
261 ++m_ptr;
262 while (m_ptr < m_end && isASCIIDigit(*m_ptr))
263 ++m_ptr;
264 }
265
266 token.type = TokNumber;
267 token.end = m_ptr;
268 Vector<char, 64> buffer(token.end - token.start + 1);
269 int i;
270 for (i = 0; i < token.end - token.start; i++) {
271 ASSERT(static_cast<char>(token.start[i]) == token.start[i]);
272 buffer[i] = static_cast<char>(token.start[i]);
273 }
274 buffer[i] = 0;
275 char* end;
276 token.numberToken = WTF::strtod(buffer.data(), &end);
277 ASSERT(buffer.data() + (token.end - token.start) == end);
278 return TokNumber;
279 }
280
parse(ParserState initialState)281 JSValue LiteralParser::parse(ParserState initialState)
282 {
283 ParserState state = initialState;
284 MarkedArgumentBuffer objectStack;
285 JSValue lastValue;
286 Vector<ParserState, 16> stateStack;
287 Vector<Identifier, 16> identifierStack;
288 while (1) {
289 switch(state) {
290 startParseArray:
291 case StartParseArray: {
292 JSArray* array = constructEmptyArray(m_exec);
293 objectStack.append(array);
294 // fallthrough
295 }
296 doParseArrayStartExpression:
297 case DoParseArrayStartExpression: {
298 if (m_lexer.next() == TokRBracket) {
299 m_lexer.next();
300 lastValue = objectStack.last();
301 objectStack.removeLast();
302 break;
303 }
304
305 stateStack.append(DoParseArrayEndExpression);
306 goto startParseExpression;
307 }
308 case DoParseArrayEndExpression: {
309 asArray(objectStack.last())->push(m_exec, lastValue);
310
311 if (m_lexer.currentToken().type == TokComma)
312 goto doParseArrayStartExpression;
313
314 if (m_lexer.currentToken().type != TokRBracket)
315 return JSValue();
316
317 m_lexer.next();
318 lastValue = objectStack.last();
319 objectStack.removeLast();
320 break;
321 }
322 startParseObject:
323 case StartParseObject: {
324 JSObject* object = constructEmptyObject(m_exec);
325 objectStack.append(object);
326
327 TokenType type = m_lexer.next();
328 if (type == TokString) {
329 Lexer::LiteralParserToken identifierToken = m_lexer.currentToken();
330
331 // Check for colon
332 if (m_lexer.next() != TokColon)
333 return JSValue();
334
335 m_lexer.next();
336 identifierStack.append(Identifier(m_exec, identifierToken.stringToken));
337 stateStack.append(DoParseObjectEndExpression);
338 goto startParseExpression;
339 } else if (type != TokRBrace)
340 return JSValue();
341 m_lexer.next();
342 lastValue = objectStack.last();
343 objectStack.removeLast();
344 break;
345 }
346 doParseObjectStartExpression:
347 case DoParseObjectStartExpression: {
348 TokenType type = m_lexer.next();
349 if (type != TokString)
350 return JSValue();
351 Lexer::LiteralParserToken identifierToken = m_lexer.currentToken();
352
353 // Check for colon
354 if (m_lexer.next() != TokColon)
355 return JSValue();
356
357 m_lexer.next();
358 identifierStack.append(Identifier(m_exec, identifierToken.stringToken));
359 stateStack.append(DoParseObjectEndExpression);
360 goto startParseExpression;
361 }
362 case DoParseObjectEndExpression:
363 {
364 asObject(objectStack.last())->putDirect(identifierStack.last(), lastValue);
365 identifierStack.removeLast();
366 if (m_lexer.currentToken().type == TokComma)
367 goto doParseObjectStartExpression;
368 if (m_lexer.currentToken().type != TokRBrace)
369 return JSValue();
370 m_lexer.next();
371 lastValue = objectStack.last();
372 objectStack.removeLast();
373 break;
374 }
375 startParseExpression:
376 case StartParseExpression: {
377 switch (m_lexer.currentToken().type) {
378 case TokLBracket:
379 goto startParseArray;
380 case TokLBrace:
381 goto startParseObject;
382 case TokString: {
383 Lexer::LiteralParserToken stringToken = m_lexer.currentToken();
384 m_lexer.next();
385 lastValue = jsString(m_exec, stringToken.stringToken);
386 break;
387 }
388 case TokNumber: {
389 Lexer::LiteralParserToken numberToken = m_lexer.currentToken();
390 m_lexer.next();
391 lastValue = jsNumber(m_exec, numberToken.numberToken);
392 break;
393 }
394 case TokNull:
395 m_lexer.next();
396 lastValue = jsNull();
397 break;
398
399 case TokTrue:
400 m_lexer.next();
401 lastValue = jsBoolean(true);
402 break;
403
404 case TokFalse:
405 m_lexer.next();
406 lastValue = jsBoolean(false);
407 break;
408
409 default:
410 // Error
411 return JSValue();
412 }
413 break;
414 }
415 case StartParseStatement: {
416 switch (m_lexer.currentToken().type) {
417 case TokLBracket:
418 case TokNumber:
419 case TokString:
420 goto startParseExpression;
421
422 case TokLParen: {
423 m_lexer.next();
424 stateStack.append(StartParseStatementEndStatement);
425 goto startParseExpression;
426 }
427 default:
428 return JSValue();
429 }
430 }
431 case StartParseStatementEndStatement: {
432 ASSERT(stateStack.isEmpty());
433 if (m_lexer.currentToken().type != TokRParen)
434 return JSValue();
435 if (m_lexer.next() == TokEnd)
436 return lastValue;
437 return JSValue();
438 }
439 default:
440 ASSERT_NOT_REACHED();
441 }
442 if (stateStack.isEmpty())
443 return lastValue;
444 state = stateStack.last();
445 stateStack.removeLast();
446 continue;
447 }
448 }
449
450 }
451