1 /*
2 * Copyright (C) 2009 Apple Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26 #include "config.h"
27 #include "LiteralParser.h"
28
29 #include "JSArray.h"
30 #include "JSString.h"
31 #include "Lexer.h"
32 #include "UStringBuilder.h"
33 #include <wtf/ASCIICType.h>
34 #include <wtf/dtoa.h>
35
36 namespace JSC {
37
isJSONWhiteSpace(const UChar & c)38 static inline bool isJSONWhiteSpace(const UChar& c)
39 {
40 // The JSON RFC 4627 defines a list of allowed characters to be considered
41 // insignificant white space: http://www.ietf.org/rfc/rfc4627.txt (2. JSON Grammar).
42 return c == ' ' || c == 0x9 || c == 0xA || c == 0xD;
43 }
44
lex(LiteralParserToken & token)45 LiteralParser::TokenType LiteralParser::Lexer::lex(LiteralParserToken& token)
46 {
47 while (m_ptr < m_end && isJSONWhiteSpace(*m_ptr))
48 ++m_ptr;
49
50 ASSERT(m_ptr <= m_end);
51 if (m_ptr >= m_end) {
52 token.type = TokEnd;
53 token.start = token.end = m_ptr;
54 return TokEnd;
55 }
56 token.type = TokError;
57 token.start = m_ptr;
58 switch (*m_ptr) {
59 case '[':
60 token.type = TokLBracket;
61 token.end = ++m_ptr;
62 return TokLBracket;
63 case ']':
64 token.type = TokRBracket;
65 token.end = ++m_ptr;
66 return TokRBracket;
67 case '(':
68 token.type = TokLParen;
69 token.end = ++m_ptr;
70 return TokLBracket;
71 case ')':
72 token.type = TokRParen;
73 token.end = ++m_ptr;
74 return TokRBracket;
75 case '{':
76 token.type = TokLBrace;
77 token.end = ++m_ptr;
78 return TokLBrace;
79 case '}':
80 token.type = TokRBrace;
81 token.end = ++m_ptr;
82 return TokRBrace;
83 case ',':
84 token.type = TokComma;
85 token.end = ++m_ptr;
86 return TokComma;
87 case ':':
88 token.type = TokColon;
89 token.end = ++m_ptr;
90 return TokColon;
91 case '"':
92 if (m_mode == StrictJSON)
93 return lexString<StrictJSON>(token);
94 return lexString<NonStrictJSON>(token);
95 case 't':
96 if (m_end - m_ptr >= 4 && m_ptr[1] == 'r' && m_ptr[2] == 'u' && m_ptr[3] == 'e') {
97 m_ptr += 4;
98 token.type = TokTrue;
99 token.end = m_ptr;
100 return TokTrue;
101 }
102 break;
103 case 'f':
104 if (m_end - m_ptr >= 5 && m_ptr[1] == 'a' && m_ptr[2] == 'l' && m_ptr[3] == 's' && m_ptr[4] == 'e') {
105 m_ptr += 5;
106 token.type = TokFalse;
107 token.end = m_ptr;
108 return TokFalse;
109 }
110 break;
111 case 'n':
112 if (m_end - m_ptr >= 4 && m_ptr[1] == 'u' && m_ptr[2] == 'l' && m_ptr[3] == 'l') {
113 m_ptr += 4;
114 token.type = TokNull;
115 token.end = m_ptr;
116 return TokNull;
117 }
118 break;
119 case '-':
120 case '0':
121 case '1':
122 case '2':
123 case '3':
124 case '4':
125 case '5':
126 case '6':
127 case '7':
128 case '8':
129 case '9':
130 return lexNumber(token);
131 }
132 return TokError;
133 }
134
isSafeStringCharacter(UChar c)135 template <LiteralParser::ParserMode mode> static inline bool isSafeStringCharacter(UChar c)
136 {
137 return (c >= ' ' && (mode == LiteralParser::StrictJSON || c <= 0xff) && c != '\\' && c != '"') || c == '\t';
138 }
139
140 // "inline" is required here to help WINSCW compiler resolve specialized argument in templated functions.
lexString(LiteralParserToken & token)141 template <LiteralParser::ParserMode mode> inline LiteralParser::TokenType LiteralParser::Lexer::lexString(LiteralParserToken& token)
142 {
143 ++m_ptr;
144 const UChar* runStart;
145 UStringBuilder builder;
146 do {
147 runStart = m_ptr;
148 while (m_ptr < m_end && isSafeStringCharacter<mode>(*m_ptr))
149 ++m_ptr;
150 if (runStart < m_ptr)
151 builder.append(runStart, m_ptr - runStart);
152 if ((mode == StrictJSON) && m_ptr < m_end && *m_ptr == '\\') {
153 ++m_ptr;
154 if (m_ptr >= m_end)
155 return TokError;
156 switch (*m_ptr) {
157 case '"':
158 builder.append('"');
159 m_ptr++;
160 break;
161 case '\\':
162 builder.append('\\');
163 m_ptr++;
164 break;
165 case '/':
166 builder.append('/');
167 m_ptr++;
168 break;
169 case 'b':
170 builder.append('\b');
171 m_ptr++;
172 break;
173 case 'f':
174 builder.append('\f');
175 m_ptr++;
176 break;
177 case 'n':
178 builder.append('\n');
179 m_ptr++;
180 break;
181 case 'r':
182 builder.append('\r');
183 m_ptr++;
184 break;
185 case 't':
186 builder.append('\t');
187 m_ptr++;
188 break;
189
190 case 'u':
191 if ((m_end - m_ptr) < 5) // uNNNN == 5 characters
192 return TokError;
193 for (int i = 1; i < 5; i++) {
194 if (!isASCIIHexDigit(m_ptr[i]))
195 return TokError;
196 }
197 builder.append(JSC::Lexer::convertUnicode(m_ptr[1], m_ptr[2], m_ptr[3], m_ptr[4]));
198 m_ptr += 5;
199 break;
200
201 default:
202 return TokError;
203 }
204 }
205 } while ((mode == StrictJSON) && m_ptr != runStart && (m_ptr < m_end) && *m_ptr != '"');
206
207 if (m_ptr >= m_end || *m_ptr != '"')
208 return TokError;
209
210 token.stringToken = builder.toUString();
211 token.type = TokString;
212 token.end = ++m_ptr;
213 return TokString;
214 }
215
lexNumber(LiteralParserToken & token)216 LiteralParser::TokenType LiteralParser::Lexer::lexNumber(LiteralParserToken& token)
217 {
218 // ES5 and json.org define numbers as
219 // number
220 // int
221 // int frac? exp?
222 //
223 // int
224 // -? 0
225 // -? digit1-9 digits?
226 //
227 // digits
228 // digit digits?
229 //
230 // -?(0 | [1-9][0-9]*) ('.' [0-9]+)? ([eE][+-]? [0-9]+)?
231
232 if (m_ptr < m_end && *m_ptr == '-') // -?
233 ++m_ptr;
234
235 // (0 | [1-9][0-9]*)
236 if (m_ptr < m_end && *m_ptr == '0') // 0
237 ++m_ptr;
238 else if (m_ptr < m_end && *m_ptr >= '1' && *m_ptr <= '9') { // [1-9]
239 ++m_ptr;
240 // [0-9]*
241 while (m_ptr < m_end && isASCIIDigit(*m_ptr))
242 ++m_ptr;
243 } else
244 return TokError;
245
246 // ('.' [0-9]+)?
247 if (m_ptr < m_end && *m_ptr == '.') {
248 ++m_ptr;
249 // [0-9]+
250 if (m_ptr >= m_end || !isASCIIDigit(*m_ptr))
251 return TokError;
252
253 ++m_ptr;
254 while (m_ptr < m_end && isASCIIDigit(*m_ptr))
255 ++m_ptr;
256 }
257
258 // ([eE][+-]? [0-9]+)?
259 if (m_ptr < m_end && (*m_ptr == 'e' || *m_ptr == 'E')) { // [eE]
260 ++m_ptr;
261
262 // [-+]?
263 if (m_ptr < m_end && (*m_ptr == '-' || *m_ptr == '+'))
264 ++m_ptr;
265
266 // [0-9]+
267 if (m_ptr >= m_end || !isASCIIDigit(*m_ptr))
268 return TokError;
269
270 ++m_ptr;
271 while (m_ptr < m_end && isASCIIDigit(*m_ptr))
272 ++m_ptr;
273 }
274
275 token.type = TokNumber;
276 token.end = m_ptr;
277 Vector<char, 64> buffer(token.end - token.start + 1);
278 int i;
279 for (i = 0; i < token.end - token.start; i++) {
280 ASSERT(static_cast<char>(token.start[i]) == token.start[i]);
281 buffer[i] = static_cast<char>(token.start[i]);
282 }
283 buffer[i] = 0;
284 char* end;
285 token.numberToken = WTF::strtod(buffer.data(), &end);
286 ASSERT(buffer.data() + (token.end - token.start) == end);
287 return TokNumber;
288 }
289
parse(ParserState initialState)290 JSValue LiteralParser::parse(ParserState initialState)
291 {
292 ParserState state = initialState;
293 MarkedArgumentBuffer objectStack;
294 JSValue lastValue;
295 Vector<ParserState, 16> stateStack;
296 Vector<Identifier, 16> identifierStack;
297 while (1) {
298 switch(state) {
299 startParseArray:
300 case StartParseArray: {
301 JSArray* array = constructEmptyArray(m_exec);
302 objectStack.append(array);
303 // fallthrough
304 }
305 doParseArrayStartExpression:
306 case DoParseArrayStartExpression: {
307 TokenType lastToken = m_lexer.currentToken().type;
308 if (m_lexer.next() == TokRBracket) {
309 if (lastToken == TokComma)
310 return JSValue();
311 m_lexer.next();
312 lastValue = objectStack.last();
313 objectStack.removeLast();
314 break;
315 }
316
317 stateStack.append(DoParseArrayEndExpression);
318 goto startParseExpression;
319 }
320 case DoParseArrayEndExpression: {
321 asArray(objectStack.last())->push(m_exec, lastValue);
322
323 if (m_lexer.currentToken().type == TokComma)
324 goto doParseArrayStartExpression;
325
326 if (m_lexer.currentToken().type != TokRBracket)
327 return JSValue();
328
329 m_lexer.next();
330 lastValue = objectStack.last();
331 objectStack.removeLast();
332 break;
333 }
334 startParseObject:
335 case StartParseObject: {
336 JSObject* object = constructEmptyObject(m_exec);
337 objectStack.append(object);
338
339 TokenType type = m_lexer.next();
340 if (type == TokString) {
341 Lexer::LiteralParserToken identifierToken = m_lexer.currentToken();
342
343 // Check for colon
344 if (m_lexer.next() != TokColon)
345 return JSValue();
346
347 m_lexer.next();
348 identifierStack.append(Identifier(m_exec, identifierToken.stringToken));
349 stateStack.append(DoParseObjectEndExpression);
350 goto startParseExpression;
351 } else if (type != TokRBrace)
352 return JSValue();
353 m_lexer.next();
354 lastValue = objectStack.last();
355 objectStack.removeLast();
356 break;
357 }
358 doParseObjectStartExpression:
359 case DoParseObjectStartExpression: {
360 TokenType type = m_lexer.next();
361 if (type != TokString)
362 return JSValue();
363 Lexer::LiteralParserToken identifierToken = m_lexer.currentToken();
364
365 // Check for colon
366 if (m_lexer.next() != TokColon)
367 return JSValue();
368
369 m_lexer.next();
370 identifierStack.append(Identifier(m_exec, identifierToken.stringToken));
371 stateStack.append(DoParseObjectEndExpression);
372 goto startParseExpression;
373 }
374 case DoParseObjectEndExpression:
375 {
376 asObject(objectStack.last())->putDirect(m_exec->globalData(), identifierStack.last(), lastValue);
377 identifierStack.removeLast();
378 if (m_lexer.currentToken().type == TokComma)
379 goto doParseObjectStartExpression;
380 if (m_lexer.currentToken().type != TokRBrace)
381 return JSValue();
382 m_lexer.next();
383 lastValue = objectStack.last();
384 objectStack.removeLast();
385 break;
386 }
387 startParseExpression:
388 case StartParseExpression: {
389 switch (m_lexer.currentToken().type) {
390 case TokLBracket:
391 goto startParseArray;
392 case TokLBrace:
393 goto startParseObject;
394 case TokString: {
395 Lexer::LiteralParserToken stringToken = m_lexer.currentToken();
396 m_lexer.next();
397 lastValue = jsString(m_exec, stringToken.stringToken);
398 break;
399 }
400 case TokNumber: {
401 Lexer::LiteralParserToken numberToken = m_lexer.currentToken();
402 m_lexer.next();
403 lastValue = jsNumber(numberToken.numberToken);
404 break;
405 }
406 case TokNull:
407 m_lexer.next();
408 lastValue = jsNull();
409 break;
410
411 case TokTrue:
412 m_lexer.next();
413 lastValue = jsBoolean(true);
414 break;
415
416 case TokFalse:
417 m_lexer.next();
418 lastValue = jsBoolean(false);
419 break;
420
421 default:
422 // Error
423 return JSValue();
424 }
425 break;
426 }
427 case StartParseStatement: {
428 switch (m_lexer.currentToken().type) {
429 case TokLBracket:
430 case TokNumber:
431 case TokString:
432 goto startParseExpression;
433
434 case TokLParen: {
435 m_lexer.next();
436 stateStack.append(StartParseStatementEndStatement);
437 goto startParseExpression;
438 }
439 default:
440 return JSValue();
441 }
442 }
443 case StartParseStatementEndStatement: {
444 ASSERT(stateStack.isEmpty());
445 if (m_lexer.currentToken().type != TokRParen)
446 return JSValue();
447 if (m_lexer.next() == TokEnd)
448 return lastValue;
449 return JSValue();
450 }
451 default:
452 ASSERT_NOT_REACHED();
453 }
454 if (stateStack.isEmpty())
455 return lastValue;
456 state = stateStack.last();
457 stateStack.removeLast();
458 continue;
459 }
460 }
461
462 }
463