1 /*
2 * Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
3 * Copyright (C) 2006, 2007, 2008, 2009 Apple Inc. All Rights Reserved.
4 * Copyright (C) 2007 Cameron Zwarich (cwzwarich@uwaterloo.ca)
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Library General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Library General Public License for more details.
15 *
16 * You should have received a copy of the GNU Library General Public License
17 * along with this library; see the file COPYING.LIB. If not, write to
18 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19 * Boston, MA 02110-1301, USA.
20 *
21 */
22
23 #include "config.h"
24 #include "Lexer.h"
25
26 #include "JSFunction.h"
27 #include "JSGlobalObjectFunctions.h"
28 #include "NodeInfo.h"
29 #include "Nodes.h"
30 #include "dtoa.h"
31 #include <ctype.h>
32 #include <limits.h>
33 #include <string.h>
34 #include <wtf/ASCIICType.h>
35 #include <wtf/Assertions.h>
36 #include <wtf/unicode/Unicode.h>
37
38 using namespace WTF;
39 using namespace Unicode;
40
41 // we can't specify the namespace in yacc's C output, so do it here
42 using namespace JSC;
43
44 #ifndef KDE_USE_FINAL
45 #include "Grammar.h"
46 #endif
47
48 #include "Lookup.h"
49 #include "Lexer.lut.h"
50
51 // a bridge for yacc from the C world to C++
jscyylex(void * lvalp,void * llocp,void * globalData)52 int jscyylex(void* lvalp, void* llocp, void* globalData)
53 {
54 return static_cast<JSGlobalData*>(globalData)->lexer->lex(lvalp, llocp);
55 }
56
57 namespace JSC {
58
59 static bool isDecimalDigit(int);
60
Lexer(JSGlobalData * globalData)61 Lexer::Lexer(JSGlobalData* globalData)
62 : yylineno(1)
63 , m_restrKeyword(false)
64 , m_eatNextIdentifier(false)
65 , m_stackToken(-1)
66 , m_lastToken(-1)
67 , m_position(0)
68 , m_code(0)
69 , m_length(0)
70 , m_isReparsing(false)
71 , m_atLineStart(true)
72 , m_current(0)
73 , m_next1(0)
74 , m_next2(0)
75 , m_next3(0)
76 , m_currentOffset(0)
77 , m_nextOffset1(0)
78 , m_nextOffset2(0)
79 , m_nextOffset3(0)
80 , m_globalData(globalData)
81 , m_mainTable(JSC::mainTable)
82 {
83 m_buffer8.reserveCapacity(initialReadBufferCapacity);
84 m_buffer16.reserveCapacity(initialReadBufferCapacity);
85 }
86
~Lexer()87 Lexer::~Lexer()
88 {
89 m_mainTable.deleteTable();
90 }
91
setCode(const SourceCode & source)92 void Lexer::setCode(const SourceCode& source)
93 {
94 yylineno = source.firstLine();
95 m_restrKeyword = false;
96 m_delimited = false;
97 m_eatNextIdentifier = false;
98 m_stackToken = -1;
99 m_lastToken = -1;
100
101 m_position = source.startOffset();
102 m_source = &source;
103 m_code = source.provider()->data();
104 m_length = source.endOffset();
105 m_skipLF = false;
106 m_skipCR = false;
107 m_error = false;
108 m_atLineStart = true;
109
110 // read first characters
111 shift(4);
112 }
113
shift(unsigned p)114 void Lexer::shift(unsigned p)
115 {
116 // ECMA-262 calls for stripping Cf characters here, but we only do this for BOM,
117 // see <https://bugs.webkit.org/show_bug.cgi?id=4931>.
118
119 while (p--) {
120 m_current = m_next1;
121 m_next1 = m_next2;
122 m_next2 = m_next3;
123 m_currentOffset = m_nextOffset1;
124 m_nextOffset1 = m_nextOffset2;
125 m_nextOffset2 = m_nextOffset3;
126 do {
127 if (m_position >= m_length) {
128 m_nextOffset3 = m_position;
129 m_position++;
130 m_next3 = -1;
131 break;
132 }
133 m_nextOffset3 = m_position;
134 m_next3 = m_code[m_position++];
135 } while (m_next3 == 0xFEFF);
136 }
137 }
138
139 // called on each new line
nextLine()140 void Lexer::nextLine()
141 {
142 yylineno++;
143 m_atLineStart = true;
144 }
145
setDone(State s)146 void Lexer::setDone(State s)
147 {
148 m_state = s;
149 m_done = true;
150 }
151
lex(void * p1,void * p2)152 int Lexer::lex(void* p1, void* p2)
153 {
154 YYSTYPE* lvalp = static_cast<YYSTYPE*>(p1);
155 YYLTYPE* llocp = static_cast<YYLTYPE*>(p2);
156 int token = 0;
157 m_state = Start;
158 unsigned short stringType = 0; // either single or double quotes
159 m_buffer8.clear();
160 m_buffer16.clear();
161 m_done = false;
162 m_terminator = false;
163 m_skipLF = false;
164 m_skipCR = false;
165
166 // did we push a token on the stack previously ?
167 // (after an automatic semicolon insertion)
168 if (m_stackToken >= 0) {
169 setDone(Other);
170 token = m_stackToken;
171 m_stackToken = 0;
172 }
173 int startOffset = m_currentOffset;
174 while (!m_done) {
175 if (m_skipLF && m_current != '\n') // found \r but not \n afterwards
176 m_skipLF = false;
177 if (m_skipCR && m_current != '\r') // found \n but not \r afterwards
178 m_skipCR = false;
179 if (m_skipLF || m_skipCR) { // found \r\n or \n\r -> eat the second one
180 m_skipLF = false;
181 m_skipCR = false;
182 shift(1);
183 }
184 switch (m_state) {
185 case Start:
186 startOffset = m_currentOffset;
187 if (isWhiteSpace()) {
188 // do nothing
189 } else if (m_current == '/' && m_next1 == '/') {
190 shift(1);
191 m_state = InSingleLineComment;
192 } else if (m_current == '/' && m_next1 == '*') {
193 shift(1);
194 m_state = InMultiLineComment;
195 } else if (m_current == -1) {
196 if (!m_terminator && !m_delimited && !m_isReparsing) {
197 // automatic semicolon insertion if program incomplete
198 token = ';';
199 m_stackToken = 0;
200 setDone(Other);
201 } else
202 setDone(Eof);
203 } else if (isLineTerminator()) {
204 nextLine();
205 m_terminator = true;
206 if (m_restrKeyword) {
207 token = ';';
208 setDone(Other);
209 }
210 } else if (m_current == '"' || m_current == '\'') {
211 m_state = InString;
212 stringType = static_cast<unsigned short>(m_current);
213 } else if (isIdentStart(m_current)) {
214 record16(m_current);
215 m_state = InIdentifierOrKeyword;
216 } else if (m_current == '\\')
217 m_state = InIdentifierStartUnicodeEscapeStart;
218 else if (m_current == '0') {
219 record8(m_current);
220 m_state = InNum0;
221 } else if (isDecimalDigit(m_current)) {
222 record8(m_current);
223 m_state = InNum;
224 } else if (m_current == '.' && isDecimalDigit(m_next1)) {
225 record8(m_current);
226 m_state = InDecimal;
227 // <!-- marks the beginning of a line comment (for www usage)
228 } else if (m_current == '<' && m_next1 == '!' && m_next2 == '-' && m_next3 == '-') {
229 shift(3);
230 m_state = InSingleLineComment;
231 // same for -->
232 } else if (m_atLineStart && m_current == '-' && m_next1 == '-' && m_next2 == '>') {
233 shift(2);
234 m_state = InSingleLineComment;
235 } else {
236 token = matchPunctuator(lvalp->intValue, m_current, m_next1, m_next2, m_next3);
237 if (token != -1)
238 setDone(Other);
239 else
240 setDone(Bad);
241 }
242 break;
243 case InString:
244 if (m_current == stringType) {
245 shift(1);
246 setDone(String);
247 } else if (isLineTerminator() || m_current == -1)
248 setDone(Bad);
249 else if (m_current == '\\')
250 m_state = InEscapeSequence;
251 else
252 record16(m_current);
253 break;
254 // Escape Sequences inside of strings
255 case InEscapeSequence:
256 if (isOctalDigit(m_current)) {
257 if (m_current >= '0' && m_current <= '3' &&
258 isOctalDigit(m_next1) && isOctalDigit(m_next2)) {
259 record16(convertOctal(m_current, m_next1, m_next2));
260 shift(2);
261 m_state = InString;
262 } else if (isOctalDigit(m_current) && isOctalDigit(m_next1)) {
263 record16(convertOctal('0', m_current, m_next1));
264 shift(1);
265 m_state = InString;
266 } else if (isOctalDigit(m_current)) {
267 record16(convertOctal('0', '0', m_current));
268 m_state = InString;
269 } else
270 setDone(Bad);
271 } else if (m_current == 'x')
272 m_state = InHexEscape;
273 else if (m_current == 'u')
274 m_state = InUnicodeEscape;
275 else if (isLineTerminator()) {
276 nextLine();
277 m_state = InString;
278 } else {
279 record16(singleEscape(static_cast<unsigned short>(m_current)));
280 m_state = InString;
281 }
282 break;
283 case InHexEscape:
284 if (isHexDigit(m_current) && isHexDigit(m_next1)) {
285 m_state = InString;
286 record16(convertHex(m_current, m_next1));
287 shift(1);
288 } else if (m_current == stringType) {
289 record16('x');
290 shift(1);
291 setDone(String);
292 } else {
293 record16('x');
294 record16(m_current);
295 m_state = InString;
296 }
297 break;
298 case InUnicodeEscape:
299 if (isHexDigit(m_current) && isHexDigit(m_next1) && isHexDigit(m_next2) && isHexDigit(m_next3)) {
300 record16(convertUnicode(m_current, m_next1, m_next2, m_next3));
301 shift(3);
302 m_state = InString;
303 } else if (m_current == stringType) {
304 record16('u');
305 shift(1);
306 setDone(String);
307 } else
308 setDone(Bad);
309 break;
310 case InSingleLineComment:
311 if (isLineTerminator()) {
312 nextLine();
313 m_terminator = true;
314 if (m_restrKeyword) {
315 token = ';';
316 setDone(Other);
317 } else
318 m_state = Start;
319 } else if (m_current == -1)
320 setDone(Eof);
321 break;
322 case InMultiLineComment:
323 if (m_current == -1)
324 setDone(Bad);
325 else if (isLineTerminator())
326 nextLine();
327 else if (m_current == '*' && m_next1 == '/') {
328 m_state = Start;
329 shift(1);
330 }
331 break;
332 case InIdentifierOrKeyword:
333 case InIdentifier:
334 if (isIdentPart(m_current))
335 record16(m_current);
336 else if (m_current == '\\')
337 m_state = InIdentifierPartUnicodeEscapeStart;
338 else
339 setDone(m_state == InIdentifierOrKeyword ? IdentifierOrKeyword : Identifier);
340 break;
341 case InNum0:
342 if (m_current == 'x' || m_current == 'X') {
343 record8(m_current);
344 m_state = InHex;
345 } else if (m_current == '.') {
346 record8(m_current);
347 m_state = InDecimal;
348 } else if (m_current == 'e' || m_current == 'E') {
349 record8(m_current);
350 m_state = InExponentIndicator;
351 } else if (isOctalDigit(m_current)) {
352 record8(m_current);
353 m_state = InOctal;
354 } else if (isDecimalDigit(m_current)) {
355 record8(m_current);
356 m_state = InDecimal;
357 } else
358 setDone(Number);
359 break;
360 case InHex:
361 if (isHexDigit(m_current))
362 record8(m_current);
363 else
364 setDone(Hex);
365 break;
366 case InOctal:
367 if (isOctalDigit(m_current))
368 record8(m_current);
369 else if (isDecimalDigit(m_current)) {
370 record8(m_current);
371 m_state = InDecimal;
372 } else
373 setDone(Octal);
374 break;
375 case InNum:
376 if (isDecimalDigit(m_current))
377 record8(m_current);
378 else if (m_current == '.') {
379 record8(m_current);
380 m_state = InDecimal;
381 } else if (m_current == 'e' || m_current == 'E') {
382 record8(m_current);
383 m_state = InExponentIndicator;
384 } else
385 setDone(Number);
386 break;
387 case InDecimal:
388 if (isDecimalDigit(m_current))
389 record8(m_current);
390 else if (m_current == 'e' || m_current == 'E') {
391 record8(m_current);
392 m_state = InExponentIndicator;
393 } else
394 setDone(Number);
395 break;
396 case InExponentIndicator:
397 if (m_current == '+' || m_current == '-')
398 record8(m_current);
399 else if (isDecimalDigit(m_current)) {
400 record8(m_current);
401 m_state = InExponent;
402 } else
403 setDone(Bad);
404 break;
405 case InExponent:
406 if (isDecimalDigit(m_current))
407 record8(m_current);
408 else
409 setDone(Number);
410 break;
411 case InIdentifierStartUnicodeEscapeStart:
412 if (m_current == 'u')
413 m_state = InIdentifierStartUnicodeEscape;
414 else
415 setDone(Bad);
416 break;
417 case InIdentifierPartUnicodeEscapeStart:
418 if (m_current == 'u')
419 m_state = InIdentifierPartUnicodeEscape;
420 else
421 setDone(Bad);
422 break;
423 case InIdentifierStartUnicodeEscape:
424 if (!isHexDigit(m_current) || !isHexDigit(m_next1) || !isHexDigit(m_next2) || !isHexDigit(m_next3)) {
425 setDone(Bad);
426 break;
427 }
428 token = convertUnicode(m_current, m_next1, m_next2, m_next3);
429 shift(3);
430 if (!isIdentStart(token)) {
431 setDone(Bad);
432 break;
433 }
434 record16(token);
435 m_state = InIdentifier;
436 break;
437 case InIdentifierPartUnicodeEscape:
438 if (!isHexDigit(m_current) || !isHexDigit(m_next1) || !isHexDigit(m_next2) || !isHexDigit(m_next3)) {
439 setDone(Bad);
440 break;
441 }
442 token = convertUnicode(m_current, m_next1, m_next2, m_next3);
443 shift(3);
444 if (!isIdentPart(token)) {
445 setDone(Bad);
446 break;
447 }
448 record16(token);
449 m_state = InIdentifier;
450 break;
451 default:
452 ASSERT(!"Unhandled state in switch statement");
453 }
454
455 // move on to the next character
456 if (!m_done)
457 shift(1);
458 if (m_state != Start && m_state != InSingleLineComment)
459 m_atLineStart = false;
460 }
461
462 // no identifiers allowed directly after numeric literal, e.g. "3in" is bad
463 if ((m_state == Number || m_state == Octal || m_state == Hex) && isIdentStart(m_current))
464 m_state = Bad;
465
466 // terminate string
467 m_buffer8.append('\0');
468
469 #ifdef JSC_DEBUG_LEX
470 fprintf(stderr, "line: %d ", lineNo());
471 fprintf(stderr, "yytext (%x): ", m_buffer8[0]);
472 fprintf(stderr, "%s ", m_buffer8.data());
473 #endif
474
475 double dval = 0;
476 if (m_state == Number)
477 dval = WTF::strtod(m_buffer8.data(), 0L);
478 else if (m_state == Hex) { // scan hex numbers
479 const char* p = m_buffer8.data() + 2;
480 while (char c = *p++) {
481 dval *= 16;
482 dval += convertHex(c);
483 }
484
485 if (dval >= mantissaOverflowLowerBound)
486 dval = parseIntOverflow(m_buffer8.data() + 2, p - (m_buffer8.data() + 3), 16);
487
488 m_state = Number;
489 } else if (m_state == Octal) { // scan octal number
490 const char* p = m_buffer8.data() + 1;
491 while (char c = *p++) {
492 dval *= 8;
493 dval += c - '0';
494 }
495
496 if (dval >= mantissaOverflowLowerBound)
497 dval = parseIntOverflow(m_buffer8.data() + 1, p - (m_buffer8.data() + 2), 8);
498
499 m_state = Number;
500 }
501
502 #ifdef JSC_DEBUG_LEX
503 switch (m_state) {
504 case Eof:
505 printf("(EOF)\n");
506 break;
507 case Other:
508 printf("(Other)\n");
509 break;
510 case Identifier:
511 printf("(Identifier)/(Keyword)\n");
512 break;
513 case String:
514 printf("(String)\n");
515 break;
516 case Number:
517 printf("(Number)\n");
518 break;
519 default:
520 printf("(unknown)");
521 }
522 #endif
523
524 if (m_state != Identifier)
525 m_eatNextIdentifier = false;
526
527 m_restrKeyword = false;
528 m_delimited = false;
529 llocp->first_line = yylineno;
530 llocp->last_line = yylineno;
531 llocp->first_column = startOffset;
532 llocp->last_column = m_currentOffset;
533 switch (m_state) {
534 case Eof:
535 token = 0;
536 break;
537 case Other:
538 if (token == '}' || token == ';')
539 m_delimited = true;
540 break;
541 case Identifier:
542 // Apply anonymous-function hack below (eat the identifier).
543 if (m_eatNextIdentifier) {
544 m_eatNextIdentifier = false;
545 token = lex(lvalp, llocp);
546 break;
547 }
548 lvalp->ident = makeIdentifier(m_buffer16);
549 token = IDENT;
550 break;
551 case IdentifierOrKeyword: {
552 lvalp->ident = makeIdentifier(m_buffer16);
553 const HashEntry* entry = m_mainTable.entry(m_globalData, *lvalp->ident);
554 if (!entry) {
555 // Lookup for keyword failed, means this is an identifier.
556 token = IDENT;
557 break;
558 }
559 token = entry->lexerValue();
560 // Hack for "f = function somename() { ... }"; too hard to get into the grammar.
561 m_eatNextIdentifier = token == FUNCTION && m_lastToken == '=';
562 if (token == CONTINUE || token == BREAK || token == RETURN || token == THROW)
563 m_restrKeyword = true;
564 break;
565 }
566 case String:
567 // Atomize constant strings in case they're later used in property lookup.
568 lvalp->ident = makeIdentifier(m_buffer16);
569 token = STRING;
570 break;
571 case Number:
572 lvalp->doubleValue = dval;
573 token = NUMBER;
574 break;
575 case Bad:
576 #ifdef JSC_DEBUG_LEX
577 fprintf(stderr, "yylex: ERROR.\n");
578 #endif
579 m_error = true;
580 return -1;
581 default:
582 ASSERT(!"unhandled numeration value in switch");
583 m_error = true;
584 return -1;
585 }
586 m_lastToken = token;
587 return token;
588 }
589
isWhiteSpace() const590 bool Lexer::isWhiteSpace() const
591 {
592 return m_current == '\t' || m_current == 0x0b || m_current == 0x0c || isSeparatorSpace(m_current);
593 }
594
isLineTerminator()595 bool Lexer::isLineTerminator()
596 {
597 bool cr = (m_current == '\r');
598 bool lf = (m_current == '\n');
599 if (cr)
600 m_skipLF = true;
601 else if (lf)
602 m_skipCR = true;
603 return cr || lf || m_current == 0x2028 || m_current == 0x2029;
604 }
605
isIdentStart(int c)606 bool Lexer::isIdentStart(int c)
607 {
608 return isASCIIAlpha(c) || c == '$' || c == '_' || (!isASCII(c) && (category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other)));
609 }
610
isIdentPart(int c)611 bool Lexer::isIdentPart(int c)
612 {
613 return isASCIIAlphanumeric(c) || c == '$' || c == '_' || (!isASCII(c) && (category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other
614 | Mark_NonSpacing | Mark_SpacingCombining | Number_DecimalDigit | Punctuation_Connector)));
615 }
616
isDecimalDigit(int c)617 static bool isDecimalDigit(int c)
618 {
619 return isASCIIDigit(c);
620 }
621
isHexDigit(int c)622 bool Lexer::isHexDigit(int c)
623 {
624 return isASCIIHexDigit(c);
625 }
626
isOctalDigit(int c)627 bool Lexer::isOctalDigit(int c)
628 {
629 return isASCIIOctalDigit(c);
630 }
631
matchPunctuator(int & charPos,int c1,int c2,int c3,int c4)632 int Lexer::matchPunctuator(int& charPos, int c1, int c2, int c3, int c4)
633 {
634 if (c1 == '>' && c2 == '>' && c3 == '>' && c4 == '=') {
635 shift(4);
636 return URSHIFTEQUAL;
637 }
638 if (c1 == '=' && c2 == '=' && c3 == '=') {
639 shift(3);
640 return STREQ;
641 }
642 if (c1 == '!' && c2 == '=' && c3 == '=') {
643 shift(3);
644 return STRNEQ;
645 }
646 if (c1 == '>' && c2 == '>' && c3 == '>') {
647 shift(3);
648 return URSHIFT;
649 }
650 if (c1 == '<' && c2 == '<' && c3 == '=') {
651 shift(3);
652 return LSHIFTEQUAL;
653 }
654 if (c1 == '>' && c2 == '>' && c3 == '=') {
655 shift(3);
656 return RSHIFTEQUAL;
657 }
658 if (c1 == '<' && c2 == '=') {
659 shift(2);
660 return LE;
661 }
662 if (c1 == '>' && c2 == '=') {
663 shift(2);
664 return GE;
665 }
666 if (c1 == '!' && c2 == '=') {
667 shift(2);
668 return NE;
669 }
670 if (c1 == '+' && c2 == '+') {
671 shift(2);
672 if (m_terminator)
673 return AUTOPLUSPLUS;
674 return PLUSPLUS;
675 }
676 if (c1 == '-' && c2 == '-') {
677 shift(2);
678 if (m_terminator)
679 return AUTOMINUSMINUS;
680 return MINUSMINUS;
681 }
682 if (c1 == '=' && c2 == '=') {
683 shift(2);
684 return EQEQ;
685 }
686 if (c1 == '+' && c2 == '=') {
687 shift(2);
688 return PLUSEQUAL;
689 }
690 if (c1 == '-' && c2 == '=') {
691 shift(2);
692 return MINUSEQUAL;
693 }
694 if (c1 == '*' && c2 == '=') {
695 shift(2);
696 return MULTEQUAL;
697 }
698 if (c1 == '/' && c2 == '=') {
699 shift(2);
700 return DIVEQUAL;
701 }
702 if (c1 == '&' && c2 == '=') {
703 shift(2);
704 return ANDEQUAL;
705 }
706 if (c1 == '^' && c2 == '=') {
707 shift(2);
708 return XOREQUAL;
709 }
710 if (c1 == '%' && c2 == '=') {
711 shift(2);
712 return MODEQUAL;
713 }
714 if (c1 == '|' && c2 == '=') {
715 shift(2);
716 return OREQUAL;
717 }
718 if (c1 == '<' && c2 == '<') {
719 shift(2);
720 return LSHIFT;
721 }
722 if (c1 == '>' && c2 == '>') {
723 shift(2);
724 return RSHIFT;
725 }
726 if (c1 == '&' && c2 == '&') {
727 shift(2);
728 return AND;
729 }
730 if (c1 == '|' && c2 == '|') {
731 shift(2);
732 return OR;
733 }
734
735 switch (c1) {
736 case '=':
737 case '>':
738 case '<':
739 case ',':
740 case '!':
741 case '~':
742 case '?':
743 case ':':
744 case '.':
745 case '+':
746 case '-':
747 case '*':
748 case '/':
749 case '&':
750 case '|':
751 case '^':
752 case '%':
753 case '(':
754 case ')':
755 case '[':
756 case ']':
757 case ';':
758 shift(1);
759 return static_cast<int>(c1);
760 case '{':
761 charPos = m_currentOffset;
762 shift(1);
763 return OPENBRACE;
764 case '}':
765 charPos = m_currentOffset;
766 shift(1);
767 return CLOSEBRACE;
768 default:
769 return -1;
770 }
771 }
772
singleEscape(unsigned short c)773 unsigned short Lexer::singleEscape(unsigned short c)
774 {
775 switch (c) {
776 case 'b':
777 return 0x08;
778 case 't':
779 return 0x09;
780 case 'n':
781 return 0x0A;
782 case 'v':
783 return 0x0B;
784 case 'f':
785 return 0x0C;
786 case 'r':
787 return 0x0D;
788 case '"':
789 return 0x22;
790 case '\'':
791 return 0x27;
792 case '\\':
793 return 0x5C;
794 default:
795 return c;
796 }
797 }
798
convertOctal(int c1,int c2,int c3)799 unsigned short Lexer::convertOctal(int c1, int c2, int c3)
800 {
801 return static_cast<unsigned short>((c1 - '0') * 64 + (c2 - '0') * 8 + c3 - '0');
802 }
803
convertHex(int c)804 unsigned char Lexer::convertHex(int c)
805 {
806 if (c >= '0' && c <= '9')
807 return static_cast<unsigned char>(c - '0');
808 if (c >= 'a' && c <= 'f')
809 return static_cast<unsigned char>(c - 'a' + 10);
810 return static_cast<unsigned char>(c - 'A' + 10);
811 }
812
convertHex(int c1,int c2)813 unsigned char Lexer::convertHex(int c1, int c2)
814 {
815 return ((convertHex(c1) << 4) + convertHex(c2));
816 }
817
convertUnicode(int c1,int c2,int c3,int c4)818 UChar Lexer::convertUnicode(int c1, int c2, int c3, int c4)
819 {
820 unsigned char highByte = (convertHex(c1) << 4) + convertHex(c2);
821 unsigned char lowByte = (convertHex(c3) << 4) + convertHex(c4);
822 return (highByte << 8 | lowByte);
823 }
824
record8(int c)825 void Lexer::record8(int c)
826 {
827 ASSERT(c >= 0);
828 ASSERT(c <= 0xff);
829 m_buffer8.append(static_cast<char>(c));
830 }
831
record16(int c)832 void Lexer::record16(int c)
833 {
834 ASSERT(c >= 0);
835 ASSERT(c <= USHRT_MAX);
836 record16(UChar(static_cast<unsigned short>(c)));
837 }
838
record16(UChar c)839 void Lexer::record16(UChar c)
840 {
841 m_buffer16.append(c);
842 }
843
scanRegExp()844 bool Lexer::scanRegExp()
845 {
846 m_buffer16.clear();
847 bool lastWasEscape = false;
848 bool inBrackets = false;
849
850 while (1) {
851 if (isLineTerminator() || m_current == -1)
852 return false;
853 else if (m_current != '/' || lastWasEscape == true || inBrackets == true) {
854 // keep track of '[' and ']'
855 if (!lastWasEscape) {
856 if ( m_current == '[' && !inBrackets )
857 inBrackets = true;
858 if ( m_current == ']' && inBrackets )
859 inBrackets = false;
860 }
861 record16(m_current);
862 lastWasEscape =
863 !lastWasEscape && (m_current == '\\');
864 } else { // end of regexp
865 m_pattern = UString(m_buffer16);
866 m_buffer16.clear();
867 shift(1);
868 break;
869 }
870 shift(1);
871 }
872
873 while (isIdentPart(m_current)) {
874 record16(m_current);
875 shift(1);
876 }
877 m_flags = UString(m_buffer16);
878
879 return true;
880 }
881
clear()882 void Lexer::clear()
883 {
884 m_identifiers.clear();
885
886 Vector<char> newBuffer8;
887 newBuffer8.reserveCapacity(initialReadBufferCapacity);
888 m_buffer8.swap(newBuffer8);
889
890 Vector<UChar> newBuffer16;
891 newBuffer16.reserveCapacity(initialReadBufferCapacity);
892 m_buffer16.swap(newBuffer16);
893
894 m_isReparsing = false;
895
896 m_pattern = 0;
897 m_flags = 0;
898 }
899
900 } // namespace JSC
901