1 /*
2 * Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
3 * Copyright (C) 2006, 2007, 2008, 2009 Apple Inc. All Rights Reserved.
4 * Copyright (C) 2007 Cameron Zwarich (cwzwarich@uwaterloo.ca)
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Library General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Library General Public License for more details.
15 *
16 * You should have received a copy of the GNU Library General Public License
17 * along with this library; see the file COPYING.LIB. If not, write to
18 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19 * Boston, MA 02110-1301, USA.
20 *
21 */
22
23 #include "config.h"
24 #include "Lexer.h"
25
26 #include "JSFunction.h"
27 #include "JSGlobalObjectFunctions.h"
28 #include "NodeInfo.h"
29 #include "Nodes.h"
30 #include "dtoa.h"
31 #include <ctype.h>
32 #include <limits.h>
33 #include <string.h>
34 #include <wtf/Assertions.h>
35
36 using namespace WTF;
37 using namespace Unicode;
38
39 // We can't specify the namespace in yacc's C output, so do it here instead.
40 using namespace JSC;
41
42 #ifndef KDE_USE_FINAL
43 #include "Grammar.h"
44 #endif
45
46 #include "Lookup.h"
47 #include "Lexer.lut.h"
48
49 // A bridge for yacc from the C world to the C++ world.
jscyylex(void * lvalp,void * llocp,void * globalData)50 int jscyylex(void* lvalp, void* llocp, void* globalData)
51 {
52 return static_cast<JSGlobalData*>(globalData)->lexer->lex(lvalp, llocp);
53 }
54
55 namespace JSC {
56
57 static const UChar byteOrderMark = 0xFEFF;
58
Lexer(JSGlobalData * globalData)59 Lexer::Lexer(JSGlobalData* globalData)
60 : m_isReparsing(false)
61 , m_globalData(globalData)
62 , m_keywordTable(JSC::mainTable)
63 {
64 m_buffer8.reserveInitialCapacity(initialReadBufferCapacity);
65 m_buffer16.reserveInitialCapacity(initialReadBufferCapacity);
66 }
67
~Lexer()68 Lexer::~Lexer()
69 {
70 m_keywordTable.deleteTable();
71 }
72
currentCharacter() const73 inline const UChar* Lexer::currentCharacter() const
74 {
75 return m_code - 4;
76 }
77
currentOffset() const78 inline int Lexer::currentOffset() const
79 {
80 return currentCharacter() - m_codeStart;
81 }
82
shift1()83 ALWAYS_INLINE void Lexer::shift1()
84 {
85 m_current = m_next1;
86 m_next1 = m_next2;
87 m_next2 = m_next3;
88 if (LIKELY(m_code < m_codeEnd))
89 m_next3 = m_code[0];
90 else
91 m_next3 = -1;
92
93 ++m_code;
94 }
95
shift2()96 ALWAYS_INLINE void Lexer::shift2()
97 {
98 m_current = m_next2;
99 m_next1 = m_next3;
100 if (LIKELY(m_code + 1 < m_codeEnd)) {
101 m_next2 = m_code[0];
102 m_next3 = m_code[1];
103 } else {
104 m_next2 = m_code < m_codeEnd ? m_code[0] : -1;
105 m_next3 = -1;
106 }
107
108 m_code += 2;
109 }
110
shift3()111 ALWAYS_INLINE void Lexer::shift3()
112 {
113 m_current = m_next3;
114 if (LIKELY(m_code + 2 < m_codeEnd)) {
115 m_next1 = m_code[0];
116 m_next2 = m_code[1];
117 m_next3 = m_code[2];
118 } else {
119 m_next1 = m_code < m_codeEnd ? m_code[0] : -1;
120 m_next2 = m_code + 1 < m_codeEnd ? m_code[1] : -1;
121 m_next3 = -1;
122 }
123
124 m_code += 3;
125 }
126
shift4()127 ALWAYS_INLINE void Lexer::shift4()
128 {
129 if (LIKELY(m_code + 3 < m_codeEnd)) {
130 m_current = m_code[0];
131 m_next1 = m_code[1];
132 m_next2 = m_code[2];
133 m_next3 = m_code[3];
134 } else {
135 m_current = m_code < m_codeEnd ? m_code[0] : -1;
136 m_next1 = m_code + 1 < m_codeEnd ? m_code[1] : -1;
137 m_next2 = m_code + 2 < m_codeEnd ? m_code[2] : -1;
138 m_next3 = -1;
139 }
140
141 m_code += 4;
142 }
143
setCode(const SourceCode & source)144 void Lexer::setCode(const SourceCode& source)
145 {
146 m_lineNumber = source.firstLine();
147 m_delimited = false;
148 m_lastToken = -1;
149
150 const UChar* data = source.provider()->data();
151
152 m_source = &source;
153 m_codeStart = data;
154 m_code = data + source.startOffset();
155 m_codeEnd = data + source.endOffset();
156 m_error = false;
157 m_atLineStart = true;
158
159 // ECMA-262 calls for stripping all Cf characters, but we only strip BOM characters.
160 // See <https://bugs.webkit.org/show_bug.cgi?id=4931> for details.
161 if (source.provider()->hasBOMs()) {
162 for (const UChar* p = m_codeStart; p < m_codeEnd; ++p) {
163 if (UNLIKELY(*p == byteOrderMark)) {
164 copyCodeWithoutBOMs();
165 break;
166 }
167 }
168 }
169
170 // Read the first characters into the 4-character buffer.
171 shift4();
172 ASSERT(currentOffset() == source.startOffset());
173 }
174
copyCodeWithoutBOMs()175 void Lexer::copyCodeWithoutBOMs()
176 {
177 // Note: In this case, the character offset data for debugging will be incorrect.
178 // If it's important to correctly debug code with extraneous BOMs, then the caller
179 // should strip the BOMs when creating the SourceProvider object and do its own
180 // mapping of offsets within the stripped text to original text offset.
181
182 m_codeWithoutBOMs.reserveCapacity(m_codeEnd - m_code);
183 for (const UChar* p = m_code; p < m_codeEnd; ++p) {
184 UChar c = *p;
185 if (c != byteOrderMark)
186 m_codeWithoutBOMs.append(c);
187 }
188 ptrdiff_t startDelta = m_codeStart - m_code;
189 m_code = m_codeWithoutBOMs.data();
190 m_codeStart = m_code + startDelta;
191 m_codeEnd = m_codeWithoutBOMs.data() + m_codeWithoutBOMs.size();
192 }
193
shiftLineTerminator()194 void Lexer::shiftLineTerminator()
195 {
196 ASSERT(isLineTerminator(m_current));
197
198 // Allow both CRLF and LFCR.
199 if (m_current + m_next1 == '\n' + '\r')
200 shift2();
201 else
202 shift1();
203
204 ++m_lineNumber;
205 }
206
makeIdentifier(const UChar * characters,size_t length)207 ALWAYS_INLINE Identifier* Lexer::makeIdentifier(const UChar* characters, size_t length)
208 {
209 m_identifiers.append(Identifier(m_globalData, characters, length));
210 return &m_identifiers.last();
211 }
212
lastTokenWasRestrKeyword() const213 inline bool Lexer::lastTokenWasRestrKeyword() const
214 {
215 return m_lastToken == CONTINUE || m_lastToken == BREAK || m_lastToken == RETURN || m_lastToken == THROW;
216 }
217
isNonASCIIIdentStart(int c)218 static NEVER_INLINE bool isNonASCIIIdentStart(int c)
219 {
220 return category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other);
221 }
222
isIdentStart(int c)223 static inline bool isIdentStart(int c)
224 {
225 return isASCII(c) ? isASCIIAlpha(c) || c == '$' || c == '_' : isNonASCIIIdentStart(c);
226 }
227
isNonASCIIIdentPart(int c)228 static NEVER_INLINE bool isNonASCIIIdentPart(int c)
229 {
230 return category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other
231 | Mark_NonSpacing | Mark_SpacingCombining | Number_DecimalDigit | Punctuation_Connector);
232 }
233
isIdentPart(int c)234 static inline bool isIdentPart(int c)
235 {
236 return isASCII(c) ? isASCIIAlphanumeric(c) || c == '$' || c == '_' : isNonASCIIIdentPart(c);
237 }
238
singleEscape(int c)239 static inline int singleEscape(int c)
240 {
241 switch (c) {
242 case 'b':
243 return 0x08;
244 case 't':
245 return 0x09;
246 case 'n':
247 return 0x0A;
248 case 'v':
249 return 0x0B;
250 case 'f':
251 return 0x0C;
252 case 'r':
253 return 0x0D;
254 default:
255 return c;
256 }
257 }
258
record8(int c)259 inline void Lexer::record8(int c)
260 {
261 ASSERT(c >= 0);
262 ASSERT(c <= 0xFF);
263 m_buffer8.append(static_cast<char>(c));
264 }
265
record16(UChar c)266 inline void Lexer::record16(UChar c)
267 {
268 m_buffer16.append(c);
269 }
270
record16(int c)271 inline void Lexer::record16(int c)
272 {
273 ASSERT(c >= 0);
274 ASSERT(c <= USHRT_MAX);
275 record16(UChar(static_cast<unsigned short>(c)));
276 }
277
lex(void * p1,void * p2)278 int Lexer::lex(void* p1, void* p2)
279 {
280 ASSERT(!m_error);
281 ASSERT(m_buffer8.isEmpty());
282 ASSERT(m_buffer16.isEmpty());
283
284 YYSTYPE* lvalp = static_cast<YYSTYPE*>(p1);
285 YYLTYPE* llocp = static_cast<YYLTYPE*>(p2);
286 int token = 0;
287 m_terminator = false;
288
289 start:
290 while (isWhiteSpace(m_current))
291 shift1();
292
293 int startOffset = currentOffset();
294
295 if (m_current == -1) {
296 if (!m_terminator && !m_delimited && !m_isReparsing) {
297 // automatic semicolon insertion if program incomplete
298 token = ';';
299 goto doneSemicolon;
300 }
301 return 0;
302 }
303
304 m_delimited = false;
305 switch (m_current) {
306 case '>':
307 if (m_next1 == '>' && m_next2 == '>') {
308 if (m_next3 == '=') {
309 shift4();
310 token = URSHIFTEQUAL;
311 break;
312 }
313 shift3();
314 token = URSHIFT;
315 break;
316 }
317 if (m_next1 == '>') {
318 if (m_next2 == '=') {
319 shift3();
320 token = RSHIFTEQUAL;
321 break;
322 }
323 shift2();
324 token = RSHIFT;
325 break;
326 }
327 if (m_next1 == '=') {
328 shift2();
329 token = GE;
330 break;
331 }
332 shift1();
333 token = '>';
334 break;
335 case '=':
336 if (m_next1 == '=') {
337 if (m_next2 == '=') {
338 shift3();
339 token = STREQ;
340 break;
341 }
342 shift2();
343 token = EQEQ;
344 break;
345 }
346 shift1();
347 token = '=';
348 break;
349 case '!':
350 if (m_next1 == '=') {
351 if (m_next2 == '=') {
352 shift3();
353 token = STRNEQ;
354 break;
355 }
356 shift2();
357 token = NE;
358 break;
359 }
360 shift1();
361 token = '!';
362 break;
363 case '<':
364 if (m_next1 == '!' && m_next2 == '-' && m_next3 == '-') {
365 // <!-- marks the beginning of a line comment (for www usage)
366 shift4();
367 goto inSingleLineComment;
368 }
369 if (m_next1 == '<') {
370 if (m_next2 == '=') {
371 shift3();
372 token = LSHIFTEQUAL;
373 break;
374 }
375 shift2();
376 token = LSHIFT;
377 break;
378 }
379 if (m_next1 == '=') {
380 shift2();
381 token = LE;
382 break;
383 }
384 shift1();
385 token = '<';
386 break;
387 case '+':
388 if (m_next1 == '+') {
389 shift2();
390 if (m_terminator) {
391 token = AUTOPLUSPLUS;
392 break;
393 }
394 token = PLUSPLUS;
395 break;
396 }
397 if (m_next1 == '=') {
398 shift2();
399 token = PLUSEQUAL;
400 break;
401 }
402 shift1();
403 token = '+';
404 break;
405 case '-':
406 if (m_next1 == '-') {
407 if (m_atLineStart && m_next2 == '>') {
408 shift3();
409 goto inSingleLineComment;
410 }
411 shift2();
412 if (m_terminator) {
413 token = AUTOMINUSMINUS;
414 break;
415 }
416 token = MINUSMINUS;
417 break;
418 }
419 if (m_next1 == '=') {
420 shift2();
421 token = MINUSEQUAL;
422 break;
423 }
424 shift1();
425 token = '-';
426 break;
427 case '*':
428 if (m_next1 == '=') {
429 shift2();
430 token = MULTEQUAL;
431 break;
432 }
433 shift1();
434 token = '*';
435 break;
436 case '/':
437 if (m_next1 == '/') {
438 shift2();
439 goto inSingleLineComment;
440 }
441 if (m_next1 == '*')
442 goto inMultiLineComment;
443 if (m_next1 == '=') {
444 shift2();
445 token = DIVEQUAL;
446 break;
447 }
448 shift1();
449 token = '/';
450 break;
451 case '&':
452 if (m_next1 == '&') {
453 shift2();
454 token = AND;
455 break;
456 }
457 if (m_next1 == '=') {
458 shift2();
459 token = ANDEQUAL;
460 break;
461 }
462 shift1();
463 token = '&';
464 break;
465 case '^':
466 if (m_next1 == '=') {
467 shift2();
468 token = XOREQUAL;
469 break;
470 }
471 shift1();
472 token = '^';
473 break;
474 case '%':
475 if (m_next1 == '=') {
476 shift2();
477 token = MODEQUAL;
478 break;
479 }
480 shift1();
481 token = '%';
482 break;
483 case '|':
484 if (m_next1 == '=') {
485 shift2();
486 token = OREQUAL;
487 break;
488 }
489 if (m_next1 == '|') {
490 shift2();
491 token = OR;
492 break;
493 }
494 shift1();
495 token = '|';
496 break;
497 case '.':
498 if (isASCIIDigit(m_next1)) {
499 record8('.');
500 shift1();
501 goto inNumberAfterDecimalPoint;
502 }
503 token = '.';
504 shift1();
505 break;
506 case ',':
507 case '~':
508 case '?':
509 case ':':
510 case '(':
511 case ')':
512 case '[':
513 case ']':
514 token = m_current;
515 shift1();
516 break;
517 case ';':
518 shift1();
519 m_delimited = true;
520 token = ';';
521 break;
522 case '{':
523 lvalp->intValue = currentOffset();
524 shift1();
525 token = OPENBRACE;
526 break;
527 case '}':
528 lvalp->intValue = currentOffset();
529 shift1();
530 m_delimited = true;
531 token = CLOSEBRACE;
532 break;
533 case '\\':
534 goto startIdentifierWithBackslash;
535 case '0':
536 goto startNumberWithZeroDigit;
537 case '1':
538 case '2':
539 case '3':
540 case '4':
541 case '5':
542 case '6':
543 case '7':
544 case '8':
545 case '9':
546 goto startNumber;
547 case '"':
548 case '\'':
549 goto startString;
550 default:
551 if (isIdentStart(m_current))
552 goto startIdentifierOrKeyword;
553 if (isLineTerminator(m_current)) {
554 shiftLineTerminator();
555 m_atLineStart = true;
556 m_terminator = true;
557 if (lastTokenWasRestrKeyword()) {
558 token = ';';
559 goto doneSemicolon;
560 }
561 goto start;
562 }
563 goto returnError;
564 }
565
566 m_atLineStart = false;
567 goto returnToken;
568
569 startString: {
570 int stringQuoteCharacter = m_current;
571 shift1();
572
573 const UChar* stringStart = currentCharacter();
574 while (m_current != stringQuoteCharacter) {
575 // Fast check for characters that require special handling.
576 // Catches -1, \n, \r, \, 0x2028, and 0x2029 as efficiently
577 // as possible, and lets through all common ASCII characters.
578 if (UNLIKELY(m_current == '\\') || UNLIKELY(((static_cast<unsigned>(m_current) - 0xE) & 0x2000))) {
579 m_buffer16.append(stringStart, currentCharacter() - stringStart);
580 goto inString;
581 }
582 shift1();
583 }
584 lvalp->ident = makeIdentifier(stringStart, currentCharacter() - stringStart);
585 shift1();
586 m_atLineStart = false;
587 m_delimited = false;
588 token = STRING;
589 goto returnToken;
590
591 inString:
592 while (m_current != stringQuoteCharacter) {
593 if (m_current == '\\')
594 goto inStringEscapeSequence;
595 if (UNLIKELY(isLineTerminator(m_current)))
596 goto returnError;
597 if (UNLIKELY(m_current == -1))
598 goto returnError;
599 record16(m_current);
600 shift1();
601 }
602 goto doneString;
603
604 inStringEscapeSequence:
605 shift1();
606 if (m_current == 'x') {
607 shift1();
608 if (isASCIIHexDigit(m_current) && isASCIIHexDigit(m_next1)) {
609 record16(convertHex(m_current, m_next1));
610 shift2();
611 goto inString;
612 }
613 record16('x');
614 if (m_current == stringQuoteCharacter)
615 goto doneString;
616 goto inString;
617 }
618 if (m_current == 'u') {
619 shift1();
620 if (isASCIIHexDigit(m_current) && isASCIIHexDigit(m_next1) && isASCIIHexDigit(m_next2) && isASCIIHexDigit(m_next3)) {
621 record16(convertUnicode(m_current, m_next1, m_next2, m_next3));
622 shift4();
623 goto inString;
624 }
625 if (m_current == stringQuoteCharacter) {
626 record16('u');
627 goto doneString;
628 }
629 goto returnError;
630 }
631 if (isASCIIOctalDigit(m_current)) {
632 if (m_current >= '0' && m_current <= '3' && isASCIIOctalDigit(m_next1) && isASCIIOctalDigit(m_next2)) {
633 record16((m_current - '0') * 64 + (m_next1 - '0') * 8 + m_next2 - '0');
634 shift3();
635 goto inString;
636 }
637 if (isASCIIOctalDigit(m_next1)) {
638 record16((m_current - '0') * 8 + m_next1 - '0');
639 shift2();
640 goto inString;
641 }
642 record16(m_current - '0');
643 shift1();
644 goto inString;
645 }
646 if (isLineTerminator(m_current)) {
647 shiftLineTerminator();
648 goto inString;
649 }
650 record16(singleEscape(m_current));
651 shift1();
652 goto inString;
653 }
654
655 startIdentifierWithBackslash:
656 shift1();
657 if (UNLIKELY(m_current != 'u'))
658 goto returnError;
659 shift1();
660 if (UNLIKELY(!isASCIIHexDigit(m_current) || !isASCIIHexDigit(m_next1) || !isASCIIHexDigit(m_next2) || !isASCIIHexDigit(m_next3)))
661 goto returnError;
662 token = convertUnicode(m_current, m_next1, m_next2, m_next3);
663 if (UNLIKELY(!isIdentStart(token)))
664 goto returnError;
665 goto inIdentifierAfterCharacterCheck;
666
667 startIdentifierOrKeyword: {
668 const UChar* identifierStart = currentCharacter();
669 shift1();
670 while (isIdentPart(m_current))
671 shift1();
672 if (LIKELY(m_current != '\\')) {
673 lvalp->ident = makeIdentifier(identifierStart, currentCharacter() - identifierStart);
674 goto doneIdentifierOrKeyword;
675 }
676 m_buffer16.append(identifierStart, currentCharacter() - identifierStart);
677 }
678
679 do {
680 shift1();
681 if (UNLIKELY(m_current != 'u'))
682 goto returnError;
683 shift1();
684 if (UNLIKELY(!isASCIIHexDigit(m_current) || !isASCIIHexDigit(m_next1) || !isASCIIHexDigit(m_next2) || !isASCIIHexDigit(m_next3)))
685 goto returnError;
686 token = convertUnicode(m_current, m_next1, m_next2, m_next3);
687 if (UNLIKELY(!isIdentPart(token)))
688 goto returnError;
689 inIdentifierAfterCharacterCheck:
690 record16(token);
691 shift4();
692
693 while (isIdentPart(m_current)) {
694 record16(m_current);
695 shift1();
696 }
697 } while (UNLIKELY(m_current == '\\'));
698 goto doneIdentifier;
699
700 inSingleLineComment:
701 while (!isLineTerminator(m_current)) {
702 if (UNLIKELY(m_current == -1))
703 return 0;
704 shift1();
705 }
706 shiftLineTerminator();
707 m_atLineStart = true;
708 m_terminator = true;
709 if (lastTokenWasRestrKeyword())
710 goto doneSemicolon;
711 goto start;
712
713 inMultiLineComment:
714 shift2();
715 while (m_current != '*' || m_next1 != '/') {
716 if (isLineTerminator(m_current))
717 shiftLineTerminator();
718 else {
719 shift1();
720 if (UNLIKELY(m_current == -1))
721 goto returnError;
722 }
723 }
724 shift2();
725 m_atLineStart = false;
726 goto start;
727
728 startNumberWithZeroDigit:
729 shift1();
730 if ((m_current | 0x20) == 'x' && isASCIIHexDigit(m_next1)) {
731 shift1();
732 goto inHex;
733 }
734 if (m_current == '.') {
735 record8('0');
736 record8('.');
737 shift1();
738 goto inNumberAfterDecimalPoint;
739 }
740 if ((m_current | 0x20) == 'e') {
741 record8('0');
742 record8('e');
743 shift1();
744 goto inExponentIndicator;
745 }
746 if (isASCIIOctalDigit(m_current))
747 goto inOctal;
748 if (isASCIIDigit(m_current))
749 goto startNumber;
750 lvalp->doubleValue = 0;
751 goto doneNumeric;
752
753 inNumberAfterDecimalPoint:
754 while (isASCIIDigit(m_current)) {
755 record8(m_current);
756 shift1();
757 }
758 if ((m_current | 0x20) == 'e') {
759 record8('e');
760 shift1();
761 goto inExponentIndicator;
762 }
763 goto doneNumber;
764
765 inExponentIndicator:
766 if (m_current == '+' || m_current == '-') {
767 record8(m_current);
768 shift1();
769 }
770 if (!isASCIIDigit(m_current))
771 goto returnError;
772 do {
773 record8(m_current);
774 shift1();
775 } while (isASCIIDigit(m_current));
776 goto doneNumber;
777
778 inOctal: {
779 do {
780 record8(m_current);
781 shift1();
782 } while (isASCIIOctalDigit(m_current));
783 if (isASCIIDigit(m_current))
784 goto startNumber;
785
786 double dval = 0;
787
788 const char* end = m_buffer8.end();
789 for (const char* p = m_buffer8.data(); p < end; ++p) {
790 dval *= 8;
791 dval += *p - '0';
792 }
793 if (dval >= mantissaOverflowLowerBound)
794 dval = parseIntOverflow(m_buffer8.data(), end - m_buffer8.data(), 8);
795
796 m_buffer8.resize(0);
797
798 lvalp->doubleValue = dval;
799 goto doneNumeric;
800 }
801
802 inHex: {
803 do {
804 record8(m_current);
805 shift1();
806 } while (isASCIIHexDigit(m_current));
807
808 double dval = 0;
809
810 const char* end = m_buffer8.end();
811 for (const char* p = m_buffer8.data(); p < end; ++p) {
812 dval *= 16;
813 dval += toASCIIHexValue(*p);
814 }
815 if (dval >= mantissaOverflowLowerBound)
816 dval = parseIntOverflow(m_buffer8.data(), end - m_buffer8.data(), 16);
817
818 m_buffer8.resize(0);
819
820 lvalp->doubleValue = dval;
821 goto doneNumeric;
822 }
823
824 startNumber:
825 record8(m_current);
826 shift1();
827 while (isASCIIDigit(m_current)) {
828 record8(m_current);
829 shift1();
830 }
831 if (m_current == '.') {
832 record8('.');
833 shift1();
834 goto inNumberAfterDecimalPoint;
835 }
836 if ((m_current | 0x20) == 'e') {
837 record8('e');
838 shift1();
839 goto inExponentIndicator;
840 }
841
842 // Fall through into doneNumber.
843
844 doneNumber:
845 // Null-terminate string for strtod.
846 m_buffer8.append('\0');
847 lvalp->doubleValue = WTF::strtod(m_buffer8.data(), 0);
848 m_buffer8.resize(0);
849
850 // Fall through into doneNumeric.
851
852 doneNumeric:
853 // No identifiers allowed directly after numeric literal, e.g. "3in" is bad.
854 if (UNLIKELY(isIdentStart(m_current)))
855 goto returnError;
856
857 m_atLineStart = false;
858 m_delimited = false;
859 token = NUMBER;
860 goto returnToken;
861
862 doneSemicolon:
863 token = ';';
864 m_delimited = true;
865 goto returnToken;
866
867 doneIdentifier:
868 m_atLineStart = false;
869 m_delimited = false;
870 lvalp->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
871 m_buffer16.resize(0);
872 token = IDENT;
873 goto returnToken;
874
875 doneIdentifierOrKeyword: {
876 m_atLineStart = false;
877 m_delimited = false;
878 m_buffer16.resize(0);
879 const HashEntry* entry = m_keywordTable.entry(m_globalData, *lvalp->ident);
880 token = entry ? entry->lexerValue() : IDENT;
881 goto returnToken;
882 }
883
884 doneString:
885 // Atomize constant strings in case they're later used in property lookup.
886 shift1();
887 m_atLineStart = false;
888 m_delimited = false;
889 lvalp->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
890 m_buffer16.resize(0);
891 token = STRING;
892
893 // Fall through into returnToken.
894
895 returnToken: {
896 int lineNumber = m_lineNumber;
897 llocp->first_line = lineNumber;
898 llocp->last_line = lineNumber;
899 llocp->first_column = startOffset;
900 llocp->last_column = currentOffset();
901
902 m_lastToken = token;
903 return token;
904 }
905
906 returnError:
907 m_error = true;
908 return -1;
909 }
910
scanRegExp()911 bool Lexer::scanRegExp()
912 {
913 ASSERT(m_buffer16.isEmpty());
914
915 bool lastWasEscape = false;
916 bool inBrackets = false;
917
918 while (true) {
919 if (isLineTerminator(m_current) || m_current == -1)
920 return false;
921 if (m_current != '/' || lastWasEscape || inBrackets) {
922 // keep track of '[' and ']'
923 if (!lastWasEscape) {
924 if (m_current == '[' && !inBrackets)
925 inBrackets = true;
926 if (m_current == ']' && inBrackets)
927 inBrackets = false;
928 }
929 record16(m_current);
930 lastWasEscape = !lastWasEscape && m_current == '\\';
931 } else { // end of regexp
932 m_pattern = UString(m_buffer16);
933 m_buffer16.resize(0);
934 shift1();
935 break;
936 }
937 shift1();
938 }
939
940 while (isIdentPart(m_current)) {
941 record16(m_current);
942 shift1();
943 }
944 m_flags = UString(m_buffer16);
945 m_buffer16.resize(0);
946
947 return true;
948 }
949
clear()950 void Lexer::clear()
951 {
952 m_identifiers.clear();
953 m_codeWithoutBOMs.clear();
954
955 Vector<char> newBuffer8;
956 newBuffer8.reserveInitialCapacity(initialReadBufferCapacity);
957 m_buffer8.swap(newBuffer8);
958
959 Vector<UChar> newBuffer16;
960 newBuffer16.reserveInitialCapacity(initialReadBufferCapacity);
961 m_buffer16.swap(newBuffer16);
962
963 m_isReparsing = false;
964
965 m_pattern = UString();
966 m_flags = UString();
967 }
968
sourceCode(int openBrace,int closeBrace,int firstLine)969 SourceCode Lexer::sourceCode(int openBrace, int closeBrace, int firstLine)
970 {
971 if (m_codeWithoutBOMs.isEmpty())
972 return SourceCode(m_source->provider(), openBrace, closeBrace + 1, firstLine);
973
974 const UChar* data = m_source->provider()->data();
975
976 ASSERT(openBrace < closeBrace);
977
978 int numBOMsBeforeOpenBrace = 0;
979 int numBOMsBetweenBraces = 0;
980
981 int i;
982 for (i = m_source->startOffset(); i < openBrace; ++i)
983 numBOMsBeforeOpenBrace += data[i] == byteOrderMark;
984 for (; i < closeBrace; ++i)
985 numBOMsBetweenBraces += data[i] == byteOrderMark;
986
987 return SourceCode(m_source->provider(), openBrace + numBOMsBeforeOpenBrace,
988 closeBrace + numBOMsBeforeOpenBrace + numBOMsBetweenBraces + 1, firstLine);
989 }
990
991 } // namespace JSC
992