1 /*
2 * Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
3 * Copyright (C) 2006, 2007, 2008, 2009 Apple Inc. All Rights Reserved.
4 * Copyright (C) 2007 Cameron Zwarich (cwzwarich@uwaterloo.ca)
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Library General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Library General Public License for more details.
15 *
16 * You should have received a copy of the GNU Library General Public License
17 * along with this library; see the file COPYING.LIB. If not, write to
18 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19 * Boston, MA 02110-1301, USA.
20 *
21 */
22
23 #include "config.h"
24 #include "Lexer.h"
25
26 #include "JSFunction.h"
27 #include "JSGlobalObjectFunctions.h"
28 #include "NodeInfo.h"
29 #include "Nodes.h"
30 #include "dtoa.h"
31 #include <ctype.h>
32 #include <limits.h>
33 #include <string.h>
34 #include <wtf/Assertions.h>
35
36 using namespace WTF;
37 using namespace Unicode;
38
39 // We can't specify the namespace in yacc's C output, so do it here instead.
40 using namespace JSC;
41
42 #include "Grammar.h"
43 #include "Lookup.h"
44 #include "Lexer.lut.h"
45
46 namespace JSC {
47
48 static const UChar byteOrderMark = 0xFEFF;
49
Lexer(JSGlobalData * globalData)50 Lexer::Lexer(JSGlobalData* globalData)
51 : m_isReparsing(false)
52 , m_globalData(globalData)
53 , m_keywordTable(JSC::mainTable)
54 {
55 m_buffer8.reserveInitialCapacity(initialReadBufferCapacity);
56 m_buffer16.reserveInitialCapacity(initialReadBufferCapacity);
57 }
58
~Lexer()59 Lexer::~Lexer()
60 {
61 m_keywordTable.deleteTable();
62 }
63
currentCharacter() const64 inline const UChar* Lexer::currentCharacter() const
65 {
66 return m_code - 4;
67 }
68
currentOffset() const69 inline int Lexer::currentOffset() const
70 {
71 return currentCharacter() - m_codeStart;
72 }
73
shift1()74 ALWAYS_INLINE void Lexer::shift1()
75 {
76 m_current = m_next1;
77 m_next1 = m_next2;
78 m_next2 = m_next3;
79 if (LIKELY(m_code < m_codeEnd))
80 m_next3 = m_code[0];
81 else
82 m_next3 = -1;
83
84 ++m_code;
85 }
86
shift2()87 ALWAYS_INLINE void Lexer::shift2()
88 {
89 m_current = m_next2;
90 m_next1 = m_next3;
91 if (LIKELY(m_code + 1 < m_codeEnd)) {
92 m_next2 = m_code[0];
93 m_next3 = m_code[1];
94 } else {
95 m_next2 = m_code < m_codeEnd ? m_code[0] : -1;
96 m_next3 = -1;
97 }
98
99 m_code += 2;
100 }
101
shift3()102 ALWAYS_INLINE void Lexer::shift3()
103 {
104 m_current = m_next3;
105 if (LIKELY(m_code + 2 < m_codeEnd)) {
106 m_next1 = m_code[0];
107 m_next2 = m_code[1];
108 m_next3 = m_code[2];
109 } else {
110 m_next1 = m_code < m_codeEnd ? m_code[0] : -1;
111 m_next2 = m_code + 1 < m_codeEnd ? m_code[1] : -1;
112 m_next3 = -1;
113 }
114
115 m_code += 3;
116 }
117
shift4()118 ALWAYS_INLINE void Lexer::shift4()
119 {
120 if (LIKELY(m_code + 3 < m_codeEnd)) {
121 m_current = m_code[0];
122 m_next1 = m_code[1];
123 m_next2 = m_code[2];
124 m_next3 = m_code[3];
125 } else {
126 m_current = m_code < m_codeEnd ? m_code[0] : -1;
127 m_next1 = m_code + 1 < m_codeEnd ? m_code[1] : -1;
128 m_next2 = m_code + 2 < m_codeEnd ? m_code[2] : -1;
129 m_next3 = -1;
130 }
131
132 m_code += 4;
133 }
134
setCode(const SourceCode & source,ParserArena & arena)135 void Lexer::setCode(const SourceCode& source, ParserArena& arena)
136 {
137 m_arena = &arena.identifierArena();
138
139 m_lineNumber = source.firstLine();
140 m_delimited = false;
141 m_lastToken = -1;
142
143 const UChar* data = source.provider()->data();
144
145 m_source = &source;
146 m_codeStart = data;
147 m_code = data + source.startOffset();
148 m_codeEnd = data + source.endOffset();
149 m_error = false;
150 m_atLineStart = true;
151
152 // ECMA-262 calls for stripping all Cf characters, but we only strip BOM characters.
153 // See <https://bugs.webkit.org/show_bug.cgi?id=4931> for details.
154 if (source.provider()->hasBOMs()) {
155 for (const UChar* p = m_codeStart; p < m_codeEnd; ++p) {
156 if (UNLIKELY(*p == byteOrderMark)) {
157 copyCodeWithoutBOMs();
158 break;
159 }
160 }
161 }
162
163 // Read the first characters into the 4-character buffer.
164 shift4();
165 ASSERT(currentOffset() == source.startOffset());
166 }
167
copyCodeWithoutBOMs()168 void Lexer::copyCodeWithoutBOMs()
169 {
170 // Note: In this case, the character offset data for debugging will be incorrect.
171 // If it's important to correctly debug code with extraneous BOMs, then the caller
172 // should strip the BOMs when creating the SourceProvider object and do its own
173 // mapping of offsets within the stripped text to original text offset.
174
175 m_codeWithoutBOMs.reserveCapacity(m_codeEnd - m_code);
176 for (const UChar* p = m_code; p < m_codeEnd; ++p) {
177 UChar c = *p;
178 if (c != byteOrderMark)
179 m_codeWithoutBOMs.append(c);
180 }
181 ptrdiff_t startDelta = m_codeStart - m_code;
182 m_code = m_codeWithoutBOMs.data();
183 m_codeStart = m_code + startDelta;
184 m_codeEnd = m_codeWithoutBOMs.data() + m_codeWithoutBOMs.size();
185 }
186
shiftLineTerminator()187 void Lexer::shiftLineTerminator()
188 {
189 ASSERT(isLineTerminator(m_current));
190
191 // Allow both CRLF and LFCR.
192 if (m_current + m_next1 == '\n' + '\r')
193 shift2();
194 else
195 shift1();
196
197 ++m_lineNumber;
198 }
199
makeIdentifier(const UChar * characters,size_t length)200 ALWAYS_INLINE const Identifier* Lexer::makeIdentifier(const UChar* characters, size_t length)
201 {
202 return &m_arena->makeIdentifier(m_globalData, characters, length);
203 }
204
lastTokenWasRestrKeyword() const205 inline bool Lexer::lastTokenWasRestrKeyword() const
206 {
207 return m_lastToken == CONTINUE || m_lastToken == BREAK || m_lastToken == RETURN || m_lastToken == THROW;
208 }
209
isNonASCIIIdentStart(int c)210 static NEVER_INLINE bool isNonASCIIIdentStart(int c)
211 {
212 return category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other);
213 }
214
isIdentStart(int c)215 static inline bool isIdentStart(int c)
216 {
217 return isASCII(c) ? isASCIIAlpha(c) || c == '$' || c == '_' : isNonASCIIIdentStart(c);
218 }
219
isNonASCIIIdentPart(int c)220 static NEVER_INLINE bool isNonASCIIIdentPart(int c)
221 {
222 return category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other
223 | Mark_NonSpacing | Mark_SpacingCombining | Number_DecimalDigit | Punctuation_Connector);
224 }
225
isIdentPart(int c)226 static inline bool isIdentPart(int c)
227 {
228 return isASCII(c) ? isASCIIAlphanumeric(c) || c == '$' || c == '_' : isNonASCIIIdentPart(c);
229 }
230
singleEscape(int c)231 static inline int singleEscape(int c)
232 {
233 switch (c) {
234 case 'b':
235 return 0x08;
236 case 't':
237 return 0x09;
238 case 'n':
239 return 0x0A;
240 case 'v':
241 return 0x0B;
242 case 'f':
243 return 0x0C;
244 case 'r':
245 return 0x0D;
246 default:
247 return c;
248 }
249 }
250
record8(int c)251 inline void Lexer::record8(int c)
252 {
253 ASSERT(c >= 0);
254 ASSERT(c <= 0xFF);
255 m_buffer8.append(static_cast<char>(c));
256 }
257
record16(UChar c)258 inline void Lexer::record16(UChar c)
259 {
260 m_buffer16.append(c);
261 }
262
record16(int c)263 inline void Lexer::record16(int c)
264 {
265 ASSERT(c >= 0);
266 ASSERT(c <= USHRT_MAX);
267 record16(UChar(static_cast<unsigned short>(c)));
268 }
269
lex(void * p1,void * p2)270 int Lexer::lex(void* p1, void* p2)
271 {
272 ASSERT(!m_error);
273 ASSERT(m_buffer8.isEmpty());
274 ASSERT(m_buffer16.isEmpty());
275
276 YYSTYPE* lvalp = static_cast<YYSTYPE*>(p1);
277 YYLTYPE* llocp = static_cast<YYLTYPE*>(p2);
278 int token = 0;
279 m_terminator = false;
280
281 start:
282 while (isWhiteSpace(m_current))
283 shift1();
284
285 int startOffset = currentOffset();
286
287 if (m_current == -1) {
288 if (!m_terminator && !m_delimited && !m_isReparsing) {
289 // automatic semicolon insertion if program incomplete
290 token = ';';
291 goto doneSemicolon;
292 }
293 return 0;
294 }
295
296 m_delimited = false;
297 switch (m_current) {
298 case '>':
299 if (m_next1 == '>' && m_next2 == '>') {
300 if (m_next3 == '=') {
301 shift4();
302 token = URSHIFTEQUAL;
303 break;
304 }
305 shift3();
306 token = URSHIFT;
307 break;
308 }
309 if (m_next1 == '>') {
310 if (m_next2 == '=') {
311 shift3();
312 token = RSHIFTEQUAL;
313 break;
314 }
315 shift2();
316 token = RSHIFT;
317 break;
318 }
319 if (m_next1 == '=') {
320 shift2();
321 token = GE;
322 break;
323 }
324 shift1();
325 token = '>';
326 break;
327 case '=':
328 if (m_next1 == '=') {
329 if (m_next2 == '=') {
330 shift3();
331 token = STREQ;
332 break;
333 }
334 shift2();
335 token = EQEQ;
336 break;
337 }
338 shift1();
339 token = '=';
340 break;
341 case '!':
342 if (m_next1 == '=') {
343 if (m_next2 == '=') {
344 shift3();
345 token = STRNEQ;
346 break;
347 }
348 shift2();
349 token = NE;
350 break;
351 }
352 shift1();
353 token = '!';
354 break;
355 case '<':
356 if (m_next1 == '!' && m_next2 == '-' && m_next3 == '-') {
357 // <!-- marks the beginning of a line comment (for www usage)
358 shift4();
359 goto inSingleLineComment;
360 }
361 if (m_next1 == '<') {
362 if (m_next2 == '=') {
363 shift3();
364 token = LSHIFTEQUAL;
365 break;
366 }
367 shift2();
368 token = LSHIFT;
369 break;
370 }
371 if (m_next1 == '=') {
372 shift2();
373 token = LE;
374 break;
375 }
376 shift1();
377 token = '<';
378 break;
379 case '+':
380 if (m_next1 == '+') {
381 shift2();
382 if (m_terminator) {
383 token = AUTOPLUSPLUS;
384 break;
385 }
386 token = PLUSPLUS;
387 break;
388 }
389 if (m_next1 == '=') {
390 shift2();
391 token = PLUSEQUAL;
392 break;
393 }
394 shift1();
395 token = '+';
396 break;
397 case '-':
398 if (m_next1 == '-') {
399 if (m_atLineStart && m_next2 == '>') {
400 shift3();
401 goto inSingleLineComment;
402 }
403 shift2();
404 if (m_terminator) {
405 token = AUTOMINUSMINUS;
406 break;
407 }
408 token = MINUSMINUS;
409 break;
410 }
411 if (m_next1 == '=') {
412 shift2();
413 token = MINUSEQUAL;
414 break;
415 }
416 shift1();
417 token = '-';
418 break;
419 case '*':
420 if (m_next1 == '=') {
421 shift2();
422 token = MULTEQUAL;
423 break;
424 }
425 shift1();
426 token = '*';
427 break;
428 case '/':
429 if (m_next1 == '/') {
430 shift2();
431 goto inSingleLineComment;
432 }
433 if (m_next1 == '*')
434 goto inMultiLineComment;
435 if (m_next1 == '=') {
436 shift2();
437 token = DIVEQUAL;
438 break;
439 }
440 shift1();
441 token = '/';
442 break;
443 case '&':
444 if (m_next1 == '&') {
445 shift2();
446 token = AND;
447 break;
448 }
449 if (m_next1 == '=') {
450 shift2();
451 token = ANDEQUAL;
452 break;
453 }
454 shift1();
455 token = '&';
456 break;
457 case '^':
458 if (m_next1 == '=') {
459 shift2();
460 token = XOREQUAL;
461 break;
462 }
463 shift1();
464 token = '^';
465 break;
466 case '%':
467 if (m_next1 == '=') {
468 shift2();
469 token = MODEQUAL;
470 break;
471 }
472 shift1();
473 token = '%';
474 break;
475 case '|':
476 if (m_next1 == '=') {
477 shift2();
478 token = OREQUAL;
479 break;
480 }
481 if (m_next1 == '|') {
482 shift2();
483 token = OR;
484 break;
485 }
486 shift1();
487 token = '|';
488 break;
489 case '.':
490 if (isASCIIDigit(m_next1)) {
491 record8('.');
492 shift1();
493 goto inNumberAfterDecimalPoint;
494 }
495 token = '.';
496 shift1();
497 break;
498 case ',':
499 case '~':
500 case '?':
501 case ':':
502 case '(':
503 case ')':
504 case '[':
505 case ']':
506 token = m_current;
507 shift1();
508 break;
509 case ';':
510 shift1();
511 m_delimited = true;
512 token = ';';
513 break;
514 case '{':
515 lvalp->intValue = currentOffset();
516 shift1();
517 token = OPENBRACE;
518 break;
519 case '}':
520 lvalp->intValue = currentOffset();
521 shift1();
522 m_delimited = true;
523 token = CLOSEBRACE;
524 break;
525 case '\\':
526 goto startIdentifierWithBackslash;
527 case '0':
528 goto startNumberWithZeroDigit;
529 case '1':
530 case '2':
531 case '3':
532 case '4':
533 case '5':
534 case '6':
535 case '7':
536 case '8':
537 case '9':
538 goto startNumber;
539 case '"':
540 case '\'':
541 goto startString;
542 default:
543 if (isIdentStart(m_current))
544 goto startIdentifierOrKeyword;
545 if (isLineTerminator(m_current)) {
546 shiftLineTerminator();
547 m_atLineStart = true;
548 m_terminator = true;
549 if (lastTokenWasRestrKeyword()) {
550 token = ';';
551 goto doneSemicolon;
552 }
553 goto start;
554 }
555 goto returnError;
556 }
557
558 m_atLineStart = false;
559 goto returnToken;
560
561 startString: {
562 int stringQuoteCharacter = m_current;
563 shift1();
564
565 const UChar* stringStart = currentCharacter();
566 while (m_current != stringQuoteCharacter) {
567 // Fast check for characters that require special handling.
568 // Catches -1, \n, \r, \, 0x2028, and 0x2029 as efficiently
569 // as possible, and lets through all common ASCII characters.
570 if (UNLIKELY(m_current == '\\') || UNLIKELY(((static_cast<unsigned>(m_current) - 0xE) & 0x2000))) {
571 m_buffer16.append(stringStart, currentCharacter() - stringStart);
572 goto inString;
573 }
574 shift1();
575 }
576 lvalp->ident = makeIdentifier(stringStart, currentCharacter() - stringStart);
577 shift1();
578 m_atLineStart = false;
579 m_delimited = false;
580 token = STRING;
581 goto returnToken;
582
583 inString:
584 while (m_current != stringQuoteCharacter) {
585 if (m_current == '\\')
586 goto inStringEscapeSequence;
587 if (UNLIKELY(isLineTerminator(m_current)))
588 goto returnError;
589 if (UNLIKELY(m_current == -1))
590 goto returnError;
591 record16(m_current);
592 shift1();
593 }
594 goto doneString;
595
596 inStringEscapeSequence:
597 shift1();
598 if (m_current == 'x') {
599 shift1();
600 if (isASCIIHexDigit(m_current) && isASCIIHexDigit(m_next1)) {
601 record16(convertHex(m_current, m_next1));
602 shift2();
603 goto inString;
604 }
605 record16('x');
606 if (m_current == stringQuoteCharacter)
607 goto doneString;
608 goto inString;
609 }
610 if (m_current == 'u') {
611 shift1();
612 if (isASCIIHexDigit(m_current) && isASCIIHexDigit(m_next1) && isASCIIHexDigit(m_next2) && isASCIIHexDigit(m_next3)) {
613 record16(convertUnicode(m_current, m_next1, m_next2, m_next3));
614 shift4();
615 goto inString;
616 }
617 if (m_current == stringQuoteCharacter) {
618 record16('u');
619 goto doneString;
620 }
621 goto returnError;
622 }
623 if (isASCIIOctalDigit(m_current)) {
624 if (m_current >= '0' && m_current <= '3' && isASCIIOctalDigit(m_next1) && isASCIIOctalDigit(m_next2)) {
625 record16((m_current - '0') * 64 + (m_next1 - '0') * 8 + m_next2 - '0');
626 shift3();
627 goto inString;
628 }
629 if (isASCIIOctalDigit(m_next1)) {
630 record16((m_current - '0') * 8 + m_next1 - '0');
631 shift2();
632 goto inString;
633 }
634 record16(m_current - '0');
635 shift1();
636 goto inString;
637 }
638 if (isLineTerminator(m_current)) {
639 shiftLineTerminator();
640 goto inString;
641 }
642 if (m_current == -1)
643 goto returnError;
644 record16(singleEscape(m_current));
645 shift1();
646 goto inString;
647 }
648
649 startIdentifierWithBackslash:
650 shift1();
651 if (UNLIKELY(m_current != 'u'))
652 goto returnError;
653 shift1();
654 if (UNLIKELY(!isASCIIHexDigit(m_current) || !isASCIIHexDigit(m_next1) || !isASCIIHexDigit(m_next2) || !isASCIIHexDigit(m_next3)))
655 goto returnError;
656 token = convertUnicode(m_current, m_next1, m_next2, m_next3);
657 if (UNLIKELY(!isIdentStart(token)))
658 goto returnError;
659 goto inIdentifierAfterCharacterCheck;
660
661 startIdentifierOrKeyword: {
662 const UChar* identifierStart = currentCharacter();
663 shift1();
664 while (isIdentPart(m_current))
665 shift1();
666 if (LIKELY(m_current != '\\')) {
667 lvalp->ident = makeIdentifier(identifierStart, currentCharacter() - identifierStart);
668 goto doneIdentifierOrKeyword;
669 }
670 m_buffer16.append(identifierStart, currentCharacter() - identifierStart);
671 }
672
673 do {
674 shift1();
675 if (UNLIKELY(m_current != 'u'))
676 goto returnError;
677 shift1();
678 if (UNLIKELY(!isASCIIHexDigit(m_current) || !isASCIIHexDigit(m_next1) || !isASCIIHexDigit(m_next2) || !isASCIIHexDigit(m_next3)))
679 goto returnError;
680 token = convertUnicode(m_current, m_next1, m_next2, m_next3);
681 if (UNLIKELY(!isIdentPart(token)))
682 goto returnError;
683 inIdentifierAfterCharacterCheck:
684 record16(token);
685 shift4();
686
687 while (isIdentPart(m_current)) {
688 record16(m_current);
689 shift1();
690 }
691 } while (UNLIKELY(m_current == '\\'));
692 goto doneIdentifier;
693
694 inSingleLineComment:
695 while (!isLineTerminator(m_current)) {
696 if (UNLIKELY(m_current == -1))
697 return 0;
698 shift1();
699 }
700 shiftLineTerminator();
701 m_atLineStart = true;
702 m_terminator = true;
703 if (lastTokenWasRestrKeyword())
704 goto doneSemicolon;
705 goto start;
706
707 inMultiLineComment:
708 shift2();
709 while (m_current != '*' || m_next1 != '/') {
710 if (isLineTerminator(m_current))
711 shiftLineTerminator();
712 else {
713 shift1();
714 if (UNLIKELY(m_current == -1))
715 goto returnError;
716 }
717 }
718 shift2();
719 m_atLineStart = false;
720 goto start;
721
722 startNumberWithZeroDigit:
723 shift1();
724 if ((m_current | 0x20) == 'x' && isASCIIHexDigit(m_next1)) {
725 shift1();
726 goto inHex;
727 }
728 if (m_current == '.') {
729 record8('0');
730 record8('.');
731 shift1();
732 goto inNumberAfterDecimalPoint;
733 }
734 if ((m_current | 0x20) == 'e') {
735 record8('0');
736 record8('e');
737 shift1();
738 goto inExponentIndicator;
739 }
740 if (isASCIIOctalDigit(m_current))
741 goto inOctal;
742 if (isASCIIDigit(m_current))
743 goto startNumber;
744 lvalp->doubleValue = 0;
745 goto doneNumeric;
746
747 inNumberAfterDecimalPoint:
748 while (isASCIIDigit(m_current)) {
749 record8(m_current);
750 shift1();
751 }
752 if ((m_current | 0x20) == 'e') {
753 record8('e');
754 shift1();
755 goto inExponentIndicator;
756 }
757 goto doneNumber;
758
759 inExponentIndicator:
760 if (m_current == '+' || m_current == '-') {
761 record8(m_current);
762 shift1();
763 }
764 if (!isASCIIDigit(m_current))
765 goto returnError;
766 do {
767 record8(m_current);
768 shift1();
769 } while (isASCIIDigit(m_current));
770 goto doneNumber;
771
772 inOctal: {
773 do {
774 record8(m_current);
775 shift1();
776 } while (isASCIIOctalDigit(m_current));
777 if (isASCIIDigit(m_current))
778 goto startNumber;
779
780 double dval = 0;
781
782 const char* end = m_buffer8.end();
783 for (const char* p = m_buffer8.data(); p < end; ++p) {
784 dval *= 8;
785 dval += *p - '0';
786 }
787 if (dval >= mantissaOverflowLowerBound)
788 dval = parseIntOverflow(m_buffer8.data(), end - m_buffer8.data(), 8);
789
790 m_buffer8.resize(0);
791
792 lvalp->doubleValue = dval;
793 goto doneNumeric;
794 }
795
796 inHex: {
797 do {
798 record8(m_current);
799 shift1();
800 } while (isASCIIHexDigit(m_current));
801
802 double dval = 0;
803
804 const char* end = m_buffer8.end();
805 for (const char* p = m_buffer8.data(); p < end; ++p) {
806 dval *= 16;
807 dval += toASCIIHexValue(*p);
808 }
809 if (dval >= mantissaOverflowLowerBound)
810 dval = parseIntOverflow(m_buffer8.data(), end - m_buffer8.data(), 16);
811
812 m_buffer8.resize(0);
813
814 lvalp->doubleValue = dval;
815 goto doneNumeric;
816 }
817
818 startNumber:
819 record8(m_current);
820 shift1();
821 while (isASCIIDigit(m_current)) {
822 record8(m_current);
823 shift1();
824 }
825 if (m_current == '.') {
826 record8('.');
827 shift1();
828 goto inNumberAfterDecimalPoint;
829 }
830 if ((m_current | 0x20) == 'e') {
831 record8('e');
832 shift1();
833 goto inExponentIndicator;
834 }
835
836 // Fall through into doneNumber.
837
838 doneNumber:
839 // Null-terminate string for strtod.
840 m_buffer8.append('\0');
841 lvalp->doubleValue = WTF::strtod(m_buffer8.data(), 0);
842 m_buffer8.resize(0);
843
844 // Fall through into doneNumeric.
845
846 doneNumeric:
847 // No identifiers allowed directly after numeric literal, e.g. "3in" is bad.
848 if (UNLIKELY(isIdentStart(m_current)))
849 goto returnError;
850
851 m_atLineStart = false;
852 m_delimited = false;
853 token = NUMBER;
854 goto returnToken;
855
856 doneSemicolon:
857 token = ';';
858 m_delimited = true;
859 goto returnToken;
860
861 doneIdentifier:
862 m_atLineStart = false;
863 m_delimited = false;
864 lvalp->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
865 m_buffer16.resize(0);
866 token = IDENT;
867 goto returnToken;
868
869 doneIdentifierOrKeyword: {
870 m_atLineStart = false;
871 m_delimited = false;
872 m_buffer16.resize(0);
873 const HashEntry* entry = m_keywordTable.entry(m_globalData, *lvalp->ident);
874 token = entry ? entry->lexerValue() : IDENT;
875 goto returnToken;
876 }
877
878 doneString:
879 // Atomize constant strings in case they're later used in property lookup.
880 shift1();
881 m_atLineStart = false;
882 m_delimited = false;
883 lvalp->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
884 m_buffer16.resize(0);
885 token = STRING;
886
887 // Fall through into returnToken.
888
889 returnToken: {
890 int lineNumber = m_lineNumber;
891 llocp->first_line = lineNumber;
892 llocp->last_line = lineNumber;
893 llocp->first_column = startOffset;
894 llocp->last_column = currentOffset();
895
896 m_lastToken = token;
897 return token;
898 }
899
900 returnError:
901 m_error = true;
902 return -1;
903 }
904
scanRegExp(const Identifier * & pattern,const Identifier * & flags,UChar patternPrefix)905 bool Lexer::scanRegExp(const Identifier*& pattern, const Identifier*& flags, UChar patternPrefix)
906 {
907 ASSERT(m_buffer16.isEmpty());
908
909 bool lastWasEscape = false;
910 bool inBrackets = false;
911
912 if (patternPrefix) {
913 ASSERT(!isLineTerminator(patternPrefix));
914 ASSERT(patternPrefix != '/');
915 ASSERT(patternPrefix != '[');
916 record16(patternPrefix);
917 }
918
919 while (true) {
920 int current = m_current;
921
922 if (isLineTerminator(current) || current == -1) {
923 m_buffer16.resize(0);
924 return false;
925 }
926
927 shift1();
928
929 if (current == '/' && !lastWasEscape && !inBrackets)
930 break;
931
932 record16(current);
933
934 if (lastWasEscape) {
935 lastWasEscape = false;
936 continue;
937 }
938
939 switch (current) {
940 case '[':
941 inBrackets = true;
942 break;
943 case ']':
944 inBrackets = false;
945 break;
946 case '\\':
947 lastWasEscape = true;
948 break;
949 }
950 }
951
952 pattern = makeIdentifier(m_buffer16.data(), m_buffer16.size());
953 m_buffer16.resize(0);
954
955 while (isIdentPart(m_current)) {
956 record16(m_current);
957 shift1();
958 }
959
960 flags = makeIdentifier(m_buffer16.data(), m_buffer16.size());
961 m_buffer16.resize(0);
962
963 return true;
964 }
965
skipRegExp()966 bool Lexer::skipRegExp()
967 {
968 bool lastWasEscape = false;
969 bool inBrackets = false;
970
971 while (true) {
972 int current = m_current;
973
974 if (isLineTerminator(current) || current == -1)
975 return false;
976
977 shift1();
978
979 if (current == '/' && !lastWasEscape && !inBrackets)
980 break;
981
982 if (lastWasEscape) {
983 lastWasEscape = false;
984 continue;
985 }
986
987 switch (current) {
988 case '[':
989 inBrackets = true;
990 break;
991 case ']':
992 inBrackets = false;
993 break;
994 case '\\':
995 lastWasEscape = true;
996 break;
997 }
998 }
999
1000 while (isIdentPart(m_current))
1001 shift1();
1002
1003 return true;
1004 }
1005
clear()1006 void Lexer::clear()
1007 {
1008 m_arena = 0;
1009 m_codeWithoutBOMs.clear();
1010
1011 Vector<char> newBuffer8;
1012 newBuffer8.reserveInitialCapacity(initialReadBufferCapacity);
1013 m_buffer8.swap(newBuffer8);
1014
1015 Vector<UChar> newBuffer16;
1016 newBuffer16.reserveInitialCapacity(initialReadBufferCapacity);
1017 m_buffer16.swap(newBuffer16);
1018
1019 m_isReparsing = false;
1020 }
1021
sourceCode(int openBrace,int closeBrace,int firstLine)1022 SourceCode Lexer::sourceCode(int openBrace, int closeBrace, int firstLine)
1023 {
1024 if (m_codeWithoutBOMs.isEmpty())
1025 return SourceCode(m_source->provider(), openBrace, closeBrace + 1, firstLine);
1026
1027 const UChar* data = m_source->provider()->data();
1028
1029 ASSERT(openBrace < closeBrace);
1030
1031 int numBOMsBeforeOpenBrace = 0;
1032 int numBOMsBetweenBraces = 0;
1033
1034 int i;
1035 for (i = m_source->startOffset(); i < openBrace; ++i)
1036 numBOMsBeforeOpenBrace += data[i] == byteOrderMark;
1037 for (; i < closeBrace; ++i)
1038 numBOMsBetweenBraces += data[i] == byteOrderMark;
1039
1040 return SourceCode(m_source->provider(), openBrace + numBOMsBeforeOpenBrace,
1041 closeBrace + numBOMsBeforeOpenBrace + numBOMsBetweenBraces + 1, firstLine);
1042 }
1043
1044 } // namespace JSC
1045