• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
3  *  Copyright (C) 2006, 2007, 2008, 2009 Apple Inc. All Rights Reserved.
4  *  Copyright (C) 2007 Cameron Zwarich (cwzwarich@uwaterloo.ca)
5  *
6  *  This library is free software; you can redistribute it and/or
7  *  modify it under the terms of the GNU Library General Public
8  *  License as published by the Free Software Foundation; either
9  *  version 2 of the License, or (at your option) any later version.
10  *
11  *  This library is distributed in the hope that it will be useful,
12  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  *  Library General Public License for more details.
15  *
16  *  You should have received a copy of the GNU Library General Public License
17  *  along with this library; see the file COPYING.LIB.  If not, write to
18  *  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19  *  Boston, MA 02110-1301, USA.
20  *
21  */
22 
23 #include "config.h"
24 #include "Lexer.h"
25 
26 #include "JSFunction.h"
27 #include "JSGlobalObjectFunctions.h"
28 #include "NodeInfo.h"
29 #include "Nodes.h"
30 #include "dtoa.h"
31 #include <ctype.h>
32 #include <limits.h>
33 #include <string.h>
34 #include <wtf/Assertions.h>
35 
36 using namespace WTF;
37 using namespace Unicode;
38 
39 // We can't specify the namespace in yacc's C output, so do it here instead.
40 using namespace JSC;
41 
42 #include "Grammar.h"
43 #include "Lookup.h"
44 #include "Lexer.lut.h"
45 
46 namespace JSC {
47 
48 static const UChar byteOrderMark = 0xFEFF;
49 
Lexer(JSGlobalData * globalData)50 Lexer::Lexer(JSGlobalData* globalData)
51     : m_isReparsing(false)
52     , m_globalData(globalData)
53     , m_keywordTable(JSC::mainTable)
54 {
55     m_buffer8.reserveInitialCapacity(initialReadBufferCapacity);
56     m_buffer16.reserveInitialCapacity(initialReadBufferCapacity);
57 }
58 
~Lexer()59 Lexer::~Lexer()
60 {
61     m_keywordTable.deleteTable();
62 }
63 
currentCharacter() const64 inline const UChar* Lexer::currentCharacter() const
65 {
66     return m_code - 4;
67 }
68 
currentOffset() const69 inline int Lexer::currentOffset() const
70 {
71     return currentCharacter() - m_codeStart;
72 }
73 
shift1()74 ALWAYS_INLINE void Lexer::shift1()
75 {
76     m_current = m_next1;
77     m_next1 = m_next2;
78     m_next2 = m_next3;
79     if (LIKELY(m_code < m_codeEnd))
80         m_next3 = m_code[0];
81     else
82         m_next3 = -1;
83 
84     ++m_code;
85 }
86 
shift2()87 ALWAYS_INLINE void Lexer::shift2()
88 {
89     m_current = m_next2;
90     m_next1 = m_next3;
91     if (LIKELY(m_code + 1 < m_codeEnd)) {
92         m_next2 = m_code[0];
93         m_next3 = m_code[1];
94     } else {
95         m_next2 = m_code < m_codeEnd ? m_code[0] : -1;
96         m_next3 = -1;
97     }
98 
99     m_code += 2;
100 }
101 
shift3()102 ALWAYS_INLINE void Lexer::shift3()
103 {
104     m_current = m_next3;
105     if (LIKELY(m_code + 2 < m_codeEnd)) {
106         m_next1 = m_code[0];
107         m_next2 = m_code[1];
108         m_next3 = m_code[2];
109     } else {
110         m_next1 = m_code < m_codeEnd ? m_code[0] : -1;
111         m_next2 = m_code + 1 < m_codeEnd ? m_code[1] : -1;
112         m_next3 = -1;
113     }
114 
115     m_code += 3;
116 }
117 
shift4()118 ALWAYS_INLINE void Lexer::shift4()
119 {
120     if (LIKELY(m_code + 3 < m_codeEnd)) {
121         m_current = m_code[0];
122         m_next1 = m_code[1];
123         m_next2 = m_code[2];
124         m_next3 = m_code[3];
125     } else {
126         m_current = m_code < m_codeEnd ? m_code[0] : -1;
127         m_next1 = m_code + 1 < m_codeEnd ? m_code[1] : -1;
128         m_next2 = m_code + 2 < m_codeEnd ? m_code[2] : -1;
129         m_next3 = -1;
130     }
131 
132     m_code += 4;
133 }
134 
setCode(const SourceCode & source,ParserArena & arena)135 void Lexer::setCode(const SourceCode& source, ParserArena& arena)
136 {
137     m_arena = &arena.identifierArena();
138 
139     m_lineNumber = source.firstLine();
140     m_delimited = false;
141     m_lastToken = -1;
142 
143     const UChar* data = source.provider()->data();
144 
145     m_source = &source;
146     m_codeStart = data;
147     m_code = data + source.startOffset();
148     m_codeEnd = data + source.endOffset();
149     m_error = false;
150     m_atLineStart = true;
151 
152     // ECMA-262 calls for stripping all Cf characters, but we only strip BOM characters.
153     // See <https://bugs.webkit.org/show_bug.cgi?id=4931> for details.
154     if (source.provider()->hasBOMs()) {
155         for (const UChar* p = m_codeStart; p < m_codeEnd; ++p) {
156             if (UNLIKELY(*p == byteOrderMark)) {
157                 copyCodeWithoutBOMs();
158                 break;
159             }
160         }
161     }
162 
163     // Read the first characters into the 4-character buffer.
164     shift4();
165     ASSERT(currentOffset() == source.startOffset());
166 }
167 
copyCodeWithoutBOMs()168 void Lexer::copyCodeWithoutBOMs()
169 {
170     // Note: In this case, the character offset data for debugging will be incorrect.
171     // If it's important to correctly debug code with extraneous BOMs, then the caller
172     // should strip the BOMs when creating the SourceProvider object and do its own
173     // mapping of offsets within the stripped text to original text offset.
174 
175     m_codeWithoutBOMs.reserveCapacity(m_codeEnd - m_code);
176     for (const UChar* p = m_code; p < m_codeEnd; ++p) {
177         UChar c = *p;
178         if (c != byteOrderMark)
179             m_codeWithoutBOMs.append(c);
180     }
181     ptrdiff_t startDelta = m_codeStart - m_code;
182     m_code = m_codeWithoutBOMs.data();
183     m_codeStart = m_code + startDelta;
184     m_codeEnd = m_codeWithoutBOMs.data() + m_codeWithoutBOMs.size();
185 }
186 
shiftLineTerminator()187 void Lexer::shiftLineTerminator()
188 {
189     ASSERT(isLineTerminator(m_current));
190 
191     // Allow both CRLF and LFCR.
192     if (m_current + m_next1 == '\n' + '\r')
193         shift2();
194     else
195         shift1();
196 
197     ++m_lineNumber;
198 }
199 
makeIdentifier(const UChar * characters,size_t length)200 ALWAYS_INLINE const Identifier* Lexer::makeIdentifier(const UChar* characters, size_t length)
201 {
202     return &m_arena->makeIdentifier(m_globalData, characters, length);
203 }
204 
lastTokenWasRestrKeyword() const205 inline bool Lexer::lastTokenWasRestrKeyword() const
206 {
207     return m_lastToken == CONTINUE || m_lastToken == BREAK || m_lastToken == RETURN || m_lastToken == THROW;
208 }
209 
isNonASCIIIdentStart(int c)210 static NEVER_INLINE bool isNonASCIIIdentStart(int c)
211 {
212     return category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other);
213 }
214 
isIdentStart(int c)215 static inline bool isIdentStart(int c)
216 {
217     return isASCII(c) ? isASCIIAlpha(c) || c == '$' || c == '_' : isNonASCIIIdentStart(c);
218 }
219 
isNonASCIIIdentPart(int c)220 static NEVER_INLINE bool isNonASCIIIdentPart(int c)
221 {
222     return category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other
223         | Mark_NonSpacing | Mark_SpacingCombining | Number_DecimalDigit | Punctuation_Connector);
224 }
225 
isIdentPart(int c)226 static inline bool isIdentPart(int c)
227 {
228     return isASCII(c) ? isASCIIAlphanumeric(c) || c == '$' || c == '_' : isNonASCIIIdentPart(c);
229 }
230 
singleEscape(int c)231 static inline int singleEscape(int c)
232 {
233     switch (c) {
234         case 'b':
235             return 0x08;
236         case 't':
237             return 0x09;
238         case 'n':
239             return 0x0A;
240         case 'v':
241             return 0x0B;
242         case 'f':
243             return 0x0C;
244         case 'r':
245             return 0x0D;
246         default:
247             return c;
248     }
249 }
250 
record8(int c)251 inline void Lexer::record8(int c)
252 {
253     ASSERT(c >= 0);
254     ASSERT(c <= 0xFF);
255     m_buffer8.append(static_cast<char>(c));
256 }
257 
record16(UChar c)258 inline void Lexer::record16(UChar c)
259 {
260     m_buffer16.append(c);
261 }
262 
record16(int c)263 inline void Lexer::record16(int c)
264 {
265     ASSERT(c >= 0);
266     ASSERT(c <= USHRT_MAX);
267     record16(UChar(static_cast<unsigned short>(c)));
268 }
269 
lex(void * p1,void * p2)270 int Lexer::lex(void* p1, void* p2)
271 {
272     ASSERT(!m_error);
273     ASSERT(m_buffer8.isEmpty());
274     ASSERT(m_buffer16.isEmpty());
275 
276     YYSTYPE* lvalp = static_cast<YYSTYPE*>(p1);
277     YYLTYPE* llocp = static_cast<YYLTYPE*>(p2);
278     int token = 0;
279     m_terminator = false;
280 
281 start:
282     while (isWhiteSpace(m_current))
283         shift1();
284 
285     int startOffset = currentOffset();
286 
287     if (m_current == -1) {
288         if (!m_terminator && !m_delimited && !m_isReparsing) {
289             // automatic semicolon insertion if program incomplete
290             token = ';';
291             goto doneSemicolon;
292         }
293         return 0;
294     }
295 
296     m_delimited = false;
297     switch (m_current) {
298         case '>':
299             if (m_next1 == '>' && m_next2 == '>') {
300                 if (m_next3 == '=') {
301                     shift4();
302                     token = URSHIFTEQUAL;
303                     break;
304                 }
305                 shift3();
306                 token = URSHIFT;
307                 break;
308             }
309             if (m_next1 == '>') {
310                 if (m_next2 == '=') {
311                     shift3();
312                     token = RSHIFTEQUAL;
313                     break;
314                 }
315                 shift2();
316                 token = RSHIFT;
317                 break;
318             }
319             if (m_next1 == '=') {
320                 shift2();
321                 token = GE;
322                 break;
323             }
324             shift1();
325             token = '>';
326             break;
327         case '=':
328             if (m_next1 == '=') {
329                 if (m_next2 == '=') {
330                     shift3();
331                     token = STREQ;
332                     break;
333                 }
334                 shift2();
335                 token = EQEQ;
336                 break;
337             }
338             shift1();
339             token = '=';
340             break;
341         case '!':
342             if (m_next1 == '=') {
343                 if (m_next2 == '=') {
344                     shift3();
345                     token = STRNEQ;
346                     break;
347                 }
348                 shift2();
349                 token = NE;
350                 break;
351             }
352             shift1();
353             token = '!';
354             break;
355         case '<':
356             if (m_next1 == '!' && m_next2 == '-' && m_next3 == '-') {
357                 // <!-- marks the beginning of a line comment (for www usage)
358                 shift4();
359                 goto inSingleLineComment;
360             }
361             if (m_next1 == '<') {
362                 if (m_next2 == '=') {
363                     shift3();
364                     token = LSHIFTEQUAL;
365                     break;
366                 }
367                 shift2();
368                 token = LSHIFT;
369                 break;
370             }
371             if (m_next1 == '=') {
372                 shift2();
373                 token = LE;
374                 break;
375             }
376             shift1();
377             token = '<';
378             break;
379         case '+':
380             if (m_next1 == '+') {
381                 shift2();
382                 if (m_terminator) {
383                     token = AUTOPLUSPLUS;
384                     break;
385                 }
386                 token = PLUSPLUS;
387                 break;
388             }
389             if (m_next1 == '=') {
390                 shift2();
391                 token = PLUSEQUAL;
392                 break;
393             }
394             shift1();
395             token = '+';
396             break;
397         case '-':
398             if (m_next1 == '-') {
399                 if (m_atLineStart && m_next2 == '>') {
400                     shift3();
401                     goto inSingleLineComment;
402                 }
403                 shift2();
404                 if (m_terminator) {
405                     token = AUTOMINUSMINUS;
406                     break;
407                 }
408                 token = MINUSMINUS;
409                 break;
410             }
411             if (m_next1 == '=') {
412                 shift2();
413                 token = MINUSEQUAL;
414                 break;
415             }
416             shift1();
417             token = '-';
418             break;
419         case '*':
420             if (m_next1 == '=') {
421                 shift2();
422                 token = MULTEQUAL;
423                 break;
424             }
425             shift1();
426             token = '*';
427             break;
428         case '/':
429             if (m_next1 == '/') {
430                 shift2();
431                 goto inSingleLineComment;
432             }
433             if (m_next1 == '*')
434                 goto inMultiLineComment;
435             if (m_next1 == '=') {
436                 shift2();
437                 token = DIVEQUAL;
438                 break;
439             }
440             shift1();
441             token = '/';
442             break;
443         case '&':
444             if (m_next1 == '&') {
445                 shift2();
446                 token = AND;
447                 break;
448             }
449             if (m_next1 == '=') {
450                 shift2();
451                 token = ANDEQUAL;
452                 break;
453             }
454             shift1();
455             token = '&';
456             break;
457         case '^':
458             if (m_next1 == '=') {
459                 shift2();
460                 token = XOREQUAL;
461                 break;
462             }
463             shift1();
464             token = '^';
465             break;
466         case '%':
467             if (m_next1 == '=') {
468                 shift2();
469                 token = MODEQUAL;
470                 break;
471             }
472             shift1();
473             token = '%';
474             break;
475         case '|':
476             if (m_next1 == '=') {
477                 shift2();
478                 token = OREQUAL;
479                 break;
480             }
481             if (m_next1 == '|') {
482                 shift2();
483                 token = OR;
484                 break;
485             }
486             shift1();
487             token = '|';
488             break;
489         case '.':
490             if (isASCIIDigit(m_next1)) {
491                 record8('.');
492                 shift1();
493                 goto inNumberAfterDecimalPoint;
494             }
495             token = '.';
496             shift1();
497             break;
498         case ',':
499         case '~':
500         case '?':
501         case ':':
502         case '(':
503         case ')':
504         case '[':
505         case ']':
506             token = m_current;
507             shift1();
508             break;
509         case ';':
510             shift1();
511             m_delimited = true;
512             token = ';';
513             break;
514         case '{':
515             lvalp->intValue = currentOffset();
516             shift1();
517             token = OPENBRACE;
518             break;
519         case '}':
520             lvalp->intValue = currentOffset();
521             shift1();
522             m_delimited = true;
523             token = CLOSEBRACE;
524             break;
525         case '\\':
526             goto startIdentifierWithBackslash;
527         case '0':
528             goto startNumberWithZeroDigit;
529         case '1':
530         case '2':
531         case '3':
532         case '4':
533         case '5':
534         case '6':
535         case '7':
536         case '8':
537         case '9':
538             goto startNumber;
539         case '"':
540         case '\'':
541             goto startString;
542         default:
543             if (isIdentStart(m_current))
544                 goto startIdentifierOrKeyword;
545             if (isLineTerminator(m_current)) {
546                 shiftLineTerminator();
547                 m_atLineStart = true;
548                 m_terminator = true;
549                 if (lastTokenWasRestrKeyword()) {
550                     token = ';';
551                     goto doneSemicolon;
552                 }
553                 goto start;
554             }
555             goto returnError;
556     }
557 
558     m_atLineStart = false;
559     goto returnToken;
560 
561 startString: {
562     int stringQuoteCharacter = m_current;
563     shift1();
564 
565     const UChar* stringStart = currentCharacter();
566     while (m_current != stringQuoteCharacter) {
567         // Fast check for characters that require special handling.
568         // Catches -1, \n, \r, \, 0x2028, and 0x2029 as efficiently
569         // as possible, and lets through all common ASCII characters.
570         if (UNLIKELY(m_current == '\\') || UNLIKELY(((static_cast<unsigned>(m_current) - 0xE) & 0x2000))) {
571             m_buffer16.append(stringStart, currentCharacter() - stringStart);
572             goto inString;
573         }
574         shift1();
575     }
576     lvalp->ident = makeIdentifier(stringStart, currentCharacter() - stringStart);
577     shift1();
578     m_atLineStart = false;
579     m_delimited = false;
580     token = STRING;
581     goto returnToken;
582 
583 inString:
584     while (m_current != stringQuoteCharacter) {
585         if (m_current == '\\')
586             goto inStringEscapeSequence;
587         if (UNLIKELY(isLineTerminator(m_current)))
588             goto returnError;
589         if (UNLIKELY(m_current == -1))
590             goto returnError;
591         record16(m_current);
592         shift1();
593     }
594     goto doneString;
595 
596 inStringEscapeSequence:
597     shift1();
598     if (m_current == 'x') {
599         shift1();
600         if (isASCIIHexDigit(m_current) && isASCIIHexDigit(m_next1)) {
601             record16(convertHex(m_current, m_next1));
602             shift2();
603             goto inString;
604         }
605         record16('x');
606         if (m_current == stringQuoteCharacter)
607             goto doneString;
608         goto inString;
609     }
610     if (m_current == 'u') {
611         shift1();
612         if (isASCIIHexDigit(m_current) && isASCIIHexDigit(m_next1) && isASCIIHexDigit(m_next2) && isASCIIHexDigit(m_next3)) {
613             record16(convertUnicode(m_current, m_next1, m_next2, m_next3));
614             shift4();
615             goto inString;
616         }
617         if (m_current == stringQuoteCharacter) {
618             record16('u');
619             goto doneString;
620         }
621         goto returnError;
622     }
623     if (isASCIIOctalDigit(m_current)) {
624         if (m_current >= '0' && m_current <= '3' && isASCIIOctalDigit(m_next1) && isASCIIOctalDigit(m_next2)) {
625             record16((m_current - '0') * 64 + (m_next1 - '0') * 8 + m_next2 - '0');
626             shift3();
627             goto inString;
628         }
629         if (isASCIIOctalDigit(m_next1)) {
630             record16((m_current - '0') * 8 + m_next1 - '0');
631             shift2();
632             goto inString;
633         }
634         record16(m_current - '0');
635         shift1();
636         goto inString;
637     }
638     if (isLineTerminator(m_current)) {
639         shiftLineTerminator();
640         goto inString;
641     }
642     if (m_current == -1)
643         goto returnError;
644     record16(singleEscape(m_current));
645     shift1();
646     goto inString;
647 }
648 
649 startIdentifierWithBackslash:
650     shift1();
651     if (UNLIKELY(m_current != 'u'))
652         goto returnError;
653     shift1();
654     if (UNLIKELY(!isASCIIHexDigit(m_current) || !isASCIIHexDigit(m_next1) || !isASCIIHexDigit(m_next2) || !isASCIIHexDigit(m_next3)))
655         goto returnError;
656     token = convertUnicode(m_current, m_next1, m_next2, m_next3);
657     if (UNLIKELY(!isIdentStart(token)))
658         goto returnError;
659     goto inIdentifierAfterCharacterCheck;
660 
661 startIdentifierOrKeyword: {
662     const UChar* identifierStart = currentCharacter();
663     shift1();
664     while (isIdentPart(m_current))
665         shift1();
666     if (LIKELY(m_current != '\\')) {
667         lvalp->ident = makeIdentifier(identifierStart, currentCharacter() - identifierStart);
668         goto doneIdentifierOrKeyword;
669     }
670     m_buffer16.append(identifierStart, currentCharacter() - identifierStart);
671 }
672 
673     do {
674         shift1();
675         if (UNLIKELY(m_current != 'u'))
676             goto returnError;
677         shift1();
678         if (UNLIKELY(!isASCIIHexDigit(m_current) || !isASCIIHexDigit(m_next1) || !isASCIIHexDigit(m_next2) || !isASCIIHexDigit(m_next3)))
679             goto returnError;
680         token = convertUnicode(m_current, m_next1, m_next2, m_next3);
681         if (UNLIKELY(!isIdentPart(token)))
682             goto returnError;
683 inIdentifierAfterCharacterCheck:
684         record16(token);
685         shift4();
686 
687         while (isIdentPart(m_current)) {
688             record16(m_current);
689             shift1();
690         }
691     } while (UNLIKELY(m_current == '\\'));
692     goto doneIdentifier;
693 
694 inSingleLineComment:
695     while (!isLineTerminator(m_current)) {
696         if (UNLIKELY(m_current == -1))
697             return 0;
698         shift1();
699     }
700     shiftLineTerminator();
701     m_atLineStart = true;
702     m_terminator = true;
703     if (lastTokenWasRestrKeyword())
704         goto doneSemicolon;
705     goto start;
706 
707 inMultiLineComment:
708     shift2();
709     while (m_current != '*' || m_next1 != '/') {
710         if (isLineTerminator(m_current))
711             shiftLineTerminator();
712         else {
713             shift1();
714             if (UNLIKELY(m_current == -1))
715                 goto returnError;
716         }
717     }
718     shift2();
719     m_atLineStart = false;
720     goto start;
721 
722 startNumberWithZeroDigit:
723     shift1();
724     if ((m_current | 0x20) == 'x' && isASCIIHexDigit(m_next1)) {
725         shift1();
726         goto inHex;
727     }
728     if (m_current == '.') {
729         record8('0');
730         record8('.');
731         shift1();
732         goto inNumberAfterDecimalPoint;
733     }
734     if ((m_current | 0x20) == 'e') {
735         record8('0');
736         record8('e');
737         shift1();
738         goto inExponentIndicator;
739     }
740     if (isASCIIOctalDigit(m_current))
741         goto inOctal;
742     if (isASCIIDigit(m_current))
743         goto startNumber;
744     lvalp->doubleValue = 0;
745     goto doneNumeric;
746 
747 inNumberAfterDecimalPoint:
748     while (isASCIIDigit(m_current)) {
749         record8(m_current);
750         shift1();
751     }
752     if ((m_current | 0x20) == 'e') {
753         record8('e');
754         shift1();
755         goto inExponentIndicator;
756     }
757     goto doneNumber;
758 
759 inExponentIndicator:
760     if (m_current == '+' || m_current == '-') {
761         record8(m_current);
762         shift1();
763     }
764     if (!isASCIIDigit(m_current))
765         goto returnError;
766     do {
767         record8(m_current);
768         shift1();
769     } while (isASCIIDigit(m_current));
770     goto doneNumber;
771 
772 inOctal: {
773     do {
774         record8(m_current);
775         shift1();
776     } while (isASCIIOctalDigit(m_current));
777     if (isASCIIDigit(m_current))
778         goto startNumber;
779 
780     double dval = 0;
781 
782     const char* end = m_buffer8.end();
783     for (const char* p = m_buffer8.data(); p < end; ++p) {
784         dval *= 8;
785         dval += *p - '0';
786     }
787     if (dval >= mantissaOverflowLowerBound)
788         dval = parseIntOverflow(m_buffer8.data(), end - m_buffer8.data(), 8);
789 
790     m_buffer8.resize(0);
791 
792     lvalp->doubleValue = dval;
793     goto doneNumeric;
794 }
795 
796 inHex: {
797     do {
798         record8(m_current);
799         shift1();
800     } while (isASCIIHexDigit(m_current));
801 
802     double dval = 0;
803 
804     const char* end = m_buffer8.end();
805     for (const char* p = m_buffer8.data(); p < end; ++p) {
806         dval *= 16;
807         dval += toASCIIHexValue(*p);
808     }
809     if (dval >= mantissaOverflowLowerBound)
810         dval = parseIntOverflow(m_buffer8.data(), end - m_buffer8.data(), 16);
811 
812     m_buffer8.resize(0);
813 
814     lvalp->doubleValue = dval;
815     goto doneNumeric;
816 }
817 
818 startNumber:
819     record8(m_current);
820     shift1();
821     while (isASCIIDigit(m_current)) {
822         record8(m_current);
823         shift1();
824     }
825     if (m_current == '.') {
826         record8('.');
827         shift1();
828         goto inNumberAfterDecimalPoint;
829     }
830     if ((m_current | 0x20) == 'e') {
831         record8('e');
832         shift1();
833         goto inExponentIndicator;
834     }
835 
836     // Fall through into doneNumber.
837 
838 doneNumber:
839     // Null-terminate string for strtod.
840     m_buffer8.append('\0');
841     lvalp->doubleValue = WTF::strtod(m_buffer8.data(), 0);
842     m_buffer8.resize(0);
843 
844     // Fall through into doneNumeric.
845 
846 doneNumeric:
847     // No identifiers allowed directly after numeric literal, e.g. "3in" is bad.
848     if (UNLIKELY(isIdentStart(m_current)))
849         goto returnError;
850 
851     m_atLineStart = false;
852     m_delimited = false;
853     token = NUMBER;
854     goto returnToken;
855 
856 doneSemicolon:
857     token = ';';
858     m_delimited = true;
859     goto returnToken;
860 
861 doneIdentifier:
862     m_atLineStart = false;
863     m_delimited = false;
864     lvalp->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
865     m_buffer16.resize(0);
866     token = IDENT;
867     goto returnToken;
868 
869 doneIdentifierOrKeyword: {
870     m_atLineStart = false;
871     m_delimited = false;
872     m_buffer16.resize(0);
873     const HashEntry* entry = m_keywordTable.entry(m_globalData, *lvalp->ident);
874     token = entry ? entry->lexerValue() : IDENT;
875     goto returnToken;
876 }
877 
878 doneString:
879     // Atomize constant strings in case they're later used in property lookup.
880     shift1();
881     m_atLineStart = false;
882     m_delimited = false;
883     lvalp->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
884     m_buffer16.resize(0);
885     token = STRING;
886 
887     // Fall through into returnToken.
888 
889 returnToken: {
890     int lineNumber = m_lineNumber;
891     llocp->first_line = lineNumber;
892     llocp->last_line = lineNumber;
893     llocp->first_column = startOffset;
894     llocp->last_column = currentOffset();
895 
896     m_lastToken = token;
897     return token;
898 }
899 
900 returnError:
901     m_error = true;
902     return -1;
903 }
904 
scanRegExp(const Identifier * & pattern,const Identifier * & flags,UChar patternPrefix)905 bool Lexer::scanRegExp(const Identifier*& pattern, const Identifier*& flags, UChar patternPrefix)
906 {
907     ASSERT(m_buffer16.isEmpty());
908 
909     bool lastWasEscape = false;
910     bool inBrackets = false;
911 
912     if (patternPrefix) {
913         ASSERT(!isLineTerminator(patternPrefix));
914         ASSERT(patternPrefix != '/');
915         ASSERT(patternPrefix != '[');
916         record16(patternPrefix);
917     }
918 
919     while (true) {
920         int current = m_current;
921 
922         if (isLineTerminator(current) || current == -1) {
923             m_buffer16.resize(0);
924             return false;
925         }
926 
927         shift1();
928 
929         if (current == '/' && !lastWasEscape && !inBrackets)
930             break;
931 
932         record16(current);
933 
934         if (lastWasEscape) {
935             lastWasEscape = false;
936             continue;
937         }
938 
939         switch (current) {
940         case '[':
941             inBrackets = true;
942             break;
943         case ']':
944             inBrackets = false;
945             break;
946         case '\\':
947             lastWasEscape = true;
948             break;
949         }
950     }
951 
952     pattern = makeIdentifier(m_buffer16.data(), m_buffer16.size());
953     m_buffer16.resize(0);
954 
955     while (isIdentPart(m_current)) {
956         record16(m_current);
957         shift1();
958     }
959 
960     flags = makeIdentifier(m_buffer16.data(), m_buffer16.size());
961     m_buffer16.resize(0);
962 
963     return true;
964 }
965 
skipRegExp()966 bool Lexer::skipRegExp()
967 {
968     bool lastWasEscape = false;
969     bool inBrackets = false;
970 
971     while (true) {
972         int current = m_current;
973 
974         if (isLineTerminator(current) || current == -1)
975             return false;
976 
977         shift1();
978 
979         if (current == '/' && !lastWasEscape && !inBrackets)
980             break;
981 
982         if (lastWasEscape) {
983             lastWasEscape = false;
984             continue;
985         }
986 
987         switch (current) {
988         case '[':
989             inBrackets = true;
990             break;
991         case ']':
992             inBrackets = false;
993             break;
994         case '\\':
995             lastWasEscape = true;
996             break;
997         }
998     }
999 
1000     while (isIdentPart(m_current))
1001         shift1();
1002 
1003     return true;
1004 }
1005 
clear()1006 void Lexer::clear()
1007 {
1008     m_arena = 0;
1009     m_codeWithoutBOMs.clear();
1010 
1011     Vector<char> newBuffer8;
1012     newBuffer8.reserveInitialCapacity(initialReadBufferCapacity);
1013     m_buffer8.swap(newBuffer8);
1014 
1015     Vector<UChar> newBuffer16;
1016     newBuffer16.reserveInitialCapacity(initialReadBufferCapacity);
1017     m_buffer16.swap(newBuffer16);
1018 
1019     m_isReparsing = false;
1020 }
1021 
sourceCode(int openBrace,int closeBrace,int firstLine)1022 SourceCode Lexer::sourceCode(int openBrace, int closeBrace, int firstLine)
1023 {
1024     if (m_codeWithoutBOMs.isEmpty())
1025         return SourceCode(m_source->provider(), openBrace, closeBrace + 1, firstLine);
1026 
1027     const UChar* data = m_source->provider()->data();
1028 
1029     ASSERT(openBrace < closeBrace);
1030 
1031     int numBOMsBeforeOpenBrace = 0;
1032     int numBOMsBetweenBraces = 0;
1033 
1034     int i;
1035     for (i = m_source->startOffset(); i < openBrace; ++i)
1036         numBOMsBeforeOpenBrace += data[i] == byteOrderMark;
1037     for (; i < closeBrace; ++i)
1038         numBOMsBetweenBraces += data[i] == byteOrderMark;
1039 
1040     return SourceCode(m_source->provider(), openBrace + numBOMsBeforeOpenBrace,
1041         closeBrace + numBOMsBeforeOpenBrace + numBOMsBetweenBraces + 1, firstLine);
1042 }
1043 
1044 } // namespace JSC
1045