• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
3  *  Copyright (C) 2006, 2007, 2008, 2009 Apple Inc. All Rights Reserved.
4  *  Copyright (C) 2007 Cameron Zwarich (cwzwarich@uwaterloo.ca)
5  *
6  *  This library is free software; you can redistribute it and/or
7  *  modify it under the terms of the GNU Library General Public
8  *  License as published by the Free Software Foundation; either
9  *  version 2 of the License, or (at your option) any later version.
10  *
11  *  This library is distributed in the hope that it will be useful,
12  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  *  Library General Public License for more details.
15  *
16  *  You should have received a copy of the GNU Library General Public License
17  *  along with this library; see the file COPYING.LIB.  If not, write to
18  *  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19  *  Boston, MA 02110-1301, USA.
20  *
21  */
22 
23 #include "config.h"
24 #include "Lexer.h"
25 
26 #include "JSFunction.h"
27 #include "JSGlobalObjectFunctions.h"
28 #include "NodeInfo.h"
29 #include "Nodes.h"
30 #include "dtoa.h"
31 #include <ctype.h>
32 #include <limits.h>
33 #include <string.h>
34 #include <wtf/Assertions.h>
35 
36 using namespace WTF;
37 using namespace Unicode;
38 
39 // We can't specify the namespace in yacc's C output, so do it here instead.
40 using namespace JSC;
41 
42 #ifndef KDE_USE_FINAL
43 #include "Grammar.h"
44 #endif
45 
46 #include "Lookup.h"
47 #include "Lexer.lut.h"
48 
49 // A bridge for yacc from the C world to the C++ world.
jscyylex(void * lvalp,void * llocp,void * globalData)50 int jscyylex(void* lvalp, void* llocp, void* globalData)
51 {
52     return static_cast<JSGlobalData*>(globalData)->lexer->lex(lvalp, llocp);
53 }
54 
55 namespace JSC {
56 
57 static const UChar byteOrderMark = 0xFEFF;
58 
Lexer(JSGlobalData * globalData)59 Lexer::Lexer(JSGlobalData* globalData)
60     : m_isReparsing(false)
61     , m_globalData(globalData)
62     , m_keywordTable(JSC::mainTable)
63 {
64     m_buffer8.reserveInitialCapacity(initialReadBufferCapacity);
65     m_buffer16.reserveInitialCapacity(initialReadBufferCapacity);
66 }
67 
~Lexer()68 Lexer::~Lexer()
69 {
70     m_keywordTable.deleteTable();
71 }
72 
currentCharacter() const73 inline const UChar* Lexer::currentCharacter() const
74 {
75     return m_code - 4;
76 }
77 
currentOffset() const78 inline int Lexer::currentOffset() const
79 {
80     return currentCharacter() - m_codeStart;
81 }
82 
shift1()83 ALWAYS_INLINE void Lexer::shift1()
84 {
85     m_current = m_next1;
86     m_next1 = m_next2;
87     m_next2 = m_next3;
88     if (LIKELY(m_code < m_codeEnd))
89         m_next3 = m_code[0];
90     else
91         m_next3 = -1;
92 
93     ++m_code;
94 }
95 
shift2()96 ALWAYS_INLINE void Lexer::shift2()
97 {
98     m_current = m_next2;
99     m_next1 = m_next3;
100     if (LIKELY(m_code + 1 < m_codeEnd)) {
101         m_next2 = m_code[0];
102         m_next3 = m_code[1];
103     } else {
104         m_next2 = m_code < m_codeEnd ? m_code[0] : -1;
105         m_next3 = -1;
106     }
107 
108     m_code += 2;
109 }
110 
shift3()111 ALWAYS_INLINE void Lexer::shift3()
112 {
113     m_current = m_next3;
114     if (LIKELY(m_code + 2 < m_codeEnd)) {
115         m_next1 = m_code[0];
116         m_next2 = m_code[1];
117         m_next3 = m_code[2];
118     } else {
119         m_next1 = m_code < m_codeEnd ? m_code[0] : -1;
120         m_next2 = m_code + 1 < m_codeEnd ? m_code[1] : -1;
121         m_next3 = -1;
122     }
123 
124     m_code += 3;
125 }
126 
shift4()127 ALWAYS_INLINE void Lexer::shift4()
128 {
129     if (LIKELY(m_code + 3 < m_codeEnd)) {
130         m_current = m_code[0];
131         m_next1 = m_code[1];
132         m_next2 = m_code[2];
133         m_next3 = m_code[3];
134     } else {
135         m_current = m_code < m_codeEnd ? m_code[0] : -1;
136         m_next1 = m_code + 1 < m_codeEnd ? m_code[1] : -1;
137         m_next2 = m_code + 2 < m_codeEnd ? m_code[2] : -1;
138         m_next3 = -1;
139     }
140 
141     m_code += 4;
142 }
143 
setCode(const SourceCode & source)144 void Lexer::setCode(const SourceCode& source)
145 {
146     m_lineNumber = source.firstLine();
147     m_delimited = false;
148     m_lastToken = -1;
149 
150     const UChar* data = source.provider()->data();
151 
152     m_source = &source;
153     m_codeStart = data;
154     m_code = data + source.startOffset();
155     m_codeEnd = data + source.endOffset();
156     m_error = false;
157     m_atLineStart = true;
158 
159     // ECMA-262 calls for stripping all Cf characters, but we only strip BOM characters.
160     // See <https://bugs.webkit.org/show_bug.cgi?id=4931> for details.
161     if (source.provider()->hasBOMs()) {
162         for (const UChar* p = m_codeStart; p < m_codeEnd; ++p) {
163             if (UNLIKELY(*p == byteOrderMark)) {
164                 copyCodeWithoutBOMs();
165                 break;
166             }
167         }
168     }
169 
170     // Read the first characters into the 4-character buffer.
171     shift4();
172     ASSERT(currentOffset() == source.startOffset());
173 }
174 
copyCodeWithoutBOMs()175 void Lexer::copyCodeWithoutBOMs()
176 {
177     // Note: In this case, the character offset data for debugging will be incorrect.
178     // If it's important to correctly debug code with extraneous BOMs, then the caller
179     // should strip the BOMs when creating the SourceProvider object and do its own
180     // mapping of offsets within the stripped text to original text offset.
181 
182     m_codeWithoutBOMs.reserveCapacity(m_codeEnd - m_code);
183     for (const UChar* p = m_code; p < m_codeEnd; ++p) {
184         UChar c = *p;
185         if (c != byteOrderMark)
186             m_codeWithoutBOMs.append(c);
187     }
188     ptrdiff_t startDelta = m_codeStart - m_code;
189     m_code = m_codeWithoutBOMs.data();
190     m_codeStart = m_code + startDelta;
191     m_codeEnd = m_codeWithoutBOMs.data() + m_codeWithoutBOMs.size();
192 }
193 
shiftLineTerminator()194 void Lexer::shiftLineTerminator()
195 {
196     ASSERT(isLineTerminator(m_current));
197 
198     // Allow both CRLF and LFCR.
199     if (m_current + m_next1 == '\n' + '\r')
200         shift2();
201     else
202         shift1();
203 
204     ++m_lineNumber;
205 }
206 
makeIdentifier(const UChar * characters,size_t length)207 ALWAYS_INLINE Identifier* Lexer::makeIdentifier(const UChar* characters, size_t length)
208 {
209     m_identifiers.append(Identifier(m_globalData, characters, length));
210     return &m_identifiers.last();
211 }
212 
lastTokenWasRestrKeyword() const213 inline bool Lexer::lastTokenWasRestrKeyword() const
214 {
215     return m_lastToken == CONTINUE || m_lastToken == BREAK || m_lastToken == RETURN || m_lastToken == THROW;
216 }
217 
isNonASCIIIdentStart(int c)218 static NEVER_INLINE bool isNonASCIIIdentStart(int c)
219 {
220     return category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other);
221 }
222 
isIdentStart(int c)223 static inline bool isIdentStart(int c)
224 {
225     return isASCII(c) ? isASCIIAlpha(c) || c == '$' || c == '_' : isNonASCIIIdentStart(c);
226 }
227 
isNonASCIIIdentPart(int c)228 static NEVER_INLINE bool isNonASCIIIdentPart(int c)
229 {
230     return category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other
231         | Mark_NonSpacing | Mark_SpacingCombining | Number_DecimalDigit | Punctuation_Connector);
232 }
233 
isIdentPart(int c)234 static inline bool isIdentPart(int c)
235 {
236     return isASCII(c) ? isASCIIAlphanumeric(c) || c == '$' || c == '_' : isNonASCIIIdentPart(c);
237 }
238 
singleEscape(int c)239 static inline int singleEscape(int c)
240 {
241     switch (c) {
242         case 'b':
243             return 0x08;
244         case 't':
245             return 0x09;
246         case 'n':
247             return 0x0A;
248         case 'v':
249             return 0x0B;
250         case 'f':
251             return 0x0C;
252         case 'r':
253             return 0x0D;
254         default:
255             return c;
256     }
257 }
258 
record8(int c)259 inline void Lexer::record8(int c)
260 {
261     ASSERT(c >= 0);
262     ASSERT(c <= 0xFF);
263     m_buffer8.append(static_cast<char>(c));
264 }
265 
record16(UChar c)266 inline void Lexer::record16(UChar c)
267 {
268     m_buffer16.append(c);
269 }
270 
record16(int c)271 inline void Lexer::record16(int c)
272 {
273     ASSERT(c >= 0);
274     ASSERT(c <= USHRT_MAX);
275     record16(UChar(static_cast<unsigned short>(c)));
276 }
277 
lex(void * p1,void * p2)278 int Lexer::lex(void* p1, void* p2)
279 {
280     ASSERT(!m_error);
281     ASSERT(m_buffer8.isEmpty());
282     ASSERT(m_buffer16.isEmpty());
283 
284     YYSTYPE* lvalp = static_cast<YYSTYPE*>(p1);
285     YYLTYPE* llocp = static_cast<YYLTYPE*>(p2);
286     int token = 0;
287     m_terminator = false;
288 
289 start:
290     while (isWhiteSpace(m_current))
291         shift1();
292 
293     int startOffset = currentOffset();
294 
295     if (m_current == -1) {
296         if (!m_terminator && !m_delimited && !m_isReparsing) {
297             // automatic semicolon insertion if program incomplete
298             token = ';';
299             goto doneSemicolon;
300         }
301         return 0;
302     }
303 
304     m_delimited = false;
305     switch (m_current) {
306         case '>':
307             if (m_next1 == '>' && m_next2 == '>') {
308                 if (m_next3 == '=') {
309                     shift4();
310                     token = URSHIFTEQUAL;
311                     break;
312                 }
313                 shift3();
314                 token = URSHIFT;
315                 break;
316             }
317             if (m_next1 == '>') {
318                 if (m_next2 == '=') {
319                     shift3();
320                     token = RSHIFTEQUAL;
321                     break;
322                 }
323                 shift2();
324                 token = RSHIFT;
325                 break;
326             }
327             if (m_next1 == '=') {
328                 shift2();
329                 token = GE;
330                 break;
331             }
332             shift1();
333             token = '>';
334             break;
335         case '=':
336             if (m_next1 == '=') {
337                 if (m_next2 == '=') {
338                     shift3();
339                     token = STREQ;
340                     break;
341                 }
342                 shift2();
343                 token = EQEQ;
344                 break;
345             }
346             shift1();
347             token = '=';
348             break;
349         case '!':
350             if (m_next1 == '=') {
351                 if (m_next2 == '=') {
352                     shift3();
353                     token = STRNEQ;
354                     break;
355                 }
356                 shift2();
357                 token = NE;
358                 break;
359             }
360             shift1();
361             token = '!';
362             break;
363         case '<':
364             if (m_next1 == '!' && m_next2 == '-' && m_next3 == '-') {
365                 // <!-- marks the beginning of a line comment (for www usage)
366                 shift4();
367                 goto inSingleLineComment;
368             }
369             if (m_next1 == '<') {
370                 if (m_next2 == '=') {
371                     shift3();
372                     token = LSHIFTEQUAL;
373                     break;
374                 }
375                 shift2();
376                 token = LSHIFT;
377                 break;
378             }
379             if (m_next1 == '=') {
380                 shift2();
381                 token = LE;
382                 break;
383             }
384             shift1();
385             token = '<';
386             break;
387         case '+':
388             if (m_next1 == '+') {
389                 shift2();
390                 if (m_terminator) {
391                     token = AUTOPLUSPLUS;
392                     break;
393                 }
394                 token = PLUSPLUS;
395                 break;
396             }
397             if (m_next1 == '=') {
398                 shift2();
399                 token = PLUSEQUAL;
400                 break;
401             }
402             shift1();
403             token = '+';
404             break;
405         case '-':
406             if (m_next1 == '-') {
407                 if (m_atLineStart && m_next2 == '>') {
408                     shift3();
409                     goto inSingleLineComment;
410                 }
411                 shift2();
412                 if (m_terminator) {
413                     token = AUTOMINUSMINUS;
414                     break;
415                 }
416                 token = MINUSMINUS;
417                 break;
418             }
419             if (m_next1 == '=') {
420                 shift2();
421                 token = MINUSEQUAL;
422                 break;
423             }
424             shift1();
425             token = '-';
426             break;
427         case '*':
428             if (m_next1 == '=') {
429                 shift2();
430                 token = MULTEQUAL;
431                 break;
432             }
433             shift1();
434             token = '*';
435             break;
436         case '/':
437             if (m_next1 == '/') {
438                 shift2();
439                 goto inSingleLineComment;
440             }
441             if (m_next1 == '*')
442                 goto inMultiLineComment;
443             if (m_next1 == '=') {
444                 shift2();
445                 token = DIVEQUAL;
446                 break;
447             }
448             shift1();
449             token = '/';
450             break;
451         case '&':
452             if (m_next1 == '&') {
453                 shift2();
454                 token = AND;
455                 break;
456             }
457             if (m_next1 == '=') {
458                 shift2();
459                 token = ANDEQUAL;
460                 break;
461             }
462             shift1();
463             token = '&';
464             break;
465         case '^':
466             if (m_next1 == '=') {
467                 shift2();
468                 token = XOREQUAL;
469                 break;
470             }
471             shift1();
472             token = '^';
473             break;
474         case '%':
475             if (m_next1 == '=') {
476                 shift2();
477                 token = MODEQUAL;
478                 break;
479             }
480             shift1();
481             token = '%';
482             break;
483         case '|':
484             if (m_next1 == '=') {
485                 shift2();
486                 token = OREQUAL;
487                 break;
488             }
489             if (m_next1 == '|') {
490                 shift2();
491                 token = OR;
492                 break;
493             }
494             shift1();
495             token = '|';
496             break;
497         case '.':
498             if (isASCIIDigit(m_next1)) {
499                 record8('.');
500                 shift1();
501                 goto inNumberAfterDecimalPoint;
502             }
503             token = '.';
504             shift1();
505             break;
506         case ',':
507         case '~':
508         case '?':
509         case ':':
510         case '(':
511         case ')':
512         case '[':
513         case ']':
514             token = m_current;
515             shift1();
516             break;
517         case ';':
518             shift1();
519             m_delimited = true;
520             token = ';';
521             break;
522         case '{':
523             lvalp->intValue = currentOffset();
524             shift1();
525             token = OPENBRACE;
526             break;
527         case '}':
528             lvalp->intValue = currentOffset();
529             shift1();
530             m_delimited = true;
531             token = CLOSEBRACE;
532             break;
533         case '\\':
534             goto startIdentifierWithBackslash;
535         case '0':
536             goto startNumberWithZeroDigit;
537         case '1':
538         case '2':
539         case '3':
540         case '4':
541         case '5':
542         case '6':
543         case '7':
544         case '8':
545         case '9':
546             goto startNumber;
547         case '"':
548         case '\'':
549             goto startString;
550         default:
551             if (isIdentStart(m_current))
552                 goto startIdentifierOrKeyword;
553             if (isLineTerminator(m_current)) {
554                 shiftLineTerminator();
555                 m_atLineStart = true;
556                 m_terminator = true;
557                 if (lastTokenWasRestrKeyword()) {
558                     token = ';';
559                     goto doneSemicolon;
560                 }
561                 goto start;
562             }
563             goto returnError;
564     }
565 
566     m_atLineStart = false;
567     goto returnToken;
568 
569 startString: {
570     int stringQuoteCharacter = m_current;
571     shift1();
572 
573     const UChar* stringStart = currentCharacter();
574     while (m_current != stringQuoteCharacter) {
575         // Fast check for characters that require special handling.
576         // Catches -1, \n, \r, \, 0x2028, and 0x2029 as efficiently
577         // as possible, and lets through all common ASCII characters.
578         if (UNLIKELY(m_current == '\\') || UNLIKELY(((static_cast<unsigned>(m_current) - 0xE) & 0x2000))) {
579             m_buffer16.append(stringStart, currentCharacter() - stringStart);
580             goto inString;
581         }
582         shift1();
583     }
584     lvalp->ident = makeIdentifier(stringStart, currentCharacter() - stringStart);
585     shift1();
586     m_atLineStart = false;
587     m_delimited = false;
588     token = STRING;
589     goto returnToken;
590 
591 inString:
592     while (m_current != stringQuoteCharacter) {
593         if (m_current == '\\')
594             goto inStringEscapeSequence;
595         if (UNLIKELY(isLineTerminator(m_current)))
596             goto returnError;
597         if (UNLIKELY(m_current == -1))
598             goto returnError;
599         record16(m_current);
600         shift1();
601     }
602     goto doneString;
603 
604 inStringEscapeSequence:
605     shift1();
606     if (m_current == 'x') {
607         shift1();
608         if (isASCIIHexDigit(m_current) && isASCIIHexDigit(m_next1)) {
609             record16(convertHex(m_current, m_next1));
610             shift2();
611             goto inString;
612         }
613         record16('x');
614         if (m_current == stringQuoteCharacter)
615             goto doneString;
616         goto inString;
617     }
618     if (m_current == 'u') {
619         shift1();
620         if (isASCIIHexDigit(m_current) && isASCIIHexDigit(m_next1) && isASCIIHexDigit(m_next2) && isASCIIHexDigit(m_next3)) {
621             record16(convertUnicode(m_current, m_next1, m_next2, m_next3));
622             shift4();
623             goto inString;
624         }
625         if (m_current == stringQuoteCharacter) {
626             record16('u');
627             goto doneString;
628         }
629         goto returnError;
630     }
631     if (isASCIIOctalDigit(m_current)) {
632         if (m_current >= '0' && m_current <= '3' && isASCIIOctalDigit(m_next1) && isASCIIOctalDigit(m_next2)) {
633             record16((m_current - '0') * 64 + (m_next1 - '0') * 8 + m_next2 - '0');
634             shift3();
635             goto inString;
636         }
637         if (isASCIIOctalDigit(m_next1)) {
638             record16((m_current - '0') * 8 + m_next1 - '0');
639             shift2();
640             goto inString;
641         }
642         record16(m_current - '0');
643         shift1();
644         goto inString;
645     }
646     if (isLineTerminator(m_current)) {
647         shiftLineTerminator();
648         goto inString;
649     }
650     record16(singleEscape(m_current));
651     shift1();
652     goto inString;
653 }
654 
655 startIdentifierWithBackslash:
656     shift1();
657     if (UNLIKELY(m_current != 'u'))
658         goto returnError;
659     shift1();
660     if (UNLIKELY(!isASCIIHexDigit(m_current) || !isASCIIHexDigit(m_next1) || !isASCIIHexDigit(m_next2) || !isASCIIHexDigit(m_next3)))
661         goto returnError;
662     token = convertUnicode(m_current, m_next1, m_next2, m_next3);
663     if (UNLIKELY(!isIdentStart(token)))
664         goto returnError;
665     goto inIdentifierAfterCharacterCheck;
666 
667 startIdentifierOrKeyword: {
668     const UChar* identifierStart = currentCharacter();
669     shift1();
670     while (isIdentPart(m_current))
671         shift1();
672     if (LIKELY(m_current != '\\')) {
673         lvalp->ident = makeIdentifier(identifierStart, currentCharacter() - identifierStart);
674         goto doneIdentifierOrKeyword;
675     }
676     m_buffer16.append(identifierStart, currentCharacter() - identifierStart);
677 }
678 
679     do {
680         shift1();
681         if (UNLIKELY(m_current != 'u'))
682             goto returnError;
683         shift1();
684         if (UNLIKELY(!isASCIIHexDigit(m_current) || !isASCIIHexDigit(m_next1) || !isASCIIHexDigit(m_next2) || !isASCIIHexDigit(m_next3)))
685             goto returnError;
686         token = convertUnicode(m_current, m_next1, m_next2, m_next3);
687         if (UNLIKELY(!isIdentPart(token)))
688             goto returnError;
689 inIdentifierAfterCharacterCheck:
690         record16(token);
691         shift4();
692 
693         while (isIdentPart(m_current)) {
694             record16(m_current);
695             shift1();
696         }
697     } while (UNLIKELY(m_current == '\\'));
698     goto doneIdentifier;
699 
700 inSingleLineComment:
701     while (!isLineTerminator(m_current)) {
702         if (UNLIKELY(m_current == -1))
703             return 0;
704         shift1();
705     }
706     shiftLineTerminator();
707     m_atLineStart = true;
708     m_terminator = true;
709     if (lastTokenWasRestrKeyword())
710         goto doneSemicolon;
711     goto start;
712 
713 inMultiLineComment:
714     shift2();
715     while (m_current != '*' || m_next1 != '/') {
716         if (isLineTerminator(m_current))
717             shiftLineTerminator();
718         else {
719             shift1();
720             if (UNLIKELY(m_current == -1))
721                 goto returnError;
722         }
723     }
724     shift2();
725     m_atLineStart = false;
726     goto start;
727 
728 startNumberWithZeroDigit:
729     shift1();
730     if ((m_current | 0x20) == 'x' && isASCIIHexDigit(m_next1)) {
731         shift1();
732         goto inHex;
733     }
734     if (m_current == '.') {
735         record8('0');
736         record8('.');
737         shift1();
738         goto inNumberAfterDecimalPoint;
739     }
740     if ((m_current | 0x20) == 'e') {
741         record8('0');
742         record8('e');
743         shift1();
744         goto inExponentIndicator;
745     }
746     if (isASCIIOctalDigit(m_current))
747         goto inOctal;
748     if (isASCIIDigit(m_current))
749         goto startNumber;
750     lvalp->doubleValue = 0;
751     goto doneNumeric;
752 
753 inNumberAfterDecimalPoint:
754     while (isASCIIDigit(m_current)) {
755         record8(m_current);
756         shift1();
757     }
758     if ((m_current | 0x20) == 'e') {
759         record8('e');
760         shift1();
761         goto inExponentIndicator;
762     }
763     goto doneNumber;
764 
765 inExponentIndicator:
766     if (m_current == '+' || m_current == '-') {
767         record8(m_current);
768         shift1();
769     }
770     if (!isASCIIDigit(m_current))
771         goto returnError;
772     do {
773         record8(m_current);
774         shift1();
775     } while (isASCIIDigit(m_current));
776     goto doneNumber;
777 
778 inOctal: {
779     do {
780         record8(m_current);
781         shift1();
782     } while (isASCIIOctalDigit(m_current));
783     if (isASCIIDigit(m_current))
784         goto startNumber;
785 
786     double dval = 0;
787 
788     const char* end = m_buffer8.end();
789     for (const char* p = m_buffer8.data(); p < end; ++p) {
790         dval *= 8;
791         dval += *p - '0';
792     }
793     if (dval >= mantissaOverflowLowerBound)
794         dval = parseIntOverflow(m_buffer8.data(), end - m_buffer8.data(), 8);
795 
796     m_buffer8.resize(0);
797 
798     lvalp->doubleValue = dval;
799     goto doneNumeric;
800 }
801 
802 inHex: {
803     do {
804         record8(m_current);
805         shift1();
806     } while (isASCIIHexDigit(m_current));
807 
808     double dval = 0;
809 
810     const char* end = m_buffer8.end();
811     for (const char* p = m_buffer8.data(); p < end; ++p) {
812         dval *= 16;
813         dval += toASCIIHexValue(*p);
814     }
815     if (dval >= mantissaOverflowLowerBound)
816         dval = parseIntOverflow(m_buffer8.data(), end - m_buffer8.data(), 16);
817 
818     m_buffer8.resize(0);
819 
820     lvalp->doubleValue = dval;
821     goto doneNumeric;
822 }
823 
824 startNumber:
825     record8(m_current);
826     shift1();
827     while (isASCIIDigit(m_current)) {
828         record8(m_current);
829         shift1();
830     }
831     if (m_current == '.') {
832         record8('.');
833         shift1();
834         goto inNumberAfterDecimalPoint;
835     }
836     if ((m_current | 0x20) == 'e') {
837         record8('e');
838         shift1();
839         goto inExponentIndicator;
840     }
841 
842     // Fall through into doneNumber.
843 
844 doneNumber:
845     // Null-terminate string for strtod.
846     m_buffer8.append('\0');
847     lvalp->doubleValue = WTF::strtod(m_buffer8.data(), 0);
848     m_buffer8.resize(0);
849 
850     // Fall through into doneNumeric.
851 
852 doneNumeric:
853     // No identifiers allowed directly after numeric literal, e.g. "3in" is bad.
854     if (UNLIKELY(isIdentStart(m_current)))
855         goto returnError;
856 
857     m_atLineStart = false;
858     m_delimited = false;
859     token = NUMBER;
860     goto returnToken;
861 
862 doneSemicolon:
863     token = ';';
864     m_delimited = true;
865     goto returnToken;
866 
867 doneIdentifier:
868     m_atLineStart = false;
869     m_delimited = false;
870     lvalp->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
871     m_buffer16.resize(0);
872     token = IDENT;
873     goto returnToken;
874 
875 doneIdentifierOrKeyword: {
876     m_atLineStart = false;
877     m_delimited = false;
878     m_buffer16.resize(0);
879     const HashEntry* entry = m_keywordTable.entry(m_globalData, *lvalp->ident);
880     token = entry ? entry->lexerValue() : IDENT;
881     goto returnToken;
882 }
883 
884 doneString:
885     // Atomize constant strings in case they're later used in property lookup.
886     shift1();
887     m_atLineStart = false;
888     m_delimited = false;
889     lvalp->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
890     m_buffer16.resize(0);
891     token = STRING;
892 
893     // Fall through into returnToken.
894 
895 returnToken: {
896     int lineNumber = m_lineNumber;
897     llocp->first_line = lineNumber;
898     llocp->last_line = lineNumber;
899     llocp->first_column = startOffset;
900     llocp->last_column = currentOffset();
901 
902     m_lastToken = token;
903     return token;
904 }
905 
906 returnError:
907     m_error = true;
908     return -1;
909 }
910 
scanRegExp()911 bool Lexer::scanRegExp()
912 {
913     ASSERT(m_buffer16.isEmpty());
914 
915     bool lastWasEscape = false;
916     bool inBrackets = false;
917 
918     while (true) {
919         if (isLineTerminator(m_current) || m_current == -1)
920             return false;
921         if (m_current != '/' || lastWasEscape || inBrackets) {
922             // keep track of '[' and ']'
923             if (!lastWasEscape) {
924                 if (m_current == '[' && !inBrackets)
925                     inBrackets = true;
926                 if (m_current == ']' && inBrackets)
927                     inBrackets = false;
928             }
929             record16(m_current);
930             lastWasEscape = !lastWasEscape && m_current == '\\';
931         } else { // end of regexp
932             m_pattern = UString(m_buffer16);
933             m_buffer16.resize(0);
934             shift1();
935             break;
936         }
937         shift1();
938     }
939 
940     while (isIdentPart(m_current)) {
941         record16(m_current);
942         shift1();
943     }
944     m_flags = UString(m_buffer16);
945     m_buffer16.resize(0);
946 
947     return true;
948 }
949 
clear()950 void Lexer::clear()
951 {
952     m_identifiers.clear();
953     m_codeWithoutBOMs.clear();
954 
955     Vector<char> newBuffer8;
956     newBuffer8.reserveInitialCapacity(initialReadBufferCapacity);
957     m_buffer8.swap(newBuffer8);
958 
959     Vector<UChar> newBuffer16;
960     newBuffer16.reserveInitialCapacity(initialReadBufferCapacity);
961     m_buffer16.swap(newBuffer16);
962 
963     m_isReparsing = false;
964 
965     m_pattern = UString();
966     m_flags = UString();
967 }
968 
sourceCode(int openBrace,int closeBrace,int firstLine)969 SourceCode Lexer::sourceCode(int openBrace, int closeBrace, int firstLine)
970 {
971     if (m_codeWithoutBOMs.isEmpty())
972         return SourceCode(m_source->provider(), openBrace, closeBrace + 1, firstLine);
973 
974     const UChar* data = m_source->provider()->data();
975 
976     ASSERT(openBrace < closeBrace);
977 
978     int numBOMsBeforeOpenBrace = 0;
979     int numBOMsBetweenBraces = 0;
980 
981     int i;
982     for (i = m_source->startOffset(); i < openBrace; ++i)
983         numBOMsBeforeOpenBrace += data[i] == byteOrderMark;
984     for (; i < closeBrace; ++i)
985         numBOMsBetweenBraces += data[i] == byteOrderMark;
986 
987     return SourceCode(m_source->provider(), openBrace + numBOMsBeforeOpenBrace,
988         closeBrace + numBOMsBeforeOpenBrace + numBOMsBetweenBraces + 1, firstLine);
989 }
990 
991 } // namespace JSC
992