• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
3  *  Copyright (C) 2006, 2007, 2008, 2009 Apple Inc. All Rights Reserved.
4  *  Copyright (C) 2007 Cameron Zwarich (cwzwarich@uwaterloo.ca)
5  *
6  *  This library is free software; you can redistribute it and/or
7  *  modify it under the terms of the GNU Library General Public
8  *  License as published by the Free Software Foundation; either
9  *  version 2 of the License, or (at your option) any later version.
10  *
11  *  This library is distributed in the hope that it will be useful,
12  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  *  Library General Public License for more details.
15  *
16  *  You should have received a copy of the GNU Library General Public License
17  *  along with this library; see the file COPYING.LIB.  If not, write to
18  *  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19  *  Boston, MA 02110-1301, USA.
20  *
21  */
22 
23 #include "config.h"
24 #include "Lexer.h"
25 
26 #include "JSFunction.h"
27 #include "JSGlobalObjectFunctions.h"
28 #include "NodeInfo.h"
29 #include "Nodes.h"
30 #include "dtoa.h"
31 #include <ctype.h>
32 #include <limits.h>
33 #include <string.h>
34 #include <wtf/ASCIICType.h>
35 #include <wtf/Assertions.h>
36 #include <wtf/unicode/Unicode.h>
37 
38 using namespace WTF;
39 using namespace Unicode;
40 
41 // we can't specify the namespace in yacc's C output, so do it here
42 using namespace JSC;
43 
44 #ifndef KDE_USE_FINAL
45 #include "Grammar.h"
46 #endif
47 
48 #include "Lookup.h"
49 #include "Lexer.lut.h"
50 
51 // a bridge for yacc from the C world to C++
jscyylex(void * lvalp,void * llocp,void * globalData)52 int jscyylex(void* lvalp, void* llocp, void* globalData)
53 {
54     return static_cast<JSGlobalData*>(globalData)->lexer->lex(lvalp, llocp);
55 }
56 
57 namespace JSC {
58 
59 static bool isDecimalDigit(int);
60 
Lexer(JSGlobalData * globalData)61 Lexer::Lexer(JSGlobalData* globalData)
62     : yylineno(1)
63     , m_restrKeyword(false)
64     , m_eatNextIdentifier(false)
65     , m_stackToken(-1)
66     , m_lastToken(-1)
67     , m_position(0)
68     , m_code(0)
69     , m_length(0)
70     , m_isReparsing(false)
71     , m_atLineStart(true)
72     , m_current(0)
73     , m_next1(0)
74     , m_next2(0)
75     , m_next3(0)
76     , m_currentOffset(0)
77     , m_nextOffset1(0)
78     , m_nextOffset2(0)
79     , m_nextOffset3(0)
80     , m_globalData(globalData)
81     , m_mainTable(JSC::mainTable)
82 {
83     m_buffer8.reserveCapacity(initialReadBufferCapacity);
84     m_buffer16.reserveCapacity(initialReadBufferCapacity);
85 }
86 
~Lexer()87 Lexer::~Lexer()
88 {
89     m_mainTable.deleteTable();
90 }
91 
setCode(const SourceCode & source)92 void Lexer::setCode(const SourceCode& source)
93 {
94     yylineno = source.firstLine();
95     m_restrKeyword = false;
96     m_delimited = false;
97     m_eatNextIdentifier = false;
98     m_stackToken = -1;
99     m_lastToken = -1;
100 
101     m_position = source.startOffset();
102     m_source = &source;
103     m_code = source.provider()->data();
104     m_length = source.endOffset();
105     m_skipLF = false;
106     m_skipCR = false;
107     m_error = false;
108     m_atLineStart = true;
109 
110     // read first characters
111     shift(4);
112 }
113 
shift(unsigned p)114 void Lexer::shift(unsigned p)
115 {
116     // ECMA-262 calls for stripping Cf characters here, but we only do this for BOM,
117     // see <https://bugs.webkit.org/show_bug.cgi?id=4931>.
118 
119     while (p--) {
120         m_current = m_next1;
121         m_next1 = m_next2;
122         m_next2 = m_next3;
123         m_currentOffset = m_nextOffset1;
124         m_nextOffset1 = m_nextOffset2;
125         m_nextOffset2 = m_nextOffset3;
126         do {
127             if (m_position >= m_length) {
128                 m_nextOffset3 = m_position;
129                 m_position++;
130                 m_next3 = -1;
131                 break;
132             }
133             m_nextOffset3 = m_position;
134             m_next3 = m_code[m_position++];
135         } while (m_next3 == 0xFEFF);
136     }
137 }
138 
139 // called on each new line
nextLine()140 void Lexer::nextLine()
141 {
142     yylineno++;
143     m_atLineStart = true;
144 }
145 
setDone(State s)146 void Lexer::setDone(State s)
147 {
148     m_state = s;
149     m_done = true;
150 }
151 
lex(void * p1,void * p2)152 int Lexer::lex(void* p1, void* p2)
153 {
154     YYSTYPE* lvalp = static_cast<YYSTYPE*>(p1);
155     YYLTYPE* llocp = static_cast<YYLTYPE*>(p2);
156     int token = 0;
157     m_state = Start;
158     unsigned short stringType = 0; // either single or double quotes
159     m_buffer8.clear();
160     m_buffer16.clear();
161     m_done = false;
162     m_terminator = false;
163     m_skipLF = false;
164     m_skipCR = false;
165 
166     // did we push a token on the stack previously ?
167     // (after an automatic semicolon insertion)
168     if (m_stackToken >= 0) {
169         setDone(Other);
170         token = m_stackToken;
171         m_stackToken = 0;
172     }
173     int startOffset = m_currentOffset;
174     while (!m_done) {
175         if (m_skipLF && m_current != '\n') // found \r but not \n afterwards
176             m_skipLF = false;
177         if (m_skipCR && m_current != '\r') // found \n but not \r afterwards
178             m_skipCR = false;
179         if (m_skipLF || m_skipCR) { // found \r\n or \n\r -> eat the second one
180             m_skipLF = false;
181             m_skipCR = false;
182             shift(1);
183         }
184         switch (m_state) {
185             case Start:
186                 startOffset = m_currentOffset;
187                 if (isWhiteSpace()) {
188                     // do nothing
189                 } else if (m_current == '/' && m_next1 == '/') {
190                     shift(1);
191                     m_state = InSingleLineComment;
192                 } else if (m_current == '/' && m_next1 == '*') {
193                     shift(1);
194                     m_state = InMultiLineComment;
195                 } else if (m_current == -1) {
196                     if (!m_terminator && !m_delimited && !m_isReparsing) {
197                         // automatic semicolon insertion if program incomplete
198                         token = ';';
199                         m_stackToken = 0;
200                         setDone(Other);
201                     } else
202                         setDone(Eof);
203                 } else if (isLineTerminator()) {
204                     nextLine();
205                     m_terminator = true;
206                     if (m_restrKeyword) {
207                         token = ';';
208                         setDone(Other);
209                     }
210                 } else if (m_current == '"' || m_current == '\'') {
211                     m_state = InString;
212                     stringType = static_cast<unsigned short>(m_current);
213                 } else if (isIdentStart(m_current)) {
214                     record16(m_current);
215                     m_state = InIdentifierOrKeyword;
216                 } else if (m_current == '\\')
217                     m_state = InIdentifierStartUnicodeEscapeStart;
218                 else if (m_current == '0') {
219                     record8(m_current);
220                     m_state = InNum0;
221                 } else if (isDecimalDigit(m_current)) {
222                     record8(m_current);
223                     m_state = InNum;
224                 } else if (m_current == '.' && isDecimalDigit(m_next1)) {
225                     record8(m_current);
226                     m_state = InDecimal;
227                     // <!-- marks the beginning of a line comment (for www usage)
228                 } else if (m_current == '<' && m_next1 == '!' && m_next2 == '-' && m_next3 == '-') {
229                     shift(3);
230                     m_state = InSingleLineComment;
231                     // same for -->
232                 } else if (m_atLineStart && m_current == '-' && m_next1 == '-' &&  m_next2 == '>') {
233                     shift(2);
234                     m_state = InSingleLineComment;
235                 } else {
236                     token = matchPunctuator(lvalp->intValue, m_current, m_next1, m_next2, m_next3);
237                     if (token != -1)
238                         setDone(Other);
239                     else
240                         setDone(Bad);
241                 }
242                 break;
243             case InString:
244                 if (m_current == stringType) {
245                     shift(1);
246                     setDone(String);
247                 } else if (isLineTerminator() || m_current == -1)
248                     setDone(Bad);
249                 else if (m_current == '\\')
250                     m_state = InEscapeSequence;
251                 else
252                     record16(m_current);
253                 break;
254             // Escape Sequences inside of strings
255             case InEscapeSequence:
256                 if (isOctalDigit(m_current)) {
257                     if (m_current >= '0' && m_current <= '3' &&
258                         isOctalDigit(m_next1) && isOctalDigit(m_next2)) {
259                         record16(convertOctal(m_current, m_next1, m_next2));
260                         shift(2);
261                         m_state = InString;
262                     } else if (isOctalDigit(m_current) && isOctalDigit(m_next1)) {
263                         record16(convertOctal('0', m_current, m_next1));
264                         shift(1);
265                         m_state = InString;
266                     } else if (isOctalDigit(m_current)) {
267                         record16(convertOctal('0', '0', m_current));
268                         m_state = InString;
269                     } else
270                         setDone(Bad);
271                 } else if (m_current == 'x')
272                     m_state = InHexEscape;
273                 else if (m_current == 'u')
274                     m_state = InUnicodeEscape;
275                 else if (isLineTerminator()) {
276                     nextLine();
277                     m_state = InString;
278                 } else {
279                     record16(singleEscape(static_cast<unsigned short>(m_current)));
280                     m_state = InString;
281                 }
282                 break;
283             case InHexEscape:
284                 if (isHexDigit(m_current) && isHexDigit(m_next1)) {
285                     m_state = InString;
286                     record16(convertHex(m_current, m_next1));
287                     shift(1);
288                 } else if (m_current == stringType) {
289                     record16('x');
290                     shift(1);
291                     setDone(String);
292                 } else {
293                     record16('x');
294                     record16(m_current);
295                     m_state = InString;
296                 }
297                 break;
298             case InUnicodeEscape:
299                 if (isHexDigit(m_current) && isHexDigit(m_next1) && isHexDigit(m_next2) && isHexDigit(m_next3)) {
300                     record16(convertUnicode(m_current, m_next1, m_next2, m_next3));
301                     shift(3);
302                     m_state = InString;
303                 } else if (m_current == stringType) {
304                     record16('u');
305                     shift(1);
306                     setDone(String);
307                 } else
308                     setDone(Bad);
309                 break;
310             case InSingleLineComment:
311                 if (isLineTerminator()) {
312                     nextLine();
313                     m_terminator = true;
314                     if (m_restrKeyword) {
315                         token = ';';
316                         setDone(Other);
317                     } else
318                         m_state = Start;
319                 } else if (m_current == -1)
320                     setDone(Eof);
321                 break;
322             case InMultiLineComment:
323                 if (m_current == -1)
324                     setDone(Bad);
325                 else if (isLineTerminator())
326                     nextLine();
327                 else if (m_current == '*' && m_next1 == '/') {
328                     m_state = Start;
329                     shift(1);
330                 }
331                 break;
332             case InIdentifierOrKeyword:
333             case InIdentifier:
334                 if (isIdentPart(m_current))
335                     record16(m_current);
336                 else if (m_current == '\\')
337                     m_state = InIdentifierPartUnicodeEscapeStart;
338                 else
339                     setDone(m_state == InIdentifierOrKeyword ? IdentifierOrKeyword : Identifier);
340                 break;
341             case InNum0:
342                 if (m_current == 'x' || m_current == 'X') {
343                     record8(m_current);
344                     m_state = InHex;
345                 } else if (m_current == '.') {
346                     record8(m_current);
347                     m_state = InDecimal;
348                 } else if (m_current == 'e' || m_current == 'E') {
349                     record8(m_current);
350                     m_state = InExponentIndicator;
351                 } else if (isOctalDigit(m_current)) {
352                     record8(m_current);
353                     m_state = InOctal;
354                 } else if (isDecimalDigit(m_current)) {
355                     record8(m_current);
356                     m_state = InDecimal;
357                 } else
358                     setDone(Number);
359                 break;
360             case InHex:
361                 if (isHexDigit(m_current))
362                     record8(m_current);
363                 else
364                     setDone(Hex);
365                 break;
366             case InOctal:
367                 if (isOctalDigit(m_current))
368                     record8(m_current);
369                 else if (isDecimalDigit(m_current)) {
370                     record8(m_current);
371                     m_state = InDecimal;
372                 } else
373                     setDone(Octal);
374                 break;
375             case InNum:
376                 if (isDecimalDigit(m_current))
377                     record8(m_current);
378                 else if (m_current == '.') {
379                     record8(m_current);
380                     m_state = InDecimal;
381                 } else if (m_current == 'e' || m_current == 'E') {
382                     record8(m_current);
383                     m_state = InExponentIndicator;
384                 } else
385                     setDone(Number);
386                 break;
387             case InDecimal:
388                 if (isDecimalDigit(m_current))
389                     record8(m_current);
390                 else if (m_current == 'e' || m_current == 'E') {
391                     record8(m_current);
392                     m_state = InExponentIndicator;
393                 } else
394                     setDone(Number);
395                 break;
396             case InExponentIndicator:
397                 if (m_current == '+' || m_current == '-')
398                     record8(m_current);
399                 else if (isDecimalDigit(m_current)) {
400                     record8(m_current);
401                     m_state = InExponent;
402                 } else
403                     setDone(Bad);
404                 break;
405             case InExponent:
406                 if (isDecimalDigit(m_current))
407                     record8(m_current);
408                 else
409                     setDone(Number);
410                 break;
411             case InIdentifierStartUnicodeEscapeStart:
412                 if (m_current == 'u')
413                     m_state = InIdentifierStartUnicodeEscape;
414                 else
415                     setDone(Bad);
416                 break;
417             case InIdentifierPartUnicodeEscapeStart:
418                 if (m_current == 'u')
419                     m_state = InIdentifierPartUnicodeEscape;
420                 else
421                     setDone(Bad);
422                 break;
423             case InIdentifierStartUnicodeEscape:
424                 if (!isHexDigit(m_current) || !isHexDigit(m_next1) || !isHexDigit(m_next2) || !isHexDigit(m_next3)) {
425                     setDone(Bad);
426                     break;
427                 }
428                 token = convertUnicode(m_current, m_next1, m_next2, m_next3);
429                 shift(3);
430                 if (!isIdentStart(token)) {
431                     setDone(Bad);
432                     break;
433                 }
434                 record16(token);
435                 m_state = InIdentifier;
436                 break;
437             case InIdentifierPartUnicodeEscape:
438                 if (!isHexDigit(m_current) || !isHexDigit(m_next1) || !isHexDigit(m_next2) || !isHexDigit(m_next3)) {
439                     setDone(Bad);
440                     break;
441                 }
442                 token = convertUnicode(m_current, m_next1, m_next2, m_next3);
443                 shift(3);
444                 if (!isIdentPart(token)) {
445                     setDone(Bad);
446                     break;
447                 }
448                 record16(token);
449                 m_state = InIdentifier;
450                 break;
451             default:
452                 ASSERT(!"Unhandled state in switch statement");
453         }
454 
455         // move on to the next character
456         if (!m_done)
457             shift(1);
458         if (m_state != Start && m_state != InSingleLineComment)
459             m_atLineStart = false;
460     }
461 
462     // no identifiers allowed directly after numeric literal, e.g. "3in" is bad
463     if ((m_state == Number || m_state == Octal || m_state == Hex) && isIdentStart(m_current))
464         m_state = Bad;
465 
466     // terminate string
467     m_buffer8.append('\0');
468 
469 #ifdef JSC_DEBUG_LEX
470     fprintf(stderr, "line: %d ", lineNo());
471     fprintf(stderr, "yytext (%x): ", m_buffer8[0]);
472     fprintf(stderr, "%s ", m_buffer8.data());
473 #endif
474 
475     double dval = 0;
476     if (m_state == Number)
477         dval = WTF::strtod(m_buffer8.data(), 0L);
478     else if (m_state == Hex) { // scan hex numbers
479         const char* p = m_buffer8.data() + 2;
480         while (char c = *p++) {
481             dval *= 16;
482             dval += convertHex(c);
483         }
484 
485         if (dval >= mantissaOverflowLowerBound)
486             dval = parseIntOverflow(m_buffer8.data() + 2, p - (m_buffer8.data() + 3), 16);
487 
488         m_state = Number;
489     } else if (m_state == Octal) {   // scan octal number
490         const char* p = m_buffer8.data() + 1;
491         while (char c = *p++) {
492             dval *= 8;
493             dval += c - '0';
494         }
495 
496         if (dval >= mantissaOverflowLowerBound)
497             dval = parseIntOverflow(m_buffer8.data() + 1, p - (m_buffer8.data() + 2), 8);
498 
499         m_state = Number;
500     }
501 
502 #ifdef JSC_DEBUG_LEX
503     switch (m_state) {
504         case Eof:
505             printf("(EOF)\n");
506             break;
507         case Other:
508             printf("(Other)\n");
509             break;
510         case Identifier:
511             printf("(Identifier)/(Keyword)\n");
512             break;
513         case String:
514             printf("(String)\n");
515             break;
516         case Number:
517             printf("(Number)\n");
518             break;
519         default:
520             printf("(unknown)");
521     }
522 #endif
523 
524     if (m_state != Identifier)
525         m_eatNextIdentifier = false;
526 
527     m_restrKeyword = false;
528     m_delimited = false;
529     llocp->first_line = yylineno;
530     llocp->last_line = yylineno;
531     llocp->first_column = startOffset;
532     llocp->last_column = m_currentOffset;
533     switch (m_state) {
534         case Eof:
535             token = 0;
536             break;
537         case Other:
538             if (token == '}' || token == ';')
539                 m_delimited = true;
540             break;
541         case Identifier:
542             // Apply anonymous-function hack below (eat the identifier).
543             if (m_eatNextIdentifier) {
544                 m_eatNextIdentifier = false;
545                 token = lex(lvalp, llocp);
546                 break;
547             }
548             lvalp->ident = makeIdentifier(m_buffer16);
549             token = IDENT;
550             break;
551         case IdentifierOrKeyword: {
552             lvalp->ident = makeIdentifier(m_buffer16);
553             const HashEntry* entry = m_mainTable.entry(m_globalData, *lvalp->ident);
554             if (!entry) {
555                 // Lookup for keyword failed, means this is an identifier.
556                 token = IDENT;
557                 break;
558             }
559             token = entry->lexerValue();
560             // Hack for "f = function somename() { ... }"; too hard to get into the grammar.
561             m_eatNextIdentifier = token == FUNCTION && m_lastToken == '=';
562             if (token == CONTINUE || token == BREAK || token == RETURN || token == THROW)
563                 m_restrKeyword = true;
564             break;
565         }
566         case String:
567             // Atomize constant strings in case they're later used in property lookup.
568             lvalp->ident = makeIdentifier(m_buffer16);
569             token = STRING;
570             break;
571         case Number:
572             lvalp->doubleValue = dval;
573             token = NUMBER;
574             break;
575         case Bad:
576 #ifdef JSC_DEBUG_LEX
577             fprintf(stderr, "yylex: ERROR.\n");
578 #endif
579             m_error = true;
580             return -1;
581         default:
582             ASSERT(!"unhandled numeration value in switch");
583             m_error = true;
584             return -1;
585     }
586     m_lastToken = token;
587     return token;
588 }
589 
isWhiteSpace() const590 bool Lexer::isWhiteSpace() const
591 {
592     return m_current == '\t' || m_current == 0x0b || m_current == 0x0c || isSeparatorSpace(m_current);
593 }
594 
isLineTerminator()595 bool Lexer::isLineTerminator()
596 {
597     bool cr = (m_current == '\r');
598     bool lf = (m_current == '\n');
599     if (cr)
600         m_skipLF = true;
601     else if (lf)
602         m_skipCR = true;
603     return cr || lf || m_current == 0x2028 || m_current == 0x2029;
604 }
605 
isIdentStart(int c)606 bool Lexer::isIdentStart(int c)
607 {
608     return isASCIIAlpha(c) || c == '$' || c == '_' || (!isASCII(c) && (category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other)));
609 }
610 
isIdentPart(int c)611 bool Lexer::isIdentPart(int c)
612 {
613     return isASCIIAlphanumeric(c) || c == '$' || c == '_' || (!isASCII(c) && (category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other
614                             | Mark_NonSpacing | Mark_SpacingCombining | Number_DecimalDigit | Punctuation_Connector)));
615 }
616 
isDecimalDigit(int c)617 static bool isDecimalDigit(int c)
618 {
619     return isASCIIDigit(c);
620 }
621 
isHexDigit(int c)622 bool Lexer::isHexDigit(int c)
623 {
624     return isASCIIHexDigit(c);
625 }
626 
isOctalDigit(int c)627 bool Lexer::isOctalDigit(int c)
628 {
629     return isASCIIOctalDigit(c);
630 }
631 
matchPunctuator(int & charPos,int c1,int c2,int c3,int c4)632 int Lexer::matchPunctuator(int& charPos, int c1, int c2, int c3, int c4)
633 {
634     if (c1 == '>' && c2 == '>' && c3 == '>' && c4 == '=') {
635         shift(4);
636         return URSHIFTEQUAL;
637     }
638     if (c1 == '=' && c2 == '=' && c3 == '=') {
639         shift(3);
640         return STREQ;
641     }
642     if (c1 == '!' && c2 == '=' && c3 == '=') {
643         shift(3);
644         return STRNEQ;
645     }
646     if (c1 == '>' && c2 == '>' && c3 == '>') {
647         shift(3);
648         return URSHIFT;
649     }
650     if (c1 == '<' && c2 == '<' && c3 == '=') {
651         shift(3);
652         return LSHIFTEQUAL;
653     }
654     if (c1 == '>' && c2 == '>' && c3 == '=') {
655         shift(3);
656         return RSHIFTEQUAL;
657     }
658     if (c1 == '<' && c2 == '=') {
659         shift(2);
660         return LE;
661     }
662     if (c1 == '>' && c2 == '=') {
663         shift(2);
664         return GE;
665     }
666     if (c1 == '!' && c2 == '=') {
667         shift(2);
668         return NE;
669     }
670     if (c1 == '+' && c2 == '+') {
671         shift(2);
672         if (m_terminator)
673             return AUTOPLUSPLUS;
674         return PLUSPLUS;
675     }
676     if (c1 == '-' && c2 == '-') {
677         shift(2);
678         if (m_terminator)
679             return AUTOMINUSMINUS;
680         return MINUSMINUS;
681     }
682     if (c1 == '=' && c2 == '=') {
683         shift(2);
684         return EQEQ;
685     }
686     if (c1 == '+' && c2 == '=') {
687         shift(2);
688         return PLUSEQUAL;
689     }
690     if (c1 == '-' && c2 == '=') {
691         shift(2);
692         return MINUSEQUAL;
693     }
694     if (c1 == '*' && c2 == '=') {
695         shift(2);
696         return MULTEQUAL;
697     }
698     if (c1 == '/' && c2 == '=') {
699         shift(2);
700         return DIVEQUAL;
701     }
702     if (c1 == '&' && c2 == '=') {
703         shift(2);
704         return ANDEQUAL;
705     }
706     if (c1 == '^' && c2 == '=') {
707         shift(2);
708         return XOREQUAL;
709     }
710     if (c1 == '%' && c2 == '=') {
711         shift(2);
712         return MODEQUAL;
713     }
714     if (c1 == '|' && c2 == '=') {
715         shift(2);
716         return OREQUAL;
717     }
718     if (c1 == '<' && c2 == '<') {
719         shift(2);
720         return LSHIFT;
721     }
722     if (c1 == '>' && c2 == '>') {
723         shift(2);
724         return RSHIFT;
725     }
726     if (c1 == '&' && c2 == '&') {
727         shift(2);
728         return AND;
729     }
730     if (c1 == '|' && c2 == '|') {
731         shift(2);
732         return OR;
733     }
734 
735     switch (c1) {
736         case '=':
737         case '>':
738         case '<':
739         case ',':
740         case '!':
741         case '~':
742         case '?':
743         case ':':
744         case '.':
745         case '+':
746         case '-':
747         case '*':
748         case '/':
749         case '&':
750         case '|':
751         case '^':
752         case '%':
753         case '(':
754         case ')':
755         case '[':
756         case ']':
757         case ';':
758             shift(1);
759             return static_cast<int>(c1);
760         case '{':
761             charPos = m_currentOffset;
762             shift(1);
763             return OPENBRACE;
764         case '}':
765             charPos = m_currentOffset;
766             shift(1);
767             return CLOSEBRACE;
768         default:
769             return -1;
770     }
771 }
772 
singleEscape(unsigned short c)773 unsigned short Lexer::singleEscape(unsigned short c)
774 {
775     switch (c) {
776         case 'b':
777             return 0x08;
778         case 't':
779             return 0x09;
780         case 'n':
781             return 0x0A;
782         case 'v':
783             return 0x0B;
784         case 'f':
785             return 0x0C;
786         case 'r':
787             return 0x0D;
788         case '"':
789             return 0x22;
790         case '\'':
791             return 0x27;
792         case '\\':
793             return 0x5C;
794         default:
795             return c;
796     }
797 }
798 
convertOctal(int c1,int c2,int c3)799 unsigned short Lexer::convertOctal(int c1, int c2, int c3)
800 {
801     return static_cast<unsigned short>((c1 - '0') * 64 + (c2 - '0') * 8 + c3 - '0');
802 }
803 
convertHex(int c)804 unsigned char Lexer::convertHex(int c)
805 {
806     if (c >= '0' && c <= '9')
807         return static_cast<unsigned char>(c - '0');
808     if (c >= 'a' && c <= 'f')
809         return static_cast<unsigned char>(c - 'a' + 10);
810     return static_cast<unsigned char>(c - 'A' + 10);
811 }
812 
convertHex(int c1,int c2)813 unsigned char Lexer::convertHex(int c1, int c2)
814 {
815     return ((convertHex(c1) << 4) + convertHex(c2));
816 }
817 
convertUnicode(int c1,int c2,int c3,int c4)818 UChar Lexer::convertUnicode(int c1, int c2, int c3, int c4)
819 {
820     unsigned char highByte = (convertHex(c1) << 4) + convertHex(c2);
821     unsigned char lowByte = (convertHex(c3) << 4) + convertHex(c4);
822     return (highByte << 8 | lowByte);
823 }
824 
record8(int c)825 void Lexer::record8(int c)
826 {
827     ASSERT(c >= 0);
828     ASSERT(c <= 0xff);
829     m_buffer8.append(static_cast<char>(c));
830 }
831 
record16(int c)832 void Lexer::record16(int c)
833 {
834     ASSERT(c >= 0);
835     ASSERT(c <= USHRT_MAX);
836     record16(UChar(static_cast<unsigned short>(c)));
837 }
838 
record16(UChar c)839 void Lexer::record16(UChar c)
840 {
841     m_buffer16.append(c);
842 }
843 
scanRegExp()844 bool Lexer::scanRegExp()
845 {
846     m_buffer16.clear();
847     bool lastWasEscape = false;
848     bool inBrackets = false;
849 
850     while (1) {
851         if (isLineTerminator() || m_current == -1)
852             return false;
853         else if (m_current != '/' || lastWasEscape == true || inBrackets == true) {
854             // keep track of '[' and ']'
855             if (!lastWasEscape) {
856                 if ( m_current == '[' && !inBrackets )
857                     inBrackets = true;
858                 if ( m_current == ']' && inBrackets )
859                     inBrackets = false;
860             }
861             record16(m_current);
862             lastWasEscape =
863             !lastWasEscape && (m_current == '\\');
864         } else { // end of regexp
865             m_pattern = UString(m_buffer16);
866             m_buffer16.clear();
867             shift(1);
868             break;
869         }
870         shift(1);
871     }
872 
873     while (isIdentPart(m_current)) {
874         record16(m_current);
875         shift(1);
876     }
877     m_flags = UString(m_buffer16);
878 
879     return true;
880 }
881 
clear()882 void Lexer::clear()
883 {
884     m_identifiers.clear();
885 
886     Vector<char> newBuffer8;
887     newBuffer8.reserveCapacity(initialReadBufferCapacity);
888     m_buffer8.swap(newBuffer8);
889 
890     Vector<UChar> newBuffer16;
891     newBuffer16.reserveCapacity(initialReadBufferCapacity);
892     m_buffer16.swap(newBuffer16);
893 
894     m_isReparsing = false;
895 
896     m_pattern = 0;
897     m_flags = 0;
898 }
899 
900 } // namespace JSC
901