• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
3  *  Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
4  *
5  *  This library is free software; you can redistribute it and/or
6  *  modify it under the terms of the GNU Library General Public
7  *  License as published by the Free Software Foundation; either
8  *  version 2 of the License, or (at your option) any later version.
9  *
10  *  This library is distributed in the hope that it will be useful,
11  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  *  Library General Public License for more details.
14  *
15  *  You should have received a copy of the GNU Library General Public License
16  *  along with this library; see the file COPYING.LIB.  If not, write to
17  *  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
18  *  Boston, MA 02110-1301, USA.
19  *
20  */
21 
22 #ifndef Lexer_h
23 #define Lexer_h
24 
25 #include "Lookup.h"
26 #include "SourceCode.h"
27 #include <wtf/ASCIICType.h>
28 #include <wtf/SegmentedVector.h>
29 #include <wtf/Vector.h>
30 #include <wtf/unicode/Unicode.h>
31 
32 namespace JSC {
33 
34     class RegExp;
35 
36     class Lexer : public Noncopyable {
37     public:
38         // Character manipulation functions.
39         static bool isWhiteSpace(int character);
40         static bool isLineTerminator(int character);
41         static unsigned char convertHex(int c1, int c2);
42         static UChar convertUnicode(int c1, int c2, int c3, int c4);
43 
44         // Functions to set up parsing.
45         void setCode(const SourceCode&);
setIsReparsing()46         void setIsReparsing() { m_isReparsing = true; }
47 
48         // Functions for the parser itself.
49         int lex(void* lvalp, void* llocp);
lineNumber()50         int lineNumber() const { return m_lineNumber; }
prevTerminator()51         bool prevTerminator() const { return m_terminator; }
52         SourceCode sourceCode(int openBrace, int closeBrace, int firstLine);
53         bool scanRegExp();
pattern()54         const UString& pattern() const { return m_pattern; }
flags()55         const UString& flags() const { return m_flags; }
56 
57         // Functions for use after parsing.
sawError()58         bool sawError() const { return m_error; }
59         void clear();
60 
61     private:
62         friend class JSGlobalData;
63 
64         Lexer(JSGlobalData*);
65         ~Lexer();
66 
67         void shift1();
68         void shift2();
69         void shift3();
70         void shift4();
71         void shiftLineTerminator();
72 
73         void record8(int);
74         void record16(int);
75         void record16(UChar);
76 
77         void copyCodeWithoutBOMs();
78 
79         int currentOffset() const;
80         const UChar* currentCharacter() const;
81 
82         JSC::Identifier* makeIdentifier(const UChar* buffer, size_t length);
83 
84         bool lastTokenWasRestrKeyword() const;
85 
86         static const size_t initialReadBufferCapacity = 32;
87         static const size_t initialIdentifierTableCapacity = 64;
88 
89         int m_lineNumber;
90 
91         Vector<char> m_buffer8;
92         Vector<UChar> m_buffer16;
93         bool m_terminator;
94         bool m_delimited; // encountered delimiter like "'" and "}" on last run
95         int m_lastToken;
96 
97         const SourceCode* m_source;
98         const UChar* m_code;
99         const UChar* m_codeStart;
100         const UChar* m_codeEnd;
101         bool m_isReparsing;
102         bool m_atLineStart;
103         bool m_error;
104 
105         // current and following unicode characters (int to allow for -1 for end-of-file marker)
106         int m_current;
107         int m_next1;
108         int m_next2;
109         int m_next3;
110 
111         WTF::SegmentedVector<JSC::Identifier, initialIdentifierTableCapacity> m_identifiers;
112 
113         JSGlobalData* m_globalData;
114 
115         UString m_pattern;
116         UString m_flags;
117 
118         const HashTable m_keywordTable;
119 
120         Vector<UChar> m_codeWithoutBOMs;
121     };
122 
isWhiteSpace(int ch)123     inline bool Lexer::isWhiteSpace(int ch)
124     {
125         return isASCII(ch) ? (ch == ' ' || ch == '\t' || ch == 0xB || ch == 0xC) : WTF::Unicode::isSeparatorSpace(ch);
126     }
127 
isLineTerminator(int ch)128     inline bool Lexer::isLineTerminator(int ch)
129     {
130         return ch == '\r' || ch == '\n' || (ch & ~1) == 0x2028;
131     }
132 
convertHex(int c1,int c2)133     inline unsigned char Lexer::convertHex(int c1, int c2)
134     {
135         return (toASCIIHexValue(c1) << 4) | toASCIIHexValue(c2);
136     }
137 
convertUnicode(int c1,int c2,int c3,int c4)138     inline UChar Lexer::convertUnicode(int c1, int c2, int c3, int c4)
139     {
140         return (convertHex(c1, c2) << 8) | convertHex(c3, c4);
141     }
142 
143 } // namespace JSC
144 
145 #endif // Lexer_h
146