1 /* 2 * Copyright (C) 1999-2000 Harri Porten (porten@kde.org) 3 * Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved. 4 * 5 * This library is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU Library General Public 7 * License as published by the Free Software Foundation; either 8 * version 2 of the License, or (at your option) any later version. 9 * 10 * This library is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 * Library General Public License for more details. 14 * 15 * You should have received a copy of the GNU Library General Public License 16 * along with this library; see the file COPYING.LIB. If not, write to 17 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 18 * Boston, MA 02110-1301, USA. 19 * 20 */ 21 22 #ifndef Lexer_h 23 #define Lexer_h 24 25 #include "Lookup.h" 26 #include "ParserArena.h" 27 #include "SourceCode.h" 28 #include <wtf/ASCIICType.h> 29 #include <wtf/SegmentedVector.h> 30 #include <wtf/Vector.h> 31 #include <wtf/unicode/Unicode.h> 32 33 namespace JSC { 34 35 class RegExp; 36 37 class Lexer : public Noncopyable { 38 public: 39 // Character manipulation functions. 40 static bool isWhiteSpace(int character); 41 static bool isLineTerminator(int character); 42 static unsigned char convertHex(int c1, int c2); 43 static UChar convertUnicode(int c1, int c2, int c3, int c4); 44 45 // Functions to set up parsing. 46 void setCode(const SourceCode&, ParserArena&); setIsReparsing()47 void setIsReparsing() { m_isReparsing = true; } 48 49 // Functions for the parser itself. 50 int lex(void* lvalp, void* llocp); lineNumber()51 int lineNumber() const { return m_lineNumber; } prevTerminator()52 bool prevTerminator() const { return m_terminator; } 53 SourceCode sourceCode(int openBrace, int closeBrace, int firstLine); 54 bool scanRegExp(const Identifier*& pattern, const Identifier*& flags, UChar patternPrefix = 0); 55 bool skipRegExp(); 56 57 // Functions for use after parsing. sawError()58 bool sawError() const { return m_error; } 59 void clear(); 60 61 private: 62 friend class JSGlobalData; 63 64 Lexer(JSGlobalData*); 65 ~Lexer(); 66 67 void shift1(); 68 void shift2(); 69 void shift3(); 70 void shift4(); 71 void shiftLineTerminator(); 72 73 void record8(int); 74 void record16(int); 75 void record16(UChar); 76 77 void copyCodeWithoutBOMs(); 78 79 int currentOffset() const; 80 const UChar* currentCharacter() const; 81 82 const Identifier* makeIdentifier(const UChar* characters, size_t length); 83 84 bool lastTokenWasRestrKeyword() const; 85 86 static const size_t initialReadBufferCapacity = 32; 87 88 int m_lineNumber; 89 90 Vector<char> m_buffer8; 91 Vector<UChar> m_buffer16; 92 bool m_terminator; 93 bool m_delimited; // encountered delimiter like "'" and "}" on last run 94 int m_lastToken; 95 96 const SourceCode* m_source; 97 const UChar* m_code; 98 const UChar* m_codeStart; 99 const UChar* m_codeEnd; 100 bool m_isReparsing; 101 bool m_atLineStart; 102 bool m_error; 103 104 // current and following unicode characters (int to allow for -1 for end-of-file marker) 105 int m_current; 106 int m_next1; 107 int m_next2; 108 int m_next3; 109 110 IdentifierArena* m_arena; 111 112 JSGlobalData* m_globalData; 113 114 const HashTable m_keywordTable; 115 116 Vector<UChar> m_codeWithoutBOMs; 117 }; 118 isWhiteSpace(int ch)119 inline bool Lexer::isWhiteSpace(int ch) 120 { 121 return isASCII(ch) ? (ch == ' ' || ch == '\t' || ch == 0xB || ch == 0xC) : WTF::Unicode::isSeparatorSpace(ch); 122 } 123 isLineTerminator(int ch)124 inline bool Lexer::isLineTerminator(int ch) 125 { 126 return ch == '\r' || ch == '\n' || (ch & ~1) == 0x2028; 127 } 128 convertHex(int c1,int c2)129 inline unsigned char Lexer::convertHex(int c1, int c2) 130 { 131 return (toASCIIHexValue(c1) << 4) | toASCIIHexValue(c2); 132 } 133 convertUnicode(int c1,int c2,int c3,int c4)134 inline UChar Lexer::convertUnicode(int c1, int c2, int c3, int c4) 135 { 136 return (convertHex(c1, c2) << 8) | convertHex(c3, c4); 137 } 138 139 // A bridge for yacc from the C world to the C++ world. jscyylex(void * lvalp,void * llocp,void * globalData)140 inline int jscyylex(void* lvalp, void* llocp, void* globalData) 141 { 142 return static_cast<JSGlobalData*>(globalData)->lexer->lex(lvalp, llocp); 143 } 144 145 } // namespace JSC 146 147 #endif // Lexer_h 148