1 /* Copyright JS Foundation and other contributors, http://js.foundation 2 * 3 * Licensed under the Apache License, Version 2.0 (the "License"); 4 * you may not use this file except in compliance with the License. 5 * You may obtain a copy of the License at 6 * 7 * http://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software 10 * distributed under the License is distributed on an "AS IS" BASIS 11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 * See the License for the specific language governing permissions and 13 * limitations under the License. 14 */ 15 16 #ifndef JS_LEXER_H 17 #define JS_LEXER_H 18 19 /** \addtogroup parser Parser 20 * @{ 21 * 22 * \addtogroup jsparser JavaScript 23 * @{ 24 * 25 * \addtogroup jsparser_lexer Lexer 26 * @{ 27 */ 28 29 /** 30 * Lexer token types. 31 */ 32 typedef enum 33 { 34 LEXER_EOS, /**< end of source */ 35 36 /* Primary expressions */ 37 LEXER_LITERAL, /**< literal token */ 38 LEXER_KEYW_THIS, /**< this */ 39 LEXER_LIT_TRUE, /**< true (not a keyword!) */ 40 LEXER_LIT_FALSE, /**< false (not a keyword!) */ 41 LEXER_LIT_NULL, /**< null (not a keyword!) */ 42 #if ENABLED (JERRY_ES2015) 43 LEXER_TEMPLATE_LITERAL, /**< multi segment template literal */ 44 LEXER_THREE_DOTS, /**< ... (rest or spread operator) */ 45 #endif /* ENABLED (JERRY_ES2015) */ 46 47 /* Unary operators 48 * IMPORTANT: update CBC_UNARY_OP_TOKEN_TO_OPCODE and 49 * CBC_UNARY_LVALUE_OP_TOKEN_TO_OPCODE after changes. */ 50 #define LEXER_IS_UNARY_OP_TOKEN(token_type) \ 51 ((token_type) >= LEXER_PLUS && (token_type) <= LEXER_DECREASE) 52 #define LEXER_IS_UNARY_LVALUE_OP_TOKEN(token_type) \ 53 ((token_type) >= LEXER_KEYW_DELETE && (token_type) <= LEXER_DECREASE) 54 55 LEXER_PLUS, /**< "+" */ 56 LEXER_NEGATE, /**< "-" */ 57 LEXER_LOGICAL_NOT, /**< "!" */ 58 LEXER_BIT_NOT, /**< "~" */ 59 LEXER_KEYW_VOID, /**< void */ 60 LEXER_KEYW_TYPEOF, /**< typeof */ 61 #if ENABLED (JERRY_ES2015) 62 LEXER_KEYW_AWAIT, /**< await */ 63 #endif /* ENABLED (JERRY_ES2015) */ 64 LEXER_KEYW_DELETE, /**< delete */ 65 LEXER_INCREASE, /**< "++" */ 66 LEXER_DECREASE, /**< "--" */ 67 68 /* Binary operators 69 * IMPORTANT: update CBC_BINARY_OP_TOKEN_TO_OPCODE, 70 * CBC_BINARY_LVALUE_OP_TOKEN_TO_OPCODE and 71 * parser_binary_precedence_table after changes. */ 72 #if ENABLED (JERRY_ES2015) 73 #define LEXER_IS_BINARY_OP_TOKEN(token_type) \ 74 ((token_type) >= LEXER_ASSIGN && (token_type) <= LEXER_EXPONENTIATION) 75 #else /* !ENABLED (JERRY_ES2015) */ 76 #define LEXER_IS_BINARY_OP_TOKEN(token_type) \ 77 ((token_type) >= LEXER_ASSIGN && (token_type) <= LEXER_MODULO) 78 #endif /* ENABLED (JERRY_ES2015) */ 79 80 #define LEXER_IS_BINARY_LVALUE_TOKEN(token_type) \ 81 ((token_type) >= LEXER_ASSIGN && (token_type) <= LEXER_ASSIGN_BIT_XOR) 82 83 #define LEXER_FIRST_BINARY_OP LEXER_ASSIGN 84 85 LEXER_ASSIGN, /**< "=" (prec: 3) */ 86 LEXER_ASSIGN_ADD, /**< "+=" (prec: 3) */ 87 LEXER_ASSIGN_SUBTRACT, /**< "-=" (prec: 3) */ 88 LEXER_ASSIGN_MULTIPLY, /**< "*=" (prec: 3) */ 89 LEXER_ASSIGN_DIVIDE, /**< "/=" (prec: 3) */ 90 LEXER_ASSIGN_MODULO, /**< "%=" (prec: 3) */ 91 #if ENABLED (JERRY_ES2015) 92 LEXER_ASSIGN_EXPONENTIATION, /**< "**=" (prec: 3) */ 93 #endif /* ENABLED (JERRY_ES2015) */ 94 LEXER_ASSIGN_LEFT_SHIFT, /**< "<<=" (prec: 3) */ 95 LEXER_ASSIGN_RIGHT_SHIFT, /**< ">>=" (prec: 3) */ 96 LEXER_ASSIGN_UNS_RIGHT_SHIFT, /**< ">>>=" (prec: 3) */ 97 LEXER_ASSIGN_BIT_AND, /**< "&=" (prec: 3) */ 98 LEXER_ASSIGN_BIT_OR, /**< "|=" (prec: 3) */ 99 LEXER_ASSIGN_BIT_XOR, /**< "^=" (prec: 3) */ 100 LEXER_QUESTION_MARK, /**< "?" (prec: 4) */ 101 LEXER_LOGICAL_OR, /**< "||" (prec: 5) */ 102 LEXER_LOGICAL_AND, /**< "&&" (prec: 6) */ 103 LEXER_BIT_OR, /**< "|" (prec: 7) */ 104 LEXER_BIT_XOR, /**< "^" (prec: 8) */ 105 LEXER_BIT_AND, /**< "&" (prec: 9) */ 106 LEXER_EQUAL, /**< "==" (prec: 10) */ 107 LEXER_NOT_EQUAL, /**< "!=" (prec: 10) */ 108 LEXER_STRICT_EQUAL, /**< "===" (prec: 10) */ 109 LEXER_STRICT_NOT_EQUAL, /**< "!==" (prec: 10) */ 110 LEXER_LESS, /**< "<" (prec: 11) */ 111 LEXER_GREATER, /**< ">" (prec: 11) */ 112 LEXER_LESS_EQUAL, /**< "<=" (prec: 11) */ 113 LEXER_GREATER_EQUAL, /**< ">=" (prec: 11) */ 114 LEXER_KEYW_IN, /**< in (prec: 11) */ 115 LEXER_KEYW_INSTANCEOF, /**< instanceof (prec: 11) */ 116 LEXER_LEFT_SHIFT, /**< "<<" (prec: 12) */ 117 LEXER_RIGHT_SHIFT, /**< ">>" (prec: 12) */ 118 LEXER_UNS_RIGHT_SHIFT, /**< ">>>" (prec: 12) */ 119 LEXER_ADD, /**< "+" (prec: 13) */ 120 LEXER_SUBTRACT, /**< "-" (prec: 13) */ 121 LEXER_MULTIPLY, /**< "*" (prec: 14) */ 122 LEXER_DIVIDE, /**< "/" (prec: 14) */ 123 LEXER_MODULO, /**< "%" (prec: 14) */ 124 #if ENABLED (JERRY_ES2015) 125 LEXER_EXPONENTIATION, /**< "**" (prec: 15) */ 126 #endif /* ENABLED (JERRY_ES2015) */ 127 128 LEXER_LEFT_BRACE, /**< "{" */ 129 LEXER_LEFT_PAREN, /**< "(" */ 130 LEXER_LEFT_SQUARE, /**< "[" */ 131 LEXER_RIGHT_BRACE, /**< "}" */ 132 LEXER_RIGHT_PAREN, /**< ")" */ 133 LEXER_RIGHT_SQUARE, /**< "]" */ 134 LEXER_DOT, /**< "." */ 135 LEXER_SEMICOLON, /**< ";" */ 136 LEXER_COLON, /**< ":" */ 137 LEXER_COMMA, /**< "," */ 138 #if ENABLED (JERRY_ES2015) 139 LEXER_ARROW, /**< "=>" */ 140 #endif /* ENABLED (JERRY_ES2015) */ 141 142 LEXER_KEYW_BREAK, /**< break */ 143 LEXER_KEYW_DO, /**< do */ 144 LEXER_KEYW_CASE, /**< case */ 145 LEXER_KEYW_ELSE, /**< else */ 146 LEXER_KEYW_NEW, /**< new */ 147 LEXER_KEYW_VAR, /**< var */ 148 LEXER_KEYW_CATCH, /**< catch */ 149 LEXER_KEYW_FINALLY, /**< finally */ 150 LEXER_KEYW_RETURN, /**< return */ 151 LEXER_KEYW_CONTINUE, /**< continue */ 152 LEXER_KEYW_FOR, /**< for */ 153 LEXER_KEYW_SWITCH, /**< switch */ 154 LEXER_KEYW_WHILE, /**< while */ 155 LEXER_KEYW_DEBUGGER, /**< debugger */ 156 LEXER_KEYW_FUNCTION, /**< function */ 157 LEXER_KEYW_WITH, /**< with */ 158 LEXER_KEYW_DEFAULT, /**< default */ 159 LEXER_KEYW_IF, /**< if */ 160 LEXER_KEYW_THROW, /**< throw */ 161 LEXER_KEYW_TRY, /**< try */ 162 163 LEXER_KEYW_CLASS, /**< class */ 164 LEXER_KEYW_EXTENDS, /**< extends */ 165 LEXER_KEYW_SUPER, /**< super */ 166 LEXER_KEYW_CONST, /**< const */ 167 LEXER_KEYW_EXPORT, /**< export */ 168 LEXER_KEYW_IMPORT, /**< import */ 169 LEXER_KEYW_ENUM, /**< enum */ 170 171 /* These are virtual tokens. */ 172 LEXER_EXPRESSION_START, /**< expression start */ 173 LEXER_PROPERTY_GETTER, /**< property getter function */ 174 LEXER_PROPERTY_SETTER, /**< property setter function */ 175 LEXER_COMMA_SEP_LIST, /**< comma separated bracketed expression list */ 176 #if ENABLED (JERRY_ES2015) 177 LEXER_ASSIGN_GROUP_EXPR, /**< indetifier for the assignment is located in a group expression */ 178 LEXER_ASSIGN_CONST, /**< a const binding is reassigned */ 179 LEXER_CLASS_CONSTRUCTOR, /**< special value for class constructor method */ 180 LEXER_INVALID_PATTERN, /**< special value for invalid destructuring pattern */ 181 #endif /* ENABLED (JERRY_ES2015) */ 182 183 #if ENABLED (JERRY_ES2015) 184 /* Keywords which are not keyword tokens. */ 185 #define LEXER_FIRST_NON_RESERVED_KEYWORD LEXER_KEYW_ASYNC 186 LEXER_KEYW_ASYNC, /**< async */ 187 #else /* !ENABLED (JERRY_ES2015) */ 188 /* Keywords which are not keyword tokens. */ 189 #define LEXER_FIRST_NON_RESERVED_KEYWORD LEXER_KEYW_EVAL 190 #endif /* ENABLED (JERRY_ES2015) */ 191 192 /* Keywords which cannot be assigned in strict mode. */ 193 #define LEXER_FIRST_NON_STRICT_ARGUMENTS LEXER_KEYW_EVAL 194 LEXER_KEYW_EVAL, /**< eval */ 195 LEXER_KEYW_ARGUMENTS, /**< arguments */ 196 197 /* Future strict reserved words: these keywords 198 * must form a group after non-reserved keywords. */ 199 #define LEXER_FIRST_FUTURE_STRICT_RESERVED_WORD LEXER_KEYW_IMPLEMENTS 200 LEXER_KEYW_IMPLEMENTS, /**< implements */ 201 LEXER_KEYW_PRIVATE, /**< private */ 202 LEXER_KEYW_PUBLIC, /**< public */ 203 LEXER_KEYW_INTERFACE, /**< interface */ 204 LEXER_KEYW_PACKAGE, /**< package */ 205 LEXER_KEYW_PROTECTED, /**< protected */ 206 207 /* Context dependent future strict reserved words: 208 * See also: ECMA-262 v6, 11.6.2.1 */ 209 LEXER_KEYW_LET, /**< let */ 210 LEXER_KEYW_YIELD, /**< yield */ 211 LEXER_KEYW_STATIC, /**< static */ 212 } lexer_token_type_t; 213 214 #define LEXER_NEWLINE_LS_PS_BYTE_1 0xe2 215 #define LEXER_NEWLINE_LS_PS_BYTE_23(source) \ 216 ((source)[1] == LIT_UTF8_2_BYTE_CODE_POINT_MIN && ((source)[2] | 0x1) == 0xa9) 217 218 #define LEXER_IS_LEFT_BRACKET(type) \ 219 ((type) == LEXER_LEFT_BRACE || (type) == LEXER_LEFT_PAREN || (type) == LEXER_LEFT_SQUARE) 220 221 #define LEXER_IS_RIGHT_BRACKET(type) \ 222 ((type) == LEXER_RIGHT_BRACE || (type) == LEXER_RIGHT_PAREN || (type) == LEXER_RIGHT_SQUARE) 223 224 #define LEXER_UNARY_OP_TOKEN_TO_OPCODE(token_type) \ 225 ((((token_type) - LEXER_PLUS) * 2) + CBC_PLUS) 226 227 #define LEXER_UNARY_LVALUE_OP_TOKEN_TO_OPCODE(token_type) \ 228 ((((token_type) - LEXER_INCREASE) * 6) + CBC_PRE_INCR) 229 230 #define LEXER_BINARY_OP_TOKEN_TO_OPCODE(token_type) \ 231 ((cbc_opcode_t) ((((token_type) - LEXER_BIT_OR) * 3) + CBC_BIT_OR)) 232 233 #define LEXER_BINARY_LVALUE_OP_TOKEN_TO_OPCODE(token_type) \ 234 ((cbc_opcode_t) ((((token_type) - LEXER_ASSIGN_ADD) * 2) + CBC_ASSIGN_ADD)) 235 236 /** 237 * Maximum local buffer size for identifiers which contains escape sequences. 238 */ 239 #define LEXER_MAX_LITERAL_LOCAL_BUFFER_SIZE 48 240 241 /** 242 * Lexer newline flags. 243 */ 244 typedef enum 245 { 246 LEXER_WAS_NEWLINE = (1u << 0), /**< newline was seen */ 247 LEXER_NO_SKIP_SPACES = (1u << 1) /**< ignore skip spaces */ 248 } lexer_newline_flags_t; 249 250 /** 251 * Lexer object identifier parse options. 252 */ 253 typedef enum 254 { 255 LEXER_OBJ_IDENT_NO_OPTS = (1u << 0), /**< no options */ 256 LEXER_OBJ_IDENT_ONLY_IDENTIFIERS = (1u << 1), /**< only identifiers are accepted */ 257 LEXER_OBJ_IDENT_CLASS_METHOD = (1u << 2), /**< expect identifier inside a class body */ 258 LEXER_OBJ_IDENT_OBJECT_PATTERN = (1u << 3), /**< parse "get"/"set" as string literal in object pattern */ 259 } lexer_obj_ident_opts_t; 260 261 /** 262 * Lexer string options. 263 */ 264 typedef enum 265 { 266 LEXER_STRING_NO_OPTS = (1u << 0), /**< no options */ 267 LEXER_STRING_RAW = (1u << 1), /**< raw string ECMAScript v6, 11.8.6.1: TVR */ 268 } lexer_string_options_t; 269 270 /** 271 * Lexer number types. 272 */ 273 typedef enum 274 { 275 LEXER_NUMBER_DECIMAL, /**< decimal number */ 276 LEXER_NUMBER_HEXADECIMAL, /**< hexadecimal number */ 277 LEXER_NUMBER_OCTAL, /**< octal number */ 278 LEXER_NUMBER_BINARY, /**< binary number */ 279 } lexer_number_type_t; 280 281 /** 282 * Lexer character (string / identifier) literal data. 283 */ 284 typedef struct 285 { 286 const uint8_t *char_p; /**< start of identifier or string token */ 287 prop_length_t length; /**< length or index of a literal */ 288 uint8_t type; /**< type of the current literal */ 289 uint8_t has_escape; /**< has escape sequences */ 290 } lexer_lit_location_t; 291 292 /** 293 * Lexer token. 294 */ 295 typedef struct 296 { 297 uint8_t type; /**< token type */ 298 uint8_t keyword_type; /**< keyword type for identifiers */ 299 uint8_t extra_value; /**< helper value for different purposes */ 300 uint8_t flags; /**< flag bits for the current token */ 301 parser_line_counter_t line; /**< token start line */ 302 parser_line_counter_t column; /**< token start column */ 303 lexer_lit_location_t lit_location; /**< extra data for character literals */ 304 } lexer_token_t; 305 306 /** 307 * Literal data set by lexer_construct_literal_object. 308 */ 309 typedef struct 310 { 311 lexer_literal_t *literal_p; /**< pointer to the literal object */ 312 uint16_t index; /**< literal index */ 313 } lexer_lit_object_t; 314 315 /** 316 * @} 317 * @} 318 * @} 319 */ 320 321 #endif /* !JS_LEXER_H */ 322