• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright JS Foundation and other contributors, http://js.foundation
2  *
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #ifndef JS_LEXER_H
17 #define JS_LEXER_H
18 
19 /** \addtogroup parser Parser
20  * @{
21  *
22  * \addtogroup jsparser JavaScript
23  * @{
24  *
25  * \addtogroup jsparser_lexer Lexer
26  * @{
27  */
28 
29 /**
30  * Lexer token types.
31  */
32 typedef enum
33 {
34   LEXER_EOS,                     /**< end of source */
35 
36   /* Primary expressions */
37   LEXER_LITERAL,                 /**< literal token */
38   LEXER_KEYW_THIS,               /**< this */
39   LEXER_LIT_TRUE,                /**< true (not a keyword!) */
40   LEXER_LIT_FALSE,               /**< false (not a keyword!) */
41   LEXER_LIT_NULL,                /**< null (not a keyword!) */
42 #if ENABLED (JERRY_ES2015)
43   LEXER_TEMPLATE_LITERAL,        /**< multi segment template literal */
44   LEXER_THREE_DOTS,              /**< ... (rest or spread operator) */
45 #endif /* ENABLED (JERRY_ES2015) */
46 
47   /* Unary operators
48    * IMPORTANT: update CBC_UNARY_OP_TOKEN_TO_OPCODE and
49    *            CBC_UNARY_LVALUE_OP_TOKEN_TO_OPCODE after changes. */
50 #define LEXER_IS_UNARY_OP_TOKEN(token_type) \
51   ((token_type) >= LEXER_PLUS && (token_type) <= LEXER_DECREASE)
52 #define LEXER_IS_UNARY_LVALUE_OP_TOKEN(token_type) \
53   ((token_type) >= LEXER_KEYW_DELETE && (token_type) <= LEXER_DECREASE)
54 
55   LEXER_PLUS,                    /**< "+" */
56   LEXER_NEGATE,                  /**< "-" */
57   LEXER_LOGICAL_NOT,             /**< "!" */
58   LEXER_BIT_NOT,                 /**< "~" */
59   LEXER_KEYW_VOID,               /**< void */
60   LEXER_KEYW_TYPEOF,             /**< typeof */
61 #if ENABLED (JERRY_ES2015)
62   LEXER_KEYW_AWAIT,              /**< await */
63 #endif /* ENABLED (JERRY_ES2015) */
64   LEXER_KEYW_DELETE,             /**< delete */
65   LEXER_INCREASE,                /**< "++" */
66   LEXER_DECREASE,                /**< "--" */
67 
68   /* Binary operators
69    * IMPORTANT: update CBC_BINARY_OP_TOKEN_TO_OPCODE,
70    *            CBC_BINARY_LVALUE_OP_TOKEN_TO_OPCODE and
71    *            parser_binary_precedence_table after changes. */
72 #if ENABLED (JERRY_ES2015)
73 #define LEXER_IS_BINARY_OP_TOKEN(token_type) \
74   ((token_type) >= LEXER_ASSIGN && (token_type) <= LEXER_EXPONENTIATION)
75 #else /* !ENABLED (JERRY_ES2015) */
76 #define LEXER_IS_BINARY_OP_TOKEN(token_type) \
77   ((token_type) >= LEXER_ASSIGN && (token_type) <= LEXER_MODULO)
78 #endif /* ENABLED (JERRY_ES2015) */
79 
80 #define LEXER_IS_BINARY_LVALUE_TOKEN(token_type) \
81   ((token_type) >= LEXER_ASSIGN && (token_type) <= LEXER_ASSIGN_BIT_XOR)
82 
83 #define LEXER_FIRST_BINARY_OP LEXER_ASSIGN
84 
85   LEXER_ASSIGN,                  /**< "=" (prec: 3) */
86   LEXER_ASSIGN_ADD,              /**< "+=" (prec: 3) */
87   LEXER_ASSIGN_SUBTRACT,         /**< "-=" (prec: 3) */
88   LEXER_ASSIGN_MULTIPLY,         /**< "*=" (prec: 3) */
89   LEXER_ASSIGN_DIVIDE,           /**< "/=" (prec: 3) */
90   LEXER_ASSIGN_MODULO,           /**< "%=" (prec: 3) */
91 #if ENABLED (JERRY_ES2015)
92   LEXER_ASSIGN_EXPONENTIATION,   /**< "**=" (prec: 3) */
93 #endif /* ENABLED (JERRY_ES2015) */
94   LEXER_ASSIGN_LEFT_SHIFT,       /**< "<<=" (prec: 3) */
95   LEXER_ASSIGN_RIGHT_SHIFT,      /**< ">>=" (prec: 3) */
96   LEXER_ASSIGN_UNS_RIGHT_SHIFT,  /**< ">>>=" (prec: 3) */
97   LEXER_ASSIGN_BIT_AND,          /**< "&=" (prec: 3) */
98   LEXER_ASSIGN_BIT_OR,           /**< "|=" (prec: 3) */
99   LEXER_ASSIGN_BIT_XOR,          /**< "^=" (prec: 3) */
100   LEXER_QUESTION_MARK,           /**< "?" (prec: 4) */
101   LEXER_LOGICAL_OR,              /**< "||" (prec: 5) */
102   LEXER_LOGICAL_AND,             /**< "&&" (prec: 6) */
103   LEXER_BIT_OR,                  /**< "|" (prec: 7) */
104   LEXER_BIT_XOR,                 /**< "^" (prec: 8) */
105   LEXER_BIT_AND,                 /**< "&" (prec: 9) */
106   LEXER_EQUAL,                   /**< "==" (prec: 10) */
107   LEXER_NOT_EQUAL,               /**< "!=" (prec: 10) */
108   LEXER_STRICT_EQUAL,            /**< "===" (prec: 10) */
109   LEXER_STRICT_NOT_EQUAL,        /**< "!==" (prec: 10) */
110   LEXER_LESS,                    /**< "<" (prec: 11) */
111   LEXER_GREATER,                 /**< ">" (prec: 11) */
112   LEXER_LESS_EQUAL,              /**< "<=" (prec: 11) */
113   LEXER_GREATER_EQUAL,           /**< ">=" (prec: 11) */
114   LEXER_KEYW_IN,                 /**< in (prec: 11) */
115   LEXER_KEYW_INSTANCEOF,         /**< instanceof (prec: 11) */
116   LEXER_LEFT_SHIFT,              /**< "<<" (prec: 12) */
117   LEXER_RIGHT_SHIFT,             /**< ">>" (prec: 12) */
118   LEXER_UNS_RIGHT_SHIFT,         /**< ">>>" (prec: 12) */
119   LEXER_ADD,                     /**< "+" (prec: 13) */
120   LEXER_SUBTRACT,                /**< "-" (prec: 13) */
121   LEXER_MULTIPLY,                /**< "*" (prec: 14) */
122   LEXER_DIVIDE,                  /**< "/" (prec: 14) */
123   LEXER_MODULO,                  /**< "%" (prec: 14) */
124 #if ENABLED (JERRY_ES2015)
125   LEXER_EXPONENTIATION,          /**< "**" (prec: 15) */
126 #endif /* ENABLED (JERRY_ES2015) */
127 
128   LEXER_LEFT_BRACE,              /**< "{" */
129   LEXER_LEFT_PAREN,              /**< "(" */
130   LEXER_LEFT_SQUARE,             /**< "[" */
131   LEXER_RIGHT_BRACE,             /**< "}" */
132   LEXER_RIGHT_PAREN,             /**< ")" */
133   LEXER_RIGHT_SQUARE,            /**< "]" */
134   LEXER_DOT,                     /**< "." */
135   LEXER_SEMICOLON,               /**< ";" */
136   LEXER_COLON,                   /**< ":" */
137   LEXER_COMMA,                   /**< "," */
138 #if ENABLED (JERRY_ES2015)
139   LEXER_ARROW,                   /**< "=>" */
140 #endif /* ENABLED (JERRY_ES2015) */
141 
142   LEXER_KEYW_BREAK,              /**< break */
143   LEXER_KEYW_DO,                 /**< do */
144   LEXER_KEYW_CASE,               /**< case  */
145   LEXER_KEYW_ELSE,               /**< else */
146   LEXER_KEYW_NEW,                /**< new */
147   LEXER_KEYW_VAR,                /**< var */
148   LEXER_KEYW_CATCH,              /**< catch */
149   LEXER_KEYW_FINALLY,            /**< finally */
150   LEXER_KEYW_RETURN,             /**< return */
151   LEXER_KEYW_CONTINUE,           /**< continue */
152   LEXER_KEYW_FOR,                /**< for */
153   LEXER_KEYW_SWITCH,             /**< switch */
154   LEXER_KEYW_WHILE,              /**< while */
155   LEXER_KEYW_DEBUGGER,           /**< debugger */
156   LEXER_KEYW_FUNCTION,           /**< function */
157   LEXER_KEYW_WITH,               /**< with */
158   LEXER_KEYW_DEFAULT,            /**< default */
159   LEXER_KEYW_IF,                 /**< if */
160   LEXER_KEYW_THROW,              /**< throw */
161   LEXER_KEYW_TRY,                /**< try */
162 
163   LEXER_KEYW_CLASS,              /**< class */
164   LEXER_KEYW_EXTENDS,            /**< extends */
165   LEXER_KEYW_SUPER,              /**< super */
166   LEXER_KEYW_CONST,              /**< const */
167   LEXER_KEYW_EXPORT,             /**< export */
168   LEXER_KEYW_IMPORT,             /**< import */
169   LEXER_KEYW_ENUM,               /**< enum */
170 
171   /* These are virtual tokens. */
172   LEXER_EXPRESSION_START,        /**< expression start */
173   LEXER_PROPERTY_GETTER,         /**< property getter function */
174   LEXER_PROPERTY_SETTER,         /**< property setter function */
175   LEXER_COMMA_SEP_LIST,          /**< comma separated bracketed expression list */
176 #if ENABLED (JERRY_ES2015)
177   LEXER_ASSIGN_GROUP_EXPR,       /**< indetifier for the assignment is located in a group expression */
178   LEXER_ASSIGN_CONST,            /**< a const binding is reassigned */
179   LEXER_CLASS_CONSTRUCTOR,       /**< special value for class constructor method */
180   LEXER_INVALID_PATTERN,         /**< special value for invalid destructuring pattern */
181 #endif /* ENABLED (JERRY_ES2015) */
182 
183 #if ENABLED (JERRY_ES2015)
184   /* Keywords which are not keyword tokens. */
185 #define LEXER_FIRST_NON_RESERVED_KEYWORD LEXER_KEYW_ASYNC
186   LEXER_KEYW_ASYNC,              /**< async */
187 #else /* !ENABLED (JERRY_ES2015) */
188   /* Keywords which are not keyword tokens. */
189 #define LEXER_FIRST_NON_RESERVED_KEYWORD LEXER_KEYW_EVAL
190 #endif /* ENABLED (JERRY_ES2015) */
191 
192   /* Keywords which cannot be assigned in strict mode. */
193 #define LEXER_FIRST_NON_STRICT_ARGUMENTS LEXER_KEYW_EVAL
194   LEXER_KEYW_EVAL,               /**< eval */
195   LEXER_KEYW_ARGUMENTS,          /**< arguments */
196 
197   /* Future strict reserved words: these keywords
198    * must form a group after non-reserved keywords. */
199 #define LEXER_FIRST_FUTURE_STRICT_RESERVED_WORD LEXER_KEYW_IMPLEMENTS
200   LEXER_KEYW_IMPLEMENTS,         /**< implements */
201   LEXER_KEYW_PRIVATE,            /**< private */
202   LEXER_KEYW_PUBLIC,             /**< public */
203   LEXER_KEYW_INTERFACE,          /**< interface */
204   LEXER_KEYW_PACKAGE,            /**< package */
205   LEXER_KEYW_PROTECTED,          /**< protected */
206 
207   /* Context dependent future strict reserved words:
208    * See also: ECMA-262 v6, 11.6.2.1 */
209   LEXER_KEYW_LET,                /**< let */
210   LEXER_KEYW_YIELD,              /**< yield */
211   LEXER_KEYW_STATIC,             /**< static */
212 } lexer_token_type_t;
213 
214 #define LEXER_NEWLINE_LS_PS_BYTE_1 0xe2
215 #define LEXER_NEWLINE_LS_PS_BYTE_23(source) \
216   ((source)[1] == LIT_UTF8_2_BYTE_CODE_POINT_MIN && ((source)[2] | 0x1) == 0xa9)
217 
218 #define LEXER_IS_LEFT_BRACKET(type) \
219   ((type) == LEXER_LEFT_BRACE || (type) == LEXER_LEFT_PAREN || (type) == LEXER_LEFT_SQUARE)
220 
221 #define LEXER_IS_RIGHT_BRACKET(type) \
222   ((type) == LEXER_RIGHT_BRACE || (type) == LEXER_RIGHT_PAREN || (type) == LEXER_RIGHT_SQUARE)
223 
224 #define LEXER_UNARY_OP_TOKEN_TO_OPCODE(token_type) \
225    ((((token_type) - LEXER_PLUS) * 2) + CBC_PLUS)
226 
227 #define LEXER_UNARY_LVALUE_OP_TOKEN_TO_OPCODE(token_type) \
228    ((((token_type) - LEXER_INCREASE) * 6) + CBC_PRE_INCR)
229 
230 #define LEXER_BINARY_OP_TOKEN_TO_OPCODE(token_type) \
231    ((cbc_opcode_t) ((((token_type) - LEXER_BIT_OR) * 3) + CBC_BIT_OR))
232 
233 #define LEXER_BINARY_LVALUE_OP_TOKEN_TO_OPCODE(token_type) \
234    ((cbc_opcode_t) ((((token_type) - LEXER_ASSIGN_ADD) * 2) + CBC_ASSIGN_ADD))
235 
236 /**
237  * Maximum local buffer size for identifiers which contains escape sequences.
238  */
239 #define LEXER_MAX_LITERAL_LOCAL_BUFFER_SIZE 48
240 
241 /**
242  * Lexer newline flags.
243  */
244 typedef enum
245 {
246   LEXER_WAS_NEWLINE = (1u << 0),             /**< newline was seen */
247   LEXER_NO_SKIP_SPACES = (1u << 1)           /**< ignore skip spaces */
248 } lexer_newline_flags_t;
249 
250 /**
251  * Lexer object identifier parse options.
252  */
253 typedef enum
254 {
255   LEXER_OBJ_IDENT_NO_OPTS = (1u << 0),          /**< no options */
256   LEXER_OBJ_IDENT_ONLY_IDENTIFIERS = (1u << 1), /**< only identifiers are accepted */
257   LEXER_OBJ_IDENT_CLASS_METHOD = (1u << 2),     /**< expect identifier inside a class body */
258   LEXER_OBJ_IDENT_OBJECT_PATTERN = (1u << 3),   /**< parse "get"/"set" as string literal in object pattern */
259 } lexer_obj_ident_opts_t;
260 
261 /**
262  * Lexer string options.
263  */
264 typedef enum
265 {
266   LEXER_STRING_NO_OPTS = (1u << 0),       /**< no options */
267   LEXER_STRING_RAW = (1u << 1),           /**< raw string ECMAScript v6, 11.8.6.1: TVR */
268 } lexer_string_options_t;
269 
270 /**
271  * Lexer number types.
272  */
273 typedef enum
274 {
275   LEXER_NUMBER_DECIMAL,                     /**< decimal number */
276   LEXER_NUMBER_HEXADECIMAL,                 /**< hexadecimal number */
277   LEXER_NUMBER_OCTAL,                       /**< octal number */
278   LEXER_NUMBER_BINARY,                      /**< binary number */
279 } lexer_number_type_t;
280 
281 /**
282  * Lexer character (string / identifier) literal data.
283  */
284 typedef struct
285 {
286   const uint8_t *char_p;                     /**< start of identifier or string token */
287   prop_length_t length;                      /**< length or index of a literal */
288   uint8_t type;                              /**< type of the current literal */
289   uint8_t has_escape;                        /**< has escape sequences */
290 } lexer_lit_location_t;
291 
292 /**
293  * Lexer token.
294  */
295 typedef struct
296 {
297   uint8_t type;                              /**< token type */
298   uint8_t keyword_type;                      /**< keyword type for identifiers */
299   uint8_t extra_value;                       /**< helper value for different purposes */
300   uint8_t flags;                             /**< flag bits for the current token */
301   parser_line_counter_t line;                /**< token start line */
302   parser_line_counter_t column;              /**< token start column */
303   lexer_lit_location_t lit_location;         /**< extra data for character literals */
304 } lexer_token_t;
305 
306 /**
307  * Literal data set by lexer_construct_literal_object.
308  */
309 typedef struct
310 {
311   lexer_literal_t *literal_p;                /**< pointer to the literal object */
312   uint16_t index;                            /**< literal index */
313 } lexer_lit_object_t;
314 
315 /**
316  * @}
317  * @}
318  * @}
319  */
320 
321 #endif /* !JS_LEXER_H */
322