• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright JS Foundation and other contributors, http://js.foundation
2  *
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "ecma-alloc.h"
17 #include "ecma-helpers.h"
18 #include "ecma-function-object.h"
19 #include "ecma-literal-storage.h"
20 #include "js-parser-internal.h"
21 #include "lit-char-helpers.h"
22 #include "jcontext.h"
23 
24 #if ENABLED (JERRY_PARSER)
25 
26 /** \addtogroup parser Parser
27  * @{
28  *
29  * \addtogroup jsparser JavaScript
30  * @{
31  *
32  * \addtogroup jsparser_lexer Lexer
33  * @{
34  */
35 
36 JERRY_STATIC_ASSERT (LEXER_NUMBER_BINARY > LEXER_NUMBER_OCTAL,
37                      lexer_number_binary_must_be_greater_than_lexer_number_octal);
38 
39 /**
40  * Check whether the UTF-8 intermediate is an octet or not
41  */
42 #define IS_UTF8_INTERMEDIATE_OCTET(byte) (((byte) & LIT_UTF8_EXTRA_BYTE_MASK) == LIT_UTF8_2_BYTE_CODE_POINT_MIN)
43 
44 /**
45  * Align column to the next tab position.
46  *
47  * @return aligned position
48  */
49 static parser_line_counter_t
align_column_to_tab(parser_line_counter_t column)50 align_column_to_tab (parser_line_counter_t column) /**< current column */
51 {
52   /* Tab aligns to zero column start position. */
53   return (parser_line_counter_t) (((column + (8u - 1u)) & ~ECMA_STRING_CONTAINER_MASK) + 1u);
54 } /* align_column_to_tab */
55 
56 /**
57  * Parse hexadecimal character sequence
58  *
59  * @return character value or UINT32_MAX on error
60  */
61 static lit_code_point_t
lexer_hex_to_code_point(const uint8_t * source_p,parser_line_counter_t length)62 lexer_hex_to_code_point (const uint8_t *source_p, /**< current source position */
63                          parser_line_counter_t length) /**< source length */
64 {
65   lit_code_point_t result = 0;
66 
67   do
68   {
69     uint32_t byte = *source_p++;
70 
71     result <<= 4;
72 
73     if (byte >= LIT_CHAR_0 && byte <= LIT_CHAR_9)
74     {
75       result += byte - LIT_CHAR_0;
76     }
77     else
78     {
79       byte = LEXER_TO_ASCII_LOWERCASE (byte);
80       if (byte >= LIT_CHAR_LOWERCASE_A && byte <= LIT_CHAR_LOWERCASE_F)
81       {
82         result += byte - (LIT_CHAR_LOWERCASE_A - 10);
83       }
84       else
85       {
86         return UINT32_MAX;
87       }
88     }
89   }
90   while (--length > 0);
91 
92   return result;
93 } /* lexer_hex_to_code_point */
94 
95 #if ENABLED (JERRY_ES2015)
96 
97 /**
98  * Parse hexadecimal character sequence enclosed in braces
99  *
100  * @return character value or UINT32_MAX on error
101  */
102 static lit_code_point_t
lexer_hex_in_braces_to_code_point(const uint8_t * source_p,const uint8_t * source_end_p,uint32_t * length_p)103 lexer_hex_in_braces_to_code_point (const uint8_t *source_p, /**< current source position */
104                                    const uint8_t *source_end_p, /**< source end */
105                                    uint32_t *length_p) /**< [out] length of the sequence */
106 {
107   lit_code_point_t result = 0;
108   /* Four is the size of \u{} sequence. */
109   uint32_t length = 4;
110 
111   JERRY_ASSERT (source_p[-1] == LIT_CHAR_LEFT_BRACE);
112   JERRY_ASSERT (source_p < source_end_p);
113 
114   do
115   {
116     uint32_t byte = *source_p++;
117 
118     result <<= 4;
119 
120     if (byte >= LIT_CHAR_0 && byte <= LIT_CHAR_9)
121     {
122       result += byte - LIT_CHAR_0;
123     }
124     else
125     {
126       byte = LEXER_TO_ASCII_LOWERCASE (byte);
127       if (byte >= LIT_CHAR_LOWERCASE_A && byte <= LIT_CHAR_LOWERCASE_F)
128       {
129         result += byte - (LIT_CHAR_LOWERCASE_A - 10);
130       }
131       else
132       {
133         return UINT32_MAX;
134       }
135     }
136 
137     if (result >= (LIT_UNICODE_CODE_POINT_MAX + 1) || source_p >= source_end_p)
138     {
139       return UINT32_MAX;
140     }
141     length++;
142   }
143   while (*source_p != LIT_CHAR_RIGHT_BRACE);
144 
145   *length_p = length;
146   return result;
147 } /* lexer_hex_in_braces_to_code_point */
148 
149 #endif /* ENABLED (JERRY_ES2015) */
150 
151 /**
152  * Parse hexadecimal character sequence
153  *
154  * @return character value
155  */
156 static lit_code_point_t
lexer_unchecked_hex_to_character(const uint8_t ** source_p)157 lexer_unchecked_hex_to_character (const uint8_t **source_p) /**< [in, out] current source position */
158 {
159   lit_code_point_t result = 0;
160   const uint8_t *char_p = *source_p;
161   uint32_t length = (char_p[-1] == LIT_CHAR_LOWERCASE_U) ? 4 : 2;
162 
163 #if ENABLED (JERRY_ES2015)
164   if (char_p[0] == LIT_CHAR_LEFT_BRACE)
165   {
166     length = 0;
167     char_p++;
168   }
169 #endif /* ENABLED (JERRY_ES2015) */
170 
171   while (true)
172   {
173     uint32_t byte = *char_p++;
174 
175     result <<= 4;
176 
177     if (byte >= LIT_CHAR_0 && byte <= LIT_CHAR_9)
178     {
179       result += byte - LIT_CHAR_0;
180     }
181     else
182     {
183       JERRY_ASSERT ((byte >= LIT_CHAR_LOWERCASE_A && byte <= LIT_CHAR_LOWERCASE_F)
184                     || (byte >= LIT_CHAR_UPPERCASE_A && byte <= LIT_CHAR_UPPERCASE_F));
185 
186       result += LEXER_TO_ASCII_LOWERCASE (byte) - (LIT_CHAR_LOWERCASE_A - 10);
187     }
188 
189     JERRY_ASSERT (result <= LIT_UNICODE_CODE_POINT_MAX);
190 
191 #if ENABLED (JERRY_ES2015)
192     if (length == 0)
193     {
194       if (*char_p != LIT_CHAR_RIGHT_BRACE)
195       {
196         continue;
197       }
198       *source_p = char_p + 1;
199       return result;
200     }
201 #endif /* ENABLED (JERRY_ES2015) */
202 
203     if (--length == 0)
204     {
205       *source_p = char_p;
206       return result;
207     }
208   }
209 } /* lexer_unchecked_hex_to_character */
210 
211 /**
212  * Skip space mode
213  */
214 typedef enum
215 {
216   LEXER_SKIP_SPACES,                 /**< skip spaces mode */
217   LEXER_SKIP_SINGLE_LINE_COMMENT,    /**< parse single line comment */
218   LEXER_SKIP_MULTI_LINE_COMMENT,     /**< parse multi line comment */
219 } skip_mode_t;
220 
221 /**
222  * Skip spaces.
223  */
224 static void
lexer_skip_spaces(parser_context_t * context_p)225 lexer_skip_spaces (parser_context_t *context_p) /**< context */
226 {
227   skip_mode_t mode = LEXER_SKIP_SPACES;
228   const uint8_t *source_end_p = context_p->source_end_p;
229 
230   if (context_p->token.flags & LEXER_NO_SKIP_SPACES)
231   {
232     context_p->token.flags &= (uint8_t) ~LEXER_NO_SKIP_SPACES;
233     return;
234   }
235 
236   context_p->token.flags = 0;
237 
238   while (true)
239   {
240     if (context_p->source_p >= source_end_p)
241     {
242       if (mode == LEXER_SKIP_MULTI_LINE_COMMENT)
243       {
244         parser_raise_error (context_p, PARSER_ERR_UNTERMINATED_MULTILINE_COMMENT);
245       }
246       return;
247     }
248 
249     switch (context_p->source_p[0])
250     {
251       case LIT_CHAR_CR:
252       {
253         if (context_p->source_p + 1 < source_end_p
254             && context_p->source_p[1] == LIT_CHAR_LF)
255         {
256           context_p->source_p++;
257         }
258         /* FALLTHRU */
259       }
260 
261       case LIT_CHAR_LF:
262       {
263         context_p->line++;
264         context_p->column = 0;
265         context_p->token.flags = LEXER_WAS_NEWLINE;
266 
267         if (mode == LEXER_SKIP_SINGLE_LINE_COMMENT)
268         {
269           mode = LEXER_SKIP_SPACES;
270         }
271         /* FALLTHRU */
272       }
273 
274       case LIT_CHAR_VTAB:
275       case LIT_CHAR_FF:
276       case LIT_CHAR_SP:
277       {
278         context_p->source_p++;
279         context_p->column++;
280         continue;
281       }
282 
283       case LIT_CHAR_TAB:
284       {
285         context_p->column = align_column_to_tab (context_p->column);
286         context_p->source_p++;
287         continue;
288       }
289 
290       case LIT_CHAR_SLASH:
291       {
292         if (mode == LEXER_SKIP_SPACES
293             && context_p->source_p + 1 < source_end_p)
294         {
295           if (context_p->source_p[1] == LIT_CHAR_SLASH)
296           {
297             mode = LEXER_SKIP_SINGLE_LINE_COMMENT;
298           }
299           else if (context_p->source_p[1] == LIT_CHAR_ASTERISK)
300           {
301             mode = LEXER_SKIP_MULTI_LINE_COMMENT;
302             context_p->token.line = context_p->line;
303             context_p->token.column = context_p->column;
304           }
305 
306           if (mode != LEXER_SKIP_SPACES)
307           {
308             context_p->source_p += 2;
309             PARSER_PLUS_EQUAL_LC (context_p->column, 2);
310             continue;
311           }
312         }
313         break;
314       }
315 
316       case LIT_CHAR_ASTERISK:
317       {
318         if (mode == LEXER_SKIP_MULTI_LINE_COMMENT
319             && context_p->source_p + 1 < source_end_p
320             && context_p->source_p[1] == LIT_CHAR_SLASH)
321         {
322           mode = LEXER_SKIP_SPACES;
323           context_p->source_p += 2;
324           PARSER_PLUS_EQUAL_LC (context_p->column, 2);
325           continue;
326         }
327         break;
328       }
329 
330       case 0xc2:
331       {
332         if (context_p->source_p + 1 < source_end_p
333             && context_p->source_p[1] == 0xa0)
334         {
335           /* Codepoint \u00A0 */
336           context_p->source_p += 2;
337           context_p->column++;
338           continue;
339         }
340         break;
341       }
342 
343       case LEXER_NEWLINE_LS_PS_BYTE_1:
344       {
345         JERRY_ASSERT (context_p->source_p + 2 < source_end_p);
346         if (LEXER_NEWLINE_LS_PS_BYTE_23 (context_p->source_p))
347         {
348           /* Codepoint \u2028 and \u2029 */
349           context_p->source_p += 3;
350           context_p->line++;
351           context_p->column = 1;
352           context_p->token.flags = LEXER_WAS_NEWLINE;
353 
354           if (mode == LEXER_SKIP_SINGLE_LINE_COMMENT)
355           {
356             mode = LEXER_SKIP_SPACES;
357           }
358           continue;
359         }
360         break;
361       }
362 
363       case 0xef:
364       {
365         if (context_p->source_p + 2 < source_end_p
366             && context_p->source_p[1] == 0xbb
367             && context_p->source_p[2] == 0xbf)
368         {
369           /* Codepoint \uFEFF */
370           context_p->source_p += 3;
371           context_p->column++;
372           continue;
373         }
374         break;
375       }
376 
377       default:
378       {
379         break;
380       }
381     }
382 
383     if (mode == LEXER_SKIP_SPACES)
384     {
385       return;
386     }
387 
388     context_p->source_p++;
389 
390     if (context_p->source_p < source_end_p
391         && !IS_UTF8_INTERMEDIATE_OCTET (context_p->source_p[0]))
392     {
393       context_p->column++;
394     }
395   }
396 } /* lexer_skip_spaces */
397 
398 #if ENABLED (JERRY_ES2015)
399 /**
400  * Skip all the continuous empty statements.
401  */
402 void
lexer_skip_empty_statements(parser_context_t * context_p)403 lexer_skip_empty_statements (parser_context_t *context_p) /**< context */
404 {
405   lexer_skip_spaces (context_p);
406 
407   while (context_p->source_p < context_p->source_end_p
408          && *context_p->source_p == LIT_CHAR_SEMICOLON)
409   {
410     lexer_consume_next_character (context_p);
411     lexer_skip_spaces (context_p);
412   }
413 
414   context_p->token.flags = (uint8_t) (context_p->token.flags | LEXER_NO_SKIP_SPACES);
415 } /* lexer_skip_empty_statements */
416 #endif /* ENABLED (JERRY_ES2015) */
417 
418 /**
419  * Keyword data.
420  */
421 typedef struct
422 {
423   const uint8_t *keyword_p; /**< keyword string */
424   lexer_token_type_t type;  /**< keyword token type */
425 } keyword_string_t;
426 
427 /**
428  * @{
429  * Keyword defines
430  */
431 #define LEXER_KEYWORD(name, type) { (const uint8_t *) (name), (type) }
432 #define LEXER_KEYWORD_LIST_LENGTH(name) (const uint8_t) (sizeof ((name)) / sizeof ((name)[0]))
433 /** @} */
434 
435 /**
436  * Length of the shortest keyword.
437  */
438 #define LEXER_KEYWORD_MIN_LENGTH 2
439 
440 /**
441  * Length of the longest keyword.
442  */
443 #define LEXER_KEYWORD_MAX_LENGTH 10
444 
445 /**
446  * Keywords with 2 characters.
447  */
448 static const keyword_string_t keywords_with_length_2[] =
449 {
450   LEXER_KEYWORD ("do", LEXER_KEYW_DO),
451   LEXER_KEYWORD ("if", LEXER_KEYW_IF),
452   LEXER_KEYWORD ("in", LEXER_KEYW_IN),
453 };
454 
455 /**
456  * Keywords with 3 characters.
457  */
458 static const keyword_string_t keywords_with_length_3[] =
459 {
460   LEXER_KEYWORD ("for", LEXER_KEYW_FOR),
461   LEXER_KEYWORD ("let", LEXER_KEYW_LET),
462   LEXER_KEYWORD ("new", LEXER_KEYW_NEW),
463   LEXER_KEYWORD ("try", LEXER_KEYW_TRY),
464   LEXER_KEYWORD ("var", LEXER_KEYW_VAR),
465 };
466 
467 /**
468  * Keywords with 4 characters.
469  */
470 static const keyword_string_t keywords_with_length_4[] =
471 {
472   LEXER_KEYWORD ("case", LEXER_KEYW_CASE),
473   LEXER_KEYWORD ("else", LEXER_KEYW_ELSE),
474   LEXER_KEYWORD ("enum", LEXER_KEYW_ENUM),
475   LEXER_KEYWORD ("eval", LEXER_KEYW_EVAL),
476   LEXER_KEYWORD ("null", LEXER_LIT_NULL),
477   LEXER_KEYWORD ("this", LEXER_KEYW_THIS),
478   LEXER_KEYWORD ("true", LEXER_LIT_TRUE),
479   LEXER_KEYWORD ("void", LEXER_KEYW_VOID),
480   LEXER_KEYWORD ("with", LEXER_KEYW_WITH),
481 };
482 
483 /**
484  * Keywords with 5 characters.
485  */
486 static const keyword_string_t keywords_with_length_5[] =
487 {
488 #if ENABLED (JERRY_ES2015)
489   LEXER_KEYWORD ("async", LEXER_KEYW_ASYNC),
490   LEXER_KEYWORD ("await", LEXER_KEYW_AWAIT),
491 #endif /* ENABLED (JERRY_ES2015) */
492   LEXER_KEYWORD ("break", LEXER_KEYW_BREAK),
493   LEXER_KEYWORD ("catch", LEXER_KEYW_CATCH),
494   LEXER_KEYWORD ("class", LEXER_KEYW_CLASS),
495   LEXER_KEYWORD ("const", LEXER_KEYW_CONST),
496   LEXER_KEYWORD ("false", LEXER_LIT_FALSE),
497   LEXER_KEYWORD ("super", LEXER_KEYW_SUPER),
498   LEXER_KEYWORD ("throw", LEXER_KEYW_THROW),
499   LEXER_KEYWORD ("while", LEXER_KEYW_WHILE),
500   LEXER_KEYWORD ("yield", LEXER_KEYW_YIELD),
501 };
502 
503 /**
504  * Keywords with 6 characters.
505  */
506 static const keyword_string_t keywords_with_length_6[] =
507 {
508   LEXER_KEYWORD ("delete", LEXER_KEYW_DELETE),
509   LEXER_KEYWORD ("export", LEXER_KEYW_EXPORT),
510   LEXER_KEYWORD ("import", LEXER_KEYW_IMPORT),
511   LEXER_KEYWORD ("public", LEXER_KEYW_PUBLIC),
512   LEXER_KEYWORD ("return", LEXER_KEYW_RETURN),
513   LEXER_KEYWORD ("static", LEXER_KEYW_STATIC),
514   LEXER_KEYWORD ("switch", LEXER_KEYW_SWITCH),
515   LEXER_KEYWORD ("typeof", LEXER_KEYW_TYPEOF),
516 };
517 
518 /**
519  * Keywords with 7 characters.
520  */
521 static const keyword_string_t keywords_with_length_7[] =
522 {
523   LEXER_KEYWORD ("default", LEXER_KEYW_DEFAULT),
524   LEXER_KEYWORD ("extends", LEXER_KEYW_EXTENDS),
525   LEXER_KEYWORD ("finally", LEXER_KEYW_FINALLY),
526   LEXER_KEYWORD ("package", LEXER_KEYW_PACKAGE),
527   LEXER_KEYWORD ("private", LEXER_KEYW_PRIVATE),
528 };
529 
530 /**
531  * Keywords with 8 characters.
532  */
533 static const keyword_string_t keywords_with_length_8[] =
534 {
535   LEXER_KEYWORD ("continue", LEXER_KEYW_CONTINUE),
536   LEXER_KEYWORD ("debugger", LEXER_KEYW_DEBUGGER),
537   LEXER_KEYWORD ("function", LEXER_KEYW_FUNCTION),
538 };
539 
540 /**
541  * Keywords with 9 characters.
542  */
543 static const keyword_string_t keywords_with_length_9[] =
544 {
545   LEXER_KEYWORD ("arguments", LEXER_KEYW_ARGUMENTS),
546   LEXER_KEYWORD ("interface", LEXER_KEYW_INTERFACE),
547   LEXER_KEYWORD ("protected", LEXER_KEYW_PROTECTED),
548 };
549 
550 /**
551  * Keywords with 10 characters.
552  */
553 static const keyword_string_t keywords_with_length_10[] =
554 {
555   LEXER_KEYWORD ("implements", LEXER_KEYW_IMPLEMENTS),
556   LEXER_KEYWORD ("instanceof", LEXER_KEYW_INSTANCEOF),
557 };
558 
559 /**
560  * List of the keyword groups.
561  */
562 static const keyword_string_t * const keyword_strings_list[] =
563 {
564   keywords_with_length_2,
565   keywords_with_length_3,
566   keywords_with_length_4,
567   keywords_with_length_5,
568   keywords_with_length_6,
569   keywords_with_length_7,
570   keywords_with_length_8,
571   keywords_with_length_9,
572   keywords_with_length_10
573 };
574 
575 JERRY_STATIC_ASSERT (sizeof (keyword_strings_list) / sizeof (const keyword_string_t *)
576                      == (LEXER_KEYWORD_MAX_LENGTH - LEXER_KEYWORD_MIN_LENGTH) + 1,
577                      keyword_strings_list_size_must_equal_to_keyword_max_length_difference);
578 
579 /**
580  * List of the keyword groups length.
581  */
582 static const uint8_t keyword_lengths_list[] =
583 {
584   LEXER_KEYWORD_LIST_LENGTH (keywords_with_length_2),
585   LEXER_KEYWORD_LIST_LENGTH (keywords_with_length_3),
586   LEXER_KEYWORD_LIST_LENGTH (keywords_with_length_4),
587   LEXER_KEYWORD_LIST_LENGTH (keywords_with_length_5),
588   LEXER_KEYWORD_LIST_LENGTH (keywords_with_length_6),
589   LEXER_KEYWORD_LIST_LENGTH (keywords_with_length_7),
590   LEXER_KEYWORD_LIST_LENGTH (keywords_with_length_8),
591   LEXER_KEYWORD_LIST_LENGTH (keywords_with_length_9),
592   LEXER_KEYWORD_LIST_LENGTH (keywords_with_length_10)
593 };
594 
595 #undef LEXER_KEYWORD
596 #undef LEXER_KEYWORD_LIST_LENGTH
597 
598 /**
599  * Flags for lexer_parse_identifier.
600  */
601 typedef enum
602 {
603   LEXER_PARSE_NO_OPTS = 0, /**< no options */
604   LEXER_PARSE_CHECK_KEYWORDS = (1 << 0), /**< check keywords */
605   LEXER_PARSE_CHECK_START_AND_RETURN = (1 << 1), /**< check identifier start and return */
606   LEXER_PARSE_CHECK_PART_AND_RETURN = (1 << 2), /**< check identifier part and return */
607 } lexer_parse_options_t;
608 
609 JERRY_STATIC_ASSERT (LEXER_FIRST_NON_RESERVED_KEYWORD < LEXER_FIRST_FUTURE_STRICT_RESERVED_WORD,
610                      lexer_first_non_reserved_keyword_must_be_before_lexer_first_future_strict_reserved_word);
611 
612 /**
613  * Parse identifier.
614  *
615  * @return true, if an identifier is parsed, false otherwise
616  */
617 static bool
lexer_parse_identifier(parser_context_t * context_p,lexer_parse_options_t options)618 lexer_parse_identifier (parser_context_t *context_p, /**< context */
619                         lexer_parse_options_t options) /**< check keywords */
620 {
621   /* Only very few identifiers contains \u escape sequences. */
622   const uint8_t *source_p = context_p->source_p;
623   /* Note: newline or tab cannot be part of an identifier. */
624   parser_line_counter_t column = context_p->column;
625   const uint8_t *source_end_p = context_p->source_end_p;
626   size_t length = 0;
627   uint8_t has_escape = false;
628 
629   do
630   {
631     if (*source_p == LIT_CHAR_BACKSLASH)
632     {
633       /* After a backslash an identifier must start. */
634       lit_code_point_t code_point = UINT32_MAX;
635       uint32_t escape_length = 6;
636 
637       if (options & (LEXER_PARSE_CHECK_START_AND_RETURN | LEXER_PARSE_CHECK_PART_AND_RETURN))
638       {
639         return true;
640       }
641 
642       has_escape = true;
643 
644 #if ENABLED (JERRY_ES2015)
645       if (source_p + 5 <= source_end_p && source_p[1] == LIT_CHAR_LOWERCASE_U)
646       {
647         if (source_p[2] == LIT_CHAR_LEFT_BRACE)
648         {
649           code_point = lexer_hex_in_braces_to_code_point (source_p + 3, source_end_p, &escape_length);
650         }
651         else if (source_p + 6 <= source_end_p)
652         {
653           code_point = lexer_hex_to_code_point (source_p + 2, 4);
654         }
655       }
656 #else /* !ENABLED (JERRY_ES2015) */
657       if (source_p + 6 <= source_end_p && source_p[1] == LIT_CHAR_LOWERCASE_U)
658       {
659         code_point = lexer_hex_to_code_point (source_p + 2, 4);
660       }
661 #endif /* ENABLED (JERRY_ES2015) */
662 
663       if (code_point == UINT32_MAX)
664       {
665         context_p->source_p = source_p;
666         context_p->token.column = column;
667         parser_raise_error (context_p, PARSER_ERR_INVALID_UNICODE_ESCAPE_SEQUENCE);
668       }
669 
670       if (length == 0)
671       {
672         if (!lit_code_point_is_identifier_start (code_point))
673         {
674           parser_raise_error (context_p, PARSER_ERR_INVALID_IDENTIFIER_START);
675         }
676       }
677       else
678       {
679         if (!lit_code_point_is_identifier_part (code_point))
680         {
681           parser_raise_error (context_p, PARSER_ERR_INVALID_IDENTIFIER_PART);
682         }
683       }
684 
685       length += lit_code_point_get_cesu8_length (code_point);
686       source_p += escape_length;
687       PARSER_PLUS_EQUAL_LC (column, escape_length);
688       continue;
689     }
690 
691     lit_code_point_t code_point = *source_p;
692     lit_utf8_size_t utf8_length = 1, decoded_length = 1, char_count = 1;
693 
694     if (JERRY_UNLIKELY (code_point >= LIT_UTF8_2_BYTE_MARKER))
695     {
696 #if ENABLED (JERRY_ES2015)
697       utf8_length = lit_read_code_point_from_utf8 (source_p,
698                                                    (lit_utf8_size_t) (source_end_p - source_p),
699                                                    &code_point);
700       decoded_length = utf8_length;
701 
702       /* Only ES2015 supports code points outside of the basic plane which can be part of an identifier. */
703       if ((code_point >= LIT_UTF16_HIGH_SURROGATE_MIN && code_point <= LIT_UTF16_HIGH_SURROGATE_MAX)
704           && source_p + 3 < source_end_p)
705       {
706         lit_code_point_t low_surrogate;
707         lit_read_code_point_from_utf8 (source_p + 3,
708                                        (lit_utf8_size_t) (source_end_p - (source_p + 3)),
709                                        &low_surrogate);
710 
711         if (low_surrogate >= LIT_UTF16_LOW_SURROGATE_MIN && low_surrogate <= LIT_UTF16_LOW_SURROGATE_MAX)
712         {
713           code_point = lit_convert_surrogate_pair_to_code_point ((ecma_char_t) code_point,
714                                                                  (ecma_char_t) low_surrogate);
715           utf8_length = 2 * 3;
716           decoded_length = 2 * 3;
717           char_count = 2;
718         }
719       }
720       else if (source_p[0] >= LIT_UTF8_4_BYTE_MARKER)
721       {
722         decoded_length = 2 * 3;
723         has_escape = true;
724       }
725 #else /* !ENABLED (JERRY_ES2015) */
726       if (code_point < LIT_UTF8_4_BYTE_MARKER)
727       {
728         utf8_length = lit_read_code_point_from_utf8 (source_p,
729                                                      (lit_utf8_size_t) (source_end_p - source_p),
730                                                      &code_point);
731         decoded_length = utf8_length;
732       }
733       else
734       {
735         code_point = 0;
736       }
737 #endif /* ENABLED (JERRY_ES2015) */
738     }
739 
740     if (length == 0)
741     {
742       if (JERRY_UNLIKELY (options & (LEXER_PARSE_CHECK_START_AND_RETURN | LEXER_PARSE_CHECK_PART_AND_RETURN)))
743       {
744         if (options & LEXER_PARSE_CHECK_START_AND_RETURN)
745         {
746           return lit_code_point_is_identifier_start (code_point);
747         }
748         else
749         {
750           return lit_code_point_is_identifier_part (code_point);
751         }
752       }
753 
754       if (!lit_code_point_is_identifier_start (code_point))
755       {
756         return false;
757       }
758     }
759     else if (!lit_code_point_is_identifier_part (code_point))
760     {
761       break;
762     }
763 
764     source_p += utf8_length;
765     length += decoded_length;
766     PARSER_PLUS_EQUAL_LC (column, char_count);
767   }
768   while (source_p < source_end_p);
769 
770   JERRY_ASSERT (length > 0);
771 
772   context_p->token.type = LEXER_LITERAL;
773   context_p->token.keyword_type = LEXER_EOS;
774   context_p->token.lit_location.type = LEXER_IDENT_LITERAL;
775   context_p->token.lit_location.has_escape = has_escape;
776 
777   context_p->token.column = context_p->column;
778   context_p->token.lit_location.char_p = context_p->source_p;
779   context_p->token.lit_location.length = (prop_length_t) length;
780 
781   if (JERRY_UNLIKELY (length > PARSER_MAXIMUM_IDENT_LENGTH))
782   {
783     parser_raise_error (context_p, PARSER_ERR_IDENTIFIER_TOO_LONG);
784   }
785 
786   /* Check keywords. */
787   if ((options & LEXER_PARSE_CHECK_KEYWORDS)
788       && (length >= LEXER_KEYWORD_MIN_LENGTH && length <= LEXER_KEYWORD_MAX_LENGTH))
789   {
790     const uint8_t *ident_start_p = context_p->source_p;
791     uint8_t buffer_p[LEXER_KEYWORD_MAX_LENGTH];
792 
793     if (JERRY_UNLIKELY (context_p->token.lit_location.has_escape))
794     {
795       lexer_convert_ident_to_cesu8 (buffer_p, ident_start_p, (prop_length_t) length);
796       ident_start_p = buffer_p;
797     }
798 
799     const keyword_string_t *keyword_list_p = keyword_strings_list[length - LEXER_KEYWORD_MIN_LENGTH];
800 
801     int start = 0;
802     int end = keyword_lengths_list[length - LEXER_KEYWORD_MIN_LENGTH];
803     int middle = end / 2;
804 
805     do
806     {
807       const keyword_string_t *keyword_p = keyword_list_p + middle;
808       int compare_result = ident_start_p[0] - keyword_p->keyword_p[0];
809 
810       if (compare_result == 0)
811       {
812         compare_result = memcmp (ident_start_p, keyword_p->keyword_p, length);
813 
814         if (compare_result == 0)
815         {
816           context_p->token.keyword_type = (uint8_t) keyword_p->type;
817 
818           if (JERRY_LIKELY (keyword_p->type < LEXER_FIRST_NON_RESERVED_KEYWORD))
819           {
820 #if ENABLED (JERRY_ES2015)
821             if (JERRY_UNLIKELY (keyword_p->type == LEXER_KEYW_AWAIT))
822             {
823               if (!(context_p->status_flags & PARSER_IS_ASYNC_FUNCTION)
824                   && !(context_p->global_status_flags & ECMA_PARSE_MODULE))
825               {
826                 break;
827               }
828 
829               if (context_p->status_flags & PARSER_DISALLOW_AWAIT_YIELD)
830               {
831                 if (ident_start_p == buffer_p)
832                 {
833                   parser_raise_error (context_p, PARSER_ERR_INVALID_KEYWORD);
834                 }
835                 parser_raise_error (context_p, PARSER_ERR_AWAIT_NOT_ALLOWED);
836               }
837 
838               context_p->token.type = (uint8_t) LEXER_KEYW_AWAIT;
839               break;
840             }
841 #endif /* ENABLED (JERRY_ES2015) */
842 
843             if (ident_start_p == buffer_p)
844             {
845               /* Escape sequences are not allowed in a keyword. */
846               parser_raise_error (context_p, PARSER_ERR_INVALID_KEYWORD);
847             }
848 
849             context_p->token.type = (uint8_t) keyword_p->type;
850             break;
851           }
852 
853 #if ENABLED (JERRY_ES2015)
854           if (keyword_p->type == LEXER_KEYW_LET && (context_p->status_flags & PARSER_IS_STRICT))
855           {
856             if (ident_start_p == buffer_p)
857             {
858               parser_raise_error (context_p, PARSER_ERR_INVALID_KEYWORD);
859             }
860 
861             context_p->token.type = (uint8_t) LEXER_KEYW_LET;
862             break;
863           }
864 
865           if (keyword_p->type == LEXER_KEYW_YIELD && (context_p->status_flags & PARSER_IS_GENERATOR_FUNCTION))
866           {
867             if (context_p->status_flags & PARSER_DISALLOW_AWAIT_YIELD)
868             {
869               if (ident_start_p == buffer_p)
870               {
871                 parser_raise_error (context_p, PARSER_ERR_INVALID_KEYWORD);
872               }
873               parser_raise_error (context_p, PARSER_ERR_YIELD_NOT_ALLOWED);
874             }
875 
876             context_p->token.type = (uint8_t) LEXER_KEYW_YIELD;
877             break;
878           }
879 #endif /* ENABLED (JERRY_ES2015) */
880 
881           if (keyword_p->type >= LEXER_FIRST_FUTURE_STRICT_RESERVED_WORD
882               && (context_p->status_flags & PARSER_IS_STRICT))
883           {
884             parser_raise_error (context_p, PARSER_ERR_STRICT_IDENT_NOT_ALLOWED);
885           }
886           break;
887         }
888       }
889 
890       if (compare_result > 0)
891       {
892         start = middle + 1;
893       }
894       else
895       {
896         JERRY_ASSERT (compare_result < 0);
897         end = middle;
898       }
899 
900       middle = (start + end) / 2;
901     }
902     while (start < end);
903   }
904 
905   context_p->source_p = source_p;
906   context_p->column = column;
907   return true;
908 } /* lexer_parse_identifier */
909 
910 /**
911  * Parse string.
912  */
913 void
lexer_parse_string(parser_context_t * context_p,lexer_string_options_t opts)914 lexer_parse_string (parser_context_t *context_p, /**< context */
915                     lexer_string_options_t opts) /**< options */
916 {
917 #if ENABLED (JERRY_ES2015)
918   int32_t raw_length_adjust = 0;
919 #else /* ENABLED (JERRY_ES2015) */
920   JERRY_UNUSED (opts);
921 #endif /* ENABLED (JERRY_ES2015) */
922 
923   uint8_t str_end_character = context_p->source_p[0];
924   const uint8_t *source_p = context_p->source_p + 1;
925   const uint8_t *string_start_p = source_p;
926   const uint8_t *source_end_p = context_p->source_end_p;
927   parser_line_counter_t line = context_p->line;
928   parser_line_counter_t column = (parser_line_counter_t) (context_p->column + 1);
929   parser_line_counter_t original_line = line;
930   parser_line_counter_t original_column = column;
931   size_t length = 0;
932   uint8_t has_escape = false;
933 
934 #if ENABLED (JERRY_ES2015)
935   if (str_end_character == LIT_CHAR_RIGHT_BRACE)
936   {
937     str_end_character = LIT_CHAR_GRAVE_ACCENT;
938   }
939 #endif /* ENABLED (JERRY_ES2015) */
940 
941   while (true)
942   {
943     if (source_p >= source_end_p)
944     {
945       context_p->token.line = original_line;
946       context_p->token.column = (parser_line_counter_t) (original_column - 1);
947       parser_raise_error (context_p, PARSER_ERR_UNTERMINATED_STRING);
948     }
949 
950     if (*source_p == str_end_character)
951     {
952       break;
953     }
954 
955     if (*source_p == LIT_CHAR_BACKSLASH)
956     {
957       source_p++;
958       column++;
959       if (source_p >= source_end_p)
960       {
961         /* Will throw an unterminated string error. */
962         continue;
963       }
964 
965       has_escape = true;
966 
967       /* Newline is ignored. */
968       if (*source_p == LIT_CHAR_CR)
969       {
970         source_p++;
971         if (source_p < source_end_p
972             && *source_p == LIT_CHAR_LF)
973         {
974 #if ENABLED (JERRY_ES2015)
975           raw_length_adjust--;
976 #endif /* ENABLED (JERRY_ES2015) */
977           source_p++;
978         }
979 
980         line++;
981         column = 1;
982         continue;
983       }
984       else if (*source_p == LIT_CHAR_LF)
985       {
986         source_p++;
987         line++;
988         column = 1;
989         continue;
990       }
991       else if (*source_p == LEXER_NEWLINE_LS_PS_BYTE_1 && LEXER_NEWLINE_LS_PS_BYTE_23 (source_p))
992       {
993         source_p += 3;
994         line++;
995         column = 1;
996         continue;
997       }
998 
999 #if ENABLED (JERRY_ES2015)
1000       if (opts & LEXER_STRING_RAW)
1001       {
1002         if ((*source_p == LIT_CHAR_GRAVE_ACCENT) || (*source_p == LIT_CHAR_BACKSLASH))
1003         {
1004           source_p++;
1005           column++;
1006           length++;
1007         }
1008         continue;
1009       }
1010 #endif /* ENABLED (JERRY_ES2015) */
1011 
1012       if (*source_p == LIT_CHAR_0
1013           && source_p + 1 < source_end_p
1014           && (*(source_p + 1) < LIT_CHAR_0 || *(source_p + 1) > LIT_CHAR_9))
1015       {
1016         source_p++;
1017         column++;
1018         length++;
1019         continue;
1020       }
1021 
1022       /* Except \x, \u, and octal numbers, everything is
1023        * converted to a character which has the same byte length. */
1024       if (*source_p >= LIT_CHAR_0 && *source_p <= LIT_CHAR_3)
1025       {
1026         if (context_p->status_flags & PARSER_IS_STRICT)
1027         {
1028           parser_raise_error (context_p, PARSER_ERR_OCTAL_ESCAPE_NOT_ALLOWED);
1029         }
1030 
1031         source_p++;
1032         column++;
1033 
1034         if (source_p < source_end_p && *source_p >= LIT_CHAR_0 && *source_p <= LIT_CHAR_7)
1035         {
1036           source_p++;
1037           column++;
1038 
1039           if (source_p < source_end_p && *source_p >= LIT_CHAR_0 && *source_p <= LIT_CHAR_7)
1040           {
1041             /* Numbers >= 0x200 (0x80) requires
1042              * two bytes for encoding in UTF-8. */
1043             if (source_p[-2] >= LIT_CHAR_2)
1044             {
1045               length++;
1046             }
1047 
1048             source_p++;
1049             column++;
1050           }
1051         }
1052 
1053         length++;
1054         continue;
1055       }
1056 
1057       if (*source_p >= LIT_CHAR_4 && *source_p <= LIT_CHAR_7)
1058       {
1059         if (context_p->status_flags & PARSER_IS_STRICT)
1060         {
1061           parser_raise_error (context_p, PARSER_ERR_OCTAL_ESCAPE_NOT_ALLOWED);
1062         }
1063 
1064         source_p++;
1065         column++;
1066 
1067         if (source_p < source_end_p && *source_p >= LIT_CHAR_0 && *source_p <= LIT_CHAR_7)
1068         {
1069           source_p++;
1070           column++;
1071         }
1072 
1073         /* The maximum number is 0x4d so the UTF-8
1074          * representation is always one byte. */
1075         length++;
1076         continue;
1077       }
1078 
1079       if (*source_p == LIT_CHAR_LOWERCASE_X || *source_p == LIT_CHAR_LOWERCASE_U)
1080       {
1081         uint32_t escape_length = (*source_p == LIT_CHAR_LOWERCASE_X) ? 3 : 5;
1082         lit_code_point_t code_point = UINT32_MAX;
1083 
1084         context_p->token.line = line;
1085         context_p->token.column = (parser_line_counter_t) (column - 1);
1086 
1087 #if ENABLED (JERRY_ES2015)
1088         if (source_p + 4 <= source_end_p
1089             && source_p[0] == LIT_CHAR_LOWERCASE_U
1090             && source_p[1] == LIT_CHAR_LEFT_BRACE)
1091         {
1092           code_point = lexer_hex_in_braces_to_code_point (source_p + 2, source_end_p, &escape_length);
1093           escape_length--;
1094         }
1095         else
1096         {
1097 #endif /* ENABLED (JERRY_ES2015) */
1098           if (source_p + escape_length <= source_end_p)
1099           {
1100             code_point = lexer_hex_to_code_point (source_p + 1, escape_length - 1);
1101           }
1102 #if ENABLED (JERRY_ES2015)
1103         }
1104 #endif /* ENABLED (JERRY_ES2015) */
1105 
1106         if (code_point == UINT32_MAX)
1107         {
1108           parser_raise_error (context_p, PARSER_ERR_INVALID_UNICODE_ESCAPE_SEQUENCE);
1109         }
1110 
1111         length += lit_code_point_get_cesu8_length (code_point);
1112 
1113         source_p += escape_length;
1114         PARSER_PLUS_EQUAL_LC (column, escape_length);
1115         continue;
1116       }
1117     }
1118 #if ENABLED (JERRY_ES2015)
1119     else if (str_end_character == LIT_CHAR_GRAVE_ACCENT &&
1120              source_p[0] == LIT_CHAR_DOLLAR_SIGN &&
1121              source_p + 1 < source_end_p &&
1122              source_p[1] == LIT_CHAR_LEFT_BRACE)
1123     {
1124       raw_length_adjust--;
1125       source_p++;
1126       break;
1127     }
1128 #endif /* ENABLED (JERRY_ES2015) */
1129 
1130     if (*source_p >= LIT_UTF8_4_BYTE_MARKER)
1131     {
1132       /* Processing 4 byte unicode sequence (even if it is
1133        * after a backslash). Always converted to two 3 byte
1134        * long sequence. */
1135       length += 2 * 3;
1136       has_escape = true;
1137       source_p += 4;
1138 #if ENABLED (JERRY_ES2015)
1139       raw_length_adjust += 2;
1140 #endif /* ENABLED (JERRY_ES2015) */
1141       column++;
1142       continue;
1143     }
1144     else if (*source_p == LIT_CHAR_TAB)
1145     {
1146       column = align_column_to_tab (column);
1147       /* Subtract -1 because column is increased below. */
1148       column--;
1149     }
1150 #if ENABLED (JERRY_ES2015)
1151     else if (str_end_character == LIT_CHAR_GRAVE_ACCENT)
1152     {
1153       /* Newline (without backslash) is part of the string.
1154          Note: ECMAScript v6, 11.8.6.1 <CR> or <CR><LF> are both normalized to <LF> */
1155       if (*source_p == LIT_CHAR_CR)
1156       {
1157         has_escape = true;
1158         source_p++;
1159         length++;
1160         if (source_p < source_end_p
1161             && *source_p == LIT_CHAR_LF)
1162         {
1163           source_p++;
1164           raw_length_adjust--;
1165         }
1166         line++;
1167         column = 1;
1168         continue;
1169       }
1170       else if (*source_p == LIT_CHAR_LF)
1171       {
1172         source_p++;
1173         length++;
1174         line++;
1175         column = 1;
1176         continue;
1177       }
1178       else if (*source_p == LEXER_NEWLINE_LS_PS_BYTE_1 && LEXER_NEWLINE_LS_PS_BYTE_23 (source_p))
1179       {
1180         source_p += 3;
1181         length += 3;
1182         line++;
1183         column = 1;
1184         continue;
1185       }
1186     }
1187 #endif /* ENABLED (JERRY_ES2015) */
1188     else if (*source_p == LIT_CHAR_CR
1189              || *source_p == LIT_CHAR_LF
1190              || (*source_p == LEXER_NEWLINE_LS_PS_BYTE_1 && LEXER_NEWLINE_LS_PS_BYTE_23 (source_p)))
1191     {
1192       context_p->token.line = line;
1193       context_p->token.column = column;
1194       parser_raise_error (context_p, PARSER_ERR_NEWLINE_NOT_ALLOWED);
1195     }
1196 
1197     source_p++;
1198     column++;
1199     length++;
1200 
1201     while (source_p < source_end_p
1202            && IS_UTF8_INTERMEDIATE_OCTET (*source_p))
1203     {
1204       source_p++;
1205       length++;
1206     }
1207   }
1208 
1209 #if ENABLED (JERRY_ES2015)
1210   if (opts & LEXER_STRING_RAW)
1211   {
1212     length = (size_t) ((source_p - string_start_p) + raw_length_adjust);
1213   }
1214 #endif /* ENABLED (JERRY_ES2015) */
1215 
1216   if (length > PARSER_MAXIMUM_STRING_LENGTH)
1217   {
1218     parser_raise_error (context_p, PARSER_ERR_STRING_TOO_LONG);
1219   }
1220 
1221 #if ENABLED (JERRY_ES2015)
1222   context_p->token.type = ((str_end_character != LIT_CHAR_GRAVE_ACCENT) ? LEXER_LITERAL
1223                                                                         : LEXER_TEMPLATE_LITERAL);
1224 #else /* !ENABLED (JERRY_ES2015) */
1225   context_p->token.type = LEXER_LITERAL;
1226 #endif /* ENABLED (JERRY_ES2015) */
1227 
1228   /* Fill literal data. */
1229   context_p->token.lit_location.char_p = string_start_p;
1230   context_p->token.lit_location.length = (prop_length_t) length;
1231   context_p->token.lit_location.type = LEXER_STRING_LITERAL;
1232   context_p->token.lit_location.has_escape = has_escape;
1233 
1234   context_p->source_p = source_p + 1;
1235   context_p->line = line;
1236   context_p->column = (parser_line_counter_t) (column + 1);
1237 } /* lexer_parse_string */
1238 
1239 /**
1240  * Parse octal number.
1241  */
1242 static inline void
lexer_parse_octal_number(parser_context_t * context_p,const uint8_t ** source_p)1243 lexer_parse_octal_number (parser_context_t *context_p, /** context */
1244                           const uint8_t **source_p) /**< current source position */
1245 {
1246   do
1247   {
1248     (*source_p)++;
1249   }
1250   while (*source_p < context_p->source_end_p
1251          && *source_p[0] >= LIT_CHAR_0
1252          && *source_p[0] <= LIT_CHAR_7);
1253 
1254   if (*source_p < context_p->source_end_p
1255       && (*source_p[0] == LIT_CHAR_8 || *source_p[0] == LIT_CHAR_9))
1256   {
1257     parser_raise_error (context_p, PARSER_ERR_INVALID_OCTAL_DIGIT);
1258   }
1259 } /* lexer_parse_octal_number */
1260 
1261 /**
1262  * Parse number.
1263  */
1264 static void
lexer_parse_number(parser_context_t * context_p)1265 lexer_parse_number (parser_context_t *context_p) /**< context */
1266 {
1267   const uint8_t *source_p = context_p->source_p;
1268   const uint8_t *source_end_p = context_p->source_end_p;
1269   bool can_be_float = false;
1270   size_t length;
1271 
1272   context_p->token.type = LEXER_LITERAL;
1273   context_p->token.keyword_type = LEXER_EOS;
1274   context_p->token.extra_value = LEXER_NUMBER_DECIMAL;
1275   context_p->token.lit_location.char_p = source_p;
1276   context_p->token.lit_location.type = LEXER_NUMBER_LITERAL;
1277   context_p->token.lit_location.has_escape = false;
1278 
1279   if (source_p[0] == LIT_CHAR_0
1280       && source_p + 1 < source_end_p)
1281   {
1282     if (LEXER_TO_ASCII_LOWERCASE (source_p[1]) == LIT_CHAR_LOWERCASE_X)
1283     {
1284       context_p->token.extra_value = LEXER_NUMBER_HEXADECIMAL;
1285       source_p += 2;
1286 
1287       if (source_p >= source_end_p
1288           || !lit_char_is_hex_digit (source_p[0]))
1289       {
1290         parser_raise_error (context_p, PARSER_ERR_INVALID_HEX_DIGIT);
1291       }
1292 
1293       do
1294       {
1295         source_p++;
1296       }
1297       while (source_p < source_end_p
1298              && lit_char_is_hex_digit (source_p[0]));
1299     }
1300 #if ENABLED (JERRY_ES2015)
1301     else if (LEXER_TO_ASCII_LOWERCASE (source_p[1]) == LIT_CHAR_LOWERCASE_O)
1302     {
1303       context_p->token.extra_value = LEXER_NUMBER_OCTAL;
1304       context_p->token.lit_location.char_p++;
1305       context_p->source_p++;
1306       source_p += 2;
1307 
1308       if (source_p >= source_end_p
1309           || !lit_char_is_octal_digit (source_p[0]))
1310       {
1311         parser_raise_error (context_p, PARSER_ERR_INVALID_OCTAL_DIGIT);
1312       }
1313 
1314       lexer_parse_octal_number (context_p, &source_p);
1315     }
1316 #endif /* ENABLED (JERRY_ES2015) */
1317     else if (source_p[1] >= LIT_CHAR_0
1318              && source_p[1] <= LIT_CHAR_7)
1319     {
1320       context_p->token.extra_value = LEXER_NUMBER_OCTAL;
1321 
1322       if (context_p->status_flags & PARSER_IS_STRICT)
1323       {
1324         parser_raise_error (context_p, PARSER_ERR_OCTAL_NUMBER_NOT_ALLOWED);
1325       }
1326 
1327       lexer_parse_octal_number (context_p, &source_p);
1328     }
1329     else if (source_p[1] >= LIT_CHAR_8
1330              && source_p[1] <= LIT_CHAR_9)
1331     {
1332       parser_raise_error (context_p, PARSER_ERR_INVALID_NUMBER);
1333     }
1334 #if ENABLED (JERRY_ES2015)
1335     else if (LEXER_TO_ASCII_LOWERCASE (source_p[1]) == LIT_CHAR_LOWERCASE_B)
1336     {
1337       context_p->token.extra_value = LEXER_NUMBER_BINARY;
1338       context_p->token.lit_location.char_p++;
1339       context_p->source_p++;
1340       source_p += 2;
1341 
1342       if (source_p >= source_end_p
1343           || !lit_char_is_binary_digit (source_p[0]))
1344       {
1345         parser_raise_error (context_p, PARSER_ERR_INVALID_BIN_DIGIT);
1346       }
1347 
1348       do
1349       {
1350         source_p++;
1351       }
1352       while (source_p < source_end_p
1353                && lit_char_is_binary_digit (source_p[0]));
1354     }
1355 #endif /* ENABLED (JERRY_ES2015) */
1356     else
1357     {
1358       can_be_float = true;
1359       source_p++;
1360     }
1361   }
1362   else
1363   {
1364     while (source_p < source_end_p
1365            && source_p[0] >= LIT_CHAR_0
1366            && source_p[0] <= LIT_CHAR_9)
1367     {
1368       source_p++;
1369     }
1370 
1371     can_be_float = true;
1372   }
1373 
1374   if (can_be_float)
1375   {
1376     if (source_p < source_end_p
1377         && source_p[0] == LIT_CHAR_DOT)
1378     {
1379       source_p++;
1380       while (source_p < source_end_p
1381              && source_p[0] >= LIT_CHAR_0
1382              && source_p[0] <= LIT_CHAR_9)
1383       {
1384         source_p++;
1385       }
1386     }
1387 
1388     if (source_p < source_end_p
1389         && LEXER_TO_ASCII_LOWERCASE (source_p[0]) == LIT_CHAR_LOWERCASE_E)
1390     {
1391       source_p++;
1392 
1393       if (source_p < source_end_p
1394           && (source_p[0] == LIT_CHAR_PLUS || source_p[0] == LIT_CHAR_MINUS))
1395       {
1396         source_p++;
1397       }
1398 
1399       if (source_p >= source_end_p
1400           || source_p[0] < LIT_CHAR_0
1401           || source_p[0] > LIT_CHAR_9)
1402       {
1403         parser_raise_error (context_p, PARSER_ERR_MISSING_EXPONENT);
1404       }
1405 
1406       do
1407       {
1408         source_p++;
1409       }
1410       while (source_p < source_end_p
1411              && source_p[0] >= LIT_CHAR_0
1412              && source_p[0] <= LIT_CHAR_9);
1413     }
1414   }
1415 
1416   length = (size_t) (source_p - context_p->source_p);
1417   if (length > PARSER_MAXIMUM_IDENT_LENGTH)
1418   {
1419     parser_raise_error (context_p, PARSER_ERR_NUMBER_TOO_LONG);
1420   }
1421 
1422   context_p->token.lit_location.length = (prop_length_t) length;
1423   PARSER_PLUS_EQUAL_LC (context_p->column, length);
1424   context_p->source_p = source_p;
1425 
1426   if (source_p < source_end_p && lexer_parse_identifier (context_p, LEXER_PARSE_CHECK_START_AND_RETURN))
1427   {
1428     parser_raise_error (context_p, PARSER_ERR_IDENTIFIER_AFTER_NUMBER);
1429   }
1430 } /* lexer_parse_number */
1431 
1432 /**
1433  * One character long token (e.g. comma).
1434  *
1435  * @param char1 character
1436  * @param type1 type
1437  */
1438 #define LEXER_TYPE_A_TOKEN(char1, type1) \
1439   case (uint8_t) (char1): \
1440   { \
1441     context_p->token.type = (type1); \
1442     length = 1; \
1443     break; \
1444   }
1445 
1446 /**
1447  * Token pair, where the first token is prefix of the second (e.g. % and %=).
1448  *
1449  * @param char1 first character
1450  * @param type1 type of the first character
1451  * @param char2 second character
1452  * @param type2 type of the second character
1453  */
1454 #define LEXER_TYPE_B_TOKEN(char1, type1, char2, type2) \
1455   case (uint8_t) (char1): \
1456   { \
1457     if (length >= 2 && context_p->source_p[1] == (uint8_t) (char2)) \
1458     { \
1459       context_p->token.type = (type2); \
1460       length = 2; \
1461       break; \
1462     } \
1463     \
1464     context_p->token.type = (type1); \
1465     length = 1; \
1466     break; \
1467   }
1468 
1469 /**
1470  * Three tokens, where the first is the prefix of the other two (e.g. &, &&, &=).
1471  *
1472  * @param char1 first character
1473  * @param type1 type of the first character
1474  * @param char2 second character
1475  * @param type2 type of the second character
1476  * @param char3 third character
1477  * @param type3 type of the third character
1478  */
1479 #define LEXER_TYPE_C_TOKEN(char1, type1, char2, type2, char3, type3) \
1480   case (uint8_t) (char1): \
1481   { \
1482     if (length >= 2) \
1483     { \
1484       if (context_p->source_p[1] == (uint8_t) (char2)) \
1485       { \
1486         context_p->token.type = (type2); \
1487         length = 2; \
1488         break; \
1489       } \
1490       \
1491       if (context_p->source_p[1] == (uint8_t) (char3)) \
1492       { \
1493         context_p->token.type = (type3); \
1494         length = 2; \
1495         break; \
1496       } \
1497     } \
1498     \
1499     context_p->token.type = (type1); \
1500     length = 1; \
1501     break; \
1502   }
1503 
1504 /**
1505  * Get next token.
1506  */
1507 void
lexer_next_token(parser_context_t * context_p)1508 lexer_next_token (parser_context_t *context_p) /**< context */
1509 {
1510   size_t length;
1511 
1512   lexer_skip_spaces (context_p);
1513 
1514   context_p->token.line = context_p->line;
1515   context_p->token.column = context_p->column;
1516 
1517   length = (size_t) (context_p->source_end_p - context_p->source_p);
1518   if (length == 0)
1519   {
1520     context_p->token.type = LEXER_EOS;
1521     return;
1522   }
1523 
1524   if (lexer_parse_identifier (context_p, LEXER_PARSE_CHECK_KEYWORDS))
1525   {
1526     return;
1527   }
1528 
1529   if (context_p->source_p[0] >= LIT_CHAR_0 && context_p->source_p[0] <= LIT_CHAR_9)
1530   {
1531     lexer_parse_number (context_p);
1532     return;
1533   }
1534 
1535   switch (context_p->source_p[0])
1536   {
1537     LEXER_TYPE_A_TOKEN (LIT_CHAR_LEFT_BRACE, LEXER_LEFT_BRACE);
1538     LEXER_TYPE_A_TOKEN (LIT_CHAR_LEFT_PAREN, LEXER_LEFT_PAREN);
1539     LEXER_TYPE_A_TOKEN (LIT_CHAR_LEFT_SQUARE, LEXER_LEFT_SQUARE);
1540     LEXER_TYPE_A_TOKEN (LIT_CHAR_RIGHT_BRACE, LEXER_RIGHT_BRACE);
1541     LEXER_TYPE_A_TOKEN (LIT_CHAR_RIGHT_PAREN, LEXER_RIGHT_PAREN);
1542     LEXER_TYPE_A_TOKEN (LIT_CHAR_RIGHT_SQUARE, LEXER_RIGHT_SQUARE);
1543     LEXER_TYPE_A_TOKEN (LIT_CHAR_SEMICOLON, LEXER_SEMICOLON);
1544     LEXER_TYPE_A_TOKEN (LIT_CHAR_COMMA, LEXER_COMMA);
1545 
1546     case (uint8_t) LIT_CHAR_DOT:
1547     {
1548       if (length >= 2
1549           && (context_p->source_p[1] >= LIT_CHAR_0 && context_p->source_p[1] <= LIT_CHAR_9))
1550       {
1551         lexer_parse_number (context_p);
1552         return;
1553       }
1554 
1555 #if ENABLED (JERRY_ES2015)
1556       if (length >= 3
1557           && context_p->source_p[1] == LIT_CHAR_DOT
1558           && context_p->source_p[2] == LIT_CHAR_DOT)
1559       {
1560         context_p->token.type = LEXER_THREE_DOTS;
1561         length = 3;
1562         break;
1563       }
1564 #endif /* ENABLED (JERRY_ES2015) */
1565 
1566       context_p->token.type = LEXER_DOT;
1567       length = 1;
1568       break;
1569     }
1570 
1571     case (uint8_t) LIT_CHAR_LESS_THAN:
1572     {
1573       if (length >= 2)
1574       {
1575         if (context_p->source_p[1] == (uint8_t) LIT_CHAR_EQUALS)
1576         {
1577           context_p->token.type = LEXER_LESS_EQUAL;
1578           length = 2;
1579           break;
1580         }
1581 
1582         if (context_p->source_p[1] == (uint8_t) LIT_CHAR_LESS_THAN)
1583         {
1584           if (length >= 3 && context_p->source_p[2] == (uint8_t) LIT_CHAR_EQUALS)
1585           {
1586             context_p->token.type = LEXER_ASSIGN_LEFT_SHIFT;
1587             length = 3;
1588             break;
1589           }
1590 
1591           context_p->token.type = LEXER_LEFT_SHIFT;
1592           length = 2;
1593           break;
1594         }
1595       }
1596 
1597       context_p->token.type = LEXER_LESS;
1598       length = 1;
1599       break;
1600     }
1601 
1602     case (uint8_t) LIT_CHAR_GREATER_THAN:
1603     {
1604       if (length >= 2)
1605       {
1606         if (context_p->source_p[1] == (uint8_t) LIT_CHAR_EQUALS)
1607         {
1608           context_p->token.type = LEXER_GREATER_EQUAL;
1609           length = 2;
1610           break;
1611         }
1612 
1613         if (context_p->source_p[1] == (uint8_t) LIT_CHAR_GREATER_THAN)
1614         {
1615           if (length >= 3)
1616           {
1617             if (context_p->source_p[2] == (uint8_t) LIT_CHAR_EQUALS)
1618             {
1619               context_p->token.type = LEXER_ASSIGN_RIGHT_SHIFT;
1620               length = 3;
1621               break;
1622             }
1623 
1624             if (context_p->source_p[2] == (uint8_t) LIT_CHAR_GREATER_THAN)
1625             {
1626               if (length >= 4 && context_p->source_p[3] == (uint8_t) LIT_CHAR_EQUALS)
1627               {
1628                 context_p->token.type = LEXER_ASSIGN_UNS_RIGHT_SHIFT;
1629                 length = 4;
1630                 break;
1631               }
1632 
1633               context_p->token.type = LEXER_UNS_RIGHT_SHIFT;
1634               length = 3;
1635               break;
1636             }
1637           }
1638 
1639           context_p->token.type = LEXER_RIGHT_SHIFT;
1640           length = 2;
1641           break;
1642         }
1643       }
1644 
1645       context_p->token.type = LEXER_GREATER;
1646       length = 1;
1647       break;
1648     }
1649 
1650     case (uint8_t) LIT_CHAR_EQUALS:
1651     {
1652       if (length >= 2)
1653       {
1654         if (context_p->source_p[1] == (uint8_t) LIT_CHAR_EQUALS)
1655         {
1656           if (length >= 3 && context_p->source_p[2] == (uint8_t) LIT_CHAR_EQUALS)
1657           {
1658             context_p->token.type = LEXER_STRICT_EQUAL;
1659             length = 3;
1660             break;
1661           }
1662 
1663           context_p->token.type = LEXER_EQUAL;
1664           length = 2;
1665           break;
1666         }
1667 
1668 #if ENABLED (JERRY_ES2015)
1669         if (context_p->source_p[1] == (uint8_t) LIT_CHAR_GREATER_THAN)
1670         {
1671           context_p->token.type = LEXER_ARROW;
1672           length = 2;
1673           break;
1674         }
1675 #endif /* ENABLED (JERRY_ES2015) */
1676       }
1677 
1678       context_p->token.type = LEXER_ASSIGN;
1679       length = 1;
1680       break;
1681     }
1682 
1683     case (uint8_t) LIT_CHAR_EXCLAMATION:
1684     {
1685       if (length >= 2 && context_p->source_p[1] == (uint8_t) LIT_CHAR_EQUALS)
1686       {
1687         if (length >= 3 && context_p->source_p[2] == (uint8_t) LIT_CHAR_EQUALS)
1688         {
1689           context_p->token.type = LEXER_STRICT_NOT_EQUAL;
1690           length = 3;
1691           break;
1692         }
1693 
1694         context_p->token.type = LEXER_NOT_EQUAL;
1695         length = 2;
1696         break;
1697       }
1698 
1699       context_p->token.type = LEXER_LOGICAL_NOT;
1700       length = 1;
1701       break;
1702     }
1703 
1704     LEXER_TYPE_C_TOKEN (LIT_CHAR_PLUS, LEXER_ADD, LIT_CHAR_EQUALS,
1705                         LEXER_ASSIGN_ADD, LIT_CHAR_PLUS, LEXER_INCREASE)
1706     LEXER_TYPE_C_TOKEN (LIT_CHAR_MINUS, LEXER_SUBTRACT, LIT_CHAR_EQUALS,
1707                         LEXER_ASSIGN_SUBTRACT, LIT_CHAR_MINUS, LEXER_DECREASE)
1708 
1709     case (uint8_t) LIT_CHAR_ASTERISK:
1710     {
1711       if (length >= 2)
1712       {
1713         if (context_p->source_p[1] == (uint8_t) LIT_CHAR_EQUALS)
1714         {
1715           context_p->token.type = LEXER_ASSIGN_MULTIPLY;
1716           length = 2;
1717           break;
1718         }
1719 
1720 #if ENABLED (JERRY_ES2015)
1721         if (context_p->source_p[1] == (uint8_t) LIT_CHAR_ASTERISK)
1722         {
1723           if (length >= 3 && context_p->source_p[2] == (uint8_t) LIT_CHAR_EQUALS)
1724           {
1725             context_p->token.type = LEXER_ASSIGN_EXPONENTIATION;
1726             length = 3;
1727             break;
1728           }
1729 
1730           context_p->token.type = LEXER_EXPONENTIATION;
1731           length = 2;
1732           break;
1733         }
1734 #endif /* ENABLED (JERRY_ES2015) */
1735       }
1736 
1737       context_p->token.type = LEXER_MULTIPLY;
1738       length = 1;
1739       break;
1740     }
1741 
1742     LEXER_TYPE_B_TOKEN (LIT_CHAR_SLASH, LEXER_DIVIDE, LIT_CHAR_EQUALS,
1743                         LEXER_ASSIGN_DIVIDE)
1744     LEXER_TYPE_B_TOKEN (LIT_CHAR_PERCENT, LEXER_MODULO, LIT_CHAR_EQUALS,
1745                         LEXER_ASSIGN_MODULO)
1746 
1747     LEXER_TYPE_C_TOKEN (LIT_CHAR_AMPERSAND, LEXER_BIT_AND, LIT_CHAR_EQUALS,
1748                         LEXER_ASSIGN_BIT_AND, LIT_CHAR_AMPERSAND, LEXER_LOGICAL_AND)
1749     LEXER_TYPE_C_TOKEN (LIT_CHAR_VLINE, LEXER_BIT_OR, LIT_CHAR_EQUALS,
1750                         LEXER_ASSIGN_BIT_OR, LIT_CHAR_VLINE, LEXER_LOGICAL_OR)
1751 
1752     LEXER_TYPE_B_TOKEN (LIT_CHAR_CIRCUMFLEX, LEXER_BIT_XOR, LIT_CHAR_EQUALS,
1753                         LEXER_ASSIGN_BIT_XOR)
1754 
1755     LEXER_TYPE_A_TOKEN (LIT_CHAR_TILDE, LEXER_BIT_NOT);
1756     LEXER_TYPE_A_TOKEN (LIT_CHAR_QUESTION, LEXER_QUESTION_MARK);
1757     LEXER_TYPE_A_TOKEN (LIT_CHAR_COLON, LEXER_COLON);
1758 
1759     case LIT_CHAR_SINGLE_QUOTE:
1760     case LIT_CHAR_DOUBLE_QUOTE:
1761 #if ENABLED (JERRY_ES2015)
1762     case LIT_CHAR_GRAVE_ACCENT:
1763 #endif /* ENABLED (JERRY_ES2015) */
1764     {
1765       lexer_parse_string (context_p, LEXER_STRING_NO_OPTS);
1766       return;
1767     }
1768 
1769     default:
1770     {
1771       parser_raise_error (context_p, PARSER_ERR_INVALID_CHARACTER);
1772     }
1773   }
1774 
1775   context_p->source_p += length;
1776   PARSER_PLUS_EQUAL_LC (context_p->column, length);
1777 } /* lexer_next_token */
1778 
1779 #undef LEXER_TYPE_A_TOKEN
1780 #undef LEXER_TYPE_B_TOKEN
1781 #undef LEXER_TYPE_C_TOKEN
1782 #undef LEXER_TYPE_D_TOKEN
1783 
1784 /**
1785  * Checks whether the next token starts with the specified character.
1786  *
1787  * @return true - if the next is the specified character
1788  *         false - otherwise
1789  */
1790 bool
lexer_check_next_character(parser_context_t * context_p,lit_utf8_byte_t character)1791 lexer_check_next_character (parser_context_t *context_p, /**< context */
1792                             lit_utf8_byte_t character) /**< specified character */
1793 {
1794   if (!(context_p->token.flags & LEXER_NO_SKIP_SPACES))
1795   {
1796     lexer_skip_spaces (context_p);
1797     context_p->token.flags = (uint8_t) (context_p->token.flags | LEXER_NO_SKIP_SPACES);
1798   }
1799 
1800   return (context_p->source_p < context_p->source_end_p
1801           && context_p->source_p[0] == (uint8_t) character);
1802 } /* lexer_check_next_character */
1803 
1804 /**
1805  * Checks whether the next token starts with either specified characters.
1806  *
1807  * @return true - if the next is the specified character
1808  *         false - otherwise
1809  */
1810 bool
lexer_check_next_characters(parser_context_t * context_p,lit_utf8_byte_t character1,lit_utf8_byte_t character2)1811 lexer_check_next_characters (parser_context_t *context_p, /**< context */
1812                              lit_utf8_byte_t character1, /**< first alternative character */
1813                              lit_utf8_byte_t character2) /**< second alternative character */
1814 {
1815   if (!(context_p->token.flags & LEXER_NO_SKIP_SPACES))
1816   {
1817     lexer_skip_spaces (context_p);
1818     context_p->token.flags = (uint8_t) (context_p->token.flags | LEXER_NO_SKIP_SPACES);
1819   }
1820 
1821   return (context_p->source_p < context_p->source_end_p
1822           && (context_p->source_p[0] == (uint8_t) character1
1823               || context_p->source_p[0] == (uint8_t) character2));
1824 } /* lexer_check_next_characters */
1825 
1826 /**
1827  * Consumes the next character. The character cannot be a white space.
1828  *
1829  * @return consumed character
1830  */
1831 inline uint8_t JERRY_ATTR_ALWAYS_INLINE
lexer_consume_next_character(parser_context_t * context_p)1832 lexer_consume_next_character (parser_context_t *context_p) /**< context */
1833 {
1834   JERRY_ASSERT (context_p->source_p < context_p->source_end_p);
1835 
1836   context_p->token.flags &= (uint8_t) ~LEXER_NO_SKIP_SPACES;
1837 
1838   PARSER_PLUS_EQUAL_LC (context_p->column, 1);
1839   return *context_p->source_p++;
1840 } /* lexer_consume_next_character */
1841 
1842 /**
1843  * Checks whether the next character can be the start of a post primary expression
1844  *
1845  * Note:
1846  *     the result is not precise, but this inprecise result
1847  *     has no side effects for negating number literals
1848  *
1849  * @return true if the next character can be the start of a post primary expression
1850  */
1851 bool
lexer_check_post_primary_exp(parser_context_t * context_p)1852 lexer_check_post_primary_exp (parser_context_t *context_p) /**< context */
1853 {
1854   if (!(context_p->token.flags & LEXER_NO_SKIP_SPACES))
1855   {
1856     lexer_skip_spaces (context_p);
1857     context_p->token.flags = (uint8_t) (context_p->token.flags | LEXER_NO_SKIP_SPACES);
1858   }
1859 
1860   if (context_p->source_p >= context_p->source_end_p)
1861   {
1862     return false;
1863   }
1864 
1865   switch (context_p->source_p[0])
1866   {
1867     case LIT_CHAR_DOT:
1868     case LIT_CHAR_LEFT_PAREN:
1869     case LIT_CHAR_LEFT_SQUARE:
1870 #if ENABLED (JERRY_ES2015)
1871     case LIT_CHAR_GRAVE_ACCENT:
1872 #endif /* ENABLED (JERRY_ES2015) */
1873     {
1874       return true;
1875     }
1876     case LIT_CHAR_PLUS:
1877     case LIT_CHAR_MINUS:
1878     {
1879       return (!(context_p->token.flags & LEXER_WAS_NEWLINE)
1880               && context_p->source_p + 1 < context_p->source_end_p
1881               && context_p->source_p[1] == context_p->source_p[0]);
1882     }
1883 #if ENABLED (JERRY_ES2015)
1884     case LIT_CHAR_ASTERISK:
1885     {
1886       return (context_p->source_p + 1 < context_p->source_end_p
1887               && context_p->source_p[1] == (uint8_t) LIT_CHAR_ASTERISK);
1888     }
1889 #endif /* ENABLED (JERRY_ES2015) */
1890   }
1891 
1892   return false;
1893 } /* lexer_check_post_primary_exp */
1894 
1895 #if ENABLED (JERRY_ES2015)
1896 
1897 /**
1898  * Checks whether the next token is a type used for detecting arrow functions.
1899  *
1900  * @return true if the next token is an arrow token
1901  */
1902 bool
lexer_check_arrow(parser_context_t * context_p)1903 lexer_check_arrow (parser_context_t *context_p) /**< context */
1904 {
1905   if (!(context_p->token.flags & LEXER_NO_SKIP_SPACES))
1906   {
1907     lexer_skip_spaces (context_p);
1908     context_p->token.flags = (uint8_t) (context_p->token.flags | LEXER_NO_SKIP_SPACES);
1909   }
1910 
1911   return (!(context_p->token.flags & LEXER_WAS_NEWLINE)
1912           && context_p->source_p + 2 <= context_p->source_end_p
1913           && context_p->source_p[0] == (uint8_t) LIT_CHAR_EQUALS
1914           && context_p->source_p[1] == (uint8_t) LIT_CHAR_GREATER_THAN);
1915 } /* lexer_check_arrow */
1916 
1917 /**
1918  * Checks whether the next token is a comma or equal sign.
1919  *
1920  * @return true if the next token is a comma or equal sign
1921  */
1922 bool
lexer_check_arrow_param(parser_context_t * context_p)1923 lexer_check_arrow_param (parser_context_t *context_p) /**< context */
1924 {
1925   JERRY_ASSERT (context_p->token.flags & LEXER_NO_SKIP_SPACES);
1926 
1927   if (context_p->source_p >= context_p->source_end_p)
1928   {
1929     return false;
1930   }
1931 
1932   if (context_p->source_p[0] == LIT_CHAR_COMMA)
1933   {
1934     return true;
1935   }
1936 
1937   if (context_p->source_p[0] != LIT_CHAR_EQUALS)
1938   {
1939     return false;
1940   }
1941 
1942   return (context_p->source_p + 1 >= context_p->source_end_p
1943           || context_p->source_p[1] != LIT_CHAR_EQUALS);
1944 } /* lexer_check_arrow_param */
1945 
1946 /**
1947  * Checks whether the yield expression has no argument.
1948  *
1949  * @return true if it has no argument
1950  */
1951 bool
lexer_check_yield_no_arg(parser_context_t * context_p)1952 lexer_check_yield_no_arg (parser_context_t *context_p) /**< context */
1953 {
1954   if (context_p->token.flags & LEXER_WAS_NEWLINE)
1955   {
1956     return true;
1957   }
1958 
1959   switch (context_p->token.type)
1960   {
1961     case LEXER_RIGHT_BRACE:
1962     case LEXER_RIGHT_PAREN:
1963     case LEXER_RIGHT_SQUARE:
1964     case LEXER_COMMA:
1965     case LEXER_COLON:
1966     case LEXER_SEMICOLON:
1967     case LEXER_EOS:
1968     {
1969       return true;
1970     }
1971     default:
1972     {
1973       return false;
1974     }
1975   }
1976 } /* lexer_check_yield_no_arg */
1977 
1978 /**
1979  * Checks whether the next token is a multiply and consumes it.
1980  *
1981  * @return true if the next token is a multiply
1982  */
1983 bool
lexer_consume_generator(parser_context_t * context_p)1984 lexer_consume_generator (parser_context_t *context_p) /**< context */
1985 {
1986   if (!(context_p->token.flags & LEXER_NO_SKIP_SPACES))
1987   {
1988     lexer_skip_spaces (context_p);
1989     context_p->token.flags = (uint8_t) (context_p->token.flags | LEXER_NO_SKIP_SPACES);
1990   }
1991 
1992   if (context_p->source_p >= context_p->source_end_p
1993       || context_p->source_p[0] != LIT_CHAR_ASTERISK
1994       || (context_p->source_p + 1 < context_p->source_end_p
1995           && (context_p->source_p[1] == LIT_CHAR_EQUALS || context_p->source_p[1] == LIT_CHAR_ASTERISK)))
1996   {
1997     return false;
1998   }
1999 
2000   lexer_consume_next_character (context_p);
2001   context_p->token.type = LEXER_MULTIPLY;
2002   return true;
2003 } /* lexer_consume_generator */
2004 
2005 /**
2006  * Update await / yield keywords after an arrow function with expression.
2007  */
2008 void
lexer_update_await_yield(parser_context_t * context_p,uint32_t status_flags)2009 lexer_update_await_yield (parser_context_t *context_p, /**< context */
2010                           uint32_t status_flags) /**< parser status flags after restore */
2011 {
2012   if (!(status_flags & PARSER_IS_STRICT))
2013   {
2014     if (status_flags & PARSER_IS_GENERATOR_FUNCTION)
2015     {
2016       if (context_p->token.type == LEXER_LITERAL
2017           && context_p->token.keyword_type == LEXER_KEYW_YIELD)
2018       {
2019         context_p->token.type = LEXER_KEYW_YIELD;
2020       }
2021     }
2022     else
2023     {
2024       if (context_p->token.type == LEXER_KEYW_YIELD)
2025       {
2026         JERRY_ASSERT (context_p->token.keyword_type == LEXER_KEYW_YIELD);
2027         context_p->token.type = LEXER_LITERAL;
2028       }
2029     }
2030   }
2031 
2032   if (!(context_p->global_status_flags & ECMA_PARSE_MODULE))
2033   {
2034     if (status_flags & PARSER_IS_ASYNC_FUNCTION)
2035     {
2036       if (context_p->token.type == LEXER_LITERAL
2037           && context_p->token.keyword_type == LEXER_KEYW_AWAIT)
2038       {
2039         context_p->token.type = LEXER_KEYW_AWAIT;
2040       }
2041     }
2042     else
2043     {
2044       if (context_p->token.type == LEXER_KEYW_AWAIT)
2045       {
2046         JERRY_ASSERT (context_p->token.keyword_type == LEXER_KEYW_AWAIT);
2047         context_p->token.type = LEXER_LITERAL;
2048       }
2049     }
2050   }
2051 } /* lexer_update_await_yield */
2052 
2053 #endif /* ENABLED (JERRY_ES2015) */
2054 
2055 /**
2056  * Convert an ident with escapes to a utf8 string.
2057  */
2058 void
lexer_convert_ident_to_cesu8(uint8_t * destination_p,const uint8_t * source_p,prop_length_t length)2059 lexer_convert_ident_to_cesu8 (uint8_t *destination_p, /**< destination string */
2060                               const uint8_t *source_p, /**< source string */
2061                               prop_length_t length) /**< length of destination string */
2062 {
2063   const uint8_t *destination_end_p = destination_p + length;
2064 
2065   JERRY_ASSERT (length <= PARSER_MAXIMUM_IDENT_LENGTH);
2066 
2067   do
2068   {
2069     if (*source_p == LIT_CHAR_BACKSLASH)
2070     {
2071       source_p += 2;
2072       destination_p += lit_code_point_to_cesu8_bytes (destination_p,
2073                                                       lexer_unchecked_hex_to_character (&source_p));
2074       continue;
2075     }
2076 
2077 #if ENABLED (JERRY_ES2015)
2078     if (*source_p >= LIT_UTF8_4_BYTE_MARKER)
2079     {
2080       lit_four_byte_utf8_char_to_cesu8 (destination_p, source_p);
2081 
2082       destination_p += 6;
2083       source_p += 4;
2084       continue;
2085     }
2086 #endif /* ENABLED (JERRY_ES2015) */
2087 
2088     *destination_p++ = *source_p++;
2089   }
2090   while (destination_p < destination_end_p);
2091 } /* lexer_convert_ident_to_cesu8 */
2092 
2093 /**
2094  * Convert literal to character sequence
2095  */
2096 const uint8_t *
lexer_convert_literal_to_chars(parser_context_t * context_p,const lexer_lit_location_t * literal_p,uint8_t * local_byte_array_p,lexer_string_options_t opts)2097 lexer_convert_literal_to_chars (parser_context_t *context_p, /**< context */
2098                                 const lexer_lit_location_t *literal_p, /**< literal location */
2099                                 uint8_t *local_byte_array_p, /**< local byte array to store chars */
2100                                 lexer_string_options_t opts) /**< options */
2101 {
2102   JERRY_ASSERT (context_p->u.allocated_buffer_p == NULL);
2103 
2104   if (!literal_p->has_escape)
2105   {
2106     return literal_p->char_p;
2107   }
2108 
2109   uint8_t *destination_start_p;
2110   if (literal_p->length > LEXER_MAX_LITERAL_LOCAL_BUFFER_SIZE)
2111   {
2112     context_p->u.allocated_buffer_p = (uint8_t *) parser_malloc_local (context_p, literal_p->length);
2113     context_p->allocated_buffer_size = literal_p->length;
2114     destination_start_p = context_p->u.allocated_buffer_p;
2115   }
2116   else
2117   {
2118     destination_start_p = local_byte_array_p;
2119   }
2120 
2121   if (literal_p->type == LEXER_IDENT_LITERAL)
2122   {
2123     lexer_convert_ident_to_cesu8 (destination_start_p, literal_p->char_p, literal_p->length);
2124     return destination_start_p;
2125   }
2126 
2127   const uint8_t *source_p = literal_p->char_p;
2128   uint8_t *destination_p = destination_start_p;
2129 
2130   uint8_t str_end_character = source_p[-1];
2131 
2132 #if ENABLED (JERRY_ES2015)
2133   if (str_end_character == LIT_CHAR_RIGHT_BRACE)
2134   {
2135     str_end_character = LIT_CHAR_GRAVE_ACCENT;
2136   }
2137 
2138   bool is_raw = (opts & LEXER_STRING_RAW) != 0;
2139 #else /* !ENABLED (JERRY_ES2015) */
2140   JERRY_UNUSED (opts);
2141   bool is_raw = false;
2142 #endif /* ENABLED (JERRY_ES2015) */
2143 
2144   while (true)
2145   {
2146     if (*source_p == str_end_character)
2147     {
2148       break;
2149     }
2150 
2151     if (*source_p == LIT_CHAR_BACKSLASH && !is_raw)
2152     {
2153       uint8_t conv_character;
2154 
2155       source_p++;
2156       JERRY_ASSERT (source_p < context_p->source_end_p);
2157 
2158       /* Newline is ignored. */
2159       if (*source_p == LIT_CHAR_CR)
2160       {
2161         source_p++;
2162         JERRY_ASSERT (source_p < context_p->source_end_p);
2163 
2164         if (*source_p == LIT_CHAR_LF)
2165         {
2166           source_p++;
2167         }
2168         continue;
2169       }
2170       else if (*source_p == LIT_CHAR_LF)
2171       {
2172         source_p++;
2173         continue;
2174       }
2175       else if (*source_p == LEXER_NEWLINE_LS_PS_BYTE_1 && LEXER_NEWLINE_LS_PS_BYTE_23 (source_p))
2176       {
2177         source_p += 3;
2178         continue;
2179       }
2180 
2181       if (*source_p >= LIT_CHAR_0 && *source_p <= LIT_CHAR_3)
2182       {
2183         lit_code_point_t octal_number = (uint32_t) (*source_p - LIT_CHAR_0);
2184 
2185         source_p++;
2186         JERRY_ASSERT (source_p < context_p->source_end_p);
2187 
2188         if (*source_p >= LIT_CHAR_0 && *source_p <= LIT_CHAR_7)
2189         {
2190           octal_number = octal_number * 8 + (uint32_t) (*source_p - LIT_CHAR_0);
2191           source_p++;
2192           JERRY_ASSERT (source_p < context_p->source_end_p);
2193 
2194           if (*source_p >= LIT_CHAR_0 && *source_p <= LIT_CHAR_7)
2195           {
2196             octal_number = octal_number * 8 + (uint32_t) (*source_p - LIT_CHAR_0);
2197             source_p++;
2198             JERRY_ASSERT (source_p < context_p->source_end_p);
2199           }
2200         }
2201 
2202         destination_p += lit_code_point_to_cesu8_bytes (destination_p, octal_number);
2203         continue;
2204       }
2205 
2206       if (*source_p >= LIT_CHAR_4 && *source_p <= LIT_CHAR_7)
2207       {
2208         uint32_t octal_number = (uint32_t) (*source_p - LIT_CHAR_0);
2209 
2210         source_p++;
2211         JERRY_ASSERT (source_p < context_p->source_end_p);
2212 
2213         if (*source_p >= LIT_CHAR_0 && *source_p <= LIT_CHAR_7)
2214         {
2215           octal_number = octal_number * 8 + (uint32_t) (*source_p - LIT_CHAR_0);
2216           source_p++;
2217           JERRY_ASSERT (source_p < context_p->source_end_p);
2218         }
2219 
2220         *destination_p++ = (uint8_t) octal_number;
2221         continue;
2222       }
2223 
2224       if (*source_p == LIT_CHAR_LOWERCASE_X || *source_p == LIT_CHAR_LOWERCASE_U)
2225       {
2226         source_p++;
2227         destination_p += lit_code_point_to_cesu8_bytes (destination_p,
2228                                                         lexer_unchecked_hex_to_character (&source_p));
2229         continue;
2230       }
2231 
2232       conv_character = *source_p;
2233       switch (*source_p)
2234       {
2235         case LIT_CHAR_LOWERCASE_B:
2236         {
2237           conv_character = 0x08;
2238           break;
2239         }
2240         case LIT_CHAR_LOWERCASE_T:
2241         {
2242           conv_character = 0x09;
2243           break;
2244         }
2245         case LIT_CHAR_LOWERCASE_N:
2246         {
2247           conv_character = 0x0a;
2248           break;
2249         }
2250         case LIT_CHAR_LOWERCASE_V:
2251         {
2252           conv_character = 0x0b;
2253           break;
2254         }
2255         case LIT_CHAR_LOWERCASE_F:
2256         {
2257           conv_character = 0x0c;
2258           break;
2259         }
2260         case LIT_CHAR_LOWERCASE_R:
2261         {
2262           conv_character = 0x0d;
2263           break;
2264         }
2265       }
2266 
2267       if (conv_character != *source_p)
2268       {
2269         *destination_p++ = conv_character;
2270         source_p++;
2271         continue;
2272       }
2273     }
2274 #if ENABLED (JERRY_ES2015)
2275     else if (str_end_character == LIT_CHAR_GRAVE_ACCENT)
2276     {
2277       if (source_p[0] == LIT_CHAR_DOLLAR_SIGN
2278           && source_p[1] == LIT_CHAR_LEFT_BRACE)
2279       {
2280         source_p++;
2281         JERRY_ASSERT (source_p < context_p->source_end_p);
2282         break;
2283       }
2284       if (*source_p == LIT_CHAR_CR)
2285       {
2286         *destination_p++ = LIT_CHAR_LF;
2287         source_p++;
2288         if (*source_p != str_end_character
2289             && *source_p == LIT_CHAR_LF)
2290         {
2291           source_p++;
2292         }
2293         continue;
2294       }
2295       if ((*source_p == LIT_CHAR_BACKSLASH) && is_raw)
2296       {
2297         JERRY_ASSERT (source_p + 1 < context_p->source_end_p);
2298         if ((*(source_p + 1) == LIT_CHAR_GRAVE_ACCENT) || (*(source_p + 1) == LIT_CHAR_BACKSLASH))
2299         {
2300           *destination_p++ = *source_p++;
2301           *destination_p++ = *source_p++;
2302           continue;
2303         }
2304       }
2305     }
2306 #endif /* ENABLED (JERRY_ES2015) */
2307 
2308     if (*source_p >= LIT_UTF8_4_BYTE_MARKER)
2309     {
2310       /* Processing 4 byte unicode sequence (even if it is
2311         * after a backslash). Always converted to two 3 byte
2312         * long sequence. */
2313       lit_four_byte_utf8_char_to_cesu8 (destination_p, source_p);
2314 
2315       destination_p += 6;
2316       source_p += 4;
2317       continue;
2318     }
2319 
2320     *destination_p++ = *source_p++;
2321 
2322     /* There is no need to check the source_end_p
2323       * since the string is terminated by a quotation mark. */
2324     while (IS_UTF8_INTERMEDIATE_OCTET (*source_p))
2325     {
2326       *destination_p++ = *source_p++;
2327     }
2328   }
2329 
2330   JERRY_ASSERT (destination_p == destination_start_p + literal_p->length);
2331 
2332   return destination_start_p;
2333 } /* lexer_convert_literal_to_chars */
2334 
2335 /**
2336  * Construct a literal object from an identifier.
2337  */
2338 void
lexer_construct_literal_object(parser_context_t * context_p,const lexer_lit_location_t * lit_location_p,uint8_t literal_type)2339 lexer_construct_literal_object (parser_context_t *context_p, /**< context */
2340                                 const lexer_lit_location_t *lit_location_p, /**< literal location */
2341                                 uint8_t literal_type) /**< final literal type */
2342 {
2343   uint8_t local_byte_array[LEXER_MAX_LITERAL_LOCAL_BUFFER_SIZE];
2344 
2345   const uint8_t *char_p = lexer_convert_literal_to_chars (context_p,
2346                                                           lit_location_p,
2347                                                           local_byte_array,
2348                                                           LEXER_STRING_NO_OPTS);
2349 
2350   size_t length = lit_location_p->length;
2351   parser_list_iterator_t literal_iterator;
2352   lexer_literal_t *literal_p;
2353   uint32_t literal_index = 0;
2354   bool search_scope_stack = (literal_type == LEXER_IDENT_LITERAL);
2355 
2356   if (JERRY_UNLIKELY (literal_type == LEXER_NEW_IDENT_LITERAL))
2357   {
2358     literal_type = LEXER_IDENT_LITERAL;
2359   }
2360 
2361   JERRY_ASSERT (literal_type == LEXER_IDENT_LITERAL
2362                 || literal_type == LEXER_STRING_LITERAL);
2363 
2364   JERRY_ASSERT (literal_type != LEXER_IDENT_LITERAL || length <= PARSER_MAXIMUM_IDENT_LENGTH);
2365   JERRY_ASSERT (literal_type != LEXER_STRING_LITERAL || length <= PARSER_MAXIMUM_STRING_LENGTH);
2366 
2367   parser_list_iterator_init (&context_p->literal_pool, &literal_iterator);
2368 
2369   while ((literal_p = (lexer_literal_t *) parser_list_iterator_next (&literal_iterator)) != NULL)
2370   {
2371     if (literal_p->type == literal_type
2372         && literal_p->prop.length == length
2373         && memcmp (literal_p->u.char_p, char_p, length) == 0)
2374     {
2375       context_p->lit_object.literal_p = literal_p;
2376       context_p->lit_object.index = (uint16_t) literal_index;
2377 
2378       parser_free_allocated_buffer (context_p);
2379 
2380       if (search_scope_stack)
2381       {
2382         parser_scope_stack_t *scope_stack_start_p = context_p->scope_stack_p;
2383         parser_scope_stack_t *scope_stack_p = scope_stack_start_p + context_p->scope_stack_top;
2384 
2385         while (scope_stack_p > scope_stack_start_p)
2386         {
2387           scope_stack_p--;
2388 
2389           if (scope_stack_p->map_from == literal_index)
2390           {
2391             JERRY_ASSERT (scanner_decode_map_to (scope_stack_p) >= PARSER_REGISTER_START
2392                           || (literal_p->status_flags & LEXER_FLAG_USED));
2393             context_p->lit_object.index = scanner_decode_map_to (scope_stack_p);
2394             return;
2395           }
2396         }
2397 
2398         literal_p->status_flags |= LEXER_FLAG_USED;
2399       }
2400       return;
2401     }
2402 
2403     literal_index++;
2404   }
2405 
2406   JERRY_ASSERT (literal_index == context_p->literal_count);
2407 
2408   if (literal_index >= PARSER_MAXIMUM_NUMBER_OF_LITERALS)
2409   {
2410     parser_raise_error (context_p, PARSER_ERR_LITERAL_LIMIT_REACHED);
2411   }
2412 
2413   literal_p = (lexer_literal_t *) parser_list_append (context_p, &context_p->literal_pool);
2414   literal_p->prop.length = (prop_length_t) length;
2415   literal_p->type = literal_type;
2416 
2417   uint8_t status_flags = LEXER_FLAG_SOURCE_PTR;
2418 
2419   if (length > 0 && char_p == local_byte_array)
2420   {
2421     literal_p->u.char_p = (uint8_t *) jmem_heap_alloc_block (length);
2422     memcpy ((uint8_t *) literal_p->u.char_p, char_p, length);
2423     status_flags = 0;
2424   }
2425   else
2426   {
2427     literal_p->u.char_p = char_p;
2428 
2429     /* Buffer is taken over when a new literal is constructed. */
2430     if (context_p->u.allocated_buffer_p != NULL)
2431     {
2432       JERRY_ASSERT (char_p == context_p->u.allocated_buffer_p);
2433 
2434       context_p->u.allocated_buffer_p = NULL;
2435       status_flags = 0;
2436     }
2437   }
2438 
2439   if (search_scope_stack)
2440   {
2441     status_flags |= LEXER_FLAG_USED;
2442   }
2443 
2444   literal_p->status_flags = status_flags;
2445 
2446   context_p->lit_object.literal_p = literal_p;
2447   context_p->lit_object.index = (uint16_t) literal_index;
2448   context_p->literal_count++;
2449 
2450   JERRY_ASSERT (context_p->u.allocated_buffer_p == NULL);
2451 } /* lexer_construct_literal_object */
2452 
2453 /**
2454  * Construct a number object.
2455  *
2456  * @return true if number is small number
2457  */
2458 bool
lexer_construct_number_object(parser_context_t * context_p,bool is_expr,bool is_negative_number)2459 lexer_construct_number_object (parser_context_t *context_p, /**< context */
2460                                bool is_expr, /**< expression is parsed */
2461                                bool is_negative_number) /**< sign is negative */
2462 {
2463   parser_list_iterator_t literal_iterator;
2464   lexer_literal_t *literal_p;
2465   ecma_number_t num;
2466   uint32_t literal_index = 0;
2467   prop_length_t length = context_p->token.lit_location.length;
2468 
2469   if (context_p->token.extra_value < LEXER_NUMBER_OCTAL)
2470   {
2471     num = ecma_utf8_string_to_number (context_p->token.lit_location.char_p,
2472                                       length);
2473   }
2474   else
2475   {
2476     const uint8_t *src_p = context_p->token.lit_location.char_p;
2477     const uint8_t *src_end_p = src_p + length - 1;
2478     ecma_number_t multiplier = 8.0;
2479 
2480 #if ENABLED (JERRY_ES2015)
2481     if (context_p->token.extra_value == LEXER_NUMBER_BINARY)
2482     {
2483       multiplier = 2.0;
2484     }
2485 #endif /* ENABLED (JERRY_ES2015) */
2486 
2487     num = 0;
2488     do
2489     {
2490       src_p++;
2491       num = num * multiplier + (ecma_number_t) (*src_p - LIT_CHAR_0);
2492     }
2493     while (src_p < src_end_p);
2494   }
2495 
2496   if (is_expr)
2497   {
2498     int32_t int_num = (int32_t) num;
2499 
2500     if (int_num == num
2501         && int_num <= CBC_PUSH_NUMBER_BYTE_RANGE_END
2502         && (int_num != 0 || !is_negative_number))
2503     {
2504       context_p->lit_object.index = (uint16_t) int_num;
2505       return true;
2506     }
2507   }
2508 
2509   if (is_negative_number)
2510   {
2511     num = -num;
2512   }
2513 
2514   ecma_value_t lit_value = ecma_find_or_create_literal_number (num);
2515   parser_list_iterator_init (&context_p->literal_pool, &literal_iterator);
2516 
2517   while ((literal_p = (lexer_literal_t *) parser_list_iterator_next (&literal_iterator)) != NULL)
2518   {
2519     if (literal_p->type == LEXER_NUMBER_LITERAL
2520         && literal_p->u.value == lit_value)
2521     {
2522       context_p->lit_object.literal_p = literal_p;
2523       context_p->lit_object.index = (uint16_t) literal_index;
2524       return false;
2525     }
2526 
2527     literal_index++;
2528   }
2529 
2530   JERRY_ASSERT (literal_index == context_p->literal_count);
2531 
2532   if (literal_index >= PARSER_MAXIMUM_NUMBER_OF_LITERALS)
2533   {
2534     parser_raise_error (context_p, PARSER_ERR_LITERAL_LIMIT_REACHED);
2535   }
2536 
2537   literal_p = (lexer_literal_t *) parser_list_append (context_p, &context_p->literal_pool);
2538   literal_p->u.value = lit_value;
2539   literal_p->prop.length = 0; /* Unused. */
2540   literal_p->type = LEXER_NUMBER_LITERAL;
2541   literal_p->status_flags = 0;
2542 
2543   context_p->lit_object.literal_p = literal_p;
2544   context_p->lit_object.index = (uint16_t) literal_index;
2545 
2546   context_p->literal_count++;
2547   return false;
2548 } /* lexer_construct_number_object */
2549 
2550 /**
2551  * Convert a push number opcode to push literal opcode
2552  */
2553 void
lexer_convert_push_number_to_push_literal(parser_context_t * context_p)2554 lexer_convert_push_number_to_push_literal (parser_context_t *context_p) /**< context */
2555 {
2556   ecma_integer_value_t value;
2557   bool two_literals = !PARSER_IS_BASIC_OPCODE (context_p->last_cbc_opcode);
2558 
2559   if (context_p->last_cbc_opcode == CBC_PUSH_NUMBER_0
2560       || context_p->last_cbc_opcode == PARSER_TO_EXT_OPCODE (CBC_EXT_PUSH_LITERAL_PUSH_NUMBER_0))
2561   {
2562     value = 0;
2563   }
2564   else if (context_p->last_cbc_opcode == CBC_PUSH_NUMBER_POS_BYTE
2565            || context_p->last_cbc_opcode == PARSER_TO_EXT_OPCODE (CBC_EXT_PUSH_LITERAL_PUSH_NUMBER_POS_BYTE))
2566   {
2567     value = ((ecma_integer_value_t) context_p->last_cbc.value) + 1;
2568   }
2569   else
2570   {
2571     JERRY_ASSERT (context_p->last_cbc_opcode == CBC_PUSH_NUMBER_NEG_BYTE
2572                   || context_p->last_cbc_opcode == PARSER_TO_EXT_OPCODE (CBC_EXT_PUSH_LITERAL_PUSH_NUMBER_NEG_BYTE));
2573     value = -((ecma_integer_value_t) context_p->last_cbc.value) - 1;
2574   }
2575 
2576   ecma_value_t lit_value = ecma_make_integer_value (value);
2577 
2578   parser_list_iterator_t literal_iterator;
2579   parser_list_iterator_init (&context_p->literal_pool, &literal_iterator);
2580 
2581   context_p->last_cbc_opcode = two_literals ? CBC_PUSH_TWO_LITERALS : CBC_PUSH_LITERAL;
2582 
2583   uint32_t literal_index = 0;
2584   lexer_literal_t *literal_p;
2585 
2586   while ((literal_p = (lexer_literal_t *) parser_list_iterator_next (&literal_iterator)) != NULL)
2587   {
2588     if (literal_p->type == LEXER_NUMBER_LITERAL
2589         && literal_p->u.value == lit_value)
2590     {
2591       if (two_literals)
2592       {
2593         context_p->last_cbc.value = (uint16_t) literal_index;
2594       }
2595       else
2596       {
2597         context_p->last_cbc.literal_index = (uint16_t) literal_index;
2598       }
2599       return;
2600     }
2601 
2602     literal_index++;
2603   }
2604 
2605   JERRY_ASSERT (literal_index == context_p->literal_count);
2606 
2607   if (literal_index >= PARSER_MAXIMUM_NUMBER_OF_LITERALS)
2608   {
2609     parser_raise_error (context_p, PARSER_ERR_LITERAL_LIMIT_REACHED);
2610   }
2611 
2612   literal_p = (lexer_literal_t *) parser_list_append (context_p, &context_p->literal_pool);
2613   literal_p->u.value = lit_value;
2614   literal_p->prop.length = 0; /* Unused. */
2615   literal_p->type = LEXER_NUMBER_LITERAL;
2616   literal_p->status_flags = 0;
2617 
2618   context_p->literal_count++;
2619 
2620   if (two_literals)
2621   {
2622     context_p->last_cbc.value = (uint16_t) literal_index;
2623   }
2624   else
2625   {
2626     context_p->last_cbc.literal_index = (uint16_t) literal_index;
2627   }
2628 } /* lexer_convert_push_number_to_push_literal */
2629 
2630 /**
2631  * Construct a function literal object.
2632  *
2633  * @return function object literal index
2634  */
2635 uint16_t
lexer_construct_function_object(parser_context_t * context_p,uint32_t extra_status_flags)2636 lexer_construct_function_object (parser_context_t *context_p, /**< context */
2637                                  uint32_t extra_status_flags) /**< extra status flags */
2638 {
2639   ecma_compiled_code_t *compiled_code_p;
2640   lexer_literal_t *literal_p;
2641   uint16_t result_index;
2642 
2643   if (context_p->literal_count >= PARSER_MAXIMUM_NUMBER_OF_LITERALS)
2644   {
2645     parser_raise_error (context_p, PARSER_ERR_LITERAL_LIMIT_REACHED);
2646   }
2647 
2648   parser_flush_cbc (context_p);
2649 
2650   if (context_p->status_flags & PARSER_INSIDE_WITH)
2651   {
2652     extra_status_flags |= PARSER_INSIDE_WITH;
2653   }
2654 
2655   literal_p = (lexer_literal_t *) parser_list_append (context_p, &context_p->literal_pool);
2656   literal_p->type = LEXER_UNUSED_LITERAL;
2657   literal_p->status_flags = 0;
2658 
2659   result_index = context_p->literal_count;
2660   context_p->literal_count++;
2661 
2662 #if ENABLED (JERRY_ES2015)
2663   if (!(extra_status_flags & PARSER_IS_ARROW_FUNCTION))
2664   {
2665     compiled_code_p = parser_parse_function (context_p, extra_status_flags);
2666   }
2667   else
2668   {
2669     compiled_code_p = parser_parse_arrow_function (context_p, extra_status_flags);
2670   }
2671 #else /* !ENABLED (JERRY_ES2015) */
2672   compiled_code_p = parser_parse_function (context_p, extra_status_flags);
2673 #endif /* ENABLED (JERRY_ES2015) */
2674 
2675   literal_p->u.bytecode_p = compiled_code_p;
2676   literal_p->type = LEXER_FUNCTION_LITERAL;
2677 
2678   return result_index;
2679 } /* lexer_construct_function_object */
2680 
2681 /**
2682  * Construct a regular expression object.
2683  */
2684 void
lexer_construct_regexp_object(parser_context_t * context_p,bool parse_only)2685 lexer_construct_regexp_object (parser_context_t *context_p, /**< context */
2686                                bool parse_only) /**< parse only */
2687 {
2688 #if ENABLED (JERRY_BUILTIN_REGEXP)
2689   const uint8_t *source_p = context_p->source_p;
2690   const uint8_t *regex_start_p = context_p->source_p;
2691   const uint8_t *regex_end_p = regex_start_p;
2692   const uint8_t *source_end_p = context_p->source_end_p;
2693   parser_line_counter_t column = context_p->column;
2694   lexer_literal_t *literal_p;
2695   bool in_class = false;
2696   uint16_t current_flags;
2697   lit_utf8_size_t length;
2698 
2699   JERRY_ASSERT (context_p->token.type == LEXER_DIVIDE
2700                 || context_p->token.type == LEXER_ASSIGN_DIVIDE);
2701 
2702   if (context_p->token.type == LEXER_ASSIGN_DIVIDE)
2703   {
2704     regex_start_p--;
2705   }
2706 
2707   while (true)
2708   {
2709     if (source_p >= source_end_p)
2710     {
2711       parser_raise_error (context_p, PARSER_ERR_UNTERMINATED_REGEXP);
2712     }
2713 
2714     if (!in_class && source_p[0] == LIT_CHAR_SLASH)
2715     {
2716       regex_end_p = source_p;
2717       source_p++;
2718       column++;
2719       break;
2720     }
2721 
2722     switch (source_p[0])
2723     {
2724       case LIT_CHAR_CR:
2725       case LIT_CHAR_LF:
2726       case LEXER_NEWLINE_LS_PS_BYTE_1:
2727       {
2728         if (source_p[0] != LEXER_NEWLINE_LS_PS_BYTE_1
2729             || LEXER_NEWLINE_LS_PS_BYTE_23 (source_p))
2730         {
2731           parser_raise_error (context_p, PARSER_ERR_NEWLINE_NOT_ALLOWED);
2732         }
2733         break;
2734       }
2735       case LIT_CHAR_TAB:
2736       {
2737         column = align_column_to_tab (column);
2738          /* Subtract -1 because column is increased below. */
2739         column--;
2740         break;
2741       }
2742       case LIT_CHAR_LEFT_SQUARE:
2743       {
2744         in_class = true;
2745         break;
2746       }
2747       case LIT_CHAR_RIGHT_SQUARE:
2748       {
2749         in_class = false;
2750         break;
2751       }
2752       case LIT_CHAR_BACKSLASH:
2753       {
2754         if (source_p + 1 >= source_end_p)
2755         {
2756           parser_raise_error (context_p, PARSER_ERR_UNTERMINATED_REGEXP);
2757         }
2758 
2759         if (source_p[1] >= 0x20 && source_p[1] <= LIT_UTF8_1_BYTE_CODE_POINT_MAX)
2760         {
2761           source_p++;
2762           column++;
2763         }
2764       }
2765     }
2766 
2767     source_p++;
2768     column++;
2769 
2770     while (source_p < source_end_p
2771            && IS_UTF8_INTERMEDIATE_OCTET (source_p[0]))
2772     {
2773       source_p++;
2774     }
2775   }
2776 
2777   current_flags = 0;
2778   while (source_p < source_end_p)
2779   {
2780     uint32_t flag = 0;
2781 
2782     if (source_p[0] == LIT_CHAR_LOWERCASE_G)
2783     {
2784       flag = RE_FLAG_GLOBAL;
2785     }
2786     else if (source_p[0] == LIT_CHAR_LOWERCASE_I)
2787     {
2788       flag = RE_FLAG_IGNORE_CASE;
2789     }
2790     else if (source_p[0] == LIT_CHAR_LOWERCASE_M)
2791     {
2792       flag = RE_FLAG_MULTILINE;
2793     }
2794     else if (source_p[0] == LIT_CHAR_LOWERCASE_U)
2795     {
2796       flag = RE_FLAG_UNICODE;
2797     }
2798     else if (source_p[0] == LIT_CHAR_LOWERCASE_Y)
2799     {
2800       flag = RE_FLAG_STICKY;
2801     }
2802 
2803     if (flag == 0)
2804     {
2805       break;
2806     }
2807 
2808     if (current_flags & flag)
2809     {
2810       parser_raise_error (context_p, PARSER_ERR_DUPLICATED_REGEXP_FLAG);
2811     }
2812 
2813     current_flags = (uint16_t) (current_flags | flag);
2814     source_p++;
2815     column++;
2816   }
2817 
2818   context_p->source_p = source_p;
2819   context_p->column = column;
2820 
2821   if (source_p < source_end_p && lexer_parse_identifier (context_p, LEXER_PARSE_CHECK_PART_AND_RETURN))
2822   {
2823     parser_raise_error (context_p, PARSER_ERR_UNKNOWN_REGEXP_FLAG);
2824   }
2825 
2826   length = (lit_utf8_size_t) (regex_end_p - regex_start_p);
2827   if (length > PARSER_MAXIMUM_STRING_LENGTH)
2828   {
2829     parser_raise_error (context_p, PARSER_ERR_REGEXP_TOO_LONG);
2830   }
2831 
2832   context_p->column = column;
2833   context_p->source_p = source_p;
2834 
2835   if (parse_only)
2836   {
2837     return;
2838   }
2839 
2840   if (context_p->literal_count >= PARSER_MAXIMUM_NUMBER_OF_LITERALS)
2841   {
2842     parser_raise_error (context_p, PARSER_ERR_LITERAL_LIMIT_REACHED);
2843   }
2844 
2845   literal_p = (lexer_literal_t *) parser_list_append (context_p, &context_p->literal_pool);
2846   literal_p->prop.length = (prop_length_t) length;
2847   literal_p->type = LEXER_UNUSED_LITERAL;
2848   literal_p->status_flags = 0;
2849 
2850   context_p->literal_count++;
2851 
2852   /* Compile the RegExp literal and store the RegExp bytecode pointer */
2853   ecma_string_t *pattern_str_p = NULL;
2854 
2855   if (lit_is_valid_cesu8_string (regex_start_p, length))
2856   {
2857     pattern_str_p = ecma_new_ecma_string_from_utf8 (regex_start_p, length);
2858   }
2859   else
2860   {
2861     JERRY_ASSERT (lit_is_valid_utf8_string (regex_start_p, length));
2862     pattern_str_p = ecma_new_ecma_string_from_utf8_converted_to_cesu8 (regex_start_p, length);
2863   }
2864 
2865   re_compiled_code_t *re_bytecode_p = re_compile_bytecode (pattern_str_p, current_flags);
2866   ecma_deref_ecma_string (pattern_str_p);
2867 
2868   if (JERRY_UNLIKELY (re_bytecode_p == NULL))
2869   {
2870     parser_raise_error (context_p, PARSER_ERR_INVALID_REGEXP);
2871   }
2872 
2873   literal_p->type = LEXER_REGEXP_LITERAL;
2874   literal_p->u.bytecode_p = (ecma_compiled_code_t *) re_bytecode_p;
2875 
2876   context_p->token.type = LEXER_LITERAL;
2877   context_p->token.keyword_type = LEXER_EOS;
2878   context_p->token.lit_location.type = LEXER_REGEXP_LITERAL;
2879 
2880   context_p->lit_object.literal_p = literal_p;
2881   context_p->lit_object.index = (uint16_t) (context_p->literal_count - 1);
2882 #else /* !ENABLED (JERRY_BUILTIN_REGEXP) */
2883   JERRY_UNUSED (parse_only);
2884   parser_raise_error (context_p, PARSER_ERR_UNSUPPORTED_REGEXP);
2885 #endif /* ENABLED (JERRY_BUILTIN_REGEXP) */
2886 } /* lexer_construct_regexp_object */
2887 
2888 /**
2889  * Next token must be an identifier.
2890  */
2891 void
lexer_expect_identifier(parser_context_t * context_p,uint8_t literal_type)2892 lexer_expect_identifier (parser_context_t *context_p, /**< context */
2893                          uint8_t literal_type) /**< literal type */
2894 {
2895   JERRY_ASSERT (literal_type == LEXER_STRING_LITERAL
2896                 || literal_type == LEXER_IDENT_LITERAL
2897                 || literal_type == LEXER_NEW_IDENT_LITERAL);
2898 
2899   lexer_skip_spaces (context_p);
2900   context_p->token.line = context_p->line;
2901   context_p->token.column = context_p->column;
2902 
2903   if (context_p->source_p < context_p->source_end_p
2904       && lexer_parse_identifier (context_p, (literal_type != LEXER_STRING_LITERAL ? LEXER_PARSE_CHECK_KEYWORDS
2905                                                                                   : LEXER_PARSE_NO_OPTS)))
2906   {
2907     if (context_p->token.type == LEXER_LITERAL)
2908     {
2909       JERRY_ASSERT (context_p->token.lit_location.type == LEXER_IDENT_LITERAL);
2910 
2911       lexer_construct_literal_object (context_p,
2912                                       &context_p->token.lit_location,
2913                                       literal_type);
2914 
2915       if (literal_type != LEXER_STRING_LITERAL
2916           && (context_p->status_flags & PARSER_IS_STRICT))
2917       {
2918         if (context_p->token.keyword_type == LEXER_KEYW_EVAL)
2919         {
2920           parser_raise_error (context_p, PARSER_ERR_EVAL_NOT_ALLOWED);
2921         }
2922         else if (context_p->token.keyword_type == LEXER_KEYW_ARGUMENTS)
2923         {
2924           parser_raise_error (context_p, PARSER_ERR_ARGUMENTS_NOT_ALLOWED);
2925         }
2926       }
2927       return;
2928     }
2929   }
2930 #if ENABLED (JERRY_ES2015_MODULE_SYSTEM)
2931   else if (context_p->status_flags & PARSER_MODULE_DEFAULT_CLASS_OR_FUNC)
2932   {
2933     /* When parsing default exports for modules, it is not required by functions or classes to have identifiers.
2934      * In this case we use a synthetic name for them. */
2935     context_p->token.type = LEXER_LITERAL;
2936     context_p->token.keyword_type = LEXER_EOS;
2937     context_p->token.lit_location = lexer_default_literal;
2938     lexer_construct_literal_object (context_p, &context_p->token.lit_location, literal_type);
2939     context_p->status_flags &= (uint32_t) ~(PARSER_MODULE_DEFAULT_CLASS_OR_FUNC);
2940     return;
2941   }
2942 #endif /* ENABLED (JERRY_ES2015_MODULE_SYSTEM) */
2943 
2944 #if ENABLED (JERRY_ES2015)
2945   if (context_p->token.type == LEXER_KEYW_YIELD)
2946   {
2947     parser_raise_error (context_p, PARSER_ERR_YIELD_NOT_ALLOWED);
2948   }
2949   if (context_p->token.type == LEXER_KEYW_AWAIT)
2950   {
2951     parser_raise_error (context_p, PARSER_ERR_AWAIT_NOT_ALLOWED);
2952   }
2953 #endif /* ENABLED (JERRY_ES2015) */
2954   parser_raise_error (context_p, PARSER_ERR_IDENTIFIER_EXPECTED);
2955 } /* lexer_expect_identifier */
2956 
2957 /**
2958  * Next token must be an identifier.
2959  */
2960 void
lexer_expect_object_literal_id(parser_context_t * context_p,uint32_t ident_opts)2961 lexer_expect_object_literal_id (parser_context_t *context_p, /**< context */
2962                                 uint32_t ident_opts) /**< lexer_obj_ident_opts_t option bits */
2963 {
2964   lexer_skip_spaces (context_p);
2965 
2966   if (context_p->source_p >= context_p->source_end_p)
2967   {
2968     parser_raise_error (context_p, PARSER_ERR_PROPERTY_IDENTIFIER_EXPECTED);
2969   }
2970 
2971 #if ENABLED (JERRY_ES2015)
2972   int is_class_method = ((ident_opts & LEXER_OBJ_IDENT_CLASS_METHOD)
2973                          && !(ident_opts & LEXER_OBJ_IDENT_ONLY_IDENTIFIERS)
2974                          && (context_p->token.type != LEXER_KEYW_STATIC));
2975 #endif /* ENABLED (JERRY_ES2015) */
2976 
2977   context_p->token.line = context_p->line;
2978   context_p->token.column = context_p->column;
2979   bool create_literal_object = false;
2980 
2981   if (lexer_parse_identifier (context_p, LEXER_PARSE_NO_OPTS))
2982   {
2983     if (!(ident_opts & (LEXER_OBJ_IDENT_ONLY_IDENTIFIERS | LEXER_OBJ_IDENT_OBJECT_PATTERN)))
2984     {
2985       lexer_skip_spaces (context_p);
2986       context_p->token.flags = (uint8_t) (context_p->token.flags | LEXER_NO_SKIP_SPACES);
2987 
2988       if (context_p->source_p < context_p->source_end_p
2989 #if ENABLED (JERRY_ES2015)
2990           && context_p->source_p[0] != LIT_CHAR_COMMA
2991           && context_p->source_p[0] != LIT_CHAR_RIGHT_BRACE
2992           && context_p->source_p[0] != LIT_CHAR_LEFT_PAREN
2993 #endif /* ENABLED (JERRY_ES2015) */
2994           && context_p->source_p[0] != LIT_CHAR_COLON)
2995       {
2996         if (lexer_compare_literal_to_string (context_p, "get", 3))
2997         {
2998           context_p->token.type = LEXER_PROPERTY_GETTER;
2999           return;
3000         }
3001 
3002         if (lexer_compare_literal_to_string (context_p, "set", 3))
3003         {
3004           context_p->token.type = LEXER_PROPERTY_SETTER;
3005           return;
3006         }
3007 
3008 #if ENABLED (JERRY_ES2015)
3009         if (lexer_compare_literal_to_string (context_p, "async", 5))
3010         {
3011           context_p->token.type = LEXER_KEYW_ASYNC;
3012           return;
3013         }
3014 #endif /* ENABLED (JERRY_ES2015) */
3015       }
3016     }
3017 
3018 #if ENABLED (JERRY_ES2015)
3019     if (is_class_method && lexer_compare_literal_to_string (context_p, "static", 6))
3020     {
3021       context_p->token.type = LEXER_KEYW_STATIC;
3022       return;
3023     }
3024 #endif /* ENABLED (JERRY_ES2015) */
3025 
3026     create_literal_object = true;
3027   }
3028   else
3029   {
3030     switch (context_p->source_p[0])
3031     {
3032       case LIT_CHAR_DOUBLE_QUOTE:
3033       case LIT_CHAR_SINGLE_QUOTE:
3034       {
3035         lexer_parse_string (context_p, LEXER_STRING_NO_OPTS);
3036         create_literal_object = true;
3037         break;
3038       }
3039 #if ENABLED (JERRY_ES2015)
3040       case LIT_CHAR_LEFT_SQUARE:
3041       {
3042         lexer_consume_next_character (context_p);
3043 
3044         lexer_next_token (context_p);
3045         parser_parse_expression (context_p, PARSE_EXPR_NO_COMMA);
3046 
3047         if (context_p->token.type != LEXER_RIGHT_SQUARE)
3048         {
3049           parser_raise_error (context_p, PARSER_ERR_RIGHT_SQUARE_EXPECTED);
3050         }
3051         return;
3052       }
3053       case LIT_CHAR_ASTERISK:
3054       {
3055         if (ident_opts & (LEXER_OBJ_IDENT_ONLY_IDENTIFIERS | LEXER_OBJ_IDENT_OBJECT_PATTERN))
3056         {
3057           break;
3058         }
3059 
3060         context_p->token.type = LEXER_MULTIPLY;
3061         lexer_consume_next_character (context_p);
3062         return;
3063       }
3064 #endif /* ENABLED (JERRY_ES2015) */
3065       case LIT_CHAR_RIGHT_BRACE:
3066       {
3067         if (ident_opts & LEXER_OBJ_IDENT_ONLY_IDENTIFIERS)
3068         {
3069           break;
3070         }
3071 
3072         context_p->token.type = LEXER_RIGHT_BRACE;
3073         lexer_consume_next_character (context_p);
3074         return;
3075       }
3076       default:
3077       {
3078         const uint8_t *char_p = context_p->source_p;
3079 
3080         if (char_p[0] == LIT_CHAR_DOT)
3081         {
3082           char_p++;
3083         }
3084 
3085         if (char_p < context_p->source_end_p
3086             && char_p[0] >= LIT_CHAR_0
3087             && char_p[0] <= LIT_CHAR_9)
3088         {
3089           lexer_parse_number (context_p);
3090           lexer_construct_number_object (context_p, false, false);
3091           return;
3092         }
3093         break;
3094       }
3095     }
3096   }
3097 
3098   if (create_literal_object)
3099   {
3100 #if ENABLED (JERRY_ES2015)
3101     if (is_class_method && lexer_compare_literal_to_string (context_p, "constructor", 11))
3102     {
3103       context_p->token.type = LEXER_CLASS_CONSTRUCTOR;
3104       context_p->token.flags &= (uint8_t) ~LEXER_NO_SKIP_SPACES;
3105       return;
3106     }
3107 #endif /* ENABLED (JERRY_ES2015) */
3108 
3109     lexer_construct_literal_object (context_p,
3110                                     &context_p->token.lit_location,
3111                                     LEXER_STRING_LITERAL);
3112     return;
3113   }
3114 
3115   parser_raise_error (context_p, PARSER_ERR_PROPERTY_IDENTIFIER_EXPECTED);
3116 } /* lexer_expect_object_literal_id */
3117 
3118 /**
3119  * Read next token without checking keywords
3120  *
3121  * @return true if the next literal is identifier, false otherwise
3122  */
3123 bool
lexer_scan_identifier(parser_context_t * context_p)3124 lexer_scan_identifier (parser_context_t *context_p) /**< context */
3125 {
3126   lexer_skip_spaces (context_p);
3127   context_p->token.line = context_p->line;
3128   context_p->token.column = context_p->column;
3129 
3130   if (context_p->source_p < context_p->source_end_p
3131       && lexer_parse_identifier (context_p, LEXER_PARSE_NO_OPTS))
3132   {
3133     return true;
3134   }
3135 
3136   lexer_next_token (context_p);
3137   return false;
3138 } /* lexer_scan_identifier */
3139 
3140 /**
3141  * Check whether the identifier is a modifier in a property definition.
3142  */
3143 void
lexer_check_property_modifier(parser_context_t * context_p)3144 lexer_check_property_modifier (parser_context_t *context_p) /**< context */
3145 {
3146   JERRY_ASSERT (!(context_p->token.flags & LEXER_NO_SKIP_SPACES));
3147   JERRY_ASSERT (context_p->token.type = LEXER_LITERAL
3148                 && context_p->token.lit_location.type == LEXER_IDENT_LITERAL);
3149 
3150   lexer_skip_spaces (context_p);
3151   context_p->token.flags = (uint8_t) (context_p->token.flags | LEXER_NO_SKIP_SPACES);
3152 
3153   if (context_p->source_p >= context_p->source_end_p
3154 #if ENABLED (JERRY_ES2015)
3155       || context_p->source_p[0] == LIT_CHAR_COMMA
3156       || context_p->source_p[0] == LIT_CHAR_RIGHT_BRACE
3157       || context_p->source_p[0] == LIT_CHAR_LEFT_PAREN
3158       || context_p->source_p[0] == LIT_CHAR_EQUALS
3159 #endif /* ENABLED (JERRY_ES2015) */
3160       || context_p->source_p[0] == LIT_CHAR_COLON)
3161   {
3162     return;
3163   }
3164 
3165   if (lexer_compare_literal_to_string (context_p, "get", 3))
3166   {
3167     context_p->token.type = LEXER_PROPERTY_GETTER;
3168     return;
3169   }
3170 
3171   if (lexer_compare_literal_to_string (context_p, "set", 3))
3172   {
3173     context_p->token.type = LEXER_PROPERTY_SETTER;
3174     return;
3175   }
3176 
3177 #if ENABLED (JERRY_ES2015)
3178   if (lexer_compare_literal_to_string (context_p, "async", 5))
3179   {
3180     context_p->token.type = LEXER_KEYW_ASYNC;
3181     return;
3182   }
3183 #endif /* ENABLED (JERRY_ES2015) */
3184 } /* lexer_check_property_modifier */
3185 
3186 /**
3187  * Compares two identifiers.
3188  *
3189  * Note:
3190  *   Escape sequences are allowed in the left identifier, but not in the right
3191  *
3192  * @return true if the two identifiers are the same
3193  */
3194 static bool
lexer_compare_identifier_to_chars(const uint8_t * left_p,const uint8_t * right_p,size_t size)3195 lexer_compare_identifier_to_chars (const uint8_t *left_p, /**< left identifier */
3196                                    const uint8_t *right_p, /**< right identifier string */
3197                                    size_t size) /**< byte size of the two identifiers */
3198 {
3199   uint8_t utf8_buf[6];
3200 
3201   do
3202   {
3203     if (*left_p == *right_p)
3204     {
3205       left_p++;
3206       right_p++;
3207       size--;
3208       continue;
3209     }
3210 
3211     size_t escape_size;
3212 
3213     if (*left_p == LIT_CHAR_BACKSLASH)
3214     {
3215       left_p += 2;
3216       lit_code_point_t code_point = lexer_unchecked_hex_to_character (&left_p);
3217 
3218       escape_size = lit_code_point_to_cesu8_bytes (utf8_buf, code_point);
3219     }
3220     else if (*left_p >= LIT_UTF8_4_BYTE_MARKER)
3221     {
3222       lit_four_byte_utf8_char_to_cesu8 (utf8_buf, left_p);
3223       escape_size = 3 * 2;
3224       left_p += 4;
3225     }
3226     else
3227     {
3228       return false;
3229     }
3230 
3231     size -= escape_size;
3232 
3233     uint8_t *utf8_p = utf8_buf;
3234     do
3235     {
3236       if (*right_p++ != *utf8_p++)
3237       {
3238         return false;
3239       }
3240     }
3241     while (--escape_size > 0);
3242   }
3243   while (size > 0);
3244 
3245   return true;
3246 } /* lexer_compare_identifier_to_chars */
3247 
3248 /**
3249  * Compares an identifier to a string.
3250  *
3251  * Note:
3252  *   Escape sequences are allowed in the left identifier, but not in the right
3253  *
3254  * @return true if the identifier equals to string
3255  */
3256 bool
lexer_compare_identifier_to_string(const lexer_lit_location_t * left_p,const uint8_t * right_p,size_t size)3257 lexer_compare_identifier_to_string (const lexer_lit_location_t *left_p, /**< left literal */
3258                                     const uint8_t *right_p, /**< right identifier string */
3259                                     size_t size) /**< byte size of the right identifier */
3260 {
3261   if (left_p->length != size)
3262   {
3263     return false;
3264   }
3265 
3266   if (!left_p->has_escape)
3267   {
3268     return memcmp (left_p->char_p, right_p, size) == 0;
3269   }
3270 
3271   return lexer_compare_identifier_to_chars (left_p->char_p, right_p, size);
3272 } /* lexer_compare_identifier_to_string */
3273 
3274 /**
3275  * Compares two identifiers.
3276  *
3277  * Note:
3278  *   Escape sequences are allowed in both identifiers
3279  *
3280  * @return true if the two identifiers are the same
3281  */
3282 bool
lexer_compare_identifiers(parser_context_t * context_p,const lexer_lit_location_t * left_p,const lexer_lit_location_t * right_p)3283 lexer_compare_identifiers (parser_context_t *context_p, /**< context */
3284                            const lexer_lit_location_t *left_p, /**< left literal */
3285                            const lexer_lit_location_t *right_p) /**< right literal */
3286 {
3287   prop_length_t length = left_p->length;
3288 
3289   if (length != right_p->length)
3290   {
3291     return false;
3292   }
3293 
3294   if (!left_p->has_escape)
3295   {
3296     return lexer_compare_identifier_to_chars (right_p->char_p, left_p->char_p, length);
3297   }
3298 
3299   if (!right_p->has_escape)
3300   {
3301     return lexer_compare_identifier_to_chars (left_p->char_p, right_p->char_p, length);
3302   }
3303 
3304   uint8_t buf_p[64];
3305 
3306   if (length <= 64)
3307   {
3308     lexer_convert_ident_to_cesu8 (buf_p, left_p->char_p, length);
3309     return lexer_compare_identifier_to_chars (right_p->char_p, buf_p, length);
3310   }
3311 
3312   uint8_t *dynamic_buf_p = parser_malloc (context_p, length);
3313 
3314   lexer_convert_ident_to_cesu8 (dynamic_buf_p, left_p->char_p, length);
3315   bool result = lexer_compare_identifier_to_chars (right_p->char_p, dynamic_buf_p, length);
3316   parser_free (dynamic_buf_p, length);
3317 
3318   return result;
3319 } /* lexer_compare_identifiers */
3320 
3321 /**
3322  * Compares the current identifier in the context to the parameter identifier
3323  *
3324  * Note:
3325  *   Escape sequences are allowed.
3326  *
3327  * @return true if the input identifiers are the same
3328  */
3329 bool
lexer_current_is_literal(parser_context_t * context_p,const lexer_lit_location_t * right_ident_p)3330 lexer_current_is_literal (parser_context_t *context_p, /**< context */
3331                           const lexer_lit_location_t *right_ident_p) /**< identifier */
3332 {
3333   JERRY_ASSERT (context_p->token.type == LEXER_LITERAL
3334                 && context_p->token.lit_location.type == LEXER_IDENT_LITERAL);
3335 
3336   lexer_lit_location_t *left_ident_p = &context_p->token.lit_location;
3337 
3338   JERRY_ASSERT (left_ident_p->length > 0 && right_ident_p->length > 0);
3339 
3340   if (left_ident_p->length != right_ident_p->length)
3341   {
3342     return false;
3343   }
3344 
3345   if (!left_ident_p->has_escape && !right_ident_p->has_escape)
3346   {
3347     return memcmp (left_ident_p->char_p, right_ident_p->char_p, left_ident_p->length) == 0;
3348   }
3349 
3350   return lexer_compare_identifiers (context_p, left_ident_p, right_ident_p);
3351 } /* lexer_current_is_literal */
3352 
3353 /**
3354  * Compares the current string token to "use strict".
3355  *
3356  * Note:
3357  *   Escape sequences are not allowed.
3358  *
3359  * @return true if "use strict" is found, false otherwise
3360  */
3361 inline bool JERRY_ATTR_ALWAYS_INLINE
lexer_string_is_use_strict(parser_context_t * context_p)3362 lexer_string_is_use_strict (parser_context_t *context_p) /**< context */
3363 {
3364   JERRY_ASSERT (context_p->token.type == LEXER_LITERAL
3365                 && context_p->token.lit_location.type == LEXER_STRING_LITERAL);
3366 
3367   return (context_p->token.lit_location.length == 10
3368           && !context_p->token.lit_location.has_escape
3369           && memcmp (context_p->token.lit_location.char_p, "use strict", 10) == 0);
3370 } /* lexer_string_is_use_strict */
3371 
3372 /**
3373  * Checks whether the string before the current token is a directive or a string literal.
3374  *
3375  * @return true if the string is a directive, false otherwise
3376  */
3377 inline bool JERRY_ATTR_ALWAYS_INLINE
lexer_string_is_directive(parser_context_t * context_p)3378 lexer_string_is_directive (parser_context_t *context_p) /**< context */
3379 {
3380   return (context_p->token.type == LEXER_SEMICOLON
3381           || context_p->token.type == LEXER_RIGHT_BRACE
3382           || ((context_p->token.flags & LEXER_WAS_NEWLINE)
3383               && !LEXER_IS_BINARY_OP_TOKEN (context_p->token.type)
3384               && context_p->token.type != LEXER_LEFT_PAREN
3385               && context_p->token.type != LEXER_LEFT_SQUARE
3386               && context_p->token.type != LEXER_DOT));
3387 } /* lexer_string_is_directive */
3388 
3389 #if ENABLED (JERRY_ES2015)
3390 
3391 /**
3392  * Compares the current token to an expected identifier.
3393  *
3394  * Note:
3395  *   Escape sequences are not allowed.
3396  *
3397  * @return true if they are the same, false otherwise
3398  */
3399 inline bool JERRY_ATTR_ALWAYS_INLINE
lexer_token_is_identifier(parser_context_t * context_p,const char * identifier_p,size_t identifier_length)3400 lexer_token_is_identifier (parser_context_t *context_p, /**< context */
3401                            const char *identifier_p, /**< identifier */
3402                            size_t identifier_length) /**< identifier length */
3403 {
3404   /* Checking has_escape is unnecessary because memcmp will fail if escape sequences are present. */
3405   return (context_p->token.type == LEXER_LITERAL
3406           && context_p->token.lit_location.type == LEXER_IDENT_LITERAL
3407           && context_p->token.lit_location.length == identifier_length
3408           && memcmp (context_p->token.lit_location.char_p, identifier_p, identifier_length) == 0);
3409 } /* lexer_token_is_identifier */
3410 
3411 /**
3412  * Compares the current identifier token to "let".
3413  *
3414  * Note:
3415  *   Escape sequences are not allowed.
3416  *
3417  * @return true if "let" is found, false otherwise
3418  */
3419 inline bool JERRY_ATTR_ALWAYS_INLINE
lexer_token_is_let(parser_context_t * context_p)3420 lexer_token_is_let (parser_context_t *context_p) /**< context */
3421 {
3422   JERRY_ASSERT (context_p->token.type == LEXER_LITERAL);
3423 
3424   return (context_p->token.keyword_type == LEXER_KEYW_LET
3425           && !context_p->token.lit_location.has_escape);
3426 } /* lexer_token_is_let */
3427 
3428 /**
3429  * Compares the current identifier token to "async".
3430  *
3431  * Note:
3432  *   Escape sequences are not allowed.
3433  *
3434  * @return true if "async" is found, false otherwise
3435  */
3436 inline bool JERRY_ATTR_ALWAYS_INLINE
lexer_token_is_async(parser_context_t * context_p)3437 lexer_token_is_async (parser_context_t *context_p) /**< context */
3438 {
3439   JERRY_ASSERT (context_p->token.type == LEXER_LITERAL
3440                 || context_p->token.type == LEXER_TEMPLATE_LITERAL);
3441 
3442   return (context_p->token.keyword_type == LEXER_KEYW_ASYNC
3443           && !context_p->token.lit_location.has_escape);
3444 } /* lexer_token_is_async */
3445 
3446 #endif /* ENABLED (JERRY_ES2015) */
3447 
3448 /**
3449  * Compares the current identifier or string to an expected string.
3450  *
3451  * Note:
3452  *   Escape sequences are not allowed.
3453  *
3454  * @return true if they are the same, false otherwise
3455  */
3456 inline bool JERRY_ATTR_ALWAYS_INLINE
lexer_compare_literal_to_string(parser_context_t * context_p,const char * string_p,size_t string_length)3457 lexer_compare_literal_to_string (parser_context_t *context_p, /**< context */
3458                                  const char *string_p, /**< string */
3459                                  size_t string_length) /**< string length */
3460 {
3461   JERRY_ASSERT (context_p->token.type == LEXER_LITERAL
3462                 && (context_p->token.lit_location.type == LEXER_IDENT_LITERAL
3463                     || context_p->token.lit_location.type == LEXER_STRING_LITERAL));
3464 
3465   /* Checking has_escape is unnecessary because memcmp will fail if escape sequences are present. */
3466   return (context_p->token.lit_location.length == string_length
3467           && memcmp (context_p->token.lit_location.char_p, string_p, string_length) == 0);
3468 } /* lexer_compare_literal_to_string */
3469 
3470 /**
3471  * Convert binary lvalue token to binary token
3472  * e.g. += -> +
3473  *      ^= -> ^
3474  *
3475  * @return binary token
3476  */
3477 uint8_t
lexer_convert_binary_lvalue_token_to_binary(uint8_t token)3478 lexer_convert_binary_lvalue_token_to_binary (uint8_t token) /**< binary lvalue token */
3479 {
3480   JERRY_ASSERT (LEXER_IS_BINARY_LVALUE_TOKEN (token));
3481   JERRY_ASSERT (token != LEXER_ASSIGN);
3482 
3483 #if ENABLED (JERRY_ES2015)
3484   if (token <= LEXER_ASSIGN_EXPONENTIATION)
3485   {
3486     return (uint8_t) (LEXER_ADD + (token - LEXER_ASSIGN_ADD));
3487   }
3488 #else /* !ENABLED (JERRY_ES2015) */
3489   if (token <= LEXER_ASSIGN_MODULO)
3490   {
3491     return (uint8_t) (LEXER_ADD + (token - LEXER_ASSIGN_ADD));
3492   }
3493 #endif /* ENABLED (JERRY_ES2015) */
3494 
3495   if (token <= LEXER_ASSIGN_UNS_RIGHT_SHIFT)
3496   {
3497     return (uint8_t) (LEXER_LEFT_SHIFT + (token - LEXER_ASSIGN_LEFT_SHIFT));
3498   }
3499 
3500   switch (token)
3501   {
3502     case LEXER_ASSIGN_BIT_AND:
3503     {
3504       return LEXER_BIT_AND;
3505     }
3506     case LEXER_ASSIGN_BIT_OR:
3507     {
3508       return LEXER_BIT_OR;
3509     }
3510     default:
3511     {
3512       JERRY_ASSERT (token == LEXER_ASSIGN_BIT_XOR);
3513       return LEXER_BIT_XOR;
3514     }
3515   }
3516 } /* lexer_convert_binary_lvalue_token_to_binary */
3517 
3518 /**
3519  * @}
3520  * @}
3521  * @}
3522  */
3523 
3524 #endif /* ENABLED (JERRY_PARSER) */
3525