1 /* Copyright JS Foundation and other contributors, http://js.foundation
2 *
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "ecma-exceptions.h"
17 #include "ecma-globals.h"
18 #include "ecma-try-catch-macro.h"
19 #include "jcontext.h"
20 #include "jrt-libc-includes.h"
21 #include "lit-char-helpers.h"
22 #include "re-compiler.h"
23 #include "re-parser.h"
24
25 #if ENABLED (JERRY_BUILTIN_REGEXP)
26
27 /** \addtogroup parser Parser
28 * @{
29 *
30 * \addtogroup regexparser Regular expression
31 * @{
32 *
33 * \addtogroup regexparser_parser Parser
34 * @{
35 */
36
37 /**
38 * Get the start opcode for the current group.
39 *
40 * @return RegExp opcode
41 */
42 static re_opcode_t
re_get_group_start_opcode(bool is_capturing)43 re_get_group_start_opcode (bool is_capturing) /**< is capturing group */
44 {
45 return (is_capturing) ? RE_OP_CAPTURING_GROUP_START : RE_OP_NON_CAPTURING_GROUP_START;
46 } /* re_get_group_start_opcode*/
47
48 /**
49 * Get the end opcode for the current group.
50 *
51 * @return RegExp opcode
52 */
53 static re_opcode_t
re_get_group_end_opcode(re_compiler_ctx_t * re_ctx_p,bool is_capturing)54 re_get_group_end_opcode (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context */
55 bool is_capturing) /**< is capturing group */
56 {
57 if (is_capturing)
58 {
59 if (re_ctx_p->token.greedy)
60 {
61 return RE_OP_GREEDY_CAPTURING_GROUP_END;
62 }
63
64 return RE_OP_LAZY_CAPTURING_GROUP_END;
65 }
66
67 if (re_ctx_p->token.greedy)
68 {
69 return RE_OP_GREEDY_NON_CAPTURING_GROUP_END;
70 }
71
72 return RE_OP_LAZY_NON_CAPTURING_GROUP_END;
73 } /* re_get_group_end_opcode */
74
75 /**
76 * Enclose the given bytecode to a group.
77 */
78 static void
re_insert_into_group(re_compiler_ctx_t * re_ctx_p,uint32_t group_start_offset,uint32_t idx,uint32_t capture_start,bool is_capturing)79 re_insert_into_group (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context */
80 uint32_t group_start_offset, /**< offset of group start */
81 uint32_t idx, /**< index of group */
82 uint32_t capture_start, /**< index of first nested capture */
83 bool is_capturing) /**< is capturing group */
84 {
85 uint32_t qmin = re_ctx_p->token.qmin;
86 uint32_t qmax = re_ctx_p->token.qmax;
87
88 if (JERRY_UNLIKELY (!is_capturing && re_bytecode_size (re_ctx_p) == group_start_offset))
89 {
90 return;
91 }
92
93 if (qmin == 0)
94 {
95 re_insert_value (re_ctx_p,
96 group_start_offset,
97 re_bytecode_size (re_ctx_p) - group_start_offset);
98 }
99
100 re_insert_value (re_ctx_p, group_start_offset, qmin);
101 re_insert_value (re_ctx_p, group_start_offset, re_ctx_p->captures_count - capture_start);
102
103 if (!is_capturing)
104 {
105 re_insert_value (re_ctx_p, group_start_offset, capture_start);
106 }
107 else
108 {
109 JERRY_ASSERT (idx == capture_start);
110 }
111
112 re_insert_value (re_ctx_p, group_start_offset, idx);
113 re_insert_opcode (re_ctx_p, group_start_offset, re_get_group_start_opcode (is_capturing));
114
115 re_append_opcode (re_ctx_p, re_get_group_end_opcode (re_ctx_p, is_capturing));
116 re_append_value (re_ctx_p, idx);
117 re_append_value (re_ctx_p, qmin);
118 re_append_value (re_ctx_p, qmax + RE_QMAX_OFFSET);
119 } /* re_insert_into_group */
120
121 /**
122 * Insert simple atom iterator.
123 */
124 static void
re_insert_atom_iterator(re_compiler_ctx_t * re_ctx_p,uint32_t start_offset)125 re_insert_atom_iterator (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context */
126 uint32_t start_offset) /**< atom start offset */
127 {
128 const uint32_t qmin = re_ctx_p->token.qmin;
129 const uint32_t qmax = re_ctx_p->token.qmax;
130
131 if (qmin == 1 && qmax == 1)
132 {
133 return;
134 }
135
136 re_append_opcode (re_ctx_p, RE_OP_ITERATOR_END);
137 re_insert_value (re_ctx_p, start_offset, re_bytecode_size (re_ctx_p) - start_offset);
138 re_insert_value (re_ctx_p, start_offset, qmax + RE_QMAX_OFFSET);
139 re_insert_value (re_ctx_p, start_offset, qmin);
140 re_insert_opcode (re_ctx_p, start_offset, re_ctx_p->token.greedy ? RE_OP_GREEDY_ITERATOR : RE_OP_LAZY_ITERATOR);
141 } /* re_insert_atom_iterator */
142
143 /**
144 * Insert a lookahead assertion.
145 */
146 static void
re_insert_assertion_lookahead(re_compiler_ctx_t * re_ctx_p,uint32_t start_offset,uint32_t capture_start,bool negative)147 re_insert_assertion_lookahead (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context */
148 uint32_t start_offset, /**< atom start offset */
149 uint32_t capture_start, /**< index of first nested capture */
150 bool negative) /** lookahead type */
151 {
152 const uint32_t qmin = re_ctx_p->token.qmin;
153
154 re_append_opcode (re_ctx_p, RE_OP_ASSERT_END);
155 re_insert_value (re_ctx_p, start_offset, re_bytecode_size (re_ctx_p) - start_offset);
156
157 /* We need to clear nested capturing group results when a negative assertion or the tail after a positive assertion
158 * does not match, so we store the begin and end index of nested capturing groups. */
159 re_insert_value (re_ctx_p, start_offset, re_ctx_p->captures_count - capture_start);
160 re_insert_value (re_ctx_p, start_offset, capture_start);
161
162 /* Lookaheads always result in zero length matches, which means iterations will always stop on the first match.
163 * This allows us to not have to deal with iterations beyond one. Either qmin == 0 which will implicitly match,
164 * or qmin > 0, in which case the first iteration will decide whether the assertion matches depending on whether
165 * the iteration matched or not. This also allows us to ignore qmax entirely. */
166 re_insert_byte (re_ctx_p, start_offset, (uint8_t) JERRY_MIN (qmin, 1));
167
168 const re_opcode_t opcode = (negative) ? RE_OP_ASSERT_LOOKAHEAD_NEG : RE_OP_ASSERT_LOOKAHEAD_POS;
169 re_insert_opcode (re_ctx_p, start_offset, opcode);
170 } /* re_insert_assertion_lookahead */
171
172 /**
173 * Consume non greedy (question mark) character if present.
174 */
175 static void
re_parse_lazy_char(re_compiler_ctx_t * re_ctx_p)176 re_parse_lazy_char (re_compiler_ctx_t *re_ctx_p) /**< RegExp parser context */
177 {
178 if (re_ctx_p->input_curr_p < re_ctx_p->input_end_p
179 && *re_ctx_p->input_curr_p == LIT_CHAR_QUESTION)
180 {
181 re_ctx_p->input_curr_p++;
182 re_ctx_p->token.greedy = false;
183 return;
184 }
185
186 re_ctx_p->token.greedy = true;
187 } /* re_parse_lazy_char */
188
189 /**
190 * Parse a max 3 digit long octal number from the input string, with a decimal value less than 256.
191 *
192 * @return value of the octal number
193 */
194 static uint32_t
re_parse_octal(re_compiler_ctx_t * re_ctx_p)195 re_parse_octal (re_compiler_ctx_t *re_ctx_p) /**< RegExp parser context */
196 {
197 JERRY_ASSERT (re_ctx_p->input_curr_p < re_ctx_p->input_end_p);
198 JERRY_ASSERT (lit_char_is_octal_digit (*re_ctx_p->input_curr_p));
199
200 uint32_t value = (uint32_t) (*re_ctx_p->input_curr_p++) - LIT_CHAR_0;
201
202 if (re_ctx_p->input_curr_p < re_ctx_p->input_end_p
203 && lit_char_is_octal_digit (*re_ctx_p->input_curr_p))
204 {
205 value = value * 8 + (*re_ctx_p->input_curr_p++) - LIT_CHAR_0;
206 }
207
208 if (re_ctx_p->input_curr_p < re_ctx_p->input_end_p
209 && lit_char_is_octal_digit (*re_ctx_p->input_curr_p))
210 {
211 const uint32_t new_value = value * 8 + (*re_ctx_p->input_curr_p) - LIT_CHAR_0;
212
213 if (new_value <= RE_MAX_OCTAL_VALUE)
214 {
215 value = new_value;
216 re_ctx_p->input_curr_p++;
217 }
218 }
219
220 return value;
221 } /* re_parse_octal */
222
223 /**
224 * Check that the currently parsed quantifier is valid.
225 *
226 * @return ECMA_VALUE_ERROR, if quantifier is invalid
227 * ECMA_VALUE_EMPTY, otherwise
228 */
229 static ecma_value_t
re_check_quantifier(re_compiler_ctx_t * re_ctx_p)230 re_check_quantifier (re_compiler_ctx_t *re_ctx_p)
231 {
232 if (re_ctx_p->token.qmin > re_ctx_p->token.qmax)
233 {
234 /* ECMA-262 v5.1 15.10.2.5 */
235 return ecma_raise_syntax_error (ECMA_ERR_MSG ("quantifier error: min > max."));
236 }
237
238 return ECMA_VALUE_EMPTY;
239 } /* re_check_quantifier */
240
241 /**
242 * Parse RegExp quantifier.
243 *
244 * @return ECMA_VALUE_TRUE - if parsed successfully
245 * ECMA_VALUE_FALSE - otherwise
246 */
247 static ecma_value_t
re_parse_quantifier(re_compiler_ctx_t * re_ctx_p)248 re_parse_quantifier (re_compiler_ctx_t *re_ctx_p) /**< RegExp compiler context */
249 {
250 if (re_ctx_p->input_curr_p < re_ctx_p->input_end_p)
251 {
252 switch (*re_ctx_p->input_curr_p)
253 {
254 case LIT_CHAR_QUESTION:
255 {
256 re_ctx_p->input_curr_p++;
257 re_ctx_p->token.qmin = 0;
258 re_ctx_p->token.qmax = 1;
259
260 re_parse_lazy_char (re_ctx_p);
261 return ECMA_VALUE_TRUE;
262 }
263 case LIT_CHAR_ASTERISK:
264 {
265 re_ctx_p->input_curr_p++;
266 re_ctx_p->token.qmin = 0;
267 re_ctx_p->token.qmax = RE_INFINITY;
268
269 re_parse_lazy_char (re_ctx_p);
270 return ECMA_VALUE_TRUE;
271 }
272 case LIT_CHAR_PLUS:
273 {
274 re_ctx_p->input_curr_p++;
275 re_ctx_p->token.qmin = 1;
276 re_ctx_p->token.qmax = RE_INFINITY;
277
278 re_parse_lazy_char (re_ctx_p);
279 return ECMA_VALUE_TRUE;
280 }
281 case LIT_CHAR_LEFT_BRACE:
282 {
283 const lit_utf8_byte_t *current_p = re_ctx_p->input_curr_p + 1;
284 uint32_t qmin = 0;
285 uint32_t qmax = RE_INFINITY;
286
287 if (current_p >= re_ctx_p->input_end_p)
288 {
289 break;
290 }
291
292 if (!lit_char_is_decimal_digit (*current_p))
293 {
294 break;
295 }
296
297 qmin = lit_parse_decimal (¤t_p, re_ctx_p->input_end_p);
298
299 if (current_p >= re_ctx_p->input_end_p)
300 {
301 break;
302 }
303
304 lit_utf8_byte_t ch = *current_p++;
305 if (ch == LIT_CHAR_RIGHT_BRACE)
306 {
307 qmax = qmin;
308 }
309 else if (ch == LIT_CHAR_COMMA)
310 {
311 if (current_p >= re_ctx_p->input_end_p)
312 {
313 break;
314 }
315
316 if (lit_char_is_decimal_digit (*current_p))
317 {
318 qmax = lit_parse_decimal (¤t_p, re_ctx_p->input_end_p);
319 }
320
321 if (current_p >= re_ctx_p->input_end_p || *current_p++ != LIT_CHAR_RIGHT_BRACE)
322 {
323 break;
324 }
325 }
326 else
327 {
328 break;
329 }
330
331 re_ctx_p->token.qmin = qmin;
332 re_ctx_p->token.qmax = qmax;
333 re_ctx_p->input_curr_p = current_p;
334 re_parse_lazy_char (re_ctx_p);
335 return ECMA_VALUE_TRUE;
336 }
337 default:
338 {
339 break;
340 }
341 }
342 }
343
344 re_ctx_p->token.qmin = 1;
345 re_ctx_p->token.qmax = 1;
346 re_ctx_p->token.greedy = true;
347
348 return ECMA_VALUE_FALSE;
349 } /* re_parse_quantifier */
350
351 /**
352 * Count the number of groups in the current pattern.
353 */
354 static void
re_count_groups(re_compiler_ctx_t * re_ctx_p)355 re_count_groups (re_compiler_ctx_t *re_ctx_p) /**< RegExp compiler context */
356 {
357 bool is_char_class = 0;
358 re_ctx_p->groups_count = 0;
359 const lit_utf8_byte_t *curr_p = re_ctx_p->input_start_p;
360
361 while (curr_p < re_ctx_p->input_end_p)
362 {
363 switch (*curr_p++)
364 {
365 case LIT_CHAR_BACKSLASH:
366 {
367 if (curr_p < re_ctx_p->input_end_p)
368 {
369 lit_utf8_incr (&curr_p);
370 }
371 break;
372 }
373 case LIT_CHAR_LEFT_SQUARE:
374 {
375 is_char_class = true;
376 break;
377 }
378 case LIT_CHAR_RIGHT_SQUARE:
379 {
380 is_char_class = false;
381 break;
382 }
383 case LIT_CHAR_LEFT_PAREN:
384 {
385 if (curr_p < re_ctx_p->input_end_p
386 && *curr_p != LIT_CHAR_QUESTION
387 && !is_char_class)
388 {
389 re_ctx_p->groups_count++;
390 }
391 break;
392 }
393 }
394 }
395 } /* re_count_groups */
396
397 #if ENABLED (JERRY_ES2015)
398 /**
399 * Check if a code point is a Syntax character
400 *
401 * @return true, if syntax character
402 * false, otherwise
403 */
404 static bool
re_is_syntax_char(lit_code_point_t cp)405 re_is_syntax_char (lit_code_point_t cp) /**< code point */
406 {
407 return (cp == LIT_CHAR_CIRCUMFLEX
408 || cp == LIT_CHAR_DOLLAR_SIGN
409 || cp == LIT_CHAR_BACKSLASH
410 || cp == LIT_CHAR_DOT
411 || cp == LIT_CHAR_ASTERISK
412 || cp == LIT_CHAR_PLUS
413 || cp == LIT_CHAR_QUESTION
414 || cp == LIT_CHAR_LEFT_PAREN
415 || cp == LIT_CHAR_RIGHT_PAREN
416 || cp == LIT_CHAR_LEFT_SQUARE
417 || cp == LIT_CHAR_RIGHT_SQUARE
418 || cp == LIT_CHAR_LEFT_BRACE
419 || cp == LIT_CHAR_RIGHT_BRACE
420 || cp == LIT_CHAR_VLINE);
421 } /* re_is_syntax_char */
422 #endif /* ENABLED (JERRY_ES2015) */
423
424 /**
425 * Parse a Character Escape or a Character Class Escape.
426 *
427 * @return ECMA_VALUE_EMPTY, if parsed successfully
428 * ECMA_VALUE_ERROR, otherwise
429 */
430 static ecma_value_t
re_parse_char_escape(re_compiler_ctx_t * re_ctx_p)431 re_parse_char_escape (re_compiler_ctx_t *re_ctx_p) /**< RegExp compiler context */
432 {
433 JERRY_ASSERT (re_ctx_p->input_curr_p < re_ctx_p->input_end_p);
434 re_ctx_p->token.type = RE_TOK_CHAR;
435
436 if (lit_char_is_decimal_digit (*re_ctx_p->input_curr_p))
437 {
438 /* NULL code point escape, only valid if there are no following digits. */
439 if (*re_ctx_p->input_curr_p == LIT_CHAR_0
440 && (re_ctx_p->input_curr_p + 1 >= re_ctx_p->input_end_p
441 || !lit_char_is_decimal_digit (re_ctx_p->input_curr_p[1])))
442 {
443 re_ctx_p->input_curr_p++;
444 re_ctx_p->token.value = LIT_UNICODE_CODE_POINT_NULL;
445 return ECMA_VALUE_EMPTY;
446 }
447
448 #if ENABLED (JERRY_ES2015)
449 if (re_ctx_p->flags & RE_FLAG_UNICODE)
450 {
451 return ecma_raise_syntax_error (ECMA_ERR_MSG ("Invalid escape sequence"));
452 }
453 #endif /* ENABLED (JERRY_ES2015) */
454
455 /* Legacy octal escape sequence */
456 if (lit_char_is_octal_digit (*re_ctx_p->input_curr_p))
457 {
458 re_ctx_p->token.value = re_parse_octal (re_ctx_p);
459 return ECMA_VALUE_EMPTY;
460 }
461
462 /* Identity escape */
463 re_ctx_p->token.value = *re_ctx_p->input_curr_p++;
464 return ECMA_VALUE_EMPTY;
465 }
466
467 lit_code_point_t ch = lit_cesu8_read_next (&re_ctx_p->input_curr_p);
468 switch (ch)
469 {
470 /* Character Class escapes */
471 case LIT_CHAR_LOWERCASE_D:
472 {
473 re_ctx_p->token.type = RE_TOK_CLASS_ESCAPE;
474 re_ctx_p->token.value = RE_ESCAPE_DIGIT;
475 break;
476 }
477 case LIT_CHAR_UPPERCASE_D:
478 {
479 re_ctx_p->token.type = RE_TOK_CLASS_ESCAPE;
480 re_ctx_p->token.value = RE_ESCAPE_NOT_DIGIT;
481 break;
482 }
483 case LIT_CHAR_LOWERCASE_S:
484 {
485 re_ctx_p->token.type = RE_TOK_CLASS_ESCAPE;
486 re_ctx_p->token.value = RE_ESCAPE_WHITESPACE;
487 break;
488 }
489 case LIT_CHAR_UPPERCASE_S:
490 {
491 re_ctx_p->token.type = RE_TOK_CLASS_ESCAPE;
492 re_ctx_p->token.value = RE_ESCAPE_NOT_WHITESPACE;
493 break;
494 }
495 case LIT_CHAR_LOWERCASE_W:
496 {
497 re_ctx_p->token.type = RE_TOK_CLASS_ESCAPE;
498 re_ctx_p->token.value = RE_ESCAPE_WORD_CHAR;
499 break;
500 }
501 case LIT_CHAR_UPPERCASE_W:
502 {
503 re_ctx_p->token.type = RE_TOK_CLASS_ESCAPE;
504 re_ctx_p->token.value = RE_ESCAPE_NOT_WORD_CHAR;
505 break;
506 }
507 /* Control escapes */
508 case LIT_CHAR_LOWERCASE_F:
509 {
510 re_ctx_p->token.value = LIT_CHAR_FF;
511 break;
512 }
513 case LIT_CHAR_LOWERCASE_N:
514 {
515 re_ctx_p->token.value = LIT_CHAR_LF;
516 break;
517 }
518 case LIT_CHAR_LOWERCASE_R:
519 {
520 re_ctx_p->token.value = LIT_CHAR_CR;
521 break;
522 }
523 case LIT_CHAR_LOWERCASE_T:
524 {
525 re_ctx_p->token.value = LIT_CHAR_TAB;
526 break;
527 }
528 case LIT_CHAR_LOWERCASE_V:
529 {
530 re_ctx_p->token.value = LIT_CHAR_VTAB;
531 break;
532 }
533 /* Control letter */
534 case LIT_CHAR_LOWERCASE_C:
535 {
536 if (re_ctx_p->input_curr_p < re_ctx_p->input_end_p)
537 {
538 ch = *re_ctx_p->input_curr_p;
539
540 if ((ch >= LIT_CHAR_ASCII_UPPERCASE_LETTERS_BEGIN && ch <= LIT_CHAR_ASCII_UPPERCASE_LETTERS_END)
541 || (ch >= LIT_CHAR_ASCII_LOWERCASE_LETTERS_BEGIN && ch <= LIT_CHAR_ASCII_LOWERCASE_LETTERS_END))
542 {
543 re_ctx_p->token.value = (ch % 32);
544 re_ctx_p->input_curr_p++;
545
546 break;
547 }
548 }
549
550 #if ENABLED (JERRY_ES2015)
551 if (re_ctx_p->flags & RE_FLAG_UNICODE)
552 {
553 return ecma_raise_syntax_error (ECMA_ERR_MSG ("Invalid control escape sequence"));
554 }
555 #endif /* ENABLED (JERRY_ES2015) */
556
557 re_ctx_p->token.value = LIT_CHAR_BACKSLASH;
558 re_ctx_p->input_curr_p--;
559
560 break;
561 }
562 /* Hex escape */
563 case LIT_CHAR_LOWERCASE_X:
564 {
565 uint32_t hex_value = lit_char_hex_lookup (re_ctx_p->input_curr_p, re_ctx_p->input_end_p, 2);
566 if (hex_value != UINT32_MAX)
567 {
568 re_ctx_p->token.value = hex_value;
569 re_ctx_p->input_curr_p += 2;
570 break;
571 }
572
573 #if ENABLED (JERRY_ES2015)
574 if (re_ctx_p->flags & RE_FLAG_UNICODE)
575 {
576 return ecma_raise_syntax_error (ECMA_ERR_MSG ("Invalid hex escape sequence"));
577 }
578 #endif /* ENABLED (JERRY_ES2015) */
579
580 re_ctx_p->token.value = LIT_CHAR_LOWERCASE_X;
581 break;
582 }
583 /* Unicode escape */
584 case LIT_CHAR_LOWERCASE_U:
585 {
586 uint32_t hex_value = lit_char_hex_lookup (re_ctx_p->input_curr_p, re_ctx_p->input_end_p, 4);
587 if (hex_value != UINT32_MAX)
588 {
589 re_ctx_p->token.value = hex_value;
590 re_ctx_p->input_curr_p += 4;
591
592 #if ENABLED (JERRY_ES2015)
593 if (re_ctx_p->flags & RE_FLAG_UNICODE
594 && lit_is_code_point_utf16_high_surrogate (re_ctx_p->token.value)
595 && re_ctx_p->input_curr_p + 6 <= re_ctx_p->input_end_p
596 && re_ctx_p->input_curr_p[0] == '\\'
597 && re_ctx_p->input_curr_p[1] == 'u')
598 {
599 hex_value = lit_char_hex_lookup (re_ctx_p->input_curr_p + 2, re_ctx_p->input_end_p, 4);
600 if (lit_is_code_point_utf16_low_surrogate (hex_value))
601 {
602 re_ctx_p->token.value = lit_convert_surrogate_pair_to_code_point ((ecma_char_t) re_ctx_p->token.value,
603 (ecma_char_t) hex_value);
604 re_ctx_p->input_curr_p += 6;
605 }
606 }
607 #endif /* ENABLED (JERRY_ES2015) */
608
609 break;
610 }
611
612 #if ENABLED (JERRY_ES2015)
613 if (re_ctx_p->flags & RE_FLAG_UNICODE)
614 {
615 if (re_ctx_p->input_curr_p + 1 < re_ctx_p->input_end_p
616 && re_ctx_p->input_curr_p[0] == LIT_CHAR_LEFT_BRACE
617 && lit_char_is_hex_digit (re_ctx_p->input_curr_p[1]))
618 {
619 lit_code_point_t cp = lit_char_hex_to_int (re_ctx_p->input_curr_p[1]);
620 re_ctx_p->input_curr_p += 2;
621
622 while (re_ctx_p->input_curr_p < re_ctx_p->input_end_p && lit_char_is_hex_digit (*re_ctx_p->input_curr_p))
623 {
624 cp = cp * 16 + lit_char_hex_to_int (*re_ctx_p->input_curr_p++);
625
626 if (JERRY_UNLIKELY (cp > LIT_UNICODE_CODE_POINT_MAX))
627 {
628 return ecma_raise_syntax_error (ECMA_ERR_MSG ("Invalid unicode escape sequence"));
629 }
630 }
631
632 if (re_ctx_p->input_curr_p < re_ctx_p->input_end_p && *re_ctx_p->input_curr_p == LIT_CHAR_RIGHT_BRACE)
633 {
634 re_ctx_p->input_curr_p++;
635 re_ctx_p->token.value = cp;
636 break;
637 }
638 }
639
640 return ecma_raise_syntax_error (ECMA_ERR_MSG ("Invalid unicode escape sequence"));
641 }
642 #endif /* ENABLED (JERRY_ES2015) */
643
644 re_ctx_p->token.value = LIT_CHAR_LOWERCASE_U;
645 break;
646 }
647 /* Identity escape */
648 default:
649 {
650 #if ENABLED (JERRY_ES2015)
651 /* Must be '/', or one of SyntaxCharacter */
652 if (re_ctx_p->flags & RE_FLAG_UNICODE
653 && ch != LIT_CHAR_SLASH
654 && !re_is_syntax_char (ch))
655 {
656 return ecma_raise_syntax_error (ECMA_ERR_MSG ("Invalid escape"));
657 }
658 #endif /* ENABLED (JERRY_ES2015) */
659 re_ctx_p->token.value = ch;
660 }
661 }
662
663 return ECMA_VALUE_EMPTY;
664 } /* re_parse_char_escape */
665
666 /**
667 * Read the input pattern and parse the next token for the RegExp compiler
668 *
669 * @return empty ecma value - if parsed successfully
670 * error ecma value - otherwise
671 *
672 * Returned value must be freed with ecma_free_value
673 */
674 static ecma_value_t
re_parse_next_token(re_compiler_ctx_t * re_ctx_p)675 re_parse_next_token (re_compiler_ctx_t *re_ctx_p) /**< RegExp compiler context */
676 {
677 if (re_ctx_p->input_curr_p >= re_ctx_p->input_end_p)
678 {
679 re_ctx_p->token.type = RE_TOK_EOF;
680 return ECMA_VALUE_EMPTY;
681 }
682
683 ecma_char_t ch = lit_cesu8_read_next (&re_ctx_p->input_curr_p);
684
685 switch (ch)
686 {
687 case LIT_CHAR_CIRCUMFLEX:
688 {
689 re_ctx_p->token.type = RE_TOK_ASSERT_START;
690 return ECMA_VALUE_EMPTY;
691 }
692 case LIT_CHAR_DOLLAR_SIGN:
693 {
694 re_ctx_p->token.type = RE_TOK_ASSERT_END;
695 return ECMA_VALUE_EMPTY;
696 }
697 case LIT_CHAR_VLINE:
698 {
699 re_ctx_p->token.type = RE_TOK_ALTERNATIVE;
700 return ECMA_VALUE_EMPTY;
701 }
702 case LIT_CHAR_DOT:
703 {
704 re_ctx_p->token.type = RE_TOK_PERIOD;
705 /* Check quantifier */
706 break;
707 }
708 case LIT_CHAR_BACKSLASH:
709 {
710 if (re_ctx_p->input_curr_p >= re_ctx_p->input_end_p)
711 {
712 return ecma_raise_syntax_error (ECMA_ERR_MSG ("Invalid escape"));
713 }
714
715 /* DecimalEscape, Backreferences cannot start with a zero digit. */
716 if (*re_ctx_p->input_curr_p > LIT_CHAR_0 && *re_ctx_p->input_curr_p <= LIT_CHAR_9)
717 {
718 const lit_utf8_byte_t *digits_p = re_ctx_p->input_curr_p;
719 const uint32_t value = lit_parse_decimal (&digits_p, re_ctx_p->input_end_p);
720
721 if (re_ctx_p->groups_count < 0)
722 {
723 re_count_groups (re_ctx_p);
724 }
725
726 if (value <= (uint32_t) re_ctx_p->groups_count)
727 {
728 /* Valid backreference */
729 re_ctx_p->input_curr_p = digits_p;
730 re_ctx_p->token.type = RE_TOK_BACKREFERENCE;
731 re_ctx_p->token.value = value;
732
733 /* Check quantifier */
734 break;
735 }
736 }
737
738 if (*re_ctx_p->input_curr_p == LIT_CHAR_LOWERCASE_B)
739 {
740 re_ctx_p->input_curr_p++;
741 re_ctx_p->token.type = RE_TOK_ASSERT_WORD_BOUNDARY;
742 return ECMA_VALUE_EMPTY;
743 }
744 else if (*re_ctx_p->input_curr_p == LIT_CHAR_UPPERCASE_B)
745 {
746 re_ctx_p->input_curr_p++;
747 re_ctx_p->token.type = RE_TOK_ASSERT_NOT_WORD_BOUNDARY;
748 return ECMA_VALUE_EMPTY;
749 }
750
751 const ecma_value_t parse_result = re_parse_char_escape (re_ctx_p);
752
753 if (ECMA_IS_VALUE_ERROR (parse_result))
754 {
755 return parse_result;
756 }
757
758 /* Check quantifier */
759 break;
760 }
761 case LIT_CHAR_LEFT_PAREN:
762 {
763 if (re_ctx_p->input_curr_p >= re_ctx_p->input_end_p)
764 {
765 return ecma_raise_syntax_error (ECMA_ERR_MSG ("Unterminated group"));
766 }
767
768 if (*re_ctx_p->input_curr_p == LIT_CHAR_QUESTION)
769 {
770 re_ctx_p->input_curr_p++;
771 if (re_ctx_p->input_curr_p >= re_ctx_p->input_end_p)
772 {
773 return ecma_raise_syntax_error (ECMA_ERR_MSG ("Invalid group"));
774 }
775
776 ch = *re_ctx_p->input_curr_p++;
777
778 if (ch == LIT_CHAR_EQUALS)
779 {
780 re_ctx_p->token.type = RE_TOK_ASSERT_LOOKAHEAD;
781 re_ctx_p->token.value = false;
782 }
783 else if (ch == LIT_CHAR_EXCLAMATION)
784 {
785 re_ctx_p->token.type = RE_TOK_ASSERT_LOOKAHEAD;
786 re_ctx_p->token.value = true;
787 }
788 else if (ch == LIT_CHAR_COLON)
789 {
790 re_ctx_p->token.type = RE_TOK_START_NON_CAPTURE_GROUP;
791 }
792 else
793 {
794 return ecma_raise_syntax_error (ECMA_ERR_MSG ("Invalid group"));
795 }
796 }
797 else
798 {
799 re_ctx_p->token.type = RE_TOK_START_CAPTURE_GROUP;
800 }
801
802 return ECMA_VALUE_EMPTY;
803 }
804 case LIT_CHAR_RIGHT_PAREN:
805 {
806 re_ctx_p->token.type = RE_TOK_END_GROUP;
807
808 return ECMA_VALUE_EMPTY;
809 }
810 case LIT_CHAR_LEFT_SQUARE:
811 {
812 re_ctx_p->token.type = RE_TOK_CHAR_CLASS;
813
814 if (re_ctx_p->input_curr_p >= re_ctx_p->input_end_p)
815 {
816 return ecma_raise_syntax_error (ECMA_ERR_MSG ("Unterminated character class."));
817 }
818
819 return ECMA_VALUE_EMPTY;
820 }
821 case LIT_CHAR_QUESTION:
822 case LIT_CHAR_ASTERISK:
823 case LIT_CHAR_PLUS:
824 {
825 return ecma_raise_syntax_error (ECMA_ERR_MSG ("Invalid quantifier."));
826 }
827 case LIT_CHAR_LEFT_BRACE:
828 {
829 re_ctx_p->input_curr_p--;
830 if (ecma_is_value_true (re_parse_quantifier (re_ctx_p)))
831 {
832 return ecma_raise_syntax_error (ECMA_ERR_MSG ("Nothing to repeat."));
833 }
834
835 #if ENABLED (JERRY_ES2015)
836 if (re_ctx_p->flags & RE_FLAG_UNICODE)
837 {
838 return ecma_raise_syntax_error (ECMA_ERR_MSG ("Lone quantifier bracket."));
839 }
840 #endif /* ENABLED (JERRY_ES2015) */
841
842 re_ctx_p->input_curr_p++;
843 re_ctx_p->token.type = RE_TOK_CHAR;
844 re_ctx_p->token.value = ch;
845
846 /* Check quantifier */
847 break;
848 }
849 #if ENABLED (JERRY_ES2015)
850 case LIT_CHAR_RIGHT_SQUARE:
851 case LIT_CHAR_RIGHT_BRACE:
852 {
853 if (re_ctx_p->flags & RE_FLAG_UNICODE)
854 {
855 return ecma_raise_syntax_error (ECMA_ERR_MSG ("Lone quantifier bracket."));
856 }
857
858 /* FALLTHRU */
859 }
860 #endif /* ENABLED (JERRY_ES2015) */
861 default:
862 {
863 re_ctx_p->token.type = RE_TOK_CHAR;
864 re_ctx_p->token.value = ch;
865
866 #if ENABLED (JERRY_ES2015)
867 if (re_ctx_p->flags & RE_FLAG_UNICODE
868 && lit_is_code_point_utf16_high_surrogate (ch)
869 && re_ctx_p->input_curr_p < re_ctx_p->input_end_p)
870 {
871 const ecma_char_t next = lit_cesu8_peek_next (re_ctx_p->input_curr_p);
872 if (lit_is_code_point_utf16_low_surrogate (next))
873 {
874 re_ctx_p->token.value = lit_convert_surrogate_pair_to_code_point (ch, next);
875 re_ctx_p->input_curr_p += LIT_UTF8_MAX_BYTES_IN_CODE_UNIT;
876 }
877 }
878 #endif /* ENABLED (JERRY_ES2015) */
879
880 /* Check quantifier */
881 break;
882 }
883 }
884
885 re_parse_quantifier (re_ctx_p);
886 return re_check_quantifier (re_ctx_p);
887 } /* re_parse_next_token */
888
889 /**
890 * Append a character class range to the bytecode.
891 */
892 static void
re_class_add_range(re_compiler_ctx_t * re_ctx_p,lit_code_point_t start,lit_code_point_t end)893 re_class_add_range (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context */
894 lit_code_point_t start, /**< range begin */
895 lit_code_point_t end) /**< range end */
896 {
897 if (re_ctx_p->flags & RE_FLAG_IGNORE_CASE)
898 {
899 start = ecma_regexp_canonicalize_char (start, re_ctx_p->flags & RE_FLAG_UNICODE);
900 end = ecma_regexp_canonicalize_char (end, re_ctx_p->flags & RE_FLAG_UNICODE);
901 }
902
903 re_append_char (re_ctx_p, start);
904 re_append_char (re_ctx_p, end);
905 } /* re_class_add_range */
906
907 /**
908 * Add a single character to the character class
909 */
910 static void
re_class_add_char(re_compiler_ctx_t * re_ctx_p,uint32_t class_offset,lit_code_point_t cp)911 re_class_add_char (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context */
912 uint32_t class_offset, /**< character class bytecode offset*/
913 lit_code_point_t cp) /**< code point */
914 {
915 if (re_ctx_p->flags & RE_FLAG_IGNORE_CASE)
916 {
917 cp = ecma_regexp_canonicalize_char (cp, re_ctx_p->flags & RE_FLAG_UNICODE);
918 }
919
920 re_insert_char (re_ctx_p, class_offset, cp);
921 } /* re_class_add_char */
922
923 /**
924 * Invalid character code point
925 */
926 #define RE_INVALID_CP 0xFFFFFFFF
927
928 /**
929 * Read the input pattern and parse the range of character class
930 *
931 * @return empty ecma value - if parsed successfully
932 * error ecma value - otherwise
933 *
934 * Returned value must be freed with ecma_free_value
935 */
936 static ecma_value_t
re_parse_char_class(re_compiler_ctx_t * re_ctx_p)937 re_parse_char_class (re_compiler_ctx_t *re_ctx_p) /**< RegExp compiler context */
938 {
939 static const uint8_t escape_flags[] = {0x01, 0x02, 0x04, 0x08, 0x10, 0x20};
940 const uint32_t class_offset = re_bytecode_size (re_ctx_p);
941
942 uint8_t found_escape_flags = 0;
943 uint8_t out_class_flags = 0;
944
945 uint32_t range_count = 0;
946 uint32_t char_count = 0;
947 bool is_range = false;
948
949 JERRY_ASSERT (re_ctx_p->input_curr_p < re_ctx_p->input_end_p);
950 if (*re_ctx_p->input_curr_p == LIT_CHAR_CIRCUMFLEX)
951 {
952 re_ctx_p->input_curr_p++;
953 out_class_flags |= RE_CLASS_INVERT;
954 }
955
956 lit_code_point_t start = RE_INVALID_CP;
957
958 while (true)
959 {
960 if (re_ctx_p->input_curr_p >= re_ctx_p->input_end_p)
961 {
962 return ecma_raise_syntax_error (ECMA_ERR_MSG ("Unterminated character class."));
963 }
964
965 if (*re_ctx_p->input_curr_p == LIT_CHAR_RIGHT_SQUARE)
966 {
967 if (is_range)
968 {
969 if (start != RE_INVALID_CP)
970 {
971 re_class_add_char (re_ctx_p, class_offset, start);
972 char_count++;
973 }
974
975 re_class_add_char (re_ctx_p, class_offset, LIT_CHAR_MINUS);
976 char_count++;
977 }
978
979 re_ctx_p->input_curr_p++;
980 break;
981 }
982
983 JERRY_ASSERT (re_ctx_p->input_curr_p < re_ctx_p->input_end_p);
984 lit_code_point_t current;
985
986 if (*re_ctx_p->input_curr_p == LIT_CHAR_BACKSLASH)
987 {
988 re_ctx_p->input_curr_p++;
989 if (re_ctx_p->input_curr_p >= re_ctx_p->input_end_p)
990 {
991 return ecma_raise_syntax_error (ECMA_ERR_MSG ("Invalid escape"));
992 }
993
994 if (*re_ctx_p->input_curr_p == LIT_CHAR_LOWERCASE_B)
995 {
996 re_ctx_p->input_curr_p++;
997 current = LIT_CHAR_BS;
998 }
999 #if ENABLED (JERRY_ES2015)
1000 else if (*re_ctx_p->input_curr_p == LIT_CHAR_MINUS)
1001 {
1002 re_ctx_p->input_curr_p++;
1003 current = LIT_CHAR_MINUS;
1004 }
1005 #endif /* ENABLED (JERRY_ES2015) */
1006 else if ((re_ctx_p->flags & RE_FLAG_UNICODE) == 0
1007 && *re_ctx_p->input_curr_p == LIT_CHAR_LOWERCASE_C
1008 && re_ctx_p->input_curr_p + 1 < re_ctx_p->input_end_p
1009 && (lit_char_is_decimal_digit (*(re_ctx_p->input_curr_p + 1))
1010 || *(re_ctx_p->input_curr_p + 1) == LIT_CHAR_UNDERSCORE))
1011 {
1012 current = ((uint8_t) *(re_ctx_p->input_curr_p + 1) % 32);
1013 re_ctx_p->input_curr_p += 2;
1014 }
1015 else
1016 {
1017 if (ECMA_IS_VALUE_ERROR (re_parse_char_escape (re_ctx_p)))
1018 {
1019 return ECMA_VALUE_ERROR;
1020 }
1021
1022 if (re_ctx_p->token.type == RE_TOK_CLASS_ESCAPE)
1023 {
1024 const uint8_t escape = (uint8_t) re_ctx_p->token.value;
1025 found_escape_flags |= escape_flags[escape];
1026 current = RE_INVALID_CP;
1027 }
1028 else
1029 {
1030 JERRY_ASSERT (re_ctx_p->token.type == RE_TOK_CHAR);
1031 current = re_ctx_p->token.value;
1032 }
1033 }
1034 }
1035 #if ENABLED (JERRY_ES2015)
1036 else if (re_ctx_p->flags & RE_FLAG_UNICODE)
1037 {
1038 current = ecma_regexp_unicode_advance (&re_ctx_p->input_curr_p, re_ctx_p->input_end_p);
1039 }
1040 #endif /* ENABLED (JERRY_ES2015) */
1041 else
1042 {
1043 current = lit_cesu8_read_next (&re_ctx_p->input_curr_p);
1044 }
1045
1046 if (is_range)
1047 {
1048 is_range = false;
1049
1050 if (start != RE_INVALID_CP && current != RE_INVALID_CP)
1051 {
1052 if (start > current)
1053 {
1054 return ecma_raise_syntax_error (ECMA_ERR_MSG ("Range out of order in character class"));
1055 }
1056
1057 re_class_add_range (re_ctx_p, start, current);
1058 range_count++;
1059 continue;
1060 }
1061
1062 #if ENABLED (JERRY_ES2015)
1063 if (re_ctx_p->flags & RE_FLAG_UNICODE)
1064 {
1065 return ecma_raise_syntax_error (ECMA_ERR_MSG ("Invalid character class"));
1066 }
1067 #endif /* ENABLED (JERRY_ES2015) */
1068
1069 if (start != RE_INVALID_CP)
1070 {
1071 re_class_add_char (re_ctx_p, class_offset, start);
1072 char_count++;
1073 }
1074 else if (current != RE_INVALID_CP)
1075 {
1076 re_class_add_char (re_ctx_p, class_offset, current);
1077 char_count++;
1078 }
1079
1080 re_class_add_char (re_ctx_p, class_offset, LIT_CHAR_MINUS);
1081 char_count++;
1082 continue;
1083 }
1084
1085 if (re_ctx_p->input_curr_p < re_ctx_p->input_end_p
1086 && *re_ctx_p->input_curr_p == LIT_CHAR_MINUS)
1087 {
1088 re_ctx_p->input_curr_p++;
1089 start = current;
1090 is_range = true;
1091 continue;
1092 }
1093
1094 if (current != RE_INVALID_CP)
1095 {
1096 re_class_add_char (re_ctx_p, class_offset, current);
1097 char_count++;
1098 }
1099 }
1100
1101 uint8_t escape_count = 0;
1102 for (ecma_class_escape_t escape = RE_ESCAPE__START; escape < RE_ESCAPE__COUNT; ++escape)
1103 {
1104 if (found_escape_flags & escape_flags[escape])
1105 {
1106 re_insert_byte (re_ctx_p, class_offset, (uint8_t) escape);
1107 escape_count++;
1108 }
1109 }
1110
1111 if (range_count > 0)
1112 {
1113 re_insert_value (re_ctx_p, class_offset, range_count);
1114 out_class_flags |= RE_CLASS_HAS_RANGES;
1115 }
1116
1117 if (char_count > 0)
1118 {
1119 re_insert_value (re_ctx_p, class_offset, char_count);
1120 out_class_flags |= RE_CLASS_HAS_CHARS;
1121 }
1122
1123 JERRY_ASSERT (escape_count <= RE_CLASS_ESCAPE_COUNT_MASK);
1124 out_class_flags |= escape_count;
1125
1126 re_insert_byte (re_ctx_p, class_offset, out_class_flags);
1127 re_insert_opcode (re_ctx_p, class_offset, RE_OP_CHAR_CLASS);
1128
1129 re_parse_quantifier (re_ctx_p);
1130 return re_check_quantifier (re_ctx_p);
1131 } /* re_parse_char_class */
1132
1133 /**
1134 * Parse alternatives
1135 *
1136 * @return empty ecma value - if alternative was successfully parsed
1137 * error ecma value - otherwise
1138 *
1139 * Returned value must be freed with ecma_free_value
1140 */
1141 ecma_value_t
re_parse_alternative(re_compiler_ctx_t * re_ctx_p,bool expect_eof)1142 re_parse_alternative (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context */
1143 bool expect_eof) /**< expect end of file */
1144 {
1145 ECMA_CHECK_STACK_USAGE ();
1146 uint32_t alternative_offset = re_bytecode_size (re_ctx_p);
1147 bool first_alternative = true;
1148
1149 while (true)
1150 {
1151 ecma_value_t next_token_result = re_parse_next_token (re_ctx_p);
1152 if (ECMA_IS_VALUE_ERROR (next_token_result))
1153 {
1154 return next_token_result;
1155 }
1156
1157 JERRY_ASSERT (ecma_is_value_empty (next_token_result));
1158
1159 uint32_t atom_offset = re_bytecode_size (re_ctx_p);
1160
1161 switch (re_ctx_p->token.type)
1162 {
1163 case RE_TOK_START_CAPTURE_GROUP:
1164 {
1165 const uint32_t idx = re_ctx_p->captures_count++;
1166 const uint32_t capture_start = idx;
1167
1168 ecma_value_t result = re_parse_alternative (re_ctx_p, false);
1169 if (ECMA_IS_VALUE_ERROR (result))
1170 {
1171 return result;
1172 }
1173
1174 re_parse_quantifier (re_ctx_p);
1175
1176 if (ECMA_IS_VALUE_ERROR (re_check_quantifier (re_ctx_p)))
1177 {
1178 return ECMA_VALUE_ERROR;
1179 }
1180
1181 re_insert_into_group (re_ctx_p, atom_offset, idx, capture_start, true);
1182 break;
1183 }
1184 case RE_TOK_START_NON_CAPTURE_GROUP:
1185 {
1186 const uint32_t idx = re_ctx_p->non_captures_count++;
1187 const uint32_t capture_start = re_ctx_p->captures_count;
1188
1189 ecma_value_t result = re_parse_alternative (re_ctx_p, false);
1190 if (ECMA_IS_VALUE_ERROR (result))
1191 {
1192 return result;
1193 }
1194
1195 re_parse_quantifier (re_ctx_p);
1196
1197 if (ECMA_IS_VALUE_ERROR (re_check_quantifier (re_ctx_p)))
1198 {
1199 return ECMA_VALUE_ERROR;
1200 }
1201
1202 re_insert_into_group (re_ctx_p, atom_offset, idx, capture_start, false);
1203 break;
1204 }
1205 case RE_TOK_PERIOD:
1206 {
1207 #if ENABLED (JERRY_ES2015)
1208 re_append_opcode (re_ctx_p, (re_ctx_p->flags & RE_FLAG_UNICODE) ? RE_OP_UNICODE_PERIOD : RE_OP_PERIOD);
1209 #else /* !ENABLED (JERRY_ES2015) */
1210 re_append_opcode (re_ctx_p, RE_OP_PERIOD);
1211 #endif /* !ENABLED (JERRY_ES2015) */
1212
1213 re_insert_atom_iterator (re_ctx_p, atom_offset);
1214 break;
1215 }
1216 case RE_TOK_ALTERNATIVE:
1217 {
1218 re_insert_value (re_ctx_p, alternative_offset, re_bytecode_size (re_ctx_p) - alternative_offset);
1219 re_insert_opcode (re_ctx_p, alternative_offset, first_alternative ? RE_OP_ALTERNATIVE_START
1220 : RE_OP_ALTERNATIVE_NEXT);
1221
1222 alternative_offset = re_bytecode_size (re_ctx_p);
1223 first_alternative = false;
1224 break;
1225 }
1226 case RE_TOK_ASSERT_START:
1227 {
1228 re_append_opcode (re_ctx_p, RE_OP_ASSERT_LINE_START);
1229 break;
1230 }
1231 case RE_TOK_ASSERT_END:
1232 {
1233 re_append_opcode (re_ctx_p, RE_OP_ASSERT_LINE_END);
1234 break;
1235 }
1236 case RE_TOK_ASSERT_WORD_BOUNDARY:
1237 {
1238 re_append_opcode (re_ctx_p, RE_OP_ASSERT_WORD_BOUNDARY);
1239 break;
1240 }
1241 case RE_TOK_ASSERT_NOT_WORD_BOUNDARY:
1242 {
1243 re_append_opcode (re_ctx_p, RE_OP_ASSERT_NOT_WORD_BOUNDARY);
1244 break;
1245 }
1246 case RE_TOK_ASSERT_LOOKAHEAD:
1247 {
1248 const uint32_t start_capture_count = re_ctx_p->captures_count;
1249 const bool is_negative = !!re_ctx_p->token.value;
1250
1251 ecma_value_t result = re_parse_alternative (re_ctx_p, false);
1252
1253 if (ECMA_IS_VALUE_ERROR (result))
1254 {
1255 return result;
1256 }
1257
1258 #if ENABLED (JERRY_ES2015)
1259 if (re_ctx_p->flags & RE_FLAG_UNICODE)
1260 {
1261 re_ctx_p->token.qmin = 1;
1262 re_ctx_p->token.qmax = 1;
1263 re_ctx_p->token.greedy = true;
1264 }
1265 else
1266 #endif /* ENABLED (JERRY_ES2015) */
1267 {
1268 re_parse_quantifier (re_ctx_p);
1269
1270 if (ECMA_IS_VALUE_ERROR (re_check_quantifier (re_ctx_p)))
1271 {
1272 return ECMA_VALUE_ERROR;
1273 }
1274 }
1275
1276 re_insert_assertion_lookahead (re_ctx_p, atom_offset, start_capture_count, is_negative);
1277 break;
1278 }
1279 case RE_TOK_BACKREFERENCE:
1280 {
1281 const uint32_t backref_idx = re_ctx_p->token.value;
1282 re_append_opcode (re_ctx_p, RE_OP_BACKREFERENCE);
1283 re_append_value (re_ctx_p, backref_idx);
1284
1285 if (re_ctx_p->token.qmin != 1 || re_ctx_p->token.qmax != 1)
1286 {
1287 const uint32_t group_idx = re_ctx_p->non_captures_count++;
1288 re_insert_into_group (re_ctx_p, atom_offset, group_idx, re_ctx_p->captures_count, false);
1289 }
1290
1291 break;
1292 }
1293 case RE_TOK_CLASS_ESCAPE:
1294 {
1295 const ecma_class_escape_t escape = (ecma_class_escape_t) re_ctx_p->token.value;
1296 re_append_opcode (re_ctx_p, RE_OP_CLASS_ESCAPE);
1297 re_append_byte (re_ctx_p, (uint8_t) escape);
1298
1299 re_insert_atom_iterator (re_ctx_p, atom_offset);
1300 break;
1301 }
1302 case RE_TOK_CHAR_CLASS:
1303 {
1304 ecma_value_t result = re_parse_char_class (re_ctx_p);
1305
1306 if (ECMA_IS_VALUE_ERROR (result))
1307 {
1308 return result;
1309 }
1310
1311 re_insert_atom_iterator (re_ctx_p, atom_offset);
1312 break;
1313 }
1314 case RE_TOK_END_GROUP:
1315 {
1316 if (expect_eof)
1317 {
1318 return ecma_raise_syntax_error (ECMA_ERR_MSG ("Unmatched ')'"));
1319 }
1320
1321 if (!first_alternative)
1322 {
1323 re_insert_value (re_ctx_p, alternative_offset, re_bytecode_size (re_ctx_p) - alternative_offset);
1324 re_insert_opcode (re_ctx_p, alternative_offset, RE_OP_ALTERNATIVE_NEXT);
1325 }
1326
1327 return ECMA_VALUE_EMPTY;
1328 }
1329 case RE_TOK_EOF:
1330 {
1331 if (!expect_eof)
1332 {
1333 return ecma_raise_syntax_error (ECMA_ERR_MSG ("Unexpected end of pattern."));
1334 }
1335
1336 if (!first_alternative)
1337 {
1338 re_insert_value (re_ctx_p, alternative_offset, re_bytecode_size (re_ctx_p) - alternative_offset);
1339 re_insert_opcode (re_ctx_p, alternative_offset, RE_OP_ALTERNATIVE_NEXT);
1340 }
1341
1342 re_append_opcode (re_ctx_p, RE_OP_EOF);
1343 return ECMA_VALUE_EMPTY;
1344 }
1345 default:
1346 {
1347 JERRY_ASSERT (re_ctx_p->token.type == RE_TOK_CHAR);
1348
1349 lit_code_point_t ch = re_ctx_p->token.value;
1350
1351 if (ch <= LIT_UTF8_1_BYTE_CODE_POINT_MAX && (re_ctx_p->flags & RE_FLAG_IGNORE_CASE) == 0)
1352 {
1353 re_append_opcode (re_ctx_p, RE_OP_BYTE);
1354 re_append_byte (re_ctx_p, (uint8_t) ch);
1355
1356 re_insert_atom_iterator (re_ctx_p, atom_offset);
1357 break;
1358 }
1359
1360 if (re_ctx_p->flags & RE_FLAG_IGNORE_CASE)
1361 {
1362 ch = ecma_regexp_canonicalize_char (ch, re_ctx_p->flags & RE_FLAG_UNICODE);
1363 }
1364
1365 re_append_opcode (re_ctx_p, RE_OP_CHAR);
1366 re_append_char (re_ctx_p, ch);
1367
1368 re_insert_atom_iterator (re_ctx_p, atom_offset);
1369 break;
1370 }
1371 }
1372 }
1373
1374 return ECMA_VALUE_EMPTY;
1375 } /* re_parse_alternative */
1376
1377 /**
1378 * @}
1379 * @}
1380 * @}
1381 */
1382
1383 #endif /* ENABLED (JERRY_BUILTIN_REGEXP) */
1384