1 /* Copyright JS Foundation and other contributors, http://js.foundation
2 *
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "ecma-alloc.h"
17 #include "ecma-array-object.h"
18 #include "ecma-builtins.h"
19 #include "ecma-builtin-helpers.h"
20 #include "ecma-exceptions.h"
21 #include "ecma-gc.h"
22 #include "ecma-globals.h"
23 #include "ecma-objects.h"
24 #include "ecma-function-object.h"
25 #include "ecma-regexp-object.h"
26 #include "ecma-try-catch-macro.h"
27 #include "jcontext.h"
28 #include "jrt-libc-includes.h"
29 #include "lit-char-helpers.h"
30 #include "re-compiler.h"
31
32 #if ENABLED (JERRY_BUILTIN_REGEXP)
33
34 #define ECMA_BUILTINS_INTERNAL
35 #include "ecma-builtins-internal.h"
36
37 /** \addtogroup ecma ECMA
38 * @{
39 *
40 * \addtogroup ecmaregexpobject ECMA RegExp object related routines
41 * @{
42 */
43
44 /**
45 * Index of the global capturing group
46 */
47 #define RE_GLOBAL_CAPTURE 0
48
49 /**
50 * Parse RegExp flags (global, ignoreCase, multiline)
51 *
52 * See also: ECMA-262 v5, 15.10.4.1
53 *
54 * @return empty ecma value - if parsed successfully
55 * error ecma value - otherwise
56 *
57 * Returned value must be freed with ecma_free_value
58 */
59 ecma_value_t
ecma_regexp_parse_flags(ecma_string_t * flags_str_p,uint16_t * flags_p)60 ecma_regexp_parse_flags (ecma_string_t *flags_str_p, /**< Input string with flags */
61 uint16_t *flags_p) /**< [out] parsed flag bits */
62 {
63 ecma_value_t ret_value = ECMA_VALUE_EMPTY;
64 uint16_t result_flags = RE_FLAG_EMPTY;
65
66 ECMA_STRING_TO_UTF8_STRING (flags_str_p, flags_start_p, flags_start_size);
67
68 const lit_utf8_byte_t *flags_str_curr_p = flags_start_p;
69 const lit_utf8_byte_t *flags_str_end_p = flags_start_p + flags_start_size;
70
71 while (flags_str_curr_p < flags_str_end_p)
72 {
73 ecma_regexp_flags_t flag;
74 switch (*flags_str_curr_p++)
75 {
76 case 'g':
77 {
78 flag = RE_FLAG_GLOBAL;
79 break;
80 }
81 case 'i':
82 {
83 flag = RE_FLAG_IGNORE_CASE;
84 break;
85 }
86 case 'm':
87 {
88 flag = RE_FLAG_MULTILINE;
89 break;
90 }
91 case 'y':
92 {
93 flag = RE_FLAG_STICKY;
94 break;
95 }
96 case 'u':
97 {
98 flag = RE_FLAG_UNICODE;
99 break;
100 }
101 default:
102 {
103 flag = RE_FLAG_EMPTY;
104 break;
105 }
106 }
107
108 if (flag == RE_FLAG_EMPTY || (result_flags & flag) != 0)
109 {
110 ret_value = ecma_raise_syntax_error (ECMA_ERR_MSG ("Invalid RegExp flags."));
111 break;
112 }
113
114 result_flags = (uint16_t) (result_flags | flag);
115 }
116
117 ECMA_FINALIZE_UTF8_STRING (flags_start_p, flags_start_size);
118
119 *flags_p = result_flags;
120 return ret_value;
121 } /* ecma_regexp_parse_flags */
122
123 #if !ENABLED (JERRY_ES2015)
124 /*
125 * Create the properties of a RegExp instance.
126 */
127 static void
ecma_regexp_create_props(ecma_object_t * re_object_p,ecma_string_t * source_p,uint16_t flags)128 ecma_regexp_create_props (ecma_object_t *re_object_p, /**< RegExp object */
129 ecma_string_t *source_p, /**< source string */
130 uint16_t flags) /**< flags */
131 {
132 ecma_property_value_t *prop_value_p;
133
134 prop_value_p = ecma_create_named_data_property (re_object_p,
135 ecma_get_magic_string (LIT_MAGIC_STRING_SOURCE),
136 ECMA_PROPERTY_FIXED,
137 NULL);
138
139 ecma_ref_ecma_string (source_p);
140 prop_value_p->value = ecma_make_string_value (source_p);
141
142 prop_value_p = ecma_create_named_data_property (re_object_p,
143 ecma_get_magic_string (LIT_MAGIC_STRING_GLOBAL),
144 ECMA_PROPERTY_FIXED,
145 NULL);
146
147 prop_value_p->value = ecma_make_boolean_value (flags & RE_FLAG_GLOBAL);
148
149 prop_value_p = ecma_create_named_data_property (re_object_p,
150 ecma_get_magic_string (LIT_MAGIC_STRING_IGNORECASE_UL),
151 ECMA_PROPERTY_FIXED,
152 NULL);
153
154 prop_value_p->value = ecma_make_boolean_value (flags & RE_FLAG_IGNORE_CASE);
155
156 prop_value_p = ecma_create_named_data_property (re_object_p,
157 ecma_get_magic_string (LIT_MAGIC_STRING_MULTILINE),
158 ECMA_PROPERTY_FIXED,
159 NULL);
160
161 prop_value_p->value = ecma_make_boolean_value (flags & RE_FLAG_MULTILINE);
162 } /* ecma_regexp_create_props */
163
164 /*
165 * Update the properties of a RegExp instance.
166 */
167 static void
ecma_regexp_update_props(ecma_object_t * re_object_p,ecma_string_t * source_p,uint16_t flags)168 ecma_regexp_update_props (ecma_object_t *re_object_p, /**< RegExp object */
169 ecma_string_t *source_p, /**< source string */
170 uint16_t flags) /**< flags */
171 {
172 ecma_property_t *prop_p;
173
174 prop_p = ecma_find_named_property (re_object_p, ecma_get_magic_string (LIT_MAGIC_STRING_SOURCE));
175 JERRY_ASSERT (prop_p != NULL);
176 ecma_property_value_t *prop_value_p = ECMA_PROPERTY_VALUE_PTR (prop_p);
177 ecma_free_value (prop_value_p->value);
178 ecma_ref_ecma_string (source_p);
179 prop_value_p->value = ecma_make_string_value (source_p);
180
181 prop_p = ecma_find_named_property (re_object_p, ecma_get_magic_string (LIT_MAGIC_STRING_GLOBAL));
182 JERRY_ASSERT (prop_p != NULL);
183 prop_value_p = ECMA_PROPERTY_VALUE_PTR (prop_p);
184 prop_value_p->value = ecma_make_boolean_value (flags & RE_FLAG_GLOBAL);
185
186 prop_p = ecma_find_named_property (re_object_p, ecma_get_magic_string (LIT_MAGIC_STRING_IGNORECASE_UL));
187 JERRY_ASSERT (prop_p != NULL);
188 prop_value_p = ECMA_PROPERTY_VALUE_PTR (prop_p);
189 prop_value_p->value = ecma_make_boolean_value (flags & RE_FLAG_IGNORE_CASE);
190
191 prop_p = ecma_find_named_property (re_object_p, ecma_get_magic_string (LIT_MAGIC_STRING_MULTILINE));
192 JERRY_ASSERT (prop_p != NULL);
193 prop_value_p = ECMA_PROPERTY_VALUE_PTR (prop_p);
194 prop_value_p->value = ecma_make_boolean_value (flags & RE_FLAG_MULTILINE);
195 } /* ecma_regexp_update_props */
196 #endif /* !ENABLED (JERRY_ES2015) */
197
198 /**
199 * RegExpAlloc method
200 *
201 * See also: ECMA-262 v5, 15.10.4.1
202 * ECMA-262 v6, 21.2.3.2.1
203 *
204 * Note:
205 * Returned value must be freed with ecma_free_value.
206 *
207 * @return ecma_object_t
208 */
209 ecma_object_t *
ecma_op_regexp_alloc(ecma_object_t * ctr_obj_p)210 ecma_op_regexp_alloc (ecma_object_t *ctr_obj_p) /**< constructor object pointer */
211 {
212 #if ENABLED (JERRY_ES2015)
213 if (ctr_obj_p == NULL)
214 {
215 ctr_obj_p = ecma_builtin_get (ECMA_BUILTIN_ID_REGEXP);
216 }
217
218 ecma_object_t *proto_obj_p = ecma_op_get_prototype_from_constructor (ctr_obj_p,
219 ECMA_BUILTIN_ID_REGEXP_PROTOTYPE);
220
221 if (JERRY_UNLIKELY (proto_obj_p == NULL))
222 {
223 return proto_obj_p;
224 }
225
226 #else /* !ENABLED (JERRY_ES2015) */
227 JERRY_UNUSED (ctr_obj_p);
228 ecma_object_t *proto_obj_p = ecma_builtin_get (ECMA_BUILTIN_ID_REGEXP_PROTOTYPE);
229 #endif /* ENABLED (JERRY_ES2015) */
230
231 ecma_object_t *new_object_p = ecma_create_object (proto_obj_p,
232 sizeof (ecma_extended_object_t),
233 ECMA_OBJECT_TYPE_CLASS);
234
235 #if ENABLED (JERRY_ES2015)
236 ecma_deref_object (proto_obj_p);
237 #endif /* ENABLED (JERRY_ES2015) */
238
239 ecma_extended_object_t *regexp_obj_p = (ecma_extended_object_t *) new_object_p;
240
241 /* Class id will be initialized after the bytecode is compiled. */
242 regexp_obj_p->u.class_prop.class_id = LIT_MAGIC_STRING_UNDEFINED;
243
244 ecma_value_t status = ecma_builtin_helper_def_prop (new_object_p,
245 ecma_get_magic_string (LIT_MAGIC_STRING_LASTINDEX_UL),
246 ecma_make_uint32_value (0),
247 ECMA_PROPERTY_FLAG_WRITABLE | ECMA_PROP_IS_THROW);
248
249 JERRY_ASSERT (ecma_is_value_true (status));
250
251 return new_object_p;
252 } /* ecma_op_regexp_alloc */
253
254 /**
255 * Helper method for initializing an aready existing RegExp object.
256 */
257 static void
ecma_op_regexp_initialize(ecma_object_t * regexp_obj_p,const re_compiled_code_t * bc_p,ecma_string_t * pattern_str_p,uint16_t flags)258 ecma_op_regexp_initialize (ecma_object_t *regexp_obj_p, /**< RegExp object */
259 const re_compiled_code_t *bc_p, /**< bytecode */
260 ecma_string_t *pattern_str_p, /**< pattern */
261 uint16_t flags) /**< flags */
262 {
263 ecma_extended_object_t *ext_obj_p = (ecma_extended_object_t *) regexp_obj_p;
264
265 #if !ENABLED (JERRY_ES2015)
266 if (ext_obj_p->u.class_prop.class_id == LIT_MAGIC_STRING_UNDEFINED)
267 {
268 /* This instance has not been initialized before. */
269 ecma_regexp_create_props (regexp_obj_p, pattern_str_p, flags);
270 }
271 else
272 {
273 ecma_regexp_update_props (regexp_obj_p, pattern_str_p, flags);
274 }
275 #endif /* !ENABLED (JERRY_ES2015) */
276
277 #if ENABLED (JERRY_ES2015)
278 JERRY_UNUSED (pattern_str_p);
279 JERRY_UNUSED (flags);
280 #endif /* ENABLED (JERRY_ES2015) */
281
282 ext_obj_p->u.class_prop.class_id = LIT_MAGIC_STRING_REGEXP_UL;
283 ECMA_SET_INTERNAL_VALUE_POINTER (ext_obj_p->u.class_prop.u.value, bc_p);
284 } /* ecma_op_regexp_initialize */
285
286 /**
287 * Method for creating a RegExp object from pattern.
288 *
289 * Note:
290 * Allocation have to happen before invoking this function using ecma_op_regexp_alloc.
291 *
292 * @return ecma_value_t
293 */
294 ecma_value_t
ecma_op_create_regexp_from_pattern(ecma_object_t * regexp_obj_p,ecma_value_t pattern_value,ecma_value_t flags_value)295 ecma_op_create_regexp_from_pattern (ecma_object_t *regexp_obj_p, /**< RegExp object */
296 ecma_value_t pattern_value, /**< pattern */
297 ecma_value_t flags_value) /**< flags */
298 {
299 ecma_string_t *pattern_str_p = ecma_regexp_read_pattern_str_helper (pattern_value);
300 uint16_t flags = 0;
301
302 if (JERRY_UNLIKELY (pattern_str_p == NULL))
303 {
304 return ECMA_VALUE_ERROR;
305 }
306
307 if (!ecma_is_value_undefined (flags_value))
308 {
309 ecma_string_t *flags_str_p = ecma_op_to_string (flags_value);
310
311 if (JERRY_UNLIKELY (flags_str_p == NULL))
312 {
313 ecma_deref_ecma_string (pattern_str_p);
314 return ECMA_VALUE_ERROR;
315 }
316
317 ecma_value_t parse_flags_value = ecma_regexp_parse_flags (flags_str_p, &flags);
318 ecma_deref_ecma_string (flags_str_p);
319
320 if (ECMA_IS_VALUE_ERROR (parse_flags_value))
321 {
322 ecma_deref_ecma_string (pattern_str_p);
323 return parse_flags_value;
324 }
325
326 JERRY_ASSERT (ecma_is_value_empty (parse_flags_value));
327 }
328
329 re_compiled_code_t *bc_p = re_compile_bytecode (pattern_str_p, flags);
330
331 if (JERRY_UNLIKELY (bc_p == NULL))
332 {
333 ecma_deref_ecma_string (pattern_str_p);
334 return ECMA_VALUE_ERROR;
335 }
336
337 ecma_op_regexp_initialize (regexp_obj_p, bc_p, pattern_str_p, flags);
338 ecma_deref_ecma_string (pattern_str_p);
339
340 return ecma_make_object_value (regexp_obj_p);
341 } /* ecma_op_create_regexp_from_pattern */
342
343 /**
344 * Method for creating a RegExp object from bytecode.
345 *
346 * Note:
347 * Allocation have to happen before invoking this function using ecma_op_regexp_alloc.
348 *
349 * @return ecma_value_t
350 */
351 ecma_value_t
ecma_op_create_regexp_from_bytecode(ecma_object_t * regexp_obj_p,re_compiled_code_t * bc_p)352 ecma_op_create_regexp_from_bytecode (ecma_object_t *regexp_obj_p, /**< RegExp object */
353 re_compiled_code_t *bc_p) /**< bytecode */
354 {
355 ecma_bytecode_ref ((ecma_compiled_code_t *) bc_p);
356 ecma_string_t *pattern_str_p = ecma_get_string_from_value (bc_p->source);
357 uint16_t flags = bc_p->header.status_flags;
358
359 ecma_op_regexp_initialize (regexp_obj_p, bc_p, pattern_str_p, flags);
360
361 return ecma_make_object_value (regexp_obj_p);
362 } /* ecma_op_create_regexp_from_bytecode */
363
364 /**
365 * Method for creating a RegExp object from pattern with already parsed flags.
366 *
367 * Note:
368 * Allocation have to happen before invoking this function using ecma_op_regexp_alloc.
369 *
370 * @return ecma_value_t
371 */
372 ecma_value_t
ecma_op_create_regexp_with_flags(ecma_object_t * regexp_obj_p,ecma_value_t pattern_value,uint16_t flags)373 ecma_op_create_regexp_with_flags (ecma_object_t *regexp_obj_p, /**< RegExp object */
374 ecma_value_t pattern_value, /**< pattern */
375 uint16_t flags) /**< flags */
376 {
377 ecma_string_t *pattern_str_p = ecma_regexp_read_pattern_str_helper (pattern_value);
378
379 if (JERRY_UNLIKELY (pattern_str_p == NULL))
380 {
381 return ECMA_VALUE_ERROR;
382 }
383
384 re_compiled_code_t *bc_p = re_compile_bytecode (pattern_str_p, flags);
385 ecma_deref_ecma_string (pattern_str_p);
386
387 if (JERRY_UNLIKELY (bc_p == NULL))
388 {
389 return ECMA_VALUE_ERROR;
390 }
391
392 ecma_op_regexp_initialize (regexp_obj_p, bc_p, pattern_str_p, flags);
393
394 return ecma_make_object_value (regexp_obj_p);
395 } /* ecma_op_create_regexp_with_flags */
396
397 /**
398 * Canonicalize a character
399 *
400 * @return ecma_char_t canonicalized character
401 */
402 lit_code_point_t
ecma_regexp_canonicalize_char(lit_code_point_t ch,bool unicode)403 ecma_regexp_canonicalize_char (lit_code_point_t ch, /**< character */
404 bool unicode) /**< unicode */
405 {
406 if (JERRY_LIKELY (ch <= LIT_UTF8_1_BYTE_CODE_POINT_MAX))
407 {
408 if (ch >= LIT_CHAR_LOWERCASE_A && ch <= LIT_CHAR_LOWERCASE_Z)
409 {
410 return (ecma_char_t) (ch - (LIT_CHAR_LOWERCASE_A - LIT_CHAR_UPPERCASE_A));
411 }
412
413 return ch;
414 }
415
416 #if ENABLED (JERRY_ES2015)
417 /* TODO: Implement case folding for code points in the upper planes. */
418 if (JERRY_UNLIKELY (ch > LIT_UTF16_CODE_UNIT_MAX))
419 {
420 return ch;
421 }
422 #endif /* ENABLED (JERRY_ES2015) */
423
424 ecma_char_t u[LIT_MAXIMUM_OTHER_CASE_LENGTH];
425 const ecma_length_t size = lit_char_to_upper_case ((ecma_char_t) ch, u, LIT_MAXIMUM_OTHER_CASE_LENGTH);
426
427 if (size != 1)
428 {
429 return ch;
430 }
431
432 const ecma_char_t cu = u[0];
433 if (cu <= LIT_UTF8_1_BYTE_CODE_POINT_MAX && !unicode)
434 {
435 /* 6. */
436 return ch;
437 }
438
439 return cu;
440 } /* ecma_regexp_canonicalize_char */
441
442 /**
443 * RegExp Canonicalize abstract operation
444 *
445 * See also: ECMA-262 v5, 15.10.2.8
446 *
447 * @return ecma_char_t canonicalized character
448 */
449 static inline lit_code_point_t JERRY_ATTR_ALWAYS_INLINE
ecma_regexp_canonicalize(lit_code_point_t ch,uint16_t flags)450 ecma_regexp_canonicalize (lit_code_point_t ch, /**< character */
451 uint16_t flags) /**< flags */
452 {
453 if (flags & RE_FLAG_IGNORE_CASE)
454 {
455 return ecma_regexp_canonicalize_char (ch, flags & RE_FLAG_UNICODE);
456 }
457
458 return ch;
459 } /* ecma_regexp_canonicalize */
460
461 /**
462 * Check if a code point is matched by a class escape.
463 *
464 * @return true, if code point matches escape
465 * false, otherwise
466 */
467 static bool
ecma_regexp_check_class_escape(lit_code_point_t cp,ecma_class_escape_t escape)468 ecma_regexp_check_class_escape (lit_code_point_t cp, /**< char */
469 ecma_class_escape_t escape) /**< escape */
470 {
471 switch (escape)
472 {
473 case RE_ESCAPE_DIGIT:
474 {
475 return (cp >= LIT_CHAR_0 && cp <= LIT_CHAR_9);
476 }
477 case RE_ESCAPE_NOT_DIGIT:
478 {
479 return (cp < LIT_CHAR_0 || cp > LIT_CHAR_9);
480 }
481 case RE_ESCAPE_WORD_CHAR:
482 {
483 return lit_char_is_word_char (cp);
484 }
485 case RE_ESCAPE_NOT_WORD_CHAR:
486 {
487 return !lit_char_is_word_char (cp);
488 }
489 case RE_ESCAPE_WHITESPACE:
490 {
491 return lit_char_is_white_space ((ecma_char_t) cp);
492 }
493 case RE_ESCAPE_NOT_WHITESPACE:
494 {
495 return !lit_char_is_white_space ((ecma_char_t) cp);
496 }
497 default:
498 {
499 JERRY_UNREACHABLE ();
500 }
501 }
502 } /* ecma_regexp_check_class_escape */
503
504 /**
505 * Helper function to get current code point or code unit depending on execution mode,
506 * and advance the string pointer.
507 *
508 * @return lit_code_point_t current code point
509 */
510 static lit_code_point_t
ecma_regexp_advance(ecma_regexp_ctx_t * re_ctx_p,const lit_utf8_byte_t ** str_p)511 ecma_regexp_advance (ecma_regexp_ctx_t *re_ctx_p, /**< regexp context */
512 const lit_utf8_byte_t **str_p) /**< reference to string pointer */
513 {
514 JERRY_ASSERT (str_p != NULL);
515 lit_code_point_t cp = lit_cesu8_read_next (str_p);
516
517 #if ENABLED (JERRY_ES2015)
518 if (JERRY_UNLIKELY (re_ctx_p->flags & RE_FLAG_UNICODE)
519 && lit_is_code_point_utf16_high_surrogate ((ecma_char_t) cp)
520 && *str_p < re_ctx_p->input_end_p)
521 {
522 const ecma_char_t next_ch = lit_cesu8_peek_next (*str_p);
523 if (lit_is_code_point_utf16_low_surrogate (next_ch))
524 {
525 cp = lit_convert_surrogate_pair_to_code_point ((ecma_char_t) cp, next_ch);
526 *str_p += LIT_UTF8_MAX_BYTES_IN_CODE_UNIT;
527 }
528 }
529 #endif /* ENABLED (JERRY_ES2015) */
530
531 return ecma_regexp_canonicalize (cp, re_ctx_p->flags);
532 } /* ecma_regexp_advance */
533
534 #if ENABLED (JERRY_ES2015)
535 /**
536 * Helper function to get current full unicode code point and advance the string pointer.
537 *
538 * @return lit_code_point_t current code point
539 */
540 lit_code_point_t
ecma_regexp_unicode_advance(const lit_utf8_byte_t ** str_p,const lit_utf8_byte_t * end_p)541 ecma_regexp_unicode_advance (const lit_utf8_byte_t **str_p, /**< reference to string pointer */
542 const lit_utf8_byte_t *end_p) /**< string end pointer */
543 {
544 JERRY_ASSERT (str_p != NULL);
545 const lit_utf8_byte_t *current_p = *str_p;
546
547 lit_code_point_t ch = lit_cesu8_read_next (¤t_p);
548 if (lit_is_code_point_utf16_high_surrogate ((ecma_char_t) ch)
549 && current_p < end_p)
550 {
551 const ecma_char_t next_ch = lit_cesu8_peek_next (current_p);
552 if (lit_is_code_point_utf16_low_surrogate (next_ch))
553 {
554 ch = lit_convert_surrogate_pair_to_code_point ((ecma_char_t) ch, next_ch);
555 current_p += LIT_UTF8_MAX_BYTES_IN_CODE_UNIT;
556 }
557 }
558
559 *str_p = current_p;
560 return ch;
561 } /* ecma_regexp_unicode_advance */
562 #endif /* ENABLED (JERRY_ES2015) */
563
564 /**
565 * Helper function to revert the string pointer to the previous code point.
566 *
567 * @return pointer to previous code point
568 */
569 static JERRY_ATTR_NOINLINE const lit_utf8_byte_t *
ecma_regexp_step_back(ecma_regexp_ctx_t * re_ctx_p,const lit_utf8_byte_t * str_p)570 ecma_regexp_step_back (ecma_regexp_ctx_t *re_ctx_p, /**< regexp context */
571 const lit_utf8_byte_t *str_p) /**< reference to string pointer */
572 {
573 JERRY_ASSERT (str_p != NULL);
574 #if ENABLED (JERRY_ES2015)
575 lit_code_point_t ch = lit_cesu8_read_prev (&str_p);
576 if (JERRY_UNLIKELY (re_ctx_p->flags & RE_FLAG_UNICODE)
577 && lit_is_code_point_utf16_low_surrogate (ch)
578 && lit_is_code_point_utf16_high_surrogate (lit_cesu8_peek_prev (str_p)))
579 {
580 str_p -= LIT_UTF8_MAX_BYTES_IN_CODE_UNIT;
581 }
582 #else /* !ENABLED (JERRY_ES2015) */
583 JERRY_UNUSED (re_ctx_p);
584 lit_utf8_decr (&str_p);
585 #endif /* !ENABLED (JERRY_ES2015) */
586 return str_p;
587 } /* ecma_regexp_step_back */
588
589 /**
590 * Recursive function for executing RegExp bytecode.
591 *
592 * See also:
593 * ECMA-262 v5, 15.10.2.1
594 *
595 * @return pointer to the end of the currently matched substring
596 * NULL, if pattern did not match
597 */
598 static const lit_utf8_byte_t *
ecma_regexp_run(ecma_regexp_ctx_t * re_ctx_p,const uint8_t * bc_p,const lit_utf8_byte_t * str_curr_p)599 ecma_regexp_run (ecma_regexp_ctx_t *re_ctx_p, /**< RegExp matcher context */
600 const uint8_t *bc_p, /**< pointer to the current RegExp bytecode */
601 const lit_utf8_byte_t *str_curr_p) /**< input string pointer */
602 {
603 #if (JERRY_STACK_LIMIT != 0)
604 if (JERRY_UNLIKELY (ecma_get_current_stack_usage () > CONFIG_MEM_STACK_LIMIT))
605 {
606 return ECMA_RE_OUT_OF_STACK;
607 }
608 #endif /* JERRY_STACK_LIMIT != 0 */
609
610 const lit_utf8_byte_t *str_start_p = str_curr_p;
611 const uint8_t *next_alternative_p = NULL;
612
613 while (true)
614 {
615 const re_opcode_t op = re_get_opcode (&bc_p);
616
617 switch (op)
618 {
619 case RE_OP_EOF:
620 {
621 re_ctx_p->captures_p[RE_GLOBAL_CAPTURE].end_p = str_curr_p;
622 /* FALLTHRU */
623 }
624 case RE_OP_ASSERT_END:
625 case RE_OP_ITERATOR_END:
626 {
627 return str_curr_p;
628 }
629 case RE_OP_ALTERNATIVE_START:
630 {
631 const uint32_t offset = re_get_value (&bc_p);
632 next_alternative_p = bc_p + offset;
633 continue;
634 }
635 case RE_OP_ALTERNATIVE_NEXT:
636 {
637 while (true)
638 {
639 const uint32_t offset = re_get_value (&bc_p);
640 bc_p += offset;
641
642 if (*bc_p != RE_OP_ALTERNATIVE_NEXT)
643 {
644 break;
645 }
646
647 bc_p++;
648 }
649
650 continue;
651 }
652 case RE_OP_NO_ALTERNATIVE:
653 {
654 return NULL;
655 }
656 case RE_OP_CAPTURING_GROUP_START:
657 {
658 const uint32_t group_idx = re_get_value (&bc_p);
659 ecma_regexp_capture_t *const group_p = re_ctx_p->captures_p + group_idx;
660 group_p->subcapture_count = re_get_value (&bc_p);
661
662 const lit_utf8_byte_t *const saved_begin_p = group_p->begin_p;
663 const lit_utf8_byte_t *const saved_end_p = group_p->end_p;
664 const uint32_t saved_iterator = group_p->iterator;
665
666 const uint32_t qmin = re_get_value (&bc_p);
667 group_p->end_p = NULL;
668
669 /* If zero iterations are allowed, then execute the end opcode which will handle further iterations,
670 * otherwise run the 1st iteration immediately by executing group bytecode. */
671 if (qmin == 0)
672 {
673 group_p->iterator = 0;
674 group_p->begin_p = NULL;
675 const uint32_t end_offset = re_get_value (&bc_p);
676 group_p->bc_p = bc_p;
677
678 bc_p += end_offset;
679 }
680 else
681 {
682 group_p->iterator = 1;
683 group_p->begin_p = str_curr_p;
684 group_p->bc_p = bc_p;
685 }
686
687 const lit_utf8_byte_t *matched_p = ecma_regexp_run (re_ctx_p, bc_p, str_curr_p);
688 group_p->iterator = saved_iterator;
689
690 if (matched_p == NULL)
691 {
692 group_p->begin_p = saved_begin_p;
693 group_p->end_p = saved_end_p;
694 goto fail;
695 }
696
697 return matched_p;
698 }
699 case RE_OP_NON_CAPTURING_GROUP_START:
700 {
701 const uint32_t group_idx = re_get_value (&bc_p);
702 ecma_regexp_non_capture_t *const group_p = re_ctx_p->non_captures_p + group_idx;
703
704 group_p->subcapture_start = re_get_value (&bc_p);
705 group_p->subcapture_count = re_get_value (&bc_p);
706
707 const uint32_t saved_iterator = group_p->iterator;
708 const uint32_t qmin = re_get_value (&bc_p);
709
710 /* If zero iterations are allowed, then execute the end opcode which will handle further iterations,
711 * otherwise run the 1st iteration immediately by executing group bytecode. */
712 if (qmin == 0)
713 {
714 group_p->iterator = 0;
715 group_p->begin_p = NULL;
716 const uint32_t end_offset = re_get_value (&bc_p);
717 group_p->bc_p = bc_p;
718
719 bc_p += end_offset;
720 }
721 else
722 {
723 group_p->iterator = 1;
724 group_p->begin_p = str_curr_p;
725 group_p->bc_p = bc_p;
726 }
727
728 const lit_utf8_byte_t *matched_p = ecma_regexp_run (re_ctx_p, bc_p, str_curr_p);
729 group_p->iterator = saved_iterator;
730
731 if (matched_p == NULL)
732 {
733 goto fail;
734 }
735
736 return matched_p;
737 }
738 case RE_OP_GREEDY_CAPTURING_GROUP_END:
739 {
740 const uint32_t group_idx = re_get_value (&bc_p);
741 ecma_regexp_capture_t *const group_p = re_ctx_p->captures_p + group_idx;
742 const uint32_t qmin = re_get_value (&bc_p);
743
744 if (group_p->iterator < qmin)
745 {
746 /* No need to save begin_p since we don't have to backtrack beyond the minimum iteration count, but we have
747 * to clear nested capturing groups. */
748 group_p->begin_p = str_curr_p;
749 for (uint32_t i = 1; i < group_p->subcapture_count; ++i)
750 {
751 group_p[i].begin_p = NULL;
752 }
753
754 group_p->iterator++;
755 const lit_utf8_byte_t *const matched_p = ecma_regexp_run (re_ctx_p, group_p->bc_p, str_curr_p);
756
757 if (matched_p != NULL)
758 {
759 return matched_p;
760 }
761
762 group_p->iterator--;
763 goto fail;
764 }
765
766 /* Empty matches are not allowed after reaching the minimum number of iterations. */
767 if (JERRY_UNLIKELY (group_p->begin_p >= str_curr_p) && (group_p->iterator > qmin))
768 {
769 goto fail;
770 }
771
772 const uint32_t qmax = re_get_value (&bc_p) - RE_QMAX_OFFSET;
773 if (JERRY_UNLIKELY (group_p->iterator >= qmax))
774 {
775 /* Reached maximum number of iterations, try to match tail bytecode. */
776 group_p->end_p = str_curr_p;
777 const lit_utf8_byte_t *const matched_p = ecma_regexp_run (re_ctx_p, bc_p, str_curr_p);
778
779 if (matched_p != NULL)
780 {
781 return matched_p;
782 }
783
784 goto fail;
785 }
786
787 {
788 /* Save and clear all nested capturing groups, and try to iterate. */
789 JERRY_VLA (const lit_utf8_byte_t *, saved_captures_p, group_p->subcapture_count);
790 for (uint32_t i = 0; i < group_p->subcapture_count; ++i)
791 {
792 saved_captures_p[i] = group_p[i].begin_p;
793 group_p[i].begin_p = NULL;
794 }
795
796 group_p->iterator++;
797 group_p->begin_p = str_curr_p;
798
799 const lit_utf8_byte_t *const matched_p = ecma_regexp_run (re_ctx_p, group_p->bc_p, str_curr_p);
800
801 if (matched_p != NULL)
802 {
803 return matched_p;
804 }
805
806 /* Failed to iterate again, backtrack to current match, and try to run tail bytecode. */
807 for (uint32_t i = 0; i < group_p->subcapture_count; ++i)
808 {
809 group_p[i].begin_p = saved_captures_p[i];
810 }
811
812 group_p->iterator--;
813 group_p->end_p = str_curr_p;
814 }
815
816 const lit_utf8_byte_t *const tail_match_p = ecma_regexp_run (re_ctx_p, bc_p, str_curr_p);
817
818 if (tail_match_p != NULL)
819 {
820 return tail_match_p;
821 }
822
823 goto fail;
824 }
825 case RE_OP_GREEDY_NON_CAPTURING_GROUP_END:
826 {
827 const uint32_t group_idx = re_get_value (&bc_p);
828 ecma_regexp_non_capture_t *const group_p = re_ctx_p->non_captures_p + group_idx;
829 const uint32_t qmin = re_get_value (&bc_p);
830
831 if (group_p->iterator < qmin)
832 {
833 /* No need to save begin_p but we have to clear nested capturing groups. */
834 group_p->begin_p = str_curr_p;
835
836 ecma_regexp_capture_t *const capture_p = re_ctx_p->captures_p + group_p->subcapture_start;
837 for (uint32_t i = 0; i < group_p->subcapture_count; ++i)
838 {
839 capture_p[i].begin_p = NULL;
840 }
841
842 group_p->iterator++;
843 const lit_utf8_byte_t *const matched_p = ecma_regexp_run (re_ctx_p, group_p->bc_p, str_curr_p);
844
845 if (matched_p != NULL)
846 {
847 return matched_p;
848 }
849
850 group_p->iterator--;
851 goto fail;
852 }
853
854 /* Empty matches are not allowed after reaching the minimum number of iterations. */
855 if (JERRY_UNLIKELY (group_p->begin_p >= str_curr_p) && (group_p->iterator > qmin))
856 {
857 goto fail;
858 }
859
860 const uint32_t qmax = re_get_value (&bc_p) - RE_QMAX_OFFSET;
861 if (JERRY_UNLIKELY (group_p->iterator >= qmax))
862 {
863 /* Reached maximum number of iterations, try to match tail bytecode. */
864 const lit_utf8_byte_t *const matched_p = ecma_regexp_run (re_ctx_p, bc_p, str_curr_p);
865
866 if (matched_p != NULL)
867 {
868 return matched_p;
869 }
870
871 goto fail;
872 }
873
874 {
875 /* Save and clear all nested capturing groups, and try to iterate. */
876 JERRY_VLA (const lit_utf8_byte_t *, saved_captures_p, group_p->subcapture_count);
877 for (uint32_t i = 0; i < group_p->subcapture_count; ++i)
878 {
879 ecma_regexp_capture_t *const capture_p = re_ctx_p->captures_p + group_p->subcapture_start + i;
880 saved_captures_p[i] = capture_p->begin_p;
881 capture_p->begin_p = NULL;
882 }
883
884 group_p->iterator++;
885 const lit_utf8_byte_t *const saved_begin_p = group_p->begin_p;
886 group_p->begin_p = str_curr_p;
887
888 const lit_utf8_byte_t *const matched_p = ecma_regexp_run (re_ctx_p, group_p->bc_p, str_curr_p);
889
890 if (matched_p != NULL)
891 {
892 return matched_p;
893 }
894
895 /* Failed to iterate again, backtrack to current match, and try to run tail bytecode. */
896 for (uint32_t i = 0; i < group_p->subcapture_count; ++i)
897 {
898 ecma_regexp_capture_t *const capture_p = re_ctx_p->captures_p + group_p->subcapture_start + i;
899 capture_p->begin_p = saved_captures_p[i];
900 }
901
902 group_p->iterator--;
903 group_p->begin_p = saved_begin_p;
904 }
905
906 const lit_utf8_byte_t *const tail_match_p = ecma_regexp_run (re_ctx_p, bc_p, str_curr_p);
907
908 if (tail_match_p != NULL)
909 {
910 return tail_match_p;
911 }
912
913 goto fail;
914 }
915 case RE_OP_LAZY_CAPTURING_GROUP_END:
916 {
917 const uint32_t group_idx = re_get_value (&bc_p);
918 ecma_regexp_capture_t *const group_p = re_ctx_p->captures_p + group_idx;
919 const uint32_t qmin = re_get_value (&bc_p);
920
921 if (group_p->iterator < qmin)
922 {
923 /* No need to save begin_p but we have to clear nested capturing groups. */
924 group_p->begin_p = str_curr_p;
925 for (uint32_t i = 1; i < group_p->subcapture_count; ++i)
926 {
927 group_p[i].begin_p = NULL;
928 }
929
930 group_p->iterator++;
931 const lit_utf8_byte_t *const matched_p = ecma_regexp_run (re_ctx_p, group_p->bc_p, str_curr_p);
932
933 if (matched_p != NULL)
934 {
935 return matched_p;
936 }
937
938 group_p->iterator--;
939 goto fail;
940 }
941
942 /* Empty matches are not allowed after reaching the minimum number of iterations. */
943 if (JERRY_UNLIKELY (group_p->begin_p >= str_curr_p) && (group_p->iterator > qmin))
944 {
945 goto fail;
946 }
947
948 const uint32_t qmax = re_get_value (&bc_p) - RE_QMAX_OFFSET;
949 group_p->end_p = str_curr_p;
950
951 /* Try to match tail bytecode. */
952 const lit_utf8_byte_t *const tail_match_p = ecma_regexp_run (re_ctx_p, bc_p, str_curr_p);
953
954 if (tail_match_p != NULL)
955 {
956 return tail_match_p;
957 }
958
959 if (JERRY_UNLIKELY (group_p->iterator >= qmax))
960 {
961 /* Reached maximum number of iterations and tail bytecode did not match. */
962 goto fail;
963 }
964
965 {
966 /* Save and clear all nested capturing groups, and try to iterate. */
967 JERRY_VLA (const lit_utf8_byte_t *, saved_captures_p, group_p->subcapture_count);
968 for (uint32_t i = 0; i < group_p->subcapture_count; ++i)
969 {
970 saved_captures_p[i] = group_p[i].begin_p;
971 group_p[i].begin_p = NULL;
972 }
973
974 group_p->iterator++;
975 group_p->begin_p = str_curr_p;
976
977 const lit_utf8_byte_t *const matched_p = ecma_regexp_run (re_ctx_p, group_p->bc_p, str_curr_p);
978
979 if (matched_p != NULL)
980 {
981 return matched_p;
982 }
983
984 /* Backtrack to current match. */
985 for (uint32_t i = 0; i < group_p->subcapture_count; ++i)
986 {
987 group_p[i].begin_p = saved_captures_p[i];
988 }
989
990 group_p->iterator--;
991 }
992
993 goto fail;
994 }
995 case RE_OP_LAZY_NON_CAPTURING_GROUP_END:
996 {
997 const uint32_t group_idx = re_get_value (&bc_p);
998 ecma_regexp_non_capture_t *const group_p = re_ctx_p->non_captures_p + group_idx;
999 const uint32_t qmin = re_get_value (&bc_p);
1000
1001 if (group_p->iterator < qmin)
1002 {
1003 /* Clear nested captures. */
1004 ecma_regexp_capture_t *const capture_p = re_ctx_p->captures_p + group_p->subcapture_start;
1005 for (uint32_t i = 0; i < group_p->subcapture_count; ++i)
1006 {
1007 capture_p[i].begin_p = NULL;
1008 }
1009
1010 group_p->iterator++;
1011 const lit_utf8_byte_t *const matched_p = ecma_regexp_run (re_ctx_p, group_p->bc_p, str_curr_p);
1012
1013 if (matched_p != NULL)
1014 {
1015 return matched_p;
1016 }
1017
1018 group_p->iterator--;
1019 goto fail;
1020 }
1021
1022 /* Empty matches are not allowed after reaching the minimum number of iterations. */
1023 if (JERRY_UNLIKELY (group_p->begin_p >= str_curr_p) && (group_p->iterator > qmin))
1024 {
1025 goto fail;
1026 }
1027
1028 const uint32_t qmax = re_get_value (&bc_p) - RE_QMAX_OFFSET;
1029
1030 /* Try to match tail bytecode. */
1031 const lit_utf8_byte_t *const tail_match_p = ecma_regexp_run (re_ctx_p, bc_p, str_curr_p);
1032
1033 if (tail_match_p != NULL)
1034 {
1035 return tail_match_p;
1036 }
1037
1038 if (JERRY_UNLIKELY (group_p->iterator >= qmax))
1039 {
1040 /* Reached maximum number of iterations and tail bytecode did not match. */
1041 goto fail;
1042 }
1043
1044 {
1045 /* Save and clear all nested capturing groups, and try to iterate. */
1046 JERRY_VLA (const lit_utf8_byte_t *, saved_captures_p, group_p->subcapture_count);
1047 for (uint32_t i = 0; i < group_p->subcapture_count; ++i)
1048 {
1049 ecma_regexp_capture_t *const capture_p = re_ctx_p->captures_p + group_p->subcapture_start + i;
1050 saved_captures_p[i] = capture_p->begin_p;
1051 capture_p->begin_p = NULL;
1052 }
1053
1054 group_p->iterator++;
1055 const lit_utf8_byte_t *const saved_begin_p = group_p->begin_p;
1056 group_p->begin_p = str_curr_p;
1057
1058 const lit_utf8_byte_t *const matched_p = ecma_regexp_run (re_ctx_p, group_p->bc_p, str_curr_p);
1059
1060 if (matched_p != NULL)
1061 {
1062 return matched_p;
1063 }
1064
1065 /* Backtrack to current match. */
1066 for (uint32_t i = 0; i < group_p->subcapture_count; ++i)
1067 {
1068 ecma_regexp_capture_t *const capture_p = re_ctx_p->captures_p + group_p->subcapture_start + i;
1069 capture_p->begin_p = saved_captures_p[i];
1070 }
1071
1072 group_p->iterator--;
1073 group_p->begin_p = saved_begin_p;
1074 }
1075
1076 goto fail;
1077 }
1078 case RE_OP_GREEDY_ITERATOR:
1079 {
1080 const uint32_t qmin = re_get_value (&bc_p);
1081 const uint32_t qmax = re_get_value (&bc_p) - RE_QMAX_OFFSET;
1082 const uint32_t end_offset = re_get_value (&bc_p);
1083
1084 uint32_t iterator = 0;
1085 while (iterator < qmin)
1086 {
1087 str_curr_p = ecma_regexp_run (re_ctx_p, bc_p, str_curr_p);
1088
1089 if (str_curr_p == NULL)
1090 {
1091 goto fail;
1092 }
1093
1094 if (ECMA_RE_STACK_LIMIT_REACHED (str_curr_p))
1095 {
1096 return str_curr_p;
1097 }
1098
1099 iterator++;
1100 }
1101
1102 while (iterator < qmax)
1103 {
1104 const lit_utf8_byte_t *const matched_p = ecma_regexp_run (re_ctx_p, bc_p, str_curr_p);
1105
1106 if (matched_p == NULL)
1107 {
1108 break;
1109 }
1110
1111 if (ECMA_RE_STACK_LIMIT_REACHED (str_curr_p))
1112 {
1113 return str_curr_p;
1114 }
1115
1116 str_curr_p = matched_p;
1117 iterator++;
1118 }
1119
1120 const uint8_t *const tail_bc_p = bc_p + end_offset;
1121 while (true)
1122 {
1123 const lit_utf8_byte_t *const tail_match_p = ecma_regexp_run (re_ctx_p, tail_bc_p, str_curr_p);
1124
1125 if (tail_match_p != NULL)
1126 {
1127 return tail_match_p;
1128 }
1129
1130 if (JERRY_UNLIKELY (iterator <= qmin))
1131 {
1132 goto fail;
1133 }
1134
1135 iterator--;
1136 JERRY_ASSERT (str_curr_p > re_ctx_p->input_start_p);
1137 str_curr_p = ecma_regexp_step_back (re_ctx_p, str_curr_p);
1138 }
1139
1140 JERRY_UNREACHABLE ();
1141 }
1142 case RE_OP_LAZY_ITERATOR:
1143 {
1144 const uint32_t qmin = re_get_value (&bc_p);
1145 const uint32_t qmax = re_get_value (&bc_p) - RE_QMAX_OFFSET;
1146 const uint32_t end_offset = re_get_value (&bc_p);
1147
1148 uint32_t iterator = 0;
1149 while (iterator < qmin)
1150 {
1151 str_curr_p = ecma_regexp_run (re_ctx_p, bc_p, str_curr_p);
1152
1153 if (str_curr_p == NULL)
1154 {
1155 goto fail;
1156 }
1157
1158 if (ECMA_RE_STACK_LIMIT_REACHED (str_curr_p))
1159 {
1160 return str_curr_p;
1161 }
1162
1163 iterator++;
1164 }
1165
1166 const uint8_t *const tail_bc_p = bc_p + end_offset;
1167 while (true)
1168 {
1169 const lit_utf8_byte_t *const tail_match_p = ecma_regexp_run (re_ctx_p, tail_bc_p, str_curr_p);
1170
1171 if (tail_match_p != NULL)
1172 {
1173 return tail_match_p;
1174 }
1175
1176 if (JERRY_UNLIKELY (iterator >= qmax))
1177 {
1178 goto fail;
1179 }
1180
1181 const lit_utf8_byte_t *const matched_p = ecma_regexp_run (re_ctx_p, bc_p, str_curr_p);
1182
1183 if (matched_p == NULL)
1184 {
1185 goto fail;
1186 }
1187
1188 if (ECMA_RE_STACK_LIMIT_REACHED (matched_p))
1189 {
1190 return matched_p;
1191 }
1192
1193 iterator++;
1194 str_curr_p = matched_p;
1195 }
1196
1197 JERRY_UNREACHABLE ();
1198 }
1199 case RE_OP_BACKREFERENCE:
1200 {
1201 const uint32_t backref_idx = re_get_value (&bc_p);
1202 JERRY_ASSERT (backref_idx >= 1 && backref_idx < re_ctx_p->captures_count);
1203 const ecma_regexp_capture_t *capture_p = re_ctx_p->captures_p + backref_idx;
1204
1205 if (!ECMA_RE_IS_CAPTURE_DEFINED (capture_p) || capture_p->end_p <= capture_p->begin_p)
1206 {
1207 /* Undefined or zero length captures always match. */
1208 continue;
1209 }
1210
1211 const lit_utf8_size_t capture_size = (lit_utf8_size_t) (capture_p->end_p - capture_p->begin_p);
1212
1213 if (str_curr_p + capture_size > re_ctx_p->input_end_p
1214 || memcmp (str_curr_p, capture_p->begin_p, capture_size))
1215 {
1216 goto fail;
1217 }
1218
1219 str_curr_p += capture_size;
1220 continue;
1221 }
1222 case RE_OP_ASSERT_LINE_START:
1223 {
1224 if (str_curr_p <= re_ctx_p->input_start_p)
1225 {
1226 continue;
1227 }
1228
1229 if (!(re_ctx_p->flags & RE_FLAG_MULTILINE) || !lit_char_is_line_terminator (lit_cesu8_peek_prev (str_curr_p)))
1230 {
1231 goto fail;
1232 }
1233
1234 continue;
1235 }
1236 case RE_OP_ASSERT_LINE_END:
1237 {
1238 if (str_curr_p >= re_ctx_p->input_end_p)
1239 {
1240 continue;
1241 }
1242
1243 if (!(re_ctx_p->flags & RE_FLAG_MULTILINE) || !lit_char_is_line_terminator (lit_cesu8_peek_next (str_curr_p)))
1244 {
1245 goto fail;
1246 }
1247
1248 continue;
1249 }
1250 case RE_OP_ASSERT_WORD_BOUNDARY:
1251 {
1252 const bool is_wordchar_left = ((str_curr_p > re_ctx_p->input_start_p)
1253 && lit_char_is_word_char (str_curr_p[-1]));
1254
1255 const bool is_wordchar_right = ((str_curr_p < re_ctx_p->input_end_p)
1256 && lit_char_is_word_char (str_curr_p[0]));
1257 if (is_wordchar_right == is_wordchar_left)
1258 {
1259 goto fail;
1260 }
1261
1262 continue;
1263 }
1264 case RE_OP_ASSERT_NOT_WORD_BOUNDARY:
1265 {
1266 const bool is_wordchar_left = ((str_curr_p > re_ctx_p->input_start_p)
1267 && lit_char_is_word_char (str_curr_p[-1]));
1268
1269 const bool is_wordchar_right = ((str_curr_p < re_ctx_p->input_end_p)
1270 && lit_char_is_word_char (str_curr_p[0]));
1271 if (is_wordchar_right != is_wordchar_left)
1272 {
1273 goto fail;
1274 }
1275
1276 continue;
1277 }
1278 case RE_OP_ASSERT_LOOKAHEAD_POS:
1279 {
1280 const uint8_t qmin = re_get_byte (&bc_p);
1281 const uint32_t capture_start = re_get_value (&bc_p);
1282 const uint32_t capture_count = re_get_value (&bc_p);
1283 const uint32_t end_offset = re_get_value (&bc_p);
1284
1285 /* If qmin is zero, the assertion implicitly matches. */
1286 if (qmin == 0)
1287 {
1288 bc_p += end_offset;
1289 continue;
1290 }
1291
1292 /* Capture end pointers might get clobbered and need to be restored after a tail match fail. */
1293 JERRY_VLA (const lit_utf8_byte_t *, saved_captures_p, capture_count);
1294 for (uint32_t i = 0; i < capture_count; ++i)
1295 {
1296 ecma_regexp_capture_t *const capture_p = re_ctx_p->captures_p + capture_start + i;
1297 saved_captures_p[i] = capture_p->end_p;
1298 }
1299
1300 /* The first iteration will decide whether the assertion matches depending on whether
1301 * the iteration matched or not. */
1302 const lit_utf8_byte_t *const matched_p = ecma_regexp_run (re_ctx_p, bc_p, str_curr_p);
1303
1304 if (ECMA_RE_STACK_LIMIT_REACHED (matched_p))
1305 {
1306 return matched_p;
1307 }
1308
1309 if (matched_p == NULL)
1310 {
1311 goto fail;
1312 }
1313
1314 const lit_utf8_byte_t *tail_match_p = ecma_regexp_run (re_ctx_p, bc_p + end_offset, str_curr_p);
1315
1316 if (tail_match_p == NULL)
1317 {
1318 for (uint32_t i = 0; i < capture_count; ++i)
1319 {
1320 ecma_regexp_capture_t *const capture_p = re_ctx_p->captures_p + capture_start + i;
1321 capture_p->begin_p = NULL;
1322 capture_p->end_p = saved_captures_p[i];
1323 }
1324
1325 goto fail;
1326 }
1327
1328 return tail_match_p;
1329 }
1330 case RE_OP_ASSERT_LOOKAHEAD_NEG:
1331 {
1332 const uint8_t qmin = re_get_byte (&bc_p);
1333 uint32_t capture_idx = re_get_value (&bc_p);
1334 const uint32_t capture_count = re_get_value (&bc_p);
1335 const uint32_t end_offset = re_get_value (&bc_p);
1336
1337 /* If qmin is zero, the assertion implicitly matches. */
1338 if (qmin > 0)
1339 {
1340 /* The first iteration will decide whether the assertion matches depending on whether
1341 * the iteration matched or not. */
1342 const lit_utf8_byte_t *const matched_p = ecma_regexp_run (re_ctx_p, bc_p, str_curr_p);
1343
1344 if (ECMA_RE_STACK_LIMIT_REACHED (matched_p))
1345 {
1346 return matched_p;
1347 }
1348
1349 if (matched_p != NULL)
1350 {
1351 /* Nested capturing groups inside a negative lookahead can never capture, so we clear their results. */
1352 const uint32_t capture_end = capture_idx + capture_count;
1353 while (capture_idx < capture_end)
1354 {
1355 re_ctx_p->captures_p[capture_idx++].begin_p = NULL;
1356 }
1357
1358 goto fail;
1359 }
1360 }
1361
1362 bc_p += end_offset;
1363 continue;
1364 }
1365 case RE_OP_CLASS_ESCAPE:
1366 {
1367 if (str_curr_p >= re_ctx_p->input_end_p)
1368 {
1369 goto fail;
1370 }
1371
1372 const lit_code_point_t cp = ecma_regexp_advance (re_ctx_p, &str_curr_p);
1373
1374 const ecma_class_escape_t escape = (ecma_class_escape_t) re_get_byte (&bc_p);
1375 if (!ecma_regexp_check_class_escape (cp, escape))
1376 {
1377 goto fail;
1378 }
1379
1380 continue;
1381 }
1382 case RE_OP_CHAR_CLASS:
1383 {
1384 if (str_curr_p >= re_ctx_p->input_end_p)
1385 {
1386 goto fail;
1387 }
1388
1389 uint8_t flags = re_get_byte (&bc_p);
1390 uint32_t char_count = (flags & RE_CLASS_HAS_CHARS) ? re_get_value (&bc_p) : 0;
1391 uint32_t range_count = (flags & RE_CLASS_HAS_RANGES) ? re_get_value (&bc_p) : 0;
1392
1393 const lit_code_point_t cp = ecma_regexp_advance (re_ctx_p, &str_curr_p);
1394
1395 uint8_t escape_count = flags & RE_CLASS_ESCAPE_COUNT_MASK;
1396 while (escape_count > 0)
1397 {
1398 escape_count--;
1399 const ecma_class_escape_t escape = re_get_byte (&bc_p);
1400 if (ecma_regexp_check_class_escape (cp, escape))
1401 {
1402 goto class_found;
1403 }
1404 }
1405
1406 while (char_count > 0)
1407 {
1408 char_count--;
1409 const lit_code_point_t curr = re_get_char (&bc_p, re_ctx_p->flags & RE_FLAG_UNICODE);
1410 if (cp == curr)
1411 {
1412 goto class_found;
1413 }
1414 }
1415
1416 while (range_count > 0)
1417 {
1418 range_count--;
1419 const lit_code_point_t begin = re_get_char (&bc_p, re_ctx_p->flags & RE_FLAG_UNICODE);
1420
1421 if (cp < begin)
1422 {
1423 bc_p += re_ctx_p->char_size;
1424 continue;
1425 }
1426
1427 const lit_code_point_t end = re_get_char (&bc_p, re_ctx_p->flags & RE_FLAG_UNICODE);
1428 if (cp <= end)
1429 {
1430 goto class_found;
1431 }
1432 }
1433
1434 /* Not found */
1435 if (flags & RE_CLASS_INVERT)
1436 {
1437 continue;
1438 }
1439
1440 goto fail;
1441
1442 class_found:
1443 if (flags & RE_CLASS_INVERT)
1444 {
1445 goto fail;
1446 }
1447
1448 const uint32_t chars_size = char_count * re_ctx_p->char_size;
1449 const uint32_t ranges_size = range_count * re_ctx_p->char_size * 2;
1450 bc_p = bc_p + escape_count + chars_size + ranges_size;
1451 continue;
1452 }
1453 #if ENABLED (JERRY_ES2015)
1454 case RE_OP_UNICODE_PERIOD:
1455 {
1456 if (str_curr_p >= re_ctx_p->input_end_p)
1457 {
1458 goto fail;
1459 }
1460
1461 const lit_code_point_t cp = ecma_regexp_unicode_advance (&str_curr_p, re_ctx_p->input_end_p);
1462
1463 if (JERRY_UNLIKELY (cp <= LIT_UTF16_CODE_UNIT_MAX && lit_char_is_line_terminator ((ecma_char_t) cp)))
1464 {
1465 goto fail;
1466 }
1467
1468 continue;
1469 }
1470 #endif /* ENABLED (JERRY_ES2015) */
1471 case RE_OP_PERIOD:
1472 {
1473 if (str_curr_p >= re_ctx_p->input_end_p)
1474 {
1475 goto fail;
1476 }
1477
1478 const ecma_char_t ch = lit_cesu8_read_next (&str_curr_p);
1479
1480 if (lit_char_is_line_terminator (ch))
1481 {
1482 goto fail;
1483 }
1484
1485 continue;
1486 }
1487 case RE_OP_CHAR:
1488 {
1489 if (str_curr_p >= re_ctx_p->input_end_p)
1490 {
1491 goto fail;
1492 }
1493
1494 const lit_code_point_t ch1 = re_get_char (&bc_p, re_ctx_p->flags & RE_FLAG_UNICODE);
1495 const lit_code_point_t ch2 = ecma_regexp_advance (re_ctx_p, &str_curr_p);
1496
1497 if (ch1 != ch2)
1498 {
1499 goto fail;
1500 }
1501
1502 continue;
1503 }
1504 default:
1505 {
1506 JERRY_ASSERT (op == RE_OP_BYTE);
1507
1508 if (str_curr_p >= re_ctx_p->input_end_p
1509 || *bc_p++ != *str_curr_p++)
1510 {
1511 goto fail;
1512 }
1513
1514 continue;
1515 }
1516 }
1517
1518 JERRY_UNREACHABLE ();
1519 fail:
1520 bc_p = next_alternative_p;
1521
1522 if (bc_p == NULL || *bc_p++ != RE_OP_ALTERNATIVE_NEXT)
1523 {
1524 /* None of the alternatives matched. */
1525 return NULL;
1526 }
1527
1528 /* Get the end of the new alternative and continue execution. */
1529 str_curr_p = str_start_p;
1530 const uint32_t offset = re_get_value (&bc_p);
1531 next_alternative_p = bc_p + offset;
1532 }
1533 } /* ecma_regexp_run */
1534
1535 /**
1536 * Match a RegExp at a specific position in the input string.
1537 *
1538 * @return pointer to the end of the matched sub-string
1539 * NULL, if pattern did not match
1540 */
1541 static const lit_utf8_byte_t *
ecma_regexp_match(ecma_regexp_ctx_t * re_ctx_p,const uint8_t * bc_p,const lit_utf8_byte_t * str_curr_p)1542 ecma_regexp_match (ecma_regexp_ctx_t *re_ctx_p, /**< RegExp matcher context */
1543 const uint8_t *bc_p, /**< pointer to the current RegExp bytecode */
1544 const lit_utf8_byte_t *str_curr_p) /**< input string pointer */
1545 {
1546 re_ctx_p->captures_p[RE_GLOBAL_CAPTURE].begin_p = str_curr_p;
1547
1548 for (uint32_t i = 1; i < re_ctx_p->captures_count; ++i)
1549 {
1550 re_ctx_p->captures_p[i].begin_p = NULL;
1551 }
1552
1553 return ecma_regexp_run (re_ctx_p, bc_p, str_curr_p);
1554 } /* ecma_regexp_match */
1555
1556 /*
1557 * Helper function to get the result of a capture
1558 *
1559 * @return string value, if capture is defined
1560 * undefined, otherwise
1561 */
1562 ecma_value_t
ecma_regexp_get_capture_value(const ecma_regexp_capture_t * const capture_p)1563 ecma_regexp_get_capture_value (const ecma_regexp_capture_t *const capture_p) /**< capture */
1564 {
1565 if (ECMA_RE_IS_CAPTURE_DEFINED (capture_p))
1566 {
1567 JERRY_ASSERT (capture_p->end_p >= capture_p->begin_p);
1568 const lit_utf8_size_t capture_size = (lit_utf8_size_t) (capture_p->end_p - capture_p->begin_p);
1569 ecma_string_t *const capture_str_p = ecma_new_ecma_string_from_utf8 (capture_p->begin_p, capture_size);
1570 return ecma_make_string_value (capture_str_p);
1571 }
1572
1573 return ECMA_VALUE_UNDEFINED;
1574 } /* ecma_regexp_get_capture_value */
1575
1576 /**
1577 * Helper function to create a result array from the captures in a regexp context
1578 *
1579 * @return ecma value containing the created array object
1580 */
1581 static ecma_value_t
ecma_regexp_create_result_object(ecma_regexp_ctx_t * re_ctx_p,ecma_string_t * input_string_p,uint32_t index)1582 ecma_regexp_create_result_object (ecma_regexp_ctx_t *re_ctx_p, /**< regexp context */
1583 ecma_string_t *input_string_p, /**< input ecma string */
1584 uint32_t index) /**< match index */
1585 {
1586 ecma_value_t result_array = ecma_op_create_array_object (0, 0, false);
1587 ecma_object_t *result_p = ecma_get_object_from_value (result_array);
1588
1589 for (uint32_t i = 0; i < re_ctx_p->captures_count; i++)
1590 {
1591 ecma_value_t capture_value = ecma_regexp_get_capture_value (re_ctx_p->captures_p + i);
1592 ecma_builtin_helper_def_prop_by_index (result_p,
1593 i,
1594 capture_value,
1595 ECMA_PROPERTY_CONFIGURABLE_ENUMERABLE_WRITABLE);
1596 ecma_free_value (capture_value);
1597 }
1598
1599 ecma_builtin_helper_def_prop (result_p,
1600 ecma_get_magic_string (LIT_MAGIC_STRING_INDEX),
1601 ecma_make_uint32_value (index),
1602 ECMA_PROPERTY_CONFIGURABLE_ENUMERABLE_WRITABLE);
1603
1604 ecma_builtin_helper_def_prop (result_p,
1605 ecma_get_magic_string (LIT_MAGIC_STRING_INPUT),
1606 ecma_make_string_value (input_string_p),
1607 ECMA_PROPERTY_CONFIGURABLE_ENUMERABLE_WRITABLE);
1608
1609 return result_array;
1610 } /* ecma_regexp_create_result_object */
1611
1612 /**
1613 * Helper function to initialize a regexp match context
1614 */
1615 static void
ecma_regexp_initialize_context(ecma_regexp_ctx_t * ctx_p,const re_compiled_code_t * bc_p,const lit_utf8_byte_t * input_start_p,const lit_utf8_byte_t * input_end_p)1616 ecma_regexp_initialize_context (ecma_regexp_ctx_t *ctx_p, /**< regexp context */
1617 const re_compiled_code_t *bc_p, /**< regexp bytecode */
1618 const lit_utf8_byte_t *input_start_p, /**< pointer to input string */
1619 const lit_utf8_byte_t *input_end_p) /**< pointer to end of input string */
1620 {
1621 JERRY_ASSERT (ctx_p != NULL);
1622 JERRY_ASSERT (bc_p != NULL);
1623 JERRY_ASSERT (input_start_p != NULL);
1624 JERRY_ASSERT (input_end_p >= input_start_p);
1625
1626 ctx_p->flags = bc_p->header.status_flags;
1627 ctx_p->char_size = (ctx_p->flags & RE_FLAG_UNICODE) ? sizeof (lit_code_point_t) : sizeof (ecma_char_t);
1628
1629 ctx_p->input_start_p = input_start_p;
1630 ctx_p->input_end_p = input_end_p;
1631
1632 ctx_p->captures_count = bc_p->captures_count;
1633 ctx_p->non_captures_count = bc_p->non_captures_count;
1634
1635 ctx_p->captures_p = jmem_heap_alloc_block (ctx_p->captures_count * sizeof (ecma_regexp_capture_t));
1636
1637 if (ctx_p->non_captures_count > 0)
1638 {
1639 ctx_p->non_captures_p = jmem_heap_alloc_block (ctx_p->non_captures_count * sizeof (ecma_regexp_non_capture_t));
1640 }
1641 } /* ecma_regexp_initialize_context */
1642
1643 /**
1644 * Helper function to clean up a regexp context
1645 */
1646 static void
ecma_regexp_cleanup_context(ecma_regexp_ctx_t * ctx_p)1647 ecma_regexp_cleanup_context (ecma_regexp_ctx_t *ctx_p) /**< regexp context */
1648 {
1649 JERRY_ASSERT (ctx_p != NULL);
1650 jmem_heap_free_block (ctx_p->captures_p, ctx_p->captures_count * sizeof (ecma_regexp_capture_t));
1651
1652 if (ctx_p->non_captures_count > 0)
1653 {
1654 jmem_heap_free_block (ctx_p->non_captures_p, ctx_p->non_captures_count * sizeof (ecma_regexp_non_capture_t));
1655 }
1656 } /* ecma_regexp_cleanup_context */
1657
1658 /**
1659 * RegExp helper function to start the recursive matching algorithm
1660 * and create the result Array object
1661 *
1662 * See also:
1663 * ECMA-262 v5, 15.10.6.2
1664 *
1665 * @return array object - if matched
1666 * null - otherwise
1667 *
1668 * May raise error.
1669 * Returned value must be freed with ecma_free_value
1670 */
1671 ecma_value_t
ecma_regexp_exec_helper(ecma_object_t * regexp_object_p,ecma_string_t * input_string_p)1672 ecma_regexp_exec_helper (ecma_object_t *regexp_object_p, /**< RegExp object */
1673 ecma_string_t *input_string_p) /**< input string */
1674 {
1675 ecma_value_t ret_value = ECMA_VALUE_EMPTY;
1676
1677 JERRY_ASSERT (ecma_object_is_regexp_object (ecma_make_object_value (regexp_object_p)));
1678
1679 ecma_extended_object_t *ext_object_p = (ecma_extended_object_t *) regexp_object_p;
1680 re_compiled_code_t *bc_p = ECMA_GET_INTERNAL_VALUE_ANY_POINTER (re_compiled_code_t,
1681 ext_object_p->u.class_prop.u.value);
1682
1683 lit_utf8_size_t input_size;
1684 lit_utf8_size_t input_length;
1685 uint8_t input_flags = ECMA_STRING_FLAG_IS_ASCII;
1686 const lit_utf8_byte_t *input_buffer_p = ecma_string_get_chars (input_string_p,
1687 &input_size,
1688 &input_length,
1689 NULL,
1690 &input_flags);
1691
1692 const lit_utf8_byte_t *input_curr_p = input_buffer_p;
1693 uint32_t index = 0;
1694 if (bc_p->header.status_flags & (RE_FLAG_GLOBAL | RE_FLAG_STICKY))
1695 {
1696 ecma_string_t *lastindex_str_p = ecma_get_magic_string (LIT_MAGIC_STRING_LASTINDEX_UL);
1697 ecma_value_t lastindex_value = ecma_op_object_get_own_data_prop (regexp_object_p, lastindex_str_p);
1698
1699 ecma_number_t lastindex_num;
1700 ret_value = ecma_op_to_integer (lastindex_value, &lastindex_num);
1701
1702 ecma_free_value (lastindex_value);
1703
1704 if (ECMA_IS_VALUE_ERROR (ret_value))
1705 {
1706 goto cleanup_string;
1707 }
1708
1709 /* TODO: Replace with ToLength */
1710 if (lastindex_num < 0.0f)
1711 {
1712 #if ENABLED (JERRY_ES2015)
1713 lastindex_num = 0.0f;
1714 #else /* !ENABLED (JERRY_ES2015) */
1715 lastindex_num = input_length + 1;
1716 #endif /* ENABLED (JERRY_ES2015) */
1717 }
1718 index = ecma_number_to_uint32 (lastindex_num);
1719
1720 if (index > input_length)
1721 {
1722 ret_value = ecma_op_object_put (regexp_object_p,
1723 lastindex_str_p,
1724 ecma_make_integer_value (0),
1725 true);
1726
1727 if (!ECMA_IS_VALUE_ERROR (ret_value))
1728 {
1729 JERRY_ASSERT (ecma_is_value_boolean (ret_value));
1730 /* lastIndex is out of bounds, the match should fail. */
1731 ret_value = ECMA_VALUE_NULL;
1732 }
1733
1734 goto cleanup_string;
1735 }
1736
1737 if (index > 0)
1738 {
1739 if (input_flags & ECMA_STRING_FLAG_IS_ASCII)
1740 {
1741 input_curr_p += index;
1742 }
1743 else
1744 {
1745 for (uint32_t i = 0; i < index; i++)
1746 {
1747 lit_utf8_incr (&input_curr_p);
1748 }
1749 }
1750 }
1751 }
1752
1753 const lit_utf8_byte_t *input_end_p = input_buffer_p + input_size;
1754 ecma_regexp_ctx_t re_ctx;
1755 ecma_regexp_initialize_context (&re_ctx,
1756 bc_p,
1757 input_buffer_p,
1758 input_end_p);
1759
1760 /* 2. Try to match */
1761 uint8_t *bc_start_p = (uint8_t *) (bc_p + 1);
1762 const lit_utf8_byte_t *matched_p = NULL;
1763
1764 JERRY_ASSERT (index <= input_length);
1765 while (true)
1766 {
1767 matched_p = ecma_regexp_match (&re_ctx, bc_start_p, input_curr_p);
1768
1769 if (matched_p != NULL)
1770 {
1771 break;
1772 }
1773
1774 #if ENABLED (JERRY_ES2015)
1775 if (re_ctx.flags & RE_FLAG_STICKY)
1776 {
1777 ecma_value_t put_result = ecma_op_object_put (regexp_object_p,
1778 ecma_get_magic_string (LIT_MAGIC_STRING_LASTINDEX_UL),
1779 ecma_make_uint32_value (0),
1780 true);
1781 if (ECMA_IS_VALUE_ERROR (put_result))
1782 {
1783 ret_value = put_result;
1784 goto cleanup_context;
1785 }
1786
1787 JERRY_ASSERT (ecma_is_value_boolean (put_result));
1788 ret_value = ECMA_VALUE_NULL;
1789 goto cleanup_context;
1790 }
1791 #endif /* ENABLED (JERRY_ES2015) */
1792
1793 if (input_curr_p >= input_end_p)
1794 {
1795 if (re_ctx.flags & RE_FLAG_GLOBAL)
1796 {
1797 ecma_value_t put_result = ecma_op_object_put (regexp_object_p,
1798 ecma_get_magic_string (LIT_MAGIC_STRING_LASTINDEX_UL),
1799 ecma_make_uint32_value (0),
1800 true);
1801 if (ECMA_IS_VALUE_ERROR (put_result))
1802 {
1803 ret_value = put_result;
1804 goto cleanup_context;
1805 }
1806
1807 JERRY_ASSERT (ecma_is_value_boolean (put_result));
1808 }
1809
1810 /* Failed to match, return 'null'. */
1811 ret_value = ECMA_VALUE_NULL;
1812 goto cleanup_context;
1813 }
1814
1815 JERRY_ASSERT (input_curr_p < input_end_p);
1816
1817 #if ENABLED (JERRY_ES2015)
1818 if (re_ctx.flags & RE_FLAG_UNICODE)
1819 {
1820 index++;
1821 const lit_code_point_t cp = ecma_regexp_unicode_advance (&input_curr_p,
1822 input_end_p);
1823
1824 if (cp > LIT_UTF16_CODE_UNIT_MAX)
1825 {
1826 index++;
1827 }
1828
1829 continue;
1830 }
1831 #endif /* ENABLED (JERRY_ES2015) */
1832
1833 index++;
1834 lit_utf8_incr (&input_curr_p);
1835 }
1836
1837 JERRY_ASSERT (matched_p != NULL);
1838
1839 if (ECMA_RE_STACK_LIMIT_REACHED (matched_p))
1840 {
1841 ret_value = ecma_raise_range_error (ECMA_ERR_MSG ("Stack limit exceeded."));
1842 goto cleanup_context;
1843 }
1844
1845 if (re_ctx.flags & (RE_FLAG_GLOBAL | RE_FLAG_STICKY))
1846 {
1847 JERRY_ASSERT (index <= input_length);
1848
1849 lit_utf8_size_t match_length;
1850 const lit_utf8_byte_t *match_begin_p = re_ctx.captures_p[0].begin_p;
1851 const lit_utf8_byte_t *match_end_p = re_ctx.captures_p[0].end_p;
1852
1853 if (input_flags & ECMA_STRING_FLAG_IS_ASCII)
1854 {
1855 match_length = (lit_utf8_size_t) (match_end_p - match_begin_p);
1856 }
1857 else
1858 {
1859 match_length = lit_utf8_string_length (match_begin_p,
1860 (lit_utf8_size_t) (match_end_p - match_begin_p));
1861 }
1862
1863 ecma_value_t put_result = ecma_op_object_put (regexp_object_p,
1864 ecma_get_magic_string (LIT_MAGIC_STRING_LASTINDEX_UL),
1865 ecma_make_uint32_value (index + match_length),
1866 true);
1867 if (ECMA_IS_VALUE_ERROR (put_result))
1868 {
1869 ret_value = put_result;
1870 goto cleanup_context;
1871 }
1872
1873 JERRY_ASSERT (ecma_is_value_boolean (put_result));
1874 }
1875
1876 ret_value = ecma_regexp_create_result_object (&re_ctx, input_string_p, index);
1877
1878 cleanup_context:
1879 ecma_regexp_cleanup_context (&re_ctx);
1880
1881 cleanup_string:
1882 if (input_flags & ECMA_STRING_FLAG_MUST_BE_FREED)
1883 {
1884 jmem_heap_free_block ((void *) input_buffer_p, input_size);
1885 }
1886
1887 return ret_value;
1888 } /* ecma_regexp_exec_helper */
1889
1890 /**
1891 * Helper function for converting a RegExp pattern parameter to string.
1892 *
1893 * See also:
1894 * RegExp.compile
1895 * RegExp dispatch call
1896 *
1897 * @return empty value if success, error value otherwise
1898 * Returned value must be freed with ecma_free_value.
1899 */
1900 ecma_string_t *
ecma_regexp_read_pattern_str_helper(ecma_value_t pattern_arg)1901 ecma_regexp_read_pattern_str_helper (ecma_value_t pattern_arg) /**< the RegExp pattern */
1902 {
1903 if (!ecma_is_value_undefined (pattern_arg))
1904 {
1905 ecma_string_t *pattern_string_p = ecma_op_to_string (pattern_arg);
1906 if (JERRY_UNLIKELY (pattern_string_p == NULL) || !ecma_string_is_empty (pattern_string_p))
1907 {
1908 return pattern_string_p;
1909 }
1910 }
1911
1912 return ecma_get_magic_string (LIT_MAGIC_STRING_EMPTY_NON_CAPTURE_GROUP);
1913 } /* ecma_regexp_read_pattern_str_helper */
1914
1915 /**
1916 * Helper function for RegExp based string searches
1917 *
1918 * See also:
1919 * ECMA-262 v6, 21.2.5.9
1920 *
1921 * @return index of the match
1922 */
1923 ecma_value_t
ecma_regexp_search_helper(ecma_value_t regexp_arg,ecma_value_t string_arg)1924 ecma_regexp_search_helper (ecma_value_t regexp_arg, /**< regexp argument */
1925 ecma_value_t string_arg) /**< string argument */
1926 {
1927 /* 2. */
1928 if (!ecma_is_value_object (regexp_arg))
1929 {
1930 return ecma_raise_type_error (ECMA_ERR_MSG ("'this' is not an object."));
1931 }
1932
1933 ecma_value_t result = ECMA_VALUE_ERROR;
1934
1935 /* 3-4. */
1936 ecma_string_t *const string_p = ecma_op_to_string (string_arg);
1937 if (string_p == NULL)
1938 {
1939 return result;
1940 }
1941
1942 ecma_object_t *const regexp_object_p = ecma_get_object_from_value (regexp_arg);
1943
1944 /* 5-6. */
1945 ecma_string_t *const last_index_str_p = ecma_get_magic_string (LIT_MAGIC_STRING_LASTINDEX_UL);
1946 const ecma_value_t prev_last_index = ecma_op_object_get (regexp_object_p, last_index_str_p);
1947 if (ECMA_IS_VALUE_ERROR (prev_last_index))
1948 {
1949 goto cleanup_string;
1950 }
1951
1952 /* 7-8. */
1953 const ecma_value_t status = ecma_op_object_put (regexp_object_p, last_index_str_p, ecma_make_uint32_value (0), true);
1954 if (ECMA_IS_VALUE_ERROR (status))
1955 {
1956 ecma_free_value (prev_last_index);
1957 goto cleanup_string;
1958 }
1959
1960 JERRY_ASSERT (ecma_is_value_boolean (status));
1961
1962 /* 9-10. */
1963 const ecma_value_t match = ecma_op_regexp_exec (regexp_arg, string_p);
1964 if (ECMA_IS_VALUE_ERROR (match))
1965 {
1966 ecma_free_value (prev_last_index);
1967 goto cleanup_string;
1968 }
1969
1970 /* 11-12. */
1971 result = ecma_op_object_put (regexp_object_p, last_index_str_p, prev_last_index, true);
1972 ecma_free_value (prev_last_index);
1973
1974 if (ECMA_IS_VALUE_ERROR (result))
1975 {
1976 ecma_free_value (match);
1977 goto cleanup_string;
1978 }
1979
1980 /* 13-14. */
1981 if (ecma_is_value_null (match))
1982 {
1983 result = ecma_make_int32_value (-1);
1984 }
1985 else
1986 {
1987 ecma_object_t *const match_p = ecma_get_object_from_value (match);
1988 result = ecma_op_object_get_by_magic_id (match_p, LIT_MAGIC_STRING_INDEX);
1989 ecma_deref_object (match_p);
1990 }
1991
1992 cleanup_string:
1993 ecma_deref_ecma_string (string_p);
1994 return result;
1995 } /* ecma_regexp_search_helper */
1996
1997 /**
1998 * Helper function for RegExp based string split operation
1999 *
2000 * See also:
2001 * ECMA-262 v6, 21.2.5.11
2002 *
2003 * @return array of split and captured strings
2004 */
2005 ecma_value_t
ecma_regexp_split_helper(ecma_value_t this_arg,ecma_value_t string_arg,ecma_value_t limit_arg)2006 ecma_regexp_split_helper (ecma_value_t this_arg, /**< this value */
2007 ecma_value_t string_arg, /**< string value */
2008 ecma_value_t limit_arg) /**< limit value */
2009 {
2010 #if ENABLED (JERRY_ES2015)
2011 /* 2. */
2012 if (!ecma_is_value_object (this_arg))
2013 {
2014 return ecma_raise_type_error (ECMA_ERR_MSG ("'this' is not an object."));
2015 }
2016
2017 ecma_value_t result = ECMA_VALUE_ERROR;
2018
2019 /* 3-4. */
2020 ecma_string_t *const string_p = ecma_op_to_string (string_arg);
2021 if (string_p == NULL)
2022 {
2023 return result;
2024 }
2025
2026 /* 5-6. */
2027 ecma_object_t *const regexp_obj_p = ecma_get_object_from_value (this_arg);
2028 ecma_value_t constructor = ecma_op_species_constructor (regexp_obj_p, ECMA_BUILTIN_ID_REGEXP);
2029 if (ECMA_IS_VALUE_ERROR (constructor))
2030 {
2031 goto cleanup_string;
2032 }
2033
2034 ecma_object_t *const constructor_obj_p = ecma_get_object_from_value (constructor);
2035
2036 /* 7-8. */
2037 ecma_value_t flags = ecma_op_object_get_by_magic_id (regexp_obj_p, LIT_MAGIC_STRING_FLAGS);
2038 if (ECMA_IS_VALUE_ERROR (flags))
2039 {
2040 ecma_deref_object (constructor_obj_p);
2041 goto cleanup_string;
2042 }
2043
2044 ecma_string_t *flags_str_p = ecma_op_to_string (flags);
2045 ecma_free_value (flags);
2046
2047 if (JERRY_UNLIKELY (flags_str_p == NULL))
2048 {
2049 ecma_deref_object (constructor_obj_p);
2050 goto cleanup_string;
2051 }
2052
2053 lit_utf8_size_t flags_size;
2054 uint8_t flags_str_flags = ECMA_STRING_FLAG_IS_ASCII;
2055 const lit_utf8_byte_t *flags_buffer_p = ecma_string_get_chars (flags_str_p,
2056 &flags_size,
2057 NULL,
2058 NULL,
2059 &flags_str_flags);
2060
2061 bool unicode = false;
2062 bool sticky = false;
2063
2064 /* 9-11. */
2065 const lit_utf8_byte_t *const flags_end_p = flags_buffer_p + flags_size;
2066 for (const lit_utf8_byte_t *current_p = flags_buffer_p; current_p < flags_end_p; ++current_p)
2067 {
2068 switch (*current_p)
2069 {
2070 case LIT_CHAR_LOWERCASE_U:
2071 {
2072 unicode = true;
2073 break;
2074 }
2075 case LIT_CHAR_LOWERCASE_Y:
2076 {
2077 sticky = true;
2078 break;
2079 }
2080 }
2081 }
2082
2083 if (flags_str_flags & ECMA_STRING_FLAG_MUST_BE_FREED)
2084 {
2085 jmem_heap_free_block ((void *) flags_buffer_p, flags_size);
2086 }
2087
2088 /* 12. */
2089 if (!sticky)
2090 {
2091 ecma_stringbuilder_t builder = ecma_stringbuilder_create_from (flags_str_p);
2092 ecma_stringbuilder_append_byte (&builder, LIT_CHAR_LOWERCASE_Y);
2093
2094 ecma_deref_ecma_string (flags_str_p);
2095 flags_str_p = ecma_stringbuilder_finalize (&builder);
2096 }
2097
2098 /* 13-14. */
2099 ecma_value_t arguments[] = { this_arg, ecma_make_string_value (flags_str_p) };
2100 ecma_value_t splitter = ecma_op_function_construct (constructor_obj_p, constructor_obj_p, arguments, 2);
2101
2102 ecma_deref_ecma_string (flags_str_p);
2103 ecma_deref_object (constructor_obj_p);
2104
2105 if (ECMA_IS_VALUE_ERROR (splitter))
2106 {
2107 goto cleanup_string;
2108 }
2109
2110 ecma_object_t *const splitter_obj_p = ecma_get_object_from_value (splitter);
2111
2112 /* 17. */
2113 uint32_t limit = UINT32_MAX;
2114 if (!ecma_is_value_undefined (limit_arg))
2115 {
2116 if (ECMA_IS_VALUE_ERROR (ecma_op_to_length (limit_arg, &limit)))
2117 {
2118 goto cleanup_splitter;
2119 }
2120 }
2121
2122 /* 15. */
2123 ecma_value_t array = ecma_op_create_array_object (NULL, 0, false);
2124
2125 /* 21. */
2126 if (limit == 0)
2127 {
2128 result = array;
2129 goto cleanup_splitter;
2130 }
2131
2132 const lit_utf8_size_t string_length = ecma_string_get_length (string_p);
2133
2134 ecma_object_t *const array_p = ecma_get_object_from_value (array);
2135 ecma_length_t array_length = 0;
2136
2137 /* 22. */
2138 if (string_length == 0)
2139 {
2140 const ecma_value_t match = ecma_op_regexp_exec (splitter, string_p);
2141
2142 if (ECMA_IS_VALUE_ERROR (match))
2143 {
2144 goto cleanup_array;
2145 }
2146
2147 if (ecma_is_value_null (match))
2148 {
2149 result = ecma_builtin_helper_def_prop_by_index (array_p,
2150 array_length,
2151 ecma_make_string_value (string_p),
2152 ECMA_PROPERTY_CONFIGURABLE_ENUMERABLE_WRITABLE);
2153 JERRY_ASSERT (ecma_is_value_true (result));
2154 }
2155
2156 ecma_free_value (match);
2157 result = array;
2158 goto cleanup_splitter;
2159 }
2160
2161 /* 23. */
2162 uint32_t current_index = 0;
2163 uint32_t previous_index = 0;
2164
2165 ecma_string_t *const lastindex_str_p = ecma_get_magic_string (LIT_MAGIC_STRING_LASTINDEX_UL);
2166
2167 /* 24. */
2168 while (current_index < string_length)
2169 {
2170 /* 24.a-b. */
2171 result = ecma_op_object_put (splitter_obj_p,
2172 lastindex_str_p,
2173 ecma_make_uint32_value (current_index),
2174 true);
2175
2176 if (ECMA_IS_VALUE_ERROR (result))
2177 {
2178 goto cleanup_array;
2179 }
2180
2181 JERRY_ASSERT (ecma_is_value_true (result));
2182
2183 /* 24.c-d. */
2184 result = ecma_op_regexp_exec (splitter, string_p);
2185 if (ECMA_IS_VALUE_ERROR (result))
2186 {
2187 goto cleanup_array;
2188 }
2189
2190 /* 24.e. */
2191 if (ecma_is_value_null (result))
2192 {
2193 current_index = ecma_op_advance_string_index (string_p, current_index, unicode);
2194 continue;
2195 }
2196
2197 ecma_object_t *const match_array_p = ecma_get_object_from_value (result);
2198
2199 /* 24.f.i. */
2200 result = ecma_op_object_get (splitter_obj_p, lastindex_str_p);
2201 if (ECMA_IS_VALUE_ERROR (result))
2202 {
2203 ecma_deref_object (match_array_p);
2204 goto cleanup_array;
2205 }
2206
2207 uint32_t end_index;
2208 const ecma_value_t length_value = ecma_op_to_length (result, &end_index);
2209 ecma_free_value (result);
2210
2211 if (ECMA_IS_VALUE_ERROR (length_value))
2212 {
2213 result = ECMA_VALUE_ERROR;
2214 ecma_deref_object (match_array_p);
2215 goto cleanup_array;
2216 }
2217
2218 /* 24.f.iii. */
2219 if (previous_index == end_index)
2220 {
2221 ecma_deref_object (match_array_p);
2222 current_index = ecma_op_advance_string_index (string_p, current_index, unicode);
2223 continue;
2224 }
2225
2226 /* 24.f.iv.1-4. */
2227 ecma_string_t *const split_str_p = ecma_string_substr (string_p, previous_index, current_index);
2228
2229 result = ecma_builtin_helper_def_prop_by_index (array_p,
2230 array_length++,
2231 ecma_make_string_value (split_str_p),
2232 ECMA_PROPERTY_CONFIGURABLE_ENUMERABLE_WRITABLE);
2233 JERRY_ASSERT (ecma_is_value_true (result));
2234 ecma_deref_ecma_string (split_str_p);
2235
2236 /* 24.f.iv.5. */
2237 if (array_length == limit)
2238 {
2239 ecma_deref_object (match_array_p);
2240 result = array;
2241 goto cleanup_splitter;
2242 }
2243
2244 /* 24.f.iv.6. */
2245 previous_index = end_index;
2246
2247 /* 24.f.iv.7-8. */
2248 uint32_t match_length;
2249 result = ecma_op_object_get_length (match_array_p, &match_length);
2250 if (ECMA_IS_VALUE_ERROR (result))
2251 {
2252 ecma_deref_object (match_array_p);
2253 goto cleanup_array;
2254 }
2255
2256 /* 24.f.iv.9. */
2257 match_length = (match_length > 0) ? match_length - 1 : match_length;
2258
2259 uint32_t match_index = 1;
2260 while (match_index <= match_length)
2261 {
2262 /* 24.f.iv.11.a-b. */
2263 result = ecma_op_object_get_by_uint32_index (match_array_p, match_index++);
2264 if (ECMA_IS_VALUE_ERROR (result))
2265 {
2266 ecma_deref_object (match_array_p);
2267 goto cleanup_array;
2268 }
2269
2270 const ecma_value_t capture = result;
2271
2272 /* 24.f.iv.11.c. */
2273 result = ecma_builtin_helper_def_prop_by_index (array_p,
2274 array_length++,
2275 capture,
2276 ECMA_PROPERTY_CONFIGURABLE_ENUMERABLE_WRITABLE);
2277 JERRY_ASSERT (ecma_is_value_true (result));
2278
2279 ecma_free_value (capture);
2280
2281 if (array_length == limit)
2282 {
2283 ecma_deref_object (match_array_p);
2284 result = array;
2285 goto cleanup_splitter;
2286 }
2287 }
2288
2289 /* 24.f.iv.12. */
2290 current_index = end_index;
2291
2292 ecma_deref_object (match_array_p);
2293 }
2294
2295 ecma_string_t *const end_str_p = ecma_string_substr (string_p, previous_index, string_length);
2296 result = ecma_builtin_helper_def_prop_by_index (array_p,
2297 array_length++,
2298 ecma_make_string_value (end_str_p),
2299 ECMA_PROPERTY_CONFIGURABLE_ENUMERABLE_WRITABLE);
2300 JERRY_ASSERT (ecma_is_value_true (result));
2301 ecma_deref_ecma_string (end_str_p);
2302
2303 result = array;
2304 goto cleanup_splitter;
2305
2306 cleanup_array:
2307 ecma_deref_object (array_p);
2308 cleanup_splitter:
2309 ecma_deref_object (splitter_obj_p);
2310 cleanup_string:
2311 ecma_deref_ecma_string (string_p);
2312
2313 return result;
2314 #else /* ENABLED (JERRY_ES2015) */
2315 ecma_value_t result = ECMA_VALUE_ERROR;
2316
2317 /* 2. */
2318 ecma_string_t *string_p = ecma_op_to_string (string_arg);
2319 if (JERRY_UNLIKELY (string_p == NULL))
2320 {
2321 return result;
2322 }
2323
2324 /* 5. */
2325 uint32_t limit = UINT32_MAX;
2326 if (!ecma_is_value_undefined (limit_arg))
2327 {
2328 if (ECMA_IS_VALUE_ERROR (ecma_op_to_length (limit_arg, &limit)))
2329 {
2330 goto cleanup_string;
2331 }
2332 }
2333
2334 /* 15. */
2335 ecma_value_t array = ecma_op_create_array_object (NULL, 0, false);
2336
2337 /* 21. */
2338 if (limit == 0)
2339 {
2340 result = array;
2341 goto cleanup_string;
2342 }
2343
2344 ecma_object_t *const array_p = ecma_get_object_from_value (array);
2345 ecma_length_t array_length = 0;
2346
2347 ecma_object_t *const regexp_p = ecma_get_object_from_value (this_arg);
2348 ecma_extended_object_t *const ext_object_p = (ecma_extended_object_t *) regexp_p;
2349 re_compiled_code_t *const bc_p = ECMA_GET_INTERNAL_VALUE_ANY_POINTER (re_compiled_code_t,
2350 ext_object_p->u.class_prop.u.value);
2351
2352 lit_utf8_size_t string_size;
2353 lit_utf8_size_t string_length;
2354 uint8_t string_flags = ECMA_STRING_FLAG_IS_ASCII;
2355 const lit_utf8_byte_t *string_buffer_p = ecma_string_get_chars (string_p,
2356 &string_size,
2357 &string_length,
2358 NULL,
2359 &string_flags);
2360
2361 const lit_utf8_byte_t *current_str_p = string_buffer_p;
2362 const lit_utf8_byte_t *previous_str_p = string_buffer_p;
2363 const lit_utf8_byte_t *const string_end_p = string_buffer_p + string_size;
2364
2365 ecma_regexp_ctx_t re_ctx;
2366 ecma_regexp_initialize_context (&re_ctx,
2367 bc_p,
2368 string_buffer_p,
2369 string_buffer_p + string_size);
2370
2371 uint8_t *const bc_start_p = (uint8_t *) (bc_p + 1);
2372
2373 if (string_length == 0)
2374 {
2375 const lit_utf8_byte_t *const matched_p = ecma_regexp_match (&re_ctx, bc_start_p, current_str_p);
2376
2377 if (ECMA_RE_STACK_LIMIT_REACHED (matched_p))
2378 {
2379 result = ecma_raise_range_error (ECMA_ERR_MSG ("Stack limit exceeded."));
2380 goto cleanup_array;
2381 }
2382
2383 if (matched_p == NULL)
2384 {
2385 result = ecma_builtin_helper_def_prop_by_index (array_p,
2386 array_length,
2387 ecma_make_string_value (string_p),
2388 ECMA_PROPERTY_CONFIGURABLE_ENUMERABLE_WRITABLE);
2389 JERRY_ASSERT (ecma_is_value_true (result));
2390 }
2391
2392 result = array;
2393 goto cleanup_context;
2394 }
2395
2396 /* 13. */
2397 while (current_str_p < string_end_p)
2398 {
2399 /* 13.a. */
2400 const lit_utf8_byte_t *const matched_p = ecma_regexp_match (&re_ctx, bc_start_p, current_str_p);
2401
2402 if (ECMA_RE_STACK_LIMIT_REACHED (matched_p))
2403 {
2404 result = ecma_raise_range_error (ECMA_ERR_MSG ("Stack limit exceeded."));
2405 goto cleanup_array;
2406 }
2407
2408 if (matched_p == NULL || matched_p == previous_str_p)
2409 {
2410 lit_utf8_incr (¤t_str_p);
2411 continue;
2412 }
2413
2414 /* 13.c.iii.1. */
2415 ecma_string_t *const str_p = ecma_new_ecma_string_from_utf8 (previous_str_p,
2416 (lit_utf8_size_t) (current_str_p - previous_str_p));
2417
2418 result = ecma_builtin_helper_def_prop_by_index (array_p,
2419 array_length++,
2420 ecma_make_string_value (str_p),
2421 ECMA_PROPERTY_CONFIGURABLE_ENUMERABLE_WRITABLE);
2422 JERRY_ASSERT (ecma_is_value_true (result));
2423 ecma_deref_ecma_string (str_p);
2424
2425 if (array_length == limit)
2426 {
2427 result = array;
2428 goto cleanup_context;
2429 }
2430
2431 /* 13.c.iii.5. */
2432 previous_str_p = matched_p;
2433
2434 uint32_t index = 1;
2435 while (index < re_ctx.captures_count)
2436 {
2437 const ecma_value_t capture = ecma_regexp_get_capture_value (re_ctx.captures_p + index);
2438 result = ecma_builtin_helper_def_prop_by_index (array_p,
2439 array_length++,
2440 capture,
2441 ECMA_PROPERTY_CONFIGURABLE_ENUMERABLE_WRITABLE);
2442 JERRY_ASSERT (ecma_is_value_true (result));
2443 ecma_free_value (capture);
2444
2445 if (array_length == limit)
2446 {
2447 result = array;
2448 goto cleanup_context;
2449 }
2450
2451 index++;
2452 }
2453
2454 /* 13.c.iii.8. */
2455 current_str_p = matched_p;
2456 }
2457
2458 ecma_string_t *const str_p = ecma_new_ecma_string_from_utf8 (previous_str_p,
2459 (lit_utf8_size_t) (string_end_p - previous_str_p));
2460
2461 result = ecma_builtin_helper_def_prop_by_index (array_p,
2462 array_length++,
2463 ecma_make_string_value (str_p),
2464 ECMA_PROPERTY_CONFIGURABLE_ENUMERABLE_WRITABLE);
2465 JERRY_ASSERT (ecma_is_value_true (result));
2466 ecma_deref_ecma_string (str_p);
2467
2468 result = array;
2469 goto cleanup_context;
2470
2471 cleanup_array:
2472 ecma_deref_object (array_p);
2473 cleanup_context:
2474 ecma_regexp_cleanup_context (&re_ctx);
2475 if (string_flags & ECMA_STRING_FLAG_MUST_BE_FREED)
2476 {
2477 jmem_heap_free_block ((void *) string_buffer_p, string_size);
2478 }
2479 cleanup_string:
2480 ecma_deref_ecma_string (string_p);
2481
2482 return result;
2483 #endif /* ENABLED (JERRY_ES2015) */
2484 } /* ecma_regexp_split_helper */
2485
2486 /**
2487 * Fast path for RegExp based replace operation
2488 *
2489 * This method assumes the following:
2490 * - The RegExp object is a built-in RegExp
2491 * - The 'exec' method of the RegExp object is the built-in 'exec' method
2492 * - The 'lastIndex' property is writable
2493 *
2494 * The standard would normally require us to first execute the regexp and collect the results,
2495 * and after that iterate over the collected results and replace them.
2496 * The assumptions above guarantee that during the matching phase there will be no exceptions thrown,
2497 * which means we can do the match/replace in a single loop, without collecting the results.
2498 *
2499 * @return string value if successful
2500 * thrown value otherwise
2501 */
2502 static ecma_value_t
ecma_regexp_replace_helper_fast(ecma_replace_context_t * ctx_p,const re_compiled_code_t * bc_p,ecma_string_t * string_p,ecma_value_t replace_arg)2503 ecma_regexp_replace_helper_fast (ecma_replace_context_t *ctx_p, /**<replace context */
2504 const re_compiled_code_t *bc_p, /**< regexp bytecode */
2505 ecma_string_t *string_p, /**< source string */
2506 ecma_value_t replace_arg) /**< replace argument */
2507 {
2508 JERRY_ASSERT (bc_p != NULL);
2509 ecma_value_t result = ECMA_VALUE_EMPTY;
2510
2511 uint8_t string_flags = ECMA_STRING_FLAG_IS_ASCII;
2512 lit_utf8_size_t string_length;
2513 ctx_p->string_p = ecma_string_get_chars (string_p,
2514 &(ctx_p->string_size),
2515 &string_length,
2516 NULL,
2517 &string_flags);
2518
2519 const lit_utf8_byte_t *const string_end_p = ctx_p->string_p + ctx_p->string_size;
2520 const uint8_t *const bc_start_p = (const uint8_t *) (bc_p + 1);
2521 const lit_utf8_byte_t *matched_p = NULL;
2522 const lit_utf8_byte_t *current_p = ctx_p->string_p;
2523 const lit_utf8_byte_t *last_append_p = current_p;
2524 JERRY_ASSERT (ctx_p->index <= string_length);
2525
2526 #if ENABLED (JERRY_ES2015)
2527 /* Global matches always start at index 0, but Sticky matches may have a non-zero lastIndex. */
2528 if (ctx_p->index > 0)
2529 {
2530 if (string_flags & ECMA_STRING_FLAG_IS_ASCII)
2531 {
2532 current_p += ctx_p->index;
2533 }
2534 else
2535 {
2536 ecma_length_t index = ctx_p->index;
2537 while (index--)
2538 {
2539 lit_utf8_incr (¤t_p);
2540 }
2541 }
2542 }
2543 #endif /* ENABLED (JERRY_ES2015) */
2544
2545 ecma_regexp_ctx_t re_ctx;
2546 ecma_regexp_initialize_context (&re_ctx,
2547 bc_p,
2548 ctx_p->string_p,
2549 string_end_p);
2550
2551 ctx_p->builder = ecma_stringbuilder_create ();
2552 ctx_p->capture_count = re_ctx.captures_count;
2553 ctx_p->u.captures_p = re_ctx.captures_p;
2554
2555 while (true)
2556 {
2557 matched_p = ecma_regexp_match (&re_ctx, bc_start_p, current_p);
2558
2559 if (matched_p != NULL)
2560 {
2561 if (ECMA_RE_STACK_LIMIT_REACHED (matched_p))
2562 {
2563 result = ecma_raise_range_error (ECMA_ERR_MSG ("Stack limit exceeded."));
2564 goto cleanup_builder;
2565 }
2566
2567 const lit_utf8_size_t remaining_size = (lit_utf8_size_t) (current_p - last_append_p);
2568 ecma_stringbuilder_append_raw (&(ctx_p->builder), last_append_p, remaining_size);
2569
2570 if (ctx_p->replace_str_p != NULL)
2571 {
2572 ctx_p->matched_p = current_p;
2573 const ecma_regexp_capture_t *const global_capture_p = re_ctx.captures_p;
2574 ctx_p->matched_size = (lit_utf8_size_t) (global_capture_p->end_p - global_capture_p->begin_p);
2575 ctx_p->match_byte_pos = (lit_utf8_size_t) (current_p - re_ctx.input_start_p);
2576
2577 ecma_builtin_replace_substitute (ctx_p);
2578 }
2579 else
2580 {
2581 ecma_collection_t *arguments_p = ecma_new_collection ();
2582
2583 for (uint32_t i = 0; i < re_ctx.captures_count; i++)
2584 {
2585 ecma_value_t capture = ecma_regexp_get_capture_value (re_ctx.captures_p + i);
2586 ecma_collection_push_back (arguments_p, capture);
2587 }
2588
2589 ecma_collection_push_back (arguments_p, ecma_make_uint32_value (ctx_p->index));
2590 ecma_ref_ecma_string (string_p);
2591 ecma_collection_push_back (arguments_p, ecma_make_string_value (string_p));
2592 ecma_object_t *function_p = ecma_get_object_from_value (replace_arg);
2593
2594 result = ecma_op_function_call (function_p,
2595 ECMA_VALUE_UNDEFINED,
2596 arguments_p->buffer_p,
2597 arguments_p->item_count);
2598
2599 ecma_collection_free (arguments_p);
2600
2601 if (ECMA_IS_VALUE_ERROR (result))
2602 {
2603 goto cleanup_builder;
2604 }
2605
2606 /* 16.m.v */
2607 ecma_string_t *const replace_result_p = ecma_op_to_string (result);
2608 ecma_free_value (result);
2609
2610 if (replace_result_p == NULL)
2611 {
2612 result = ECMA_VALUE_ERROR;
2613 goto cleanup_builder;
2614 }
2615
2616 ecma_stringbuilder_append (&(ctx_p->builder), replace_result_p);
2617 ecma_deref_ecma_string (replace_result_p);
2618 }
2619
2620 const ecma_regexp_capture_t *global_capture_p = re_ctx.captures_p;
2621 last_append_p = global_capture_p->end_p;
2622
2623 if (!(re_ctx.flags & RE_FLAG_GLOBAL))
2624 {
2625 break;
2626 }
2627
2628 const lit_utf8_size_t matched_size = (lit_utf8_size_t) (global_capture_p->end_p - global_capture_p->begin_p);
2629 if (matched_size > 0)
2630 {
2631 ctx_p->index += lit_utf8_string_length (current_p, matched_size);
2632 current_p = last_append_p;
2633 continue;
2634 }
2635 }
2636
2637 if (current_p >= string_end_p)
2638 {
2639 break;
2640 }
2641
2642 #if ENABLED (JERRY_ES2015)
2643 if ((re_ctx.flags & RE_FLAG_UNICODE) != 0)
2644 {
2645 ctx_p->index++;
2646 const lit_code_point_t cp = ecma_regexp_unicode_advance (¤t_p,
2647 string_end_p);
2648
2649 if (cp > LIT_UTF16_CODE_UNIT_MAX)
2650 {
2651 ctx_p->index++;
2652 }
2653
2654 continue;
2655 }
2656 #endif /* ENABLED (JERRY_ES2015) */
2657
2658 ctx_p->index++;
2659 lit_utf8_incr (¤t_p);
2660 }
2661
2662 const lit_utf8_size_t trailing_size = (lit_utf8_size_t) (string_end_p - last_append_p);
2663 ecma_stringbuilder_append_raw (&(ctx_p->builder), last_append_p, trailing_size);
2664
2665 result = ecma_make_string_value (ecma_stringbuilder_finalize (&(ctx_p->builder)));
2666 goto cleanup_context;
2667
2668 cleanup_builder:
2669 ecma_stringbuilder_destroy (&(ctx_p->builder));
2670
2671 cleanup_context:
2672 ecma_regexp_cleanup_context (&re_ctx);
2673
2674 if (string_flags & ECMA_STRING_FLAG_MUST_BE_FREED)
2675 {
2676 jmem_heap_free_block ((void *) ctx_p->string_p, ctx_p->string_size);
2677 }
2678
2679 return result;
2680 } /* ecma_regexp_replace_helper_fast */
2681
2682 /**
2683 * Helper function for RegExp based replacing
2684 *
2685 * See also:
2686 * String.prototype.replace
2687 * RegExp.prototype[@@replace]
2688 *
2689 * @return result string of the replacement, if successful
2690 * error value, otherwise
2691 */
2692 ecma_value_t
ecma_regexp_replace_helper(ecma_value_t this_arg,ecma_value_t string_arg,ecma_value_t replace_arg)2693 ecma_regexp_replace_helper (ecma_value_t this_arg, /**< this argument */
2694 ecma_value_t string_arg, /**< source string */
2695 ecma_value_t replace_arg) /**< replace string */
2696 {
2697 /* 2. */
2698 if (!ecma_is_value_object (this_arg))
2699 {
2700 return ecma_raise_type_error (ECMA_ERR_MSG ("'this' is not an object."));
2701 }
2702
2703 ecma_object_t *this_obj_p = ecma_get_object_from_value (this_arg);
2704
2705 ecma_replace_context_t replace_ctx;
2706 replace_ctx.index = 0;
2707
2708 /* 3. */
2709 ecma_string_t *string_p = ecma_op_to_string (string_arg);
2710 if (string_p == NULL)
2711 {
2712 return ECMA_VALUE_ERROR;
2713 }
2714
2715 ecma_value_t result = ECMA_VALUE_ERROR;
2716
2717 /* 6. */
2718 replace_ctx.replace_str_p = NULL;
2719 if (!ecma_op_is_callable (replace_arg))
2720 {
2721 replace_ctx.replace_str_p = ecma_op_to_string (replace_arg);
2722
2723 if (replace_ctx.replace_str_p == NULL)
2724 {
2725 goto cleanup_string;
2726 }
2727 }
2728
2729 /* 8 */
2730 result = ecma_op_object_get_by_magic_id (this_obj_p, LIT_MAGIC_STRING_GLOBAL);
2731 if (ECMA_IS_VALUE_ERROR (result))
2732 {
2733 goto cleanup_replace;
2734 }
2735
2736 const bool global = ecma_op_to_boolean (result);
2737 ecma_free_value (result);
2738
2739 #if ENABLED (JERRY_ES2015)
2740 const lit_utf8_size_t string_length = ecma_string_get_length (string_p);
2741 bool unicode = false;
2742 #endif /* ENABLED (JERRY_ES2015) */
2743
2744 /* 10. */
2745 if (global)
2746 {
2747 #if ENABLED (JERRY_ES2015)
2748 result = ecma_op_object_get_by_magic_id (this_obj_p, LIT_MAGIC_STRING_UNICODE);
2749 if (ECMA_IS_VALUE_ERROR (result))
2750 {
2751 goto cleanup_replace;
2752 }
2753
2754 unicode = ecma_op_to_boolean (result);
2755 ecma_free_value (result);
2756 #endif /* ENABLED (JERRY_ES2015) */
2757
2758 result = ecma_op_object_put (this_obj_p,
2759 ecma_get_magic_string (LIT_MAGIC_STRING_LASTINDEX_UL),
2760 ecma_make_uint32_value (0),
2761 true);
2762 if (ECMA_IS_VALUE_ERROR (result))
2763 {
2764 goto cleanup_replace;
2765 }
2766
2767 JERRY_ASSERT (ecma_is_value_boolean (result));
2768 }
2769
2770 #if !ENABLED (JERRY_ES2015)
2771 ecma_extended_object_t *re_obj_p = (ecma_extended_object_t *) this_obj_p;
2772 const re_compiled_code_t *bc_p = ECMA_GET_INTERNAL_VALUE_POINTER (re_compiled_code_t,
2773 re_obj_p->u.class_prop.u.value);
2774
2775 result = ecma_regexp_replace_helper_fast (&replace_ctx,
2776 bc_p,
2777 string_p,
2778 replace_arg);
2779
2780 goto cleanup_replace;
2781 #else /* ENABLED (JERRY_ES2015) */
2782 result = ecma_op_object_get_by_magic_id (this_obj_p, LIT_MAGIC_STRING_EXEC);
2783
2784 if (ECMA_IS_VALUE_ERROR (result))
2785 {
2786 goto cleanup_replace;
2787 }
2788
2789 /* Check for fast path. */
2790 if (ecma_op_is_callable (result))
2791 {
2792 ecma_extended_object_t *function_p = (ecma_extended_object_t *) ecma_get_object_from_value (result);
2793 if (ecma_object_class_is (this_obj_p, LIT_MAGIC_STRING_REGEXP_UL)
2794 && ecma_builtin_is_regexp_exec (function_p))
2795 {
2796 result = ecma_op_object_get_by_magic_id (this_obj_p, LIT_MAGIC_STRING_STICKY);
2797 if (ECMA_IS_VALUE_ERROR (result))
2798 {
2799 goto cleanup_replace;
2800 }
2801
2802 const bool sticky = ecma_op_to_boolean (result);
2803 ecma_free_value (result);
2804
2805 if (sticky && !global)
2806 {
2807 ecma_string_t *lastindex_str_p = ecma_get_magic_string (LIT_MAGIC_STRING_LASTINDEX_UL);
2808 ecma_value_t lastindex_value = ecma_op_object_get_own_data_prop (this_obj_p, lastindex_str_p);
2809
2810 result = ecma_op_to_length (lastindex_value, &replace_ctx.index);
2811 ecma_free_value (lastindex_value);
2812
2813 if (ECMA_IS_VALUE_ERROR (result))
2814 {
2815 goto cleanup_replace;
2816 }
2817
2818 if (replace_ctx.index > string_length)
2819 {
2820 ecma_deref_object ((ecma_object_t *) function_p);
2821
2822 result = ecma_op_object_put (this_obj_p,
2823 ecma_get_magic_string (LIT_MAGIC_STRING_LASTINDEX_UL),
2824 ecma_make_uint32_value (0),
2825 true);
2826 JERRY_ASSERT (ecma_is_value_true (result));
2827
2828 ecma_ref_ecma_string (string_p);
2829 result = ecma_make_string_value (string_p);
2830 goto cleanup_replace;
2831 }
2832 }
2833
2834 ecma_extended_object_t *re_obj_p = (ecma_extended_object_t *) this_obj_p;
2835 const re_compiled_code_t *bc_p = ECMA_GET_INTERNAL_VALUE_ANY_POINTER (re_compiled_code_t,
2836 re_obj_p->u.class_prop.u.value);
2837
2838 result = ecma_regexp_replace_helper_fast (&replace_ctx,
2839 bc_p,
2840 string_p,
2841 replace_arg);
2842
2843 ecma_deref_object ((ecma_object_t *) function_p);
2844 goto cleanup_replace;
2845 }
2846 }
2847
2848 ecma_collection_t *results_p = ecma_new_collection ();
2849
2850 while (true)
2851 {
2852 /* 13.a */
2853 if (ecma_op_is_callable (result))
2854 {
2855 ecma_object_t *const function_p = ecma_get_object_from_value (result);
2856
2857 ecma_value_t arguments[] = { ecma_make_string_value (string_p) };
2858 result = ecma_op_function_call (function_p, this_arg, arguments, 1);
2859
2860 ecma_deref_object (function_p);
2861
2862 if (ECMA_IS_VALUE_ERROR (result))
2863 {
2864 goto cleanup_results;
2865 }
2866
2867 if (!ecma_is_value_object (result) && !ecma_is_value_null (result))
2868 {
2869 ecma_free_value (result);
2870 result = ecma_raise_type_error (ECMA_ERR_MSG ("Return value of 'exec' must be an Object or Null"));
2871 goto cleanup_results;
2872 }
2873 }
2874 else
2875 {
2876 ecma_free_value (result);
2877
2878 if (!ecma_object_class_is (this_obj_p, LIT_MAGIC_STRING_REGEXP_UL))
2879 {
2880 result = ecma_raise_type_error (ECMA_ERR_MSG ("'this' is not a valid RegExp object"));
2881 goto cleanup_results;
2882 }
2883
2884 result = ecma_regexp_exec_helper (this_obj_p, string_p);
2885 }
2886
2887 /* 13.c */
2888 if (ecma_is_value_null (result))
2889 {
2890 break;
2891 }
2892
2893 /* 13.d.i */
2894 ecma_collection_push_back (results_p, result);
2895
2896 if (!global)
2897 {
2898 break;
2899 }
2900
2901 /* 13.d.iii.1 */
2902 result = ecma_op_object_get_by_uint32_index (ecma_get_object_from_value (result), 0);
2903 if (ECMA_IS_VALUE_ERROR (result))
2904 {
2905 goto cleanup_results;
2906 }
2907
2908 ecma_string_t *match_str_p = ecma_op_to_string (result);
2909 ecma_free_value (result);
2910
2911 if (match_str_p == NULL)
2912 {
2913 result = ECMA_VALUE_ERROR;
2914 goto cleanup_results;
2915 }
2916
2917 const bool is_empty = ecma_string_is_empty (match_str_p);
2918 ecma_deref_ecma_string (match_str_p);
2919
2920 /* 13.d.iii.3 */
2921 if (is_empty)
2922 {
2923 result = ecma_op_object_get_by_magic_id (this_obj_p, LIT_MAGIC_STRING_LASTINDEX_UL);
2924 if (ECMA_IS_VALUE_ERROR (result))
2925 {
2926 goto cleanup_results;
2927 }
2928
2929 uint32_t index;
2930 if (ECMA_IS_VALUE_ERROR (ecma_op_to_length (result, &index)))
2931 {
2932 ecma_free_value (result);
2933 result = ECMA_VALUE_ERROR;
2934 goto cleanup_results;
2935 }
2936
2937 ecma_free_value (result);
2938
2939 index = ecma_op_advance_string_index (string_p, index, unicode);
2940
2941 /* 10.d.iii.3.c */
2942 result = ecma_op_object_put (this_obj_p,
2943 ecma_get_magic_string (LIT_MAGIC_STRING_LASTINDEX_UL),
2944 ecma_make_uint32_value (index),
2945 true);
2946
2947 if (ECMA_IS_VALUE_ERROR (result))
2948 {
2949 goto cleanup_results;
2950 }
2951
2952 JERRY_ASSERT (ecma_is_value_boolean (result));
2953 }
2954
2955 result = ecma_op_object_get_by_magic_id (this_obj_p, LIT_MAGIC_STRING_EXEC);
2956
2957 if (ECMA_IS_VALUE_ERROR (result))
2958 {
2959 goto cleanup_results;
2960 }
2961 }
2962
2963 uint8_t string_flags = ECMA_STRING_FLAG_IS_ASCII;
2964 replace_ctx.string_p = ecma_string_get_chars (string_p,
2965 &(replace_ctx.string_size),
2966 NULL,
2967 NULL,
2968 &string_flags);
2969
2970 /* 14. */
2971 replace_ctx.builder = ecma_stringbuilder_create ();
2972 replace_ctx.matched_p = NULL;
2973 replace_ctx.capture_count = 0;
2974
2975 /* 15. */
2976 const lit_utf8_byte_t *source_position_p = replace_ctx.string_p;
2977 const lit_utf8_byte_t *const string_end_p = replace_ctx.string_p + replace_ctx.string_size;
2978
2979 /* 16. */
2980 for (ecma_value_t *current_p = results_p->buffer_p;
2981 current_p < results_p->buffer_p + results_p->item_count;
2982 current_p++)
2983 {
2984 /* 16.a */
2985 ecma_object_t *current_object_p = ecma_get_object_from_value (*current_p);
2986
2987 uint32_t capture_count;
2988 result = ecma_op_object_get_length (current_object_p, &capture_count);
2989 if (ECMA_IS_VALUE_ERROR (result))
2990 {
2991 goto cleanup_builder;
2992 }
2993
2994 /* 16.c */
2995 capture_count = (capture_count > 0) ? capture_count - 1 : capture_count;
2996
2997 /* 16.d */
2998 result = ecma_op_object_get_by_uint32_index (current_object_p, 0);
2999 if (ECMA_IS_VALUE_ERROR (result))
3000 {
3001 goto cleanup_builder;
3002 }
3003
3004 ecma_string_t *matched_str_p = ecma_op_to_string (result);
3005 ecma_free_value (result);
3006
3007 /* 16.e */
3008 if (matched_str_p == NULL)
3009 {
3010 result = ECMA_VALUE_ERROR;
3011 goto cleanup_builder;
3012 }
3013
3014 /* 16.g */
3015 result = ecma_op_object_get_by_magic_id (current_object_p, LIT_MAGIC_STRING_INDEX);
3016 if (ECMA_IS_VALUE_ERROR (result))
3017 {
3018 ecma_deref_ecma_string (matched_str_p);
3019 goto cleanup_builder;
3020 }
3021
3022 const ecma_value_t index_value = result;
3023
3024 ecma_number_t position_num;
3025 result = ecma_op_to_integer (index_value, &position_num);
3026 ecma_free_value (index_value);
3027
3028 if (ECMA_IS_VALUE_ERROR (result))
3029 {
3030 ecma_deref_ecma_string (matched_str_p);
3031 goto cleanup_builder;
3032 }
3033
3034 /* 16.i */
3035 lit_utf8_size_t position = JERRY_MIN ((lit_utf8_size_t) JERRY_MAX (position_num, 0.0f), string_length);
3036
3037 /* 16.k */
3038 ecma_collection_t *arguments_p = ecma_new_collection ();
3039 ecma_collection_push_back (arguments_p, ecma_make_string_value (matched_str_p));
3040
3041 /* 16.j, l */
3042 uint32_t n = 1;
3043 while (n <= capture_count)
3044 {
3045 result = ecma_op_object_get_by_uint32_index (current_object_p, n);
3046 if (ECMA_IS_VALUE_ERROR (result))
3047 {
3048 ecma_collection_free (arguments_p);
3049 goto cleanup_builder;
3050 }
3051
3052 /* 16.l.iii */
3053 if (!ecma_is_value_undefined (result))
3054 {
3055 ecma_string_t *capture_str_p = ecma_op_to_string (result);
3056 ecma_free_value (result);
3057
3058 if (capture_str_p == NULL)
3059 {
3060 ecma_collection_free (arguments_p);
3061 result = ECMA_VALUE_ERROR;
3062 goto cleanup_builder;
3063 }
3064
3065 result = ecma_make_string_value (capture_str_p);
3066 }
3067
3068 /* 16.l.iv */
3069 ecma_collection_push_back (arguments_p, result);
3070 n++;
3071 }
3072
3073 const bool should_replace = (position >= replace_ctx.index);
3074 /* 16.p */
3075 if (should_replace)
3076 {
3077 const lit_utf8_byte_t *match_position_p;
3078 const lit_utf8_size_t matched_str_size = ecma_string_get_size (matched_str_p);
3079 const lit_utf8_size_t matched_str_length = ecma_string_get_length (matched_str_p);
3080
3081 if (string_flags & ECMA_STRING_FLAG_IS_ASCII)
3082 {
3083 match_position_p = replace_ctx.string_p + position;
3084 }
3085 else
3086 {
3087 match_position_p = source_position_p;
3088 lit_utf8_size_t distance = position - replace_ctx.index;
3089 while (distance--)
3090 {
3091 lit_utf8_incr (&match_position_p);
3092 }
3093 }
3094
3095 ecma_stringbuilder_append_raw (&replace_ctx.builder,
3096 source_position_p,
3097 (lit_utf8_size_t) (match_position_p - source_position_p));
3098 replace_ctx.match_byte_pos = (lit_utf8_size_t) (match_position_p - replace_ctx.string_p);
3099
3100 if ((string_flags & ECMA_STRING_FLAG_IS_ASCII) && matched_str_size == matched_str_length)
3101 {
3102 source_position_p = JERRY_MIN (match_position_p + matched_str_size, string_end_p);
3103 }
3104 else
3105 {
3106 lit_utf8_size_t code_unit_count = matched_str_length;
3107
3108 while (code_unit_count-- > 0 && JERRY_LIKELY (match_position_p < string_end_p))
3109 {
3110 lit_utf8_incr (&match_position_p);
3111 }
3112
3113 source_position_p = match_position_p;
3114 }
3115
3116 replace_ctx.index = JERRY_MIN (position + matched_str_length, string_length);
3117 }
3118
3119 /* 16.m */
3120 if (replace_ctx.replace_str_p == NULL)
3121 {
3122 /* 16.m.i-ii.
3123 * arguments_p already contains <<Matched, cap1, cap2, ..., capN>> */
3124
3125 /* 16.m.iii */
3126 ecma_collection_push_back (arguments_p, ecma_make_uint32_value (position));
3127 ecma_ref_ecma_string (string_p);
3128 ecma_collection_push_back (arguments_p, ecma_make_string_value (string_p));
3129
3130 result = ecma_op_function_call (ecma_get_object_from_value (replace_arg),
3131 ECMA_VALUE_UNDEFINED,
3132 arguments_p->buffer_p,
3133 arguments_p->item_count);
3134
3135 ecma_collection_free (arguments_p);
3136
3137 if (ECMA_IS_VALUE_ERROR (result))
3138 {
3139 goto cleanup_builder;
3140 }
3141
3142 /* 16.m.v */
3143 ecma_string_t *const replace_result_p = ecma_op_to_string (result);
3144 ecma_free_value (result);
3145
3146 if (replace_result_p == NULL)
3147 {
3148 result = ECMA_VALUE_ERROR;
3149 goto cleanup_builder;
3150 }
3151
3152 /* 16.m/p */
3153 if (should_replace)
3154 {
3155 ecma_stringbuilder_append (&replace_ctx.builder, replace_result_p);
3156 }
3157
3158 ecma_deref_ecma_string (replace_result_p);
3159 }
3160 else
3161 {
3162 /* 16.n/p */
3163 if (should_replace)
3164 {
3165 replace_ctx.u.collection_p = arguments_p;
3166 ecma_builtin_replace_substitute (&replace_ctx);
3167 }
3168
3169 ecma_collection_free (arguments_p);
3170 }
3171 }
3172
3173 /* 18. */
3174 JERRY_ASSERT (replace_ctx.index <= string_length);
3175 ecma_stringbuilder_append_raw (&(replace_ctx.builder),
3176 source_position_p,
3177 (lit_utf8_size_t) (string_end_p - source_position_p));
3178
3179 result = ecma_make_string_value (ecma_stringbuilder_finalize (&replace_ctx.builder));
3180 goto cleanup_chars;
3181
3182 cleanup_builder:
3183 ecma_stringbuilder_destroy (&replace_ctx.builder);
3184
3185 cleanup_chars:
3186 if (string_flags & ECMA_STRING_FLAG_MUST_BE_FREED)
3187 {
3188 jmem_heap_free_block ((void *) replace_ctx.string_p, replace_ctx.string_size);
3189 }
3190
3191 cleanup_results:
3192 ecma_collection_free (results_p);
3193 #endif /* !ENABLED (JERRY_ES2015) */
3194
3195 cleanup_replace:
3196 if (replace_ctx.replace_str_p != NULL)
3197 {
3198 ecma_deref_ecma_string (replace_ctx.replace_str_p);
3199 }
3200
3201 cleanup_string:
3202 ecma_deref_ecma_string (string_p);
3203
3204 return result;
3205 } /* ecma_regexp_replace_helper */
3206
3207 /**
3208 * Helper function for RegExp based matching
3209 *
3210 * See also:
3211 * String.prototype.match
3212 * RegExp.prototype[@@match]
3213 *
3214 * @return ecma_value_t
3215 */
3216 ecma_value_t
ecma_regexp_match_helper(ecma_value_t this_arg,ecma_value_t string_arg)3217 ecma_regexp_match_helper (ecma_value_t this_arg, /**< this argument */
3218 ecma_value_t string_arg) /**< source string */
3219 {
3220 if (!ecma_is_value_object (this_arg))
3221 {
3222 return ecma_raise_type_error (ECMA_ERR_MSG ("'this' is not an object."));
3223 }
3224
3225 ecma_string_t *str_p = ecma_op_to_string (string_arg);
3226
3227 if (JERRY_UNLIKELY (str_p == NULL))
3228 {
3229 return ECMA_VALUE_ERROR;
3230 }
3231
3232 ecma_object_t *obj_p = ecma_get_object_from_value (this_arg);
3233
3234 ecma_value_t global_value = ecma_op_object_get_by_magic_id (obj_p, LIT_MAGIC_STRING_GLOBAL);
3235
3236 if (ECMA_IS_VALUE_ERROR (global_value))
3237 {
3238 ecma_deref_ecma_string (str_p);
3239 return global_value;
3240 }
3241
3242 bool global = ecma_op_to_boolean (global_value);
3243
3244 ecma_free_value (global_value);
3245
3246 if (!global)
3247 {
3248 ecma_value_t result = ecma_op_regexp_exec (this_arg, str_p);
3249 ecma_deref_ecma_string (str_p);
3250 return result;
3251 }
3252
3253 #if ENABLED (JERRY_ES2015)
3254 ecma_value_t full_unicode_value = ecma_op_object_get_by_magic_id (obj_p, LIT_MAGIC_STRING_UNICODE);
3255
3256 if (ECMA_IS_VALUE_ERROR (full_unicode_value))
3257 {
3258 ecma_deref_ecma_string (str_p);
3259 return full_unicode_value;
3260 }
3261
3262 bool full_unicode = ecma_op_to_boolean (full_unicode_value);
3263
3264 ecma_free_value (full_unicode_value);
3265 #endif /* ENABLED (JERRY_ES2015) */
3266
3267 ecma_value_t set_status = ecma_op_object_put (obj_p,
3268 ecma_get_magic_string (LIT_MAGIC_STRING_LASTINDEX_UL),
3269 ecma_make_uint32_value (0),
3270 true);
3271
3272 if (ECMA_IS_VALUE_ERROR (set_status))
3273 {
3274 ecma_deref_ecma_string (str_p);
3275 return set_status;
3276 }
3277
3278 ecma_value_t ret_value = ECMA_VALUE_ERROR;
3279 ecma_value_t result_array = ecma_op_create_array_object (0, 0, false);
3280 ecma_object_t *result_array_p = ecma_get_object_from_value (result_array);
3281 uint32_t n = 0;
3282
3283 while (true)
3284 {
3285 ecma_value_t result_value = ecma_op_regexp_exec (this_arg, str_p);
3286
3287 if (ECMA_IS_VALUE_ERROR (result_value))
3288 {
3289 goto result_cleanup;
3290 }
3291
3292 if (ecma_is_value_null (result_value))
3293 {
3294 if (n == 0)
3295 {
3296 ret_value = ECMA_VALUE_NULL;
3297 goto result_cleanup;
3298 }
3299
3300 ecma_deref_ecma_string (str_p);
3301 return result_array;
3302 }
3303
3304 ecma_object_t *result_value_p = ecma_get_object_from_value (result_value);
3305 ecma_value_t match_str_value = ecma_op_object_get_by_uint32_index (result_value_p, 0);
3306
3307 ecma_deref_object (result_value_p);
3308
3309 if (ECMA_IS_VALUE_ERROR (match_str_value))
3310 {
3311 goto result_cleanup;
3312 }
3313
3314 ecma_string_t *match_str_p = ecma_op_to_string (match_str_value);
3315
3316 if (JERRY_UNLIKELY (match_str_p == NULL))
3317 {
3318 ecma_free_value (match_str_value);
3319 goto result_cleanup;
3320 }
3321
3322 ecma_value_t new_prop = ecma_builtin_helper_def_prop_by_index (result_array_p,
3323 n,
3324 match_str_value,
3325 ECMA_PROPERTY_CONFIGURABLE_ENUMERABLE_WRITABLE);
3326
3327 JERRY_ASSERT (!ECMA_IS_VALUE_ERROR (new_prop));
3328
3329 ecma_value_t match_result = ECMA_VALUE_ERROR;
3330 if (ecma_string_is_empty (match_str_p))
3331 {
3332 ecma_value_t this_index = ecma_op_object_get_by_magic_id (obj_p, LIT_MAGIC_STRING_LASTINDEX_UL);
3333
3334 if (ECMA_IS_VALUE_ERROR (this_index))
3335 {
3336 goto match_cleanup;
3337 }
3338
3339 #if ENABLED (JERRY_ES2015)
3340 uint32_t index;
3341 ecma_value_t length_value = ecma_op_to_length (this_index, &index);
3342
3343 ecma_free_value (this_index);
3344
3345 if (ECMA_IS_VALUE_ERROR (length_value))
3346 {
3347 goto match_cleanup;
3348 }
3349
3350 uint32_t next_index = ecma_op_advance_string_index (str_p, index, full_unicode);
3351
3352 ecma_value_t next_set_status = ecma_op_object_put (obj_p,
3353 ecma_get_magic_string (LIT_MAGIC_STRING_LASTINDEX_UL),
3354 ecma_make_uint32_value (next_index),
3355 true);
3356 #else /* !ENABLED (JERRY_ES2015) */
3357 ecma_number_t next_index = ecma_get_number_from_value (this_index);
3358
3359 ecma_value_t next_set_status = ecma_op_object_put (obj_p,
3360 ecma_get_magic_string (LIT_MAGIC_STRING_LASTINDEX_UL),
3361 ecma_make_number_value (next_index + 1),
3362 true);
3363
3364 ecma_free_value (this_index);
3365 #endif /* ENABLED (JERRY_ES2015) */
3366
3367 if (ECMA_IS_VALUE_ERROR (next_set_status))
3368 {
3369 goto match_cleanup;
3370 }
3371 }
3372
3373 match_result = ECMA_VALUE_EMPTY;
3374
3375 match_cleanup:
3376 ecma_deref_ecma_string (match_str_p);
3377 ecma_free_value (match_str_value);
3378
3379 if (ECMA_IS_VALUE_ERROR (match_result))
3380 {
3381 goto result_cleanup;
3382 }
3383
3384 n++;
3385 }
3386
3387 result_cleanup:
3388 ecma_deref_ecma_string (str_p);
3389 ecma_deref_object (result_array_p);
3390 return ret_value;
3391 } /* ecma_regexp_match_helper */
3392
3393 /**
3394 * RegExpExec operation
3395 *
3396 * See also:
3397 * ECMA-262 v6.0, 21.2.5.2.1
3398 *
3399 * @return ecma value
3400 * Returned value must be freed with ecma_free_value.
3401 */
3402 ecma_value_t
ecma_op_regexp_exec(ecma_value_t this_arg,ecma_string_t * str_p)3403 ecma_op_regexp_exec (ecma_value_t this_arg, /**< this argument */
3404 ecma_string_t *str_p) /**< input string */
3405 {
3406 ecma_object_t *arg_obj_p = ecma_get_object_from_value (this_arg);
3407
3408 #if ENABLED (JERRY_ES2015)
3409 ecma_value_t exec = ecma_op_object_get_by_magic_id (arg_obj_p, LIT_MAGIC_STRING_EXEC);
3410
3411 if (ECMA_IS_VALUE_ERROR (exec))
3412 {
3413 return exec;
3414 }
3415
3416 if (ecma_op_is_callable (exec))
3417 {
3418 ecma_object_t *function_p = ecma_get_object_from_value (exec);
3419 ecma_value_t arguments[] = { ecma_make_string_value (str_p) };
3420
3421 ecma_value_t result = ecma_op_function_call (function_p, this_arg, arguments, 1);
3422
3423 ecma_deref_object (function_p);
3424
3425 if (ECMA_IS_VALUE_ERROR (result))
3426 {
3427 return result;
3428 }
3429
3430 if (!ecma_is_value_object (result) && !ecma_is_value_null (result))
3431 {
3432 ecma_free_value (result);
3433 return ecma_raise_type_error (ECMA_ERR_MSG ("Return value of 'exec' must be an Object or Null"));
3434 }
3435
3436 return result;
3437 }
3438 else
3439 {
3440 ecma_free_value (exec);
3441 }
3442 #endif /* ENABLED (JERRY_ES2015) */
3443
3444 if (!ecma_object_is_regexp_object (this_arg))
3445 {
3446 return ecma_raise_type_error (ECMA_ERR_MSG ("'this' is not a valid RegExp object"));
3447 }
3448
3449 return ecma_regexp_exec_helper (arg_obj_p, str_p);
3450 } /* ecma_op_regexp_exec */
3451
3452 /**
3453 * @}
3454 * @}
3455 */
3456
3457 #endif /* ENABLED (JERRY_BUILTIN_REGEXP) */
3458