1 /* Copyright JS Foundation and other contributors, http://js.foundation 2 * 3 * Licensed under the Apache License, Version 2.0 (the "License"); 4 * you may not use this file except in compliance with the License. 5 * You may obtain a copy of the License at 6 * 7 * http://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software 10 * distributed under the License is distributed on an "AS IS" BASIS 11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 * See the License for the specific language governing permissions and 13 * limitations under the License. 14 */ 15 16 #ifndef ECMA_REGEXP_OBJECT_H 17 #define ECMA_REGEXP_OBJECT_H 18 19 #if ENABLED (JERRY_BUILTIN_REGEXP) 20 21 #include "ecma-globals.h" 22 #include "re-compiler.h" 23 24 /** \addtogroup ecma ECMA 25 * @{ 26 * 27 * \addtogroup ecmaregexpobject ECMA RegExp object related routines 28 * @{ 29 */ 30 31 /** 32 * RegExp flags 33 * Note: 34 * This enum has to be kept in sync with jerry_regexp_flags_t. 35 */ 36 typedef enum 37 { 38 RE_FLAG_EMPTY = 0u, /* Empty RegExp flags */ 39 RE_FLAG_GLOBAL = (1u << 1), /**< ECMA-262 v5, 15.10.7.2 */ 40 RE_FLAG_IGNORE_CASE = (1u << 2), /**< ECMA-262 v5, 15.10.7.3 */ 41 RE_FLAG_MULTILINE = (1u << 3), /**< ECMA-262 v5, 15.10.7.4 */ 42 RE_FLAG_STICKY = (1u << 4), /**< ECMA-262 v6, 21.2.5.12 */ 43 RE_FLAG_UNICODE = (1u << 5) /**< ECMA-262 v6, 21.2.5.15 */ 44 } ecma_regexp_flags_t; 45 46 /** 47 * Class escapes 48 */ 49 typedef enum 50 { 51 RE_ESCAPE__START, /**< escapes start */ 52 RE_ESCAPE_DIGIT = RE_ESCAPE__START, /**< digit */ 53 RE_ESCAPE_NOT_DIGIT, /**< not digit */ 54 RE_ESCAPE_WORD_CHAR, /**< word char */ 55 RE_ESCAPE_NOT_WORD_CHAR, /**< not word char */ 56 RE_ESCAPE_WHITESPACE, /**< whitespace */ 57 RE_ESCAPE_NOT_WHITESPACE, /**< not whitespace */ 58 RE_ESCAPE__COUNT, /**< escape count */ 59 } ecma_class_escape_t; 60 61 /** 62 * Character class flags escape count mask size. 63 */ 64 #define RE_CLASS_ESCAPE_COUNT_MASK_SIZE (3u) 65 66 /** 67 * Character class flags escape count mask. 68 */ 69 #define RE_CLASS_ESCAPE_COUNT_MASK ((1 << RE_CLASS_ESCAPE_COUNT_MASK_SIZE) - 1u) 70 71 /** 72 * Character class flags that are present in the upper bits of the class flags byte, while the 3 least significant bits 73 * hold a value that contains the number of class escapes present in the character class. 74 */ 75 typedef enum 76 { 77 RE_CLASS_HAS_CHARS = (1 << 5), /**< contains individual characters */ 78 RE_CLASS_HAS_RANGES = (1 << 6), /**< contains character ranges */ 79 RE_CLASS_INVERT = (1 << 7), /**< inverted */ 80 } ecma_char_class_flags_t; 81 82 /** 83 * Structure for matching capturing groups and storing their result 84 */ 85 typedef struct 86 { 87 const lit_utf8_byte_t *begin_p; /**< capture start pointer */ 88 const lit_utf8_byte_t *end_p; /**< capture end pointer */ 89 const uint8_t *bc_p; /**< group bytecode pointer */ 90 uint32_t iterator; /**< iteration counter */ 91 uint32_t subcapture_count; /**< number of nested capturing groups */ 92 } ecma_regexp_capture_t; 93 94 /** 95 * Structure for matching non-capturing groups 96 */ 97 typedef struct 98 { 99 const lit_utf8_byte_t *begin_p; /**< substring start pointer */ 100 const uint8_t *bc_p; /**< group bytecode pointer */ 101 uint32_t iterator; /**< iteration counter */ 102 uint32_t subcapture_start; /**< first nested capturing group index */ 103 uint32_t subcapture_count; /**< number of nested capturing groups */ 104 } ecma_regexp_non_capture_t; 105 106 /** 107 * Check if an ecma_regexp_capture_t contains a defined capture 108 */ 109 #define ECMA_RE_IS_CAPTURE_DEFINED(c) ((c)->begin_p != NULL) 110 111 ecma_value_t 112 ecma_regexp_get_capture_value (const ecma_regexp_capture_t *const capture_p); 113 114 #if (JERRY_STACK_LIMIT != 0) 115 /** 116 * Value used ase result when stack limit is reached 117 */ 118 #define ECMA_RE_OUT_OF_STACK ((const lit_utf8_byte_t *) UINTPTR_MAX) 119 120 /** 121 * Checks if the stack limit has been reached during regexp matching 122 */ 123 #define ECMA_RE_STACK_LIMIT_REACHED(p) (JERRY_UNLIKELY (p == ECMA_RE_OUT_OF_STACK)) 124 #else /* JERRY_STACK_LIMIT == 0 */ 125 #define ECMA_RE_STACK_LIMIT_REACHED(p) (false) 126 #endif /* JERRY_STACK_LIMIT != 0 */ 127 128 /** 129 * Offset applied to qmax when encoded into the bytecode. 130 * 131 * It's common for qmax to be Infinity, which is represented a UINT32_MAX. By applying the offset we are able to store 132 * it in a single byte az zero. 133 */ 134 #define RE_QMAX_OFFSET 1 135 136 /** 137 * RegExp executor context 138 */ 139 typedef struct 140 { 141 const lit_utf8_byte_t *input_start_p; /**< start of input string */ 142 const lit_utf8_byte_t *input_end_p; /**< end of input string */ 143 uint32_t captures_count; /**< number of capture groups */ 144 uint32_t non_captures_count; /**< number of non-capture groups */ 145 ecma_regexp_capture_t *captures_p; /**< capturing groups */ 146 ecma_regexp_non_capture_t *non_captures_p; /**< non-capturing groups */ 147 uint16_t flags; /**< RegExp flags */ 148 uint8_t char_size; /**< size of encoded characters */ 149 } ecma_regexp_ctx_t; 150 151 #if ENABLED (JERRY_ES2015) 152 lit_code_point_t ecma_regexp_unicode_advance (const lit_utf8_byte_t **str_p, const lit_utf8_byte_t *end_p); 153 #endif /* ENABLED (JERRY_ES2015) */ 154 155 ecma_object_t *ecma_op_regexp_alloc (ecma_object_t *new_target_obj_p); 156 ecma_value_t ecma_regexp_exec_helper (ecma_object_t *regexp_object_p, 157 ecma_string_t *input_string_p); 158 ecma_string_t *ecma_regexp_read_pattern_str_helper (ecma_value_t pattern_arg); 159 lit_code_point_t ecma_regexp_canonicalize_char (lit_code_point_t ch, bool unicode); 160 ecma_value_t ecma_regexp_parse_flags (ecma_string_t *flags_str_p, uint16_t *flags_p); 161 void ecma_regexp_create_and_initialize_props (ecma_object_t *re_object_p, 162 ecma_string_t *source_p, 163 uint16_t flags); 164 ecma_value_t ecma_regexp_replace_helper (ecma_value_t this_arg, ecma_value_t string_arg, ecma_value_t replace_arg); 165 ecma_value_t ecma_regexp_search_helper (ecma_value_t regexp_arg, ecma_value_t string_arg); 166 ecma_value_t ecma_regexp_split_helper (ecma_value_t this_arg, ecma_value_t string_arg, ecma_value_t limit_arg); 167 ecma_value_t ecma_regexp_match_helper (ecma_value_t this_arg, ecma_value_t string_arg); 168 169 ecma_value_t ecma_op_regexp_exec (ecma_value_t this_arg, ecma_string_t *str_p); 170 171 ecma_value_t ecma_op_create_regexp_from_bytecode (ecma_object_t *regexp_obj_p, re_compiled_code_t *bc_p); 172 ecma_value_t ecma_op_create_regexp_from_pattern (ecma_object_t *regexp_obj_p, 173 ecma_value_t pattern_value, 174 ecma_value_t flags_value); 175 ecma_value_t ecma_op_create_regexp_with_flags (ecma_object_t *regexp_obj_p, 176 ecma_value_t pattern_value, 177 uint16_t flags); 178 /** 179 * @} 180 * @} 181 */ 182 183 #endif /* ENABLED (JERRY_BUILTIN_REGEXP) */ 184 #endif /* !ECMA_REGEXP_OBJECT_H */ 185