• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright JS Foundation and other contributors, http://js.foundation
2  *
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #ifndef ECMA_REGEXP_OBJECT_H
17 #define ECMA_REGEXP_OBJECT_H
18 
19 #if ENABLED (JERRY_BUILTIN_REGEXP)
20 
21 #include "ecma-globals.h"
22 #include "re-compiler.h"
23 
24 /** \addtogroup ecma ECMA
25  * @{
26  *
27  * \addtogroup ecmaregexpobject ECMA RegExp object related routines
28  * @{
29  */
30 
31 /**
32  * RegExp flags
33  * Note:
34  *      This enum has to be kept in sync with jerry_regexp_flags_t.
35  */
36 typedef enum
37 {
38   RE_FLAG_EMPTY = 0u,              /* Empty RegExp flags */
39   RE_FLAG_GLOBAL = (1u << 1),      /**< ECMA-262 v5, 15.10.7.2 */
40   RE_FLAG_IGNORE_CASE = (1u << 2), /**< ECMA-262 v5, 15.10.7.3 */
41   RE_FLAG_MULTILINE = (1u << 3),   /**< ECMA-262 v5, 15.10.7.4 */
42   RE_FLAG_STICKY = (1u << 4),      /**< ECMA-262 v6, 21.2.5.12 */
43   RE_FLAG_UNICODE = (1u << 5)      /**< ECMA-262 v6, 21.2.5.15 */
44 } ecma_regexp_flags_t;
45 
46 /**
47  * Class escapes
48  */
49 typedef enum
50 {
51   RE_ESCAPE__START,                   /**< escapes start */
52   RE_ESCAPE_DIGIT = RE_ESCAPE__START, /**< digit */
53   RE_ESCAPE_NOT_DIGIT,                /**< not digit */
54   RE_ESCAPE_WORD_CHAR,                /**< word char */
55   RE_ESCAPE_NOT_WORD_CHAR,            /**< not word char */
56   RE_ESCAPE_WHITESPACE,               /**< whitespace */
57   RE_ESCAPE_NOT_WHITESPACE,           /**< not whitespace */
58   RE_ESCAPE__COUNT,                   /**< escape count */
59 } ecma_class_escape_t;
60 
61 /**
62  * Character class flags escape count mask size.
63  */
64 #define RE_CLASS_ESCAPE_COUNT_MASK_SIZE (3u)
65 
66 /**
67  * Character class flags escape count mask.
68  */
69 #define RE_CLASS_ESCAPE_COUNT_MASK ((1 << RE_CLASS_ESCAPE_COUNT_MASK_SIZE) - 1u)
70 
71 /**
72  * Character class flags that are present in the upper bits of the class flags byte, while the 3 least significant bits
73  * hold a value that contains the number of class escapes present in the character class.
74  */
75 typedef enum
76 {
77   RE_CLASS_HAS_CHARS = (1 << 5),    /**< contains individual characters */
78   RE_CLASS_HAS_RANGES = (1 << 6),   /**< contains character ranges */
79   RE_CLASS_INVERT = (1 << 7),       /**< inverted */
80 } ecma_char_class_flags_t;
81 
82 /**
83  * Structure for matching capturing groups and storing their result
84  */
85 typedef struct
86 {
87   const lit_utf8_byte_t *begin_p; /**< capture start pointer */
88   const lit_utf8_byte_t *end_p;   /**< capture end pointer */
89   const uint8_t *bc_p;            /**< group bytecode pointer */
90   uint32_t iterator;              /**< iteration counter */
91   uint32_t subcapture_count;      /**< number of nested capturing groups */
92 } ecma_regexp_capture_t;
93 
94 /**
95  * Structure for matching non-capturing groups
96  */
97 typedef struct
98 {
99   const lit_utf8_byte_t *begin_p; /**< substring start pointer */
100   const uint8_t *bc_p;            /**< group bytecode pointer */
101   uint32_t iterator;              /**< iteration counter */
102   uint32_t subcapture_start;      /**< first nested capturing group index */
103   uint32_t subcapture_count;      /**< number of nested capturing groups */
104 } ecma_regexp_non_capture_t;
105 
106 /**
107  * Check if an ecma_regexp_capture_t contains a defined capture
108  */
109 #define ECMA_RE_IS_CAPTURE_DEFINED(c) ((c)->begin_p != NULL)
110 
111 ecma_value_t
112 ecma_regexp_get_capture_value (const ecma_regexp_capture_t *const capture_p);
113 
114 #if (JERRY_STACK_LIMIT != 0)
115 /**
116  * Value used ase result when stack limit is reached
117  */
118 #define ECMA_RE_OUT_OF_STACK ((const lit_utf8_byte_t *) UINTPTR_MAX)
119 
120 /**
121  * Checks if the stack limit has been reached during regexp matching
122  */
123 #define ECMA_RE_STACK_LIMIT_REACHED(p) (JERRY_UNLIKELY (p == ECMA_RE_OUT_OF_STACK))
124 #else /* JERRY_STACK_LIMIT == 0 */
125 #define ECMA_RE_STACK_LIMIT_REACHED(p) (false)
126 #endif /* JERRY_STACK_LIMIT != 0 */
127 
128 /**
129  * Offset applied to qmax when encoded into the bytecode.
130  *
131  * It's common for qmax to be Infinity, which is represented a UINT32_MAX. By applying the offset we are able to store
132  * it in a single byte az zero.
133  */
134 #define RE_QMAX_OFFSET 1
135 
136 /**
137  * RegExp executor context
138  */
139 typedef struct
140 {
141   const lit_utf8_byte_t *input_start_p;        /**< start of input string */
142   const lit_utf8_byte_t *input_end_p;          /**< end of input string */
143   uint32_t captures_count;                     /**< number of capture groups */
144   uint32_t non_captures_count;                 /**< number of non-capture groups */
145   ecma_regexp_capture_t *captures_p;           /**< capturing groups */
146   ecma_regexp_non_capture_t *non_captures_p;   /**< non-capturing groups */
147   uint16_t flags;                              /**< RegExp flags */
148   uint8_t char_size;                           /**< size of encoded characters */
149 } ecma_regexp_ctx_t;
150 
151 #if ENABLED (JERRY_ES2015)
152 lit_code_point_t ecma_regexp_unicode_advance (const lit_utf8_byte_t **str_p, const lit_utf8_byte_t *end_p);
153 #endif /* ENABLED (JERRY_ES2015) */
154 
155 ecma_object_t *ecma_op_regexp_alloc (ecma_object_t *new_target_obj_p);
156 ecma_value_t ecma_regexp_exec_helper (ecma_object_t *regexp_object_p,
157                                       ecma_string_t *input_string_p);
158 ecma_string_t *ecma_regexp_read_pattern_str_helper (ecma_value_t pattern_arg);
159 lit_code_point_t ecma_regexp_canonicalize_char (lit_code_point_t ch, bool unicode);
160 ecma_value_t ecma_regexp_parse_flags (ecma_string_t *flags_str_p, uint16_t *flags_p);
161 void ecma_regexp_create_and_initialize_props (ecma_object_t *re_object_p,
162                                               ecma_string_t *source_p,
163                                               uint16_t flags);
164 ecma_value_t ecma_regexp_replace_helper (ecma_value_t this_arg, ecma_value_t string_arg, ecma_value_t replace_arg);
165 ecma_value_t ecma_regexp_search_helper (ecma_value_t regexp_arg, ecma_value_t string_arg);
166 ecma_value_t ecma_regexp_split_helper (ecma_value_t this_arg, ecma_value_t string_arg, ecma_value_t limit_arg);
167 ecma_value_t ecma_regexp_match_helper (ecma_value_t this_arg, ecma_value_t string_arg);
168 
169 ecma_value_t ecma_op_regexp_exec (ecma_value_t this_arg, ecma_string_t *str_p);
170 
171 ecma_value_t ecma_op_create_regexp_from_bytecode (ecma_object_t *regexp_obj_p, re_compiled_code_t *bc_p);
172 ecma_value_t ecma_op_create_regexp_from_pattern (ecma_object_t *regexp_obj_p,
173                                                  ecma_value_t pattern_value,
174                                                  ecma_value_t flags_value);
175 ecma_value_t ecma_op_create_regexp_with_flags (ecma_object_t *regexp_obj_p,
176                                                ecma_value_t pattern_value,
177                                                uint16_t flags);
178 /**
179  * @}
180  * @}
181  */
182 
183 #endif /* ENABLED (JERRY_BUILTIN_REGEXP) */
184 #endif /* !ECMA_REGEXP_OBJECT_H */
185