• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1  /* Copyright JS Foundation and other contributors, http://js.foundation
2   *
3   * Licensed under the Apache License, Version 2.0 (the "License");
4   * you may not use this file except in compliance with the License.
5   * You may obtain a copy of the License at
6   *
7   *     http://www.apache.org/licenses/LICENSE-2.0
8   *
9   * Unless required by applicable law or agreed to in writing, software
10   * distributed under the License is distributed on an "AS IS" BASIS
11   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12   * See the License for the specific language governing permissions and
13   * limitations under the License.
14   */
15  
16  #ifndef ECMA_REGEXP_OBJECT_H
17  #define ECMA_REGEXP_OBJECT_H
18  
19  #if ENABLED (JERRY_BUILTIN_REGEXP)
20  
21  #include "ecma-globals.h"
22  #include "re-compiler.h"
23  
24  /** \addtogroup ecma ECMA
25   * @{
26   *
27   * \addtogroup ecmaregexpobject ECMA RegExp object related routines
28   * @{
29   */
30  
31  /**
32   * RegExp flags
33   * Note:
34   *      This enum has to be kept in sync with jerry_regexp_flags_t.
35   */
36  typedef enum
37  {
38    RE_FLAG_EMPTY = 0u,              /* Empty RegExp flags */
39    RE_FLAG_GLOBAL = (1u << 1),      /**< ECMA-262 v5, 15.10.7.2 */
40    RE_FLAG_IGNORE_CASE = (1u << 2), /**< ECMA-262 v5, 15.10.7.3 */
41    RE_FLAG_MULTILINE = (1u << 3),   /**< ECMA-262 v5, 15.10.7.4 */
42    RE_FLAG_STICKY = (1u << 4),      /**< ECMA-262 v6, 21.2.5.12 */
43    RE_FLAG_UNICODE = (1u << 5)      /**< ECMA-262 v6, 21.2.5.15 */
44  } ecma_regexp_flags_t;
45  
46  /**
47   * Class escapes
48   */
49  typedef enum
50  {
51    RE_ESCAPE__START,                   /**< escapes start */
52    RE_ESCAPE_DIGIT = RE_ESCAPE__START, /**< digit */
53    RE_ESCAPE_NOT_DIGIT,                /**< not digit */
54    RE_ESCAPE_WORD_CHAR,                /**< word char */
55    RE_ESCAPE_NOT_WORD_CHAR,            /**< not word char */
56    RE_ESCAPE_WHITESPACE,               /**< whitespace */
57    RE_ESCAPE_NOT_WHITESPACE,           /**< not whitespace */
58    RE_ESCAPE__COUNT,                   /**< escape count */
59  } ecma_class_escape_t;
60  
61  /**
62   * Character class flags escape count mask size.
63   */
64  #define RE_CLASS_ESCAPE_COUNT_MASK_SIZE (3u)
65  
66  /**
67   * Character class flags escape count mask.
68   */
69  #define RE_CLASS_ESCAPE_COUNT_MASK ((1 << RE_CLASS_ESCAPE_COUNT_MASK_SIZE) - 1u)
70  
71  /**
72   * Character class flags that are present in the upper bits of the class flags byte, while the 3 least significant bits
73   * hold a value that contains the number of class escapes present in the character class.
74   */
75  typedef enum
76  {
77    RE_CLASS_HAS_CHARS = (1 << 5),    /**< contains individual characters */
78    RE_CLASS_HAS_RANGES = (1 << 6),   /**< contains character ranges */
79    RE_CLASS_INVERT = (1 << 7),       /**< inverted */
80  } ecma_char_class_flags_t;
81  
82  /**
83   * Structure for matching capturing groups and storing their result
84   */
85  typedef struct
86  {
87    const lit_utf8_byte_t *begin_p; /**< capture start pointer */
88    const lit_utf8_byte_t *end_p;   /**< capture end pointer */
89    const uint8_t *bc_p;            /**< group bytecode pointer */
90    uint32_t iterator;              /**< iteration counter */
91    uint32_t subcapture_count;      /**< number of nested capturing groups */
92  } ecma_regexp_capture_t;
93  
94  /**
95   * Structure for matching non-capturing groups
96   */
97  typedef struct
98  {
99    const lit_utf8_byte_t *begin_p; /**< substring start pointer */
100    const uint8_t *bc_p;            /**< group bytecode pointer */
101    uint32_t iterator;              /**< iteration counter */
102    uint32_t subcapture_start;      /**< first nested capturing group index */
103    uint32_t subcapture_count;      /**< number of nested capturing groups */
104  } ecma_regexp_non_capture_t;
105  
106  /**
107   * Check if an ecma_regexp_capture_t contains a defined capture
108   */
109  #define ECMA_RE_IS_CAPTURE_DEFINED(c) ((c)->begin_p != NULL)
110  
111  ecma_value_t
112  ecma_regexp_get_capture_value (const ecma_regexp_capture_t *const capture_p);
113  
114  #if (JERRY_STACK_LIMIT != 0)
115  /**
116   * Value used ase result when stack limit is reached
117   */
118  #define ECMA_RE_OUT_OF_STACK ((const lit_utf8_byte_t *) UINTPTR_MAX)
119  
120  /**
121   * Checks if the stack limit has been reached during regexp matching
122   */
123  #define ECMA_RE_STACK_LIMIT_REACHED(p) (JERRY_UNLIKELY (p == ECMA_RE_OUT_OF_STACK))
124  #else /* JERRY_STACK_LIMIT == 0 */
125  #define ECMA_RE_STACK_LIMIT_REACHED(p) (false)
126  #endif /* JERRY_STACK_LIMIT != 0 */
127  
128  /**
129   * Offset applied to qmax when encoded into the bytecode.
130   *
131   * It's common for qmax to be Infinity, which is represented a UINT32_MAX. By applying the offset we are able to store
132   * it in a single byte az zero.
133   */
134  #define RE_QMAX_OFFSET 1
135  
136  /**
137   * RegExp executor context
138   */
139  typedef struct
140  {
141    const lit_utf8_byte_t *input_start_p;        /**< start of input string */
142    const lit_utf8_byte_t *input_end_p;          /**< end of input string */
143    uint32_t captures_count;                     /**< number of capture groups */
144    uint32_t non_captures_count;                 /**< number of non-capture groups */
145    ecma_regexp_capture_t *captures_p;           /**< capturing groups */
146    ecma_regexp_non_capture_t *non_captures_p;   /**< non-capturing groups */
147    uint16_t flags;                              /**< RegExp flags */
148    uint8_t char_size;                           /**< size of encoded characters */
149  } ecma_regexp_ctx_t;
150  
151  #if ENABLED (JERRY_ES2015)
152  lit_code_point_t ecma_regexp_unicode_advance (const lit_utf8_byte_t **str_p, const lit_utf8_byte_t *end_p);
153  #endif /* ENABLED (JERRY_ES2015) */
154  
155  ecma_object_t *ecma_op_regexp_alloc (ecma_object_t *new_target_obj_p);
156  ecma_value_t ecma_regexp_exec_helper (ecma_object_t *regexp_object_p,
157                                        ecma_string_t *input_string_p);
158  ecma_string_t *ecma_regexp_read_pattern_str_helper (ecma_value_t pattern_arg);
159  lit_code_point_t ecma_regexp_canonicalize_char (lit_code_point_t ch, bool unicode);
160  ecma_value_t ecma_regexp_parse_flags (ecma_string_t *flags_str_p, uint16_t *flags_p);
161  void ecma_regexp_create_and_initialize_props (ecma_object_t *re_object_p,
162                                                ecma_string_t *source_p,
163                                                uint16_t flags);
164  ecma_value_t ecma_regexp_replace_helper (ecma_value_t this_arg, ecma_value_t string_arg, ecma_value_t replace_arg);
165  ecma_value_t ecma_regexp_search_helper (ecma_value_t regexp_arg, ecma_value_t string_arg);
166  ecma_value_t ecma_regexp_split_helper (ecma_value_t this_arg, ecma_value_t string_arg, ecma_value_t limit_arg);
167  ecma_value_t ecma_regexp_match_helper (ecma_value_t this_arg, ecma_value_t string_arg);
168  
169  ecma_value_t ecma_op_regexp_exec (ecma_value_t this_arg, ecma_string_t *str_p);
170  
171  ecma_value_t ecma_op_create_regexp_from_bytecode (ecma_object_t *regexp_obj_p, re_compiled_code_t *bc_p);
172  ecma_value_t ecma_op_create_regexp_from_pattern (ecma_object_t *regexp_obj_p,
173                                                   ecma_value_t pattern_value,
174                                                   ecma_value_t flags_value);
175  ecma_value_t ecma_op_create_regexp_with_flags (ecma_object_t *regexp_obj_p,
176                                                 ecma_value_t pattern_value,
177                                                 uint16_t flags);
178  /**
179   * @}
180   * @}
181   */
182  
183  #endif /* ENABLED (JERRY_BUILTIN_REGEXP) */
184  #endif /* !ECMA_REGEXP_OBJECT_H */
185