• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright JS Foundation and other contributors, http://js.foundation
2  *
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #ifndef RE_BYTECODE_H
17 #define RE_BYTECODE_H
18 
19 #if ENABLED (JERRY_BUILTIN_REGEXP)
20 
21 #include "ecma-globals.h"
22 #include "re-compiler-context.h"
23 
24 /** \addtogroup parser Parser
25  * @{
26  *
27  * \addtogroup regexparser Regular expression
28  * @{
29  *
30  * \addtogroup regexparser_bytecode Bytecode
31  * @{
32  */
33 
34 /**
35   * Size of the RegExp bytecode cache
36   */
37 #define RE_CACHE_SIZE 8u
38 
39 /**
40   * RegExp flags mask (first 10 bits are for reference count and the rest for the actual RegExp flags)
41   */
42 #define RE_FLAGS_MASK 0x3F
43 
44 /**
45  * Maximum value that can be encoded in the RegExp bytecode as a single byte.
46  */
47 #define RE_VALUE_1BYTE_MAX 0xFE
48 
49 /**
50  * Marker that signals that the actual value is enocded in the following 4 bytes in the bytecode.
51  */
52 #define RE_VALUE_4BYTE_MARKER 0xFF
53 
54 /**
55  * RegExp opcodes
56  */
57 typedef enum
58 {
59   RE_OP_EOF,                                      /**< end of pattern */
60 
61   RE_OP_ALTERNATIVE_START,                        /**< start of alternatives */
62   RE_OP_ALTERNATIVE_NEXT,                         /**< next alternative */
63   RE_OP_NO_ALTERNATIVE,                           /**< no alternative */
64 
65   RE_OP_CAPTURING_GROUP_START,                    /**< start of a capturing group */
66   RE_OP_NON_CAPTURING_GROUP_START,                /**< start of a non-capturing group */
67 
68   RE_OP_GREEDY_CAPTURING_GROUP_END,               /**< end of a greedy capturing group */
69   RE_OP_GREEDY_NON_CAPTURING_GROUP_END,           /**< end of a greedy non-capturing group */
70   RE_OP_LAZY_CAPTURING_GROUP_END,                 /**< end of a lazy capturing group */
71   RE_OP_LAZY_NON_CAPTURING_GROUP_END,             /**< end of a lazy non-capturing group */
72 
73   RE_OP_GREEDY_ITERATOR,                          /**< greedy iterator */
74   RE_OP_LAZY_ITERATOR,                            /**< lazy iterator */
75   RE_OP_ITERATOR_END,                             /*** end of an iterator */
76 
77   RE_OP_BACKREFERENCE,                            /**< backreference */
78 
79   RE_OP_ASSERT_LINE_START,                        /**< line start assertion */
80   RE_OP_ASSERT_LINE_END,                          /**< line end assertion */
81   RE_OP_ASSERT_WORD_BOUNDARY,                     /**< word boundary assertion */
82   RE_OP_ASSERT_NOT_WORD_BOUNDARY,                 /**< not word boundary assertion */
83   RE_OP_ASSERT_LOOKAHEAD_POS,                     /**< positive lookahead assertion */
84   RE_OP_ASSERT_LOOKAHEAD_NEG,                     /**< negative lookahead assertion */
85   RE_OP_ASSERT_END,                               /**< end of an assertion */
86 
87   RE_OP_CLASS_ESCAPE,                             /**< class escape */
88   RE_OP_CHAR_CLASS,                               /**< character class */
89 #if ENABLED (JERRY_ES2015)
90   RE_OP_UNICODE_PERIOD,                           /**< period in full unicode mode */
91 #endif /* ENABLED (JERRY_ES2015) */
92   RE_OP_PERIOD,                                   /**< period in non-unicode mode */
93   RE_OP_CHAR,                                     /**< any code point */
94   RE_OP_BYTE,                                     /**< 1-byte utf8 character */
95 } re_opcode_t;
96 
97 /**
98  * Compiled byte code data.
99  */
100 typedef struct
101 {
102   ecma_compiled_code_t header;       /**< compiled code header */
103   uint32_t captures_count;           /**< number of capturing groups */
104   uint32_t non_captures_count;       /**< number of non-capturing groups */
105   ecma_value_t source;               /**< original RegExp pattern */
106 } re_compiled_code_t;
107 
108 void re_initialize_regexp_bytecode (re_compiler_ctx_t *re_ctx_p);
109 uint32_t re_bytecode_size (re_compiler_ctx_t *re_ctx_p);
110 
111 void re_append_opcode (re_compiler_ctx_t *re_ctx_p, const re_opcode_t opcode);
112 void re_append_byte (re_compiler_ctx_t *re_ctx_p, const uint8_t byte);
113 void re_append_char (re_compiler_ctx_t *re_ctx_p, const lit_code_point_t cp);
114 void re_append_value (re_compiler_ctx_t *re_ctx_p, const uint32_t value);
115 
116 void re_insert_opcode (re_compiler_ctx_t *re_ctx_p, const uint32_t offset, const re_opcode_t opcode);
117 void re_insert_byte (re_compiler_ctx_t *re_ctx_p, const uint32_t offset, const uint8_t byte);
118 void re_insert_char (re_compiler_ctx_t *re_ctx_p,  const uint32_t offset, const lit_code_point_t cp);
119 void re_insert_value (re_compiler_ctx_t *re_ctx_p, const uint32_t offset, const uint32_t value);
120 
121 re_opcode_t re_get_opcode (const uint8_t **bc_p);
122 uint8_t re_get_byte (const uint8_t **bc_p);
123 lit_code_point_t re_get_char (const uint8_t **bc_p, bool unicode);
124 uint32_t re_get_value (const uint8_t **bc_p);
125 
126 #if ENABLED (JERRY_REGEXP_DUMP_BYTE_CODE)
127 void re_dump_bytecode (re_compiler_ctx_t *bc_ctx);
128 #endif /* ENABLED (JERRY_REGEXP_DUMP_BYTE_CODE) */
129 
130 /**
131  * @}
132  * @}
133  * @}
134  */
135 
136 #endif /* ENABLED (JERRY_BUILTIN_REGEXP) */
137 #endif /* !RE_BYTECODE_H */
138