1 /* Copyright JS Foundation and other contributors, http://js.foundation 2 * 3 * Licensed under the Apache License, Version 2.0 (the "License"); 4 * you may not use this file except in compliance with the License. 5 * You may obtain a copy of the License at 6 * 7 * http://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software 10 * distributed under the License is distributed on an "AS IS" BASIS 11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 * See the License for the specific language governing permissions and 13 * limitations under the License. 14 */ 15 16 #ifndef RE_BYTECODE_H 17 #define RE_BYTECODE_H 18 19 #if ENABLED (JERRY_BUILTIN_REGEXP) 20 21 #include "ecma-globals.h" 22 #include "re-compiler-context.h" 23 24 /** \addtogroup parser Parser 25 * @{ 26 * 27 * \addtogroup regexparser Regular expression 28 * @{ 29 * 30 * \addtogroup regexparser_bytecode Bytecode 31 * @{ 32 */ 33 34 /** 35 * Size of the RegExp bytecode cache 36 */ 37 #define RE_CACHE_SIZE 8u 38 39 /** 40 * RegExp flags mask (first 10 bits are for reference count and the rest for the actual RegExp flags) 41 */ 42 #define RE_FLAGS_MASK 0x3F 43 44 /** 45 * Maximum value that can be encoded in the RegExp bytecode as a single byte. 46 */ 47 #define RE_VALUE_1BYTE_MAX 0xFE 48 49 /** 50 * Marker that signals that the actual value is enocded in the following 4 bytes in the bytecode. 51 */ 52 #define RE_VALUE_4BYTE_MARKER 0xFF 53 54 /** 55 * RegExp opcodes 56 */ 57 typedef enum 58 { 59 RE_OP_EOF, /**< end of pattern */ 60 61 RE_OP_ALTERNATIVE_START, /**< start of alternatives */ 62 RE_OP_ALTERNATIVE_NEXT, /**< next alternative */ 63 RE_OP_NO_ALTERNATIVE, /**< no alternative */ 64 65 RE_OP_CAPTURING_GROUP_START, /**< start of a capturing group */ 66 RE_OP_NON_CAPTURING_GROUP_START, /**< start of a non-capturing group */ 67 68 RE_OP_GREEDY_CAPTURING_GROUP_END, /**< end of a greedy capturing group */ 69 RE_OP_GREEDY_NON_CAPTURING_GROUP_END, /**< end of a greedy non-capturing group */ 70 RE_OP_LAZY_CAPTURING_GROUP_END, /**< end of a lazy capturing group */ 71 RE_OP_LAZY_NON_CAPTURING_GROUP_END, /**< end of a lazy non-capturing group */ 72 73 RE_OP_GREEDY_ITERATOR, /**< greedy iterator */ 74 RE_OP_LAZY_ITERATOR, /**< lazy iterator */ 75 RE_OP_ITERATOR_END, /*** end of an iterator */ 76 77 RE_OP_BACKREFERENCE, /**< backreference */ 78 79 RE_OP_ASSERT_LINE_START, /**< line start assertion */ 80 RE_OP_ASSERT_LINE_END, /**< line end assertion */ 81 RE_OP_ASSERT_WORD_BOUNDARY, /**< word boundary assertion */ 82 RE_OP_ASSERT_NOT_WORD_BOUNDARY, /**< not word boundary assertion */ 83 RE_OP_ASSERT_LOOKAHEAD_POS, /**< positive lookahead assertion */ 84 RE_OP_ASSERT_LOOKAHEAD_NEG, /**< negative lookahead assertion */ 85 RE_OP_ASSERT_END, /**< end of an assertion */ 86 87 RE_OP_CLASS_ESCAPE, /**< class escape */ 88 RE_OP_CHAR_CLASS, /**< character class */ 89 #if ENABLED (JERRY_ES2015) 90 RE_OP_UNICODE_PERIOD, /**< period in full unicode mode */ 91 #endif /* ENABLED (JERRY_ES2015) */ 92 RE_OP_PERIOD, /**< period in non-unicode mode */ 93 RE_OP_CHAR, /**< any code point */ 94 RE_OP_BYTE, /**< 1-byte utf8 character */ 95 } re_opcode_t; 96 97 /** 98 * Compiled byte code data. 99 */ 100 typedef struct 101 { 102 ecma_compiled_code_t header; /**< compiled code header */ 103 uint32_t captures_count; /**< number of capturing groups */ 104 uint32_t non_captures_count; /**< number of non-capturing groups */ 105 ecma_value_t source; /**< original RegExp pattern */ 106 } re_compiled_code_t; 107 108 void re_initialize_regexp_bytecode (re_compiler_ctx_t *re_ctx_p); 109 uint32_t re_bytecode_size (re_compiler_ctx_t *re_ctx_p); 110 111 void re_append_opcode (re_compiler_ctx_t *re_ctx_p, const re_opcode_t opcode); 112 void re_append_byte (re_compiler_ctx_t *re_ctx_p, const uint8_t byte); 113 void re_append_char (re_compiler_ctx_t *re_ctx_p, const lit_code_point_t cp); 114 void re_append_value (re_compiler_ctx_t *re_ctx_p, const uint32_t value); 115 116 void re_insert_opcode (re_compiler_ctx_t *re_ctx_p, const uint32_t offset, const re_opcode_t opcode); 117 void re_insert_byte (re_compiler_ctx_t *re_ctx_p, const uint32_t offset, const uint8_t byte); 118 void re_insert_char (re_compiler_ctx_t *re_ctx_p, const uint32_t offset, const lit_code_point_t cp); 119 void re_insert_value (re_compiler_ctx_t *re_ctx_p, const uint32_t offset, const uint32_t value); 120 121 re_opcode_t re_get_opcode (const uint8_t **bc_p); 122 uint8_t re_get_byte (const uint8_t **bc_p); 123 lit_code_point_t re_get_char (const uint8_t **bc_p, bool unicode); 124 uint32_t re_get_value (const uint8_t **bc_p); 125 126 #if ENABLED (JERRY_REGEXP_DUMP_BYTE_CODE) 127 void re_dump_bytecode (re_compiler_ctx_t *bc_ctx); 128 #endif /* ENABLED (JERRY_REGEXP_DUMP_BYTE_CODE) */ 129 130 /** 131 * @} 132 * @} 133 * @} 134 */ 135 136 #endif /* ENABLED (JERRY_BUILTIN_REGEXP) */ 137 #endif /* !RE_BYTECODE_H */ 138