• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright JS Foundation and other contributors, http://js.foundation
2  *
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "ecma-exceptions.h"
17 #include "ecma-helpers.h"
18 #include "ecma-regexp-object.h"
19 #include "ecma-try-catch-macro.h"
20 #include "lit-char-helpers.h"
21 #include "jcontext.h"
22 #include "jrt-libc-includes.h"
23 #include "jmem.h"
24 #include "re-bytecode.h"
25 #include "re-compiler.h"
26 #include "re-compiler-context.h"
27 #include "re-parser.h"
28 
29 #if ENABLED (JERRY_BUILTIN_REGEXP)
30 
31 /** \addtogroup parser Parser
32  * @{
33  *
34  * \addtogroup regexparser Regular expression
35  * @{
36  *
37  * \addtogroup regexparser_compiler Compiler
38  * @{
39  */
40 
41 /**
42  * Search for the given pattern in the RegExp cache.
43  *
44  * @return pointer to bytecode if found
45  *         NULL - otherwise
46  */
47 static re_compiled_code_t *
re_cache_lookup(ecma_string_t * pattern_str_p,uint16_t flags)48 re_cache_lookup (ecma_string_t *pattern_str_p, /**< pattern string */
49                  uint16_t flags) /**< flags */
50 {
51   re_compiled_code_t **cache_p = JERRY_CONTEXT (re_cache);
52 
53   for (uint8_t idx = 0u; idx < RE_CACHE_SIZE; idx++)
54   {
55     re_compiled_code_t *cached_bytecode_p = cache_p[idx];
56 
57     if (cached_bytecode_p == NULL)
58     {
59       break;
60     }
61 
62     ecma_string_t *cached_pattern_str_p = ecma_get_string_from_value (cached_bytecode_p->source);
63 
64     if ((cached_bytecode_p->header.status_flags & RE_FLAGS_MASK) == flags
65         && ecma_compare_ecma_strings (cached_pattern_str_p, pattern_str_p))
66     {
67       return cached_bytecode_p;
68     }
69   }
70 
71   return NULL;
72 } /* re_cache_lookup */
73 
74 /**
75  * Run garbage collection in RegExp cache.
76  */
77 void
re_cache_gc(void)78 re_cache_gc (void)
79 {
80   re_compiled_code_t **cache_p = JERRY_CONTEXT (re_cache);
81 
82   for (uint32_t i = 0u; i < RE_CACHE_SIZE; i++)
83   {
84     const re_compiled_code_t *cached_bytecode_p = cache_p[i];
85 
86     if (cached_bytecode_p == NULL)
87     {
88       break;
89     }
90 
91     ecma_bytecode_deref ((ecma_compiled_code_t *) cached_bytecode_p);
92     cache_p[i] = NULL;
93   }
94 
95   JERRY_CONTEXT (re_cache_idx) = 0;
96 } /* re_cache_gc */
97 
98 /**
99  * Compilation of RegExp bytecode
100  *
101  * @return pointer to bytecode if compilation was successful
102  *         NULL - otherwise
103  */
104 re_compiled_code_t *
re_compile_bytecode(ecma_string_t * pattern_str_p,uint16_t flags)105 re_compile_bytecode (ecma_string_t *pattern_str_p, /**< pattern */
106                      uint16_t flags) /**< flags */
107 {
108   re_compiled_code_t *cached_bytecode_p = re_cache_lookup (pattern_str_p, flags);
109 
110   if (cached_bytecode_p != NULL)
111   {
112     ecma_bytecode_ref ((ecma_compiled_code_t *) cached_bytecode_p);
113     return cached_bytecode_p;
114   }
115 
116   re_compiler_ctx_t re_ctx;
117   re_ctx.flags = flags;
118   re_ctx.captures_count = 1;
119   re_ctx.non_captures_count = 0;
120 
121   re_initialize_regexp_bytecode (&re_ctx);
122 
123   ECMA_STRING_TO_UTF8_STRING (pattern_str_p, pattern_start_p, pattern_start_size);
124 
125   re_ctx.input_start_p = pattern_start_p;
126   re_ctx.input_curr_p = (lit_utf8_byte_t *) pattern_start_p;
127   re_ctx.input_end_p = pattern_start_p + pattern_start_size;
128   re_ctx.groups_count = -1;
129 
130   /* Parse RegExp pattern */
131   ecma_value_t result = re_parse_alternative (&re_ctx, true);
132 
133   ECMA_FINALIZE_UTF8_STRING (pattern_start_p, pattern_start_size);
134 
135   if (ECMA_IS_VALUE_ERROR (result))
136   {
137     /* Compilation failed, free bytecode. */
138     jmem_heap_free_block (re_ctx.bytecode_start_p, re_ctx.bytecode_size);
139     return NULL;
140   }
141 
142   /* Align bytecode size to JMEM_ALIGNMENT so that it can be stored in the bytecode header. */
143   const uint32_t final_size = JERRY_ALIGNUP (re_ctx.bytecode_size, JMEM_ALIGNMENT);
144   re_compiled_code_t *re_compiled_code_p = (re_compiled_code_t *) jmem_heap_realloc_block (re_ctx.bytecode_start_p,
145                                                                                            re_ctx.bytecode_size,
146                                                                                            final_size);
147 
148   /* Bytecoded will be inserted into the cache and returned to the caller, so refcount is implicitly set to 2. */
149   re_compiled_code_p->header.refs = 2;
150   re_compiled_code_p->header.size = (uint16_t) (final_size >> JMEM_ALIGNMENT_LOG);
151   re_compiled_code_p->header.status_flags = re_ctx.flags;
152 
153   ecma_ref_ecma_string (pattern_str_p);
154   re_compiled_code_p->source = ecma_make_string_value (pattern_str_p);
155   re_compiled_code_p->captures_count = re_ctx.captures_count;
156   re_compiled_code_p->non_captures_count = re_ctx.non_captures_count;
157 
158 #if ENABLED (JERRY_REGEXP_DUMP_BYTE_CODE)
159   if (JERRY_CONTEXT (jerry_init_flags) & ECMA_INIT_SHOW_REGEXP_OPCODES)
160   {
161     re_dump_bytecode (&re_ctx);
162   }
163 #endif /* ENABLED (JERRY_REGEXP_DUMP_BYTE_CODE) */
164 
165   uint8_t cache_idx = JERRY_CONTEXT (re_cache_idx);
166 
167   if (JERRY_CONTEXT (re_cache)[cache_idx] != NULL)
168   {
169     ecma_bytecode_deref ((ecma_compiled_code_t *) JERRY_CONTEXT (re_cache)[cache_idx]);
170   }
171 
172   JERRY_CONTEXT (re_cache)[cache_idx] = re_compiled_code_p;
173   JERRY_CONTEXT (re_cache_idx) = (uint8_t) (cache_idx + 1) % RE_CACHE_SIZE;
174 
175   return re_compiled_code_p;
176 } /* re_compile_bytecode */
177 
178 /**
179  * @}
180  * @}
181  * @}
182  */
183 
184 #endif /* ENABLED (JERRY_BUILTIN_REGEXP) */
185