1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "src/v8.h"
6 #include "src/ast.h"
7 #include "src/assembler.h"
8 #include "src/regexp-stack.h"
9 #include "src/regexp-macro-assembler.h"
10 #include "src/simulator.h"
11
12 namespace v8 {
13 namespace internal {
14
RegExpMacroAssembler(Zone * zone)15 RegExpMacroAssembler::RegExpMacroAssembler(Zone* zone)
16 : slow_safe_compiler_(false),
17 global_mode_(NOT_GLOBAL),
18 zone_(zone) {
19 }
20
21
~RegExpMacroAssembler()22 RegExpMacroAssembler::~RegExpMacroAssembler() {
23 }
24
25
CanReadUnaligned()26 bool RegExpMacroAssembler::CanReadUnaligned() {
27 #ifdef V8_HOST_CAN_READ_UNALIGNED
28 return true;
29 #else
30 return false;
31 #endif
32 }
33
34
35 #ifndef V8_INTERPRETED_REGEXP // Avoid unused code, e.g., on ARM.
36
NativeRegExpMacroAssembler(Zone * zone)37 NativeRegExpMacroAssembler::NativeRegExpMacroAssembler(Zone* zone)
38 : RegExpMacroAssembler(zone) {
39 }
40
41
~NativeRegExpMacroAssembler()42 NativeRegExpMacroAssembler::~NativeRegExpMacroAssembler() {
43 }
44
45
CanReadUnaligned()46 bool NativeRegExpMacroAssembler::CanReadUnaligned() {
47 return FLAG_enable_unaligned_accesses && !slow_safe();
48 }
49
StringCharacterPosition(String * subject,int start_index)50 const byte* NativeRegExpMacroAssembler::StringCharacterPosition(
51 String* subject,
52 int start_index) {
53 // Not just flat, but ultra flat.
54 ASSERT(subject->IsExternalString() || subject->IsSeqString());
55 ASSERT(start_index >= 0);
56 ASSERT(start_index <= subject->length());
57 if (subject->IsOneByteRepresentation()) {
58 const byte* address;
59 if (StringShape(subject).IsExternal()) {
60 const uint8_t* data = ExternalAsciiString::cast(subject)->GetChars();
61 address = reinterpret_cast<const byte*>(data);
62 } else {
63 ASSERT(subject->IsSeqOneByteString());
64 const uint8_t* data = SeqOneByteString::cast(subject)->GetChars();
65 address = reinterpret_cast<const byte*>(data);
66 }
67 return address + start_index;
68 }
69 const uc16* data;
70 if (StringShape(subject).IsExternal()) {
71 data = ExternalTwoByteString::cast(subject)->GetChars();
72 } else {
73 ASSERT(subject->IsSeqTwoByteString());
74 data = SeqTwoByteString::cast(subject)->GetChars();
75 }
76 return reinterpret_cast<const byte*>(data + start_index);
77 }
78
79
Match(Handle<Code> regexp_code,Handle<String> subject,int * offsets_vector,int offsets_vector_length,int previous_index,Isolate * isolate)80 NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Match(
81 Handle<Code> regexp_code,
82 Handle<String> subject,
83 int* offsets_vector,
84 int offsets_vector_length,
85 int previous_index,
86 Isolate* isolate) {
87
88 ASSERT(subject->IsFlat());
89 ASSERT(previous_index >= 0);
90 ASSERT(previous_index <= subject->length());
91
92 // No allocations before calling the regexp, but we can't use
93 // DisallowHeapAllocation, since regexps might be preempted, and another
94 // thread might do allocation anyway.
95
96 String* subject_ptr = *subject;
97 // Character offsets into string.
98 int start_offset = previous_index;
99 int char_length = subject_ptr->length() - start_offset;
100 int slice_offset = 0;
101
102 // The string has been flattened, so if it is a cons string it contains the
103 // full string in the first part.
104 if (StringShape(subject_ptr).IsCons()) {
105 ASSERT_EQ(0, ConsString::cast(subject_ptr)->second()->length());
106 subject_ptr = ConsString::cast(subject_ptr)->first();
107 } else if (StringShape(subject_ptr).IsSliced()) {
108 SlicedString* slice = SlicedString::cast(subject_ptr);
109 subject_ptr = slice->parent();
110 slice_offset = slice->offset();
111 }
112 // Ensure that an underlying string has the same ASCII-ness.
113 bool is_ascii = subject_ptr->IsOneByteRepresentation();
114 ASSERT(subject_ptr->IsExternalString() || subject_ptr->IsSeqString());
115 // String is now either Sequential or External
116 int char_size_shift = is_ascii ? 0 : 1;
117
118 const byte* input_start =
119 StringCharacterPosition(subject_ptr, start_offset + slice_offset);
120 int byte_length = char_length << char_size_shift;
121 const byte* input_end = input_start + byte_length;
122 Result res = Execute(*regexp_code,
123 *subject,
124 start_offset,
125 input_start,
126 input_end,
127 offsets_vector,
128 offsets_vector_length,
129 isolate);
130 return res;
131 }
132
133
Execute(Code * code,String * input,int start_offset,const byte * input_start,const byte * input_end,int * output,int output_size,Isolate * isolate)134 NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Execute(
135 Code* code,
136 String* input, // This needs to be the unpacked (sliced, cons) string.
137 int start_offset,
138 const byte* input_start,
139 const byte* input_end,
140 int* output,
141 int output_size,
142 Isolate* isolate) {
143 // Ensure that the minimum stack has been allocated.
144 RegExpStackScope stack_scope(isolate);
145 Address stack_base = stack_scope.stack()->stack_base();
146
147 int direct_call = 0;
148 int result = CALL_GENERATED_REGEXP_CODE(code->entry(),
149 input,
150 start_offset,
151 input_start,
152 input_end,
153 output,
154 output_size,
155 stack_base,
156 direct_call,
157 isolate);
158 ASSERT(result >= RETRY);
159
160 if (result == EXCEPTION && !isolate->has_pending_exception()) {
161 // We detected a stack overflow (on the backtrack stack) in RegExp code,
162 // but haven't created the exception yet.
163 isolate->StackOverflow();
164 }
165 return static_cast<Result>(result);
166 }
167
168
169 const byte NativeRegExpMacroAssembler::word_character_map[] = {
170 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
171 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
172 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
173 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
174
175 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
176 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
177 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // '0' - '7'
178 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, // '8' - '9'
179
180 0x00u, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'A' - 'G'
181 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'H' - 'O'
182 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'P' - 'W'
183 0xffu, 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0xffu, // 'X' - 'Z', '_'
184
185 0x00u, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'a' - 'g'
186 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'h' - 'o'
187 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'p' - 'w'
188 0xffu, 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, // 'x' - 'z'
189 // Latin-1 range
190 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
191 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
192 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
193 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
194
195 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
196 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
197 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
198 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
199
200 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
201 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
202 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
203 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
204
205 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
206 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
207 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
208 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
209 };
210
211
CaseInsensitiveCompareUC16(Address byte_offset1,Address byte_offset2,size_t byte_length,Isolate * isolate)212 int NativeRegExpMacroAssembler::CaseInsensitiveCompareUC16(
213 Address byte_offset1,
214 Address byte_offset2,
215 size_t byte_length,
216 Isolate* isolate) {
217 unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize =
218 isolate->regexp_macro_assembler_canonicalize();
219 // This function is not allowed to cause a garbage collection.
220 // A GC might move the calling generated code and invalidate the
221 // return address on the stack.
222 ASSERT(byte_length % 2 == 0);
223 uc16* substring1 = reinterpret_cast<uc16*>(byte_offset1);
224 uc16* substring2 = reinterpret_cast<uc16*>(byte_offset2);
225 size_t length = byte_length >> 1;
226
227 for (size_t i = 0; i < length; i++) {
228 unibrow::uchar c1 = substring1[i];
229 unibrow::uchar c2 = substring2[i];
230 if (c1 != c2) {
231 unibrow::uchar s1[1] = { c1 };
232 canonicalize->get(c1, '\0', s1);
233 if (s1[0] != c2) {
234 unibrow::uchar s2[1] = { c2 };
235 canonicalize->get(c2, '\0', s2);
236 if (s1[0] != s2[0]) {
237 return 0;
238 }
239 }
240 }
241 }
242 return 1;
243 }
244
245
GrowStack(Address stack_pointer,Address * stack_base,Isolate * isolate)246 Address NativeRegExpMacroAssembler::GrowStack(Address stack_pointer,
247 Address* stack_base,
248 Isolate* isolate) {
249 RegExpStack* regexp_stack = isolate->regexp_stack();
250 size_t size = regexp_stack->stack_capacity();
251 Address old_stack_base = regexp_stack->stack_base();
252 ASSERT(old_stack_base == *stack_base);
253 ASSERT(stack_pointer <= old_stack_base);
254 ASSERT(static_cast<size_t>(old_stack_base - stack_pointer) <= size);
255 Address new_stack_base = regexp_stack->EnsureCapacity(size * 2);
256 if (new_stack_base == NULL) {
257 return NULL;
258 }
259 *stack_base = new_stack_base;
260 intptr_t stack_content_size = old_stack_base - stack_pointer;
261 return new_stack_base - stack_content_size;
262 }
263
264 #endif // V8_INTERPRETED_REGEXP
265
266 } } // namespace v8::internal
267