1 // Copyright 2008 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are
4 // met:
5 //
6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided
11 // with the distribution.
12 // * Neither the name of Google Inc. nor the names of its
13 // contributors may be used to endorse or promote products derived
14 // from this software without specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
28 #include "v8.h"
29 #include "ast.h"
30 #include "assembler.h"
31 #include "regexp-stack.h"
32 #include "regexp-macro-assembler.h"
33 #include "simulator.h"
34
35 namespace v8 {
36 namespace internal {
37
RegExpMacroAssembler()38 RegExpMacroAssembler::RegExpMacroAssembler() {
39 }
40
41
~RegExpMacroAssembler()42 RegExpMacroAssembler::~RegExpMacroAssembler() {
43 }
44
45
CanReadUnaligned()46 bool RegExpMacroAssembler::CanReadUnaligned() {
47 #ifdef V8_HOST_CAN_READ_UNALIGNED
48 return true;
49 #else
50 return false;
51 #endif
52 }
53
54
55 #ifdef V8_NATIVE_REGEXP // Avoid unused code, e.g., on ARM.
56
NativeRegExpMacroAssembler()57 NativeRegExpMacroAssembler::NativeRegExpMacroAssembler() {
58 }
59
60
~NativeRegExpMacroAssembler()61 NativeRegExpMacroAssembler::~NativeRegExpMacroAssembler() {
62 }
63
64
CanReadUnaligned()65 bool NativeRegExpMacroAssembler::CanReadUnaligned() {
66 #ifdef V8_TARGET_CAN_READ_UNALIGNED
67 return true;
68 #else
69 return false;
70 #endif
71 }
72
StringCharacterPosition(String * subject,int start_index)73 const byte* NativeRegExpMacroAssembler::StringCharacterPosition(
74 String* subject,
75 int start_index) {
76 // Not just flat, but ultra flat.
77 ASSERT(subject->IsExternalString() || subject->IsSeqString());
78 ASSERT(start_index >= 0);
79 ASSERT(start_index <= subject->length());
80 if (subject->IsAsciiRepresentation()) {
81 const byte* address;
82 if (StringShape(subject).IsExternal()) {
83 const char* data = ExternalAsciiString::cast(subject)->resource()->data();
84 address = reinterpret_cast<const byte*>(data);
85 } else {
86 ASSERT(subject->IsSeqAsciiString());
87 char* data = SeqAsciiString::cast(subject)->GetChars();
88 address = reinterpret_cast<const byte*>(data);
89 }
90 return address + start_index;
91 }
92 const uc16* data;
93 if (StringShape(subject).IsExternal()) {
94 data = ExternalTwoByteString::cast(subject)->resource()->data();
95 } else {
96 ASSERT(subject->IsSeqTwoByteString());
97 data = SeqTwoByteString::cast(subject)->GetChars();
98 }
99 return reinterpret_cast<const byte*>(data + start_index);
100 }
101
102
Match(Handle<Code> regexp_code,Handle<String> subject,int * offsets_vector,int offsets_vector_length,int previous_index)103 NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Match(
104 Handle<Code> regexp_code,
105 Handle<String> subject,
106 int* offsets_vector,
107 int offsets_vector_length,
108 int previous_index) {
109
110 ASSERT(subject->IsFlat());
111 ASSERT(previous_index >= 0);
112 ASSERT(previous_index <= subject->length());
113
114 // No allocations before calling the regexp, but we can't use
115 // AssertNoAllocation, since regexps might be preempted, and another thread
116 // might do allocation anyway.
117
118 String* subject_ptr = *subject;
119 // Character offsets into string.
120 int start_offset = previous_index;
121 int end_offset = subject_ptr->length();
122
123 bool is_ascii = subject->IsAsciiRepresentation();
124
125 // The string has been flattened, so it it is a cons string it contains the
126 // full string in the first part.
127 if (StringShape(subject_ptr).IsCons()) {
128 ASSERT_EQ(0, ConsString::cast(subject_ptr)->second()->length());
129 subject_ptr = ConsString::cast(subject_ptr)->first();
130 }
131 // Ensure that an underlying string has the same ascii-ness.
132 ASSERT(subject_ptr->IsAsciiRepresentation() == is_ascii);
133 ASSERT(subject_ptr->IsExternalString() || subject_ptr->IsSeqString());
134 // String is now either Sequential or External
135 int char_size_shift = is_ascii ? 0 : 1;
136 int char_length = end_offset - start_offset;
137
138 const byte* input_start =
139 StringCharacterPosition(subject_ptr, start_offset);
140 int byte_length = char_length << char_size_shift;
141 const byte* input_end = input_start + byte_length;
142 Result res = Execute(*regexp_code,
143 subject_ptr,
144 start_offset,
145 input_start,
146 input_end,
147 offsets_vector);
148 return res;
149 }
150
151
Execute(Code * code,String * input,int start_offset,const byte * input_start,const byte * input_end,int * output)152 NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Execute(
153 Code* code,
154 String* input,
155 int start_offset,
156 const byte* input_start,
157 const byte* input_end,
158 int* output) {
159 typedef int (*matcher)(String*, int, const byte*,
160 const byte*, int*, Address, int);
161 matcher matcher_func = FUNCTION_CAST<matcher>(code->entry());
162
163 // Ensure that the minimum stack has been allocated.
164 RegExpStack stack;
165 Address stack_base = RegExpStack::stack_base();
166
167 int direct_call = 0;
168 int result = CALL_GENERATED_REGEXP_CODE(matcher_func,
169 input,
170 start_offset,
171 input_start,
172 input_end,
173 output,
174 stack_base,
175 direct_call);
176 ASSERT(result <= SUCCESS);
177 ASSERT(result >= RETRY);
178
179 if (result == EXCEPTION && !Top::has_pending_exception()) {
180 // We detected a stack overflow (on the backtrack stack) in RegExp code,
181 // but haven't created the exception yet.
182 Top::StackOverflow();
183 }
184 return static_cast<Result>(result);
185 }
186
187
188 static unibrow::Mapping<unibrow::Ecma262Canonicalize> canonicalize;
189
190
191 byte NativeRegExpMacroAssembler::word_character_map[] = {
192 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
193 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
194 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
195 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
196
197 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
198 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
199 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // '0' - '7'
200 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, // '8' - '9'
201
202 0x00u, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'A' - 'G'
203 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'H' - 'O'
204 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'P' - 'W'
205 0xffu, 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0xffu, // 'X' - 'Z', '_'
206
207 0x00u, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'a' - 'g'
208 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'h' - 'o'
209 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'p' - 'w'
210 0xffu, 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, // 'x' - 'z'
211 };
212
213
CaseInsensitiveCompareUC16(Address byte_offset1,Address byte_offset2,size_t byte_length)214 int NativeRegExpMacroAssembler::CaseInsensitiveCompareUC16(
215 Address byte_offset1,
216 Address byte_offset2,
217 size_t byte_length) {
218 // This function is not allowed to cause a garbage collection.
219 // A GC might move the calling generated code and invalidate the
220 // return address on the stack.
221 ASSERT(byte_length % 2 == 0);
222 uc16* substring1 = reinterpret_cast<uc16*>(byte_offset1);
223 uc16* substring2 = reinterpret_cast<uc16*>(byte_offset2);
224 size_t length = byte_length >> 1;
225
226 for (size_t i = 0; i < length; i++) {
227 unibrow::uchar c1 = substring1[i];
228 unibrow::uchar c2 = substring2[i];
229 if (c1 != c2) {
230 unibrow::uchar s1[1] = { c1 };
231 canonicalize.get(c1, '\0', s1);
232 if (s1[0] != c2) {
233 unibrow::uchar s2[1] = { c2 };
234 canonicalize.get(c2, '\0', s2);
235 if (s1[0] != s2[0]) {
236 return 0;
237 }
238 }
239 }
240 }
241 return 1;
242 }
243
244
GrowStack(Address stack_pointer,Address * stack_base)245 Address NativeRegExpMacroAssembler::GrowStack(Address stack_pointer,
246 Address* stack_base) {
247 size_t size = RegExpStack::stack_capacity();
248 Address old_stack_base = RegExpStack::stack_base();
249 ASSERT(old_stack_base == *stack_base);
250 ASSERT(stack_pointer <= old_stack_base);
251 ASSERT(static_cast<size_t>(old_stack_base - stack_pointer) <= size);
252 Address new_stack_base = RegExpStack::EnsureCapacity(size * 2);
253 if (new_stack_base == NULL) {
254 return NULL;
255 }
256 *stack_base = new_stack_base;
257 intptr_t stack_content_size = old_stack_base - stack_pointer;
258 return new_stack_base - stack_content_size;
259 }
260
261 #endif // V8_NATIVE_REGEXP
262 } } // namespace v8::internal
263