• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2008 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are
4 // met:
5 //
6 //     * Redistributions of source code must retain the above copyright
7 //       notice, this list of conditions and the following disclaimer.
8 //     * Redistributions in binary form must reproduce the above
9 //       copyright notice, this list of conditions and the following
10 //       disclaimer in the documentation and/or other materials provided
11 //       with the distribution.
12 //     * Neither the name of Google Inc. nor the names of its
13 //       contributors may be used to endorse or promote products derived
14 //       from this software without specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 
28 #include "v8.h"
29 #include "ast.h"
30 #include "assembler.h"
31 #include "regexp-stack.h"
32 #include "regexp-macro-assembler.h"
33 #include "simulator.h"
34 
35 namespace v8 {
36 namespace internal {
37 
RegExpMacroAssembler()38 RegExpMacroAssembler::RegExpMacroAssembler() : slow_safe_compiler_(false) {
39 }
40 
41 
~RegExpMacroAssembler()42 RegExpMacroAssembler::~RegExpMacroAssembler() {
43 }
44 
45 
CanReadUnaligned()46 bool RegExpMacroAssembler::CanReadUnaligned() {
47 #ifdef V8_HOST_CAN_READ_UNALIGNED
48   return true;
49 #else
50   return false;
51 #endif
52 }
53 
54 
55 #ifndef V8_INTERPRETED_REGEXP  // Avoid unused code, e.g., on ARM.
56 
NativeRegExpMacroAssembler()57 NativeRegExpMacroAssembler::NativeRegExpMacroAssembler()
58     : RegExpMacroAssembler() {
59 }
60 
61 
~NativeRegExpMacroAssembler()62 NativeRegExpMacroAssembler::~NativeRegExpMacroAssembler() {
63 }
64 
65 
CanReadUnaligned()66 bool NativeRegExpMacroAssembler::CanReadUnaligned() {
67 #ifdef V8_TARGET_CAN_READ_UNALIGNED
68   return !slow_safe();
69 #else
70   return false;
71 #endif
72 }
73 
StringCharacterPosition(String * subject,int start_index)74 const byte* NativeRegExpMacroAssembler::StringCharacterPosition(
75     String* subject,
76     int start_index) {
77   // Not just flat, but ultra flat.
78   ASSERT(subject->IsExternalString() || subject->IsSeqString());
79   ASSERT(start_index >= 0);
80   ASSERT(start_index <= subject->length());
81   if (subject->IsAsciiRepresentation()) {
82     const byte* address;
83     if (StringShape(subject).IsExternal()) {
84       const char* data = ExternalAsciiString::cast(subject)->GetChars();
85       address = reinterpret_cast<const byte*>(data);
86     } else {
87       ASSERT(subject->IsSeqAsciiString());
88       char* data = SeqAsciiString::cast(subject)->GetChars();
89       address = reinterpret_cast<const byte*>(data);
90     }
91     return address + start_index;
92   }
93   const uc16* data;
94   if (StringShape(subject).IsExternal()) {
95     data = ExternalTwoByteString::cast(subject)->GetChars();
96   } else {
97     ASSERT(subject->IsSeqTwoByteString());
98     data = SeqTwoByteString::cast(subject)->GetChars();
99   }
100   return reinterpret_cast<const byte*>(data + start_index);
101 }
102 
103 
Match(Handle<Code> regexp_code,Handle<String> subject,int * offsets_vector,int offsets_vector_length,int previous_index,Isolate * isolate)104 NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Match(
105     Handle<Code> regexp_code,
106     Handle<String> subject,
107     int* offsets_vector,
108     int offsets_vector_length,
109     int previous_index,
110     Isolate* isolate) {
111 
112   ASSERT(subject->IsFlat());
113   ASSERT(previous_index >= 0);
114   ASSERT(previous_index <= subject->length());
115 
116   // No allocations before calling the regexp, but we can't use
117   // AssertNoAllocation, since regexps might be preempted, and another thread
118   // might do allocation anyway.
119 
120   String* subject_ptr = *subject;
121   // Character offsets into string.
122   int start_offset = previous_index;
123   int char_length = subject_ptr->length() - start_offset;
124   int slice_offset = 0;
125 
126   // The string has been flattened, so if it is a cons string it contains the
127   // full string in the first part.
128   if (StringShape(subject_ptr).IsCons()) {
129     ASSERT_EQ(0, ConsString::cast(subject_ptr)->second()->length());
130     subject_ptr = ConsString::cast(subject_ptr)->first();
131   } else if (StringShape(subject_ptr).IsSliced()) {
132     SlicedString* slice = SlicedString::cast(subject_ptr);
133     subject_ptr = slice->parent();
134     slice_offset = slice->offset();
135   }
136   // Ensure that an underlying string has the same ASCII-ness.
137   bool is_ascii = subject_ptr->IsAsciiRepresentation();
138   ASSERT(subject_ptr->IsExternalString() || subject_ptr->IsSeqString());
139   // String is now either Sequential or External
140   int char_size_shift = is_ascii ? 0 : 1;
141 
142   const byte* input_start =
143       StringCharacterPosition(subject_ptr, start_offset + slice_offset);
144   int byte_length = char_length << char_size_shift;
145   const byte* input_end = input_start + byte_length;
146   Result res = Execute(*regexp_code,
147                        *subject,
148                        start_offset,
149                        input_start,
150                        input_end,
151                        offsets_vector,
152                        isolate);
153   return res;
154 }
155 
156 
Execute(Code * code,String * input,int start_offset,const byte * input_start,const byte * input_end,int * output,Isolate * isolate)157 NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Execute(
158     Code* code,
159     String* input,  // This needs to be the unpacked (sliced, cons) string.
160     int start_offset,
161     const byte* input_start,
162     const byte* input_end,
163     int* output,
164     Isolate* isolate) {
165   ASSERT(isolate == Isolate::Current());
166   // Ensure that the minimum stack has been allocated.
167   RegExpStackScope stack_scope(isolate);
168   Address stack_base = stack_scope.stack()->stack_base();
169 
170   int direct_call = 0;
171   int result = CALL_GENERATED_REGEXP_CODE(code->entry(),
172                                           input,
173                                           start_offset,
174                                           input_start,
175                                           input_end,
176                                           output,
177                                           stack_base,
178                                           direct_call,
179                                           isolate);
180   ASSERT(result <= SUCCESS);
181   ASSERT(result >= RETRY);
182 
183   if (result == EXCEPTION && !isolate->has_pending_exception()) {
184     // We detected a stack overflow (on the backtrack stack) in RegExp code,
185     // but haven't created the exception yet.
186     isolate->StackOverflow();
187   }
188   return static_cast<Result>(result);
189 }
190 
191 
192 const byte NativeRegExpMacroAssembler::word_character_map[] = {
193     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
194     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
195     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
196     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
197 
198     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
199     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
200     0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // '0' - '7'
201     0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,  // '8' - '9'
202 
203     0x00u, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // 'A' - 'G'
204     0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // 'H' - 'O'
205     0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // 'P' - 'W'
206     0xffu, 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0xffu,  // 'X' - 'Z', '_'
207 
208     0x00u, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // 'a' - 'g'
209     0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // 'h' - 'o'
210     0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // 'p' - 'w'
211     0xffu, 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,  // 'x' - 'z'
212 };
213 
214 
CaseInsensitiveCompareUC16(Address byte_offset1,Address byte_offset2,size_t byte_length,Isolate * isolate)215 int NativeRegExpMacroAssembler::CaseInsensitiveCompareUC16(
216     Address byte_offset1,
217     Address byte_offset2,
218     size_t byte_length,
219     Isolate* isolate) {
220   ASSERT(isolate == Isolate::Current());
221   unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize =
222       isolate->regexp_macro_assembler_canonicalize();
223   // This function is not allowed to cause a garbage collection.
224   // A GC might move the calling generated code and invalidate the
225   // return address on the stack.
226   ASSERT(byte_length % 2 == 0);
227   uc16* substring1 = reinterpret_cast<uc16*>(byte_offset1);
228   uc16* substring2 = reinterpret_cast<uc16*>(byte_offset2);
229   size_t length = byte_length >> 1;
230 
231   for (size_t i = 0; i < length; i++) {
232     unibrow::uchar c1 = substring1[i];
233     unibrow::uchar c2 = substring2[i];
234     if (c1 != c2) {
235       unibrow::uchar s1[1] = { c1 };
236       canonicalize->get(c1, '\0', s1);
237       if (s1[0] != c2) {
238         unibrow::uchar s2[1] = { c2 };
239         canonicalize->get(c2, '\0', s2);
240         if (s1[0] != s2[0]) {
241           return 0;
242         }
243       }
244     }
245   }
246   return 1;
247 }
248 
249 
GrowStack(Address stack_pointer,Address * stack_base,Isolate * isolate)250 Address NativeRegExpMacroAssembler::GrowStack(Address stack_pointer,
251                                               Address* stack_base,
252                                               Isolate* isolate) {
253   ASSERT(isolate == Isolate::Current());
254   RegExpStack* regexp_stack = isolate->regexp_stack();
255   size_t size = regexp_stack->stack_capacity();
256   Address old_stack_base = regexp_stack->stack_base();
257   ASSERT(old_stack_base == *stack_base);
258   ASSERT(stack_pointer <= old_stack_base);
259   ASSERT(static_cast<size_t>(old_stack_base - stack_pointer) <= size);
260   Address new_stack_base = regexp_stack->EnsureCapacity(size * 2);
261   if (new_stack_base == NULL) {
262     return NULL;
263   }
264   *stack_base = new_stack_base;
265   intptr_t stack_content_size = old_stack_base - stack_pointer;
266   return new_stack_base - stack_content_size;
267 }
268 
269 #endif  // V8_INTERPRETED_REGEXP
270 
271 } }  // namespace v8::internal
272