• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "src/v8.h"
6 #include "src/ast.h"
7 #include "src/assembler.h"
8 #include "src/regexp-stack.h"
9 #include "src/regexp-macro-assembler.h"
10 #include "src/simulator.h"
11 
12 namespace v8 {
13 namespace internal {
14 
RegExpMacroAssembler(Zone * zone)15 RegExpMacroAssembler::RegExpMacroAssembler(Zone* zone)
16   : slow_safe_compiler_(false),
17     global_mode_(NOT_GLOBAL),
18     zone_(zone) {
19 }
20 
21 
~RegExpMacroAssembler()22 RegExpMacroAssembler::~RegExpMacroAssembler() {
23 }
24 
25 
CanReadUnaligned()26 bool RegExpMacroAssembler::CanReadUnaligned() {
27 #ifdef V8_HOST_CAN_READ_UNALIGNED
28   return true;
29 #else
30   return false;
31 #endif
32 }
33 
34 
35 #ifndef V8_INTERPRETED_REGEXP  // Avoid unused code, e.g., on ARM.
36 
NativeRegExpMacroAssembler(Zone * zone)37 NativeRegExpMacroAssembler::NativeRegExpMacroAssembler(Zone* zone)
38     : RegExpMacroAssembler(zone) {
39 }
40 
41 
~NativeRegExpMacroAssembler()42 NativeRegExpMacroAssembler::~NativeRegExpMacroAssembler() {
43 }
44 
45 
CanReadUnaligned()46 bool NativeRegExpMacroAssembler::CanReadUnaligned() {
47   return FLAG_enable_unaligned_accesses && !slow_safe();
48 }
49 
StringCharacterPosition(String * subject,int start_index)50 const byte* NativeRegExpMacroAssembler::StringCharacterPosition(
51     String* subject,
52     int start_index) {
53   // Not just flat, but ultra flat.
54   ASSERT(subject->IsExternalString() || subject->IsSeqString());
55   ASSERT(start_index >= 0);
56   ASSERT(start_index <= subject->length());
57   if (subject->IsOneByteRepresentation()) {
58     const byte* address;
59     if (StringShape(subject).IsExternal()) {
60       const uint8_t* data = ExternalAsciiString::cast(subject)->GetChars();
61       address = reinterpret_cast<const byte*>(data);
62     } else {
63       ASSERT(subject->IsSeqOneByteString());
64       const uint8_t* data = SeqOneByteString::cast(subject)->GetChars();
65       address = reinterpret_cast<const byte*>(data);
66     }
67     return address + start_index;
68   }
69   const uc16* data;
70   if (StringShape(subject).IsExternal()) {
71     data = ExternalTwoByteString::cast(subject)->GetChars();
72   } else {
73     ASSERT(subject->IsSeqTwoByteString());
74     data = SeqTwoByteString::cast(subject)->GetChars();
75   }
76   return reinterpret_cast<const byte*>(data + start_index);
77 }
78 
79 
Match(Handle<Code> regexp_code,Handle<String> subject,int * offsets_vector,int offsets_vector_length,int previous_index,Isolate * isolate)80 NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Match(
81     Handle<Code> regexp_code,
82     Handle<String> subject,
83     int* offsets_vector,
84     int offsets_vector_length,
85     int previous_index,
86     Isolate* isolate) {
87 
88   ASSERT(subject->IsFlat());
89   ASSERT(previous_index >= 0);
90   ASSERT(previous_index <= subject->length());
91 
92   // No allocations before calling the regexp, but we can't use
93   // DisallowHeapAllocation, since regexps might be preempted, and another
94   // thread might do allocation anyway.
95 
96   String* subject_ptr = *subject;
97   // Character offsets into string.
98   int start_offset = previous_index;
99   int char_length = subject_ptr->length() - start_offset;
100   int slice_offset = 0;
101 
102   // The string has been flattened, so if it is a cons string it contains the
103   // full string in the first part.
104   if (StringShape(subject_ptr).IsCons()) {
105     ASSERT_EQ(0, ConsString::cast(subject_ptr)->second()->length());
106     subject_ptr = ConsString::cast(subject_ptr)->first();
107   } else if (StringShape(subject_ptr).IsSliced()) {
108     SlicedString* slice = SlicedString::cast(subject_ptr);
109     subject_ptr = slice->parent();
110     slice_offset = slice->offset();
111   }
112   // Ensure that an underlying string has the same ASCII-ness.
113   bool is_ascii = subject_ptr->IsOneByteRepresentation();
114   ASSERT(subject_ptr->IsExternalString() || subject_ptr->IsSeqString());
115   // String is now either Sequential or External
116   int char_size_shift = is_ascii ? 0 : 1;
117 
118   const byte* input_start =
119       StringCharacterPosition(subject_ptr, start_offset + slice_offset);
120   int byte_length = char_length << char_size_shift;
121   const byte* input_end = input_start + byte_length;
122   Result res = Execute(*regexp_code,
123                        *subject,
124                        start_offset,
125                        input_start,
126                        input_end,
127                        offsets_vector,
128                        offsets_vector_length,
129                        isolate);
130   return res;
131 }
132 
133 
Execute(Code * code,String * input,int start_offset,const byte * input_start,const byte * input_end,int * output,int output_size,Isolate * isolate)134 NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Execute(
135     Code* code,
136     String* input,  // This needs to be the unpacked (sliced, cons) string.
137     int start_offset,
138     const byte* input_start,
139     const byte* input_end,
140     int* output,
141     int output_size,
142     Isolate* isolate) {
143   // Ensure that the minimum stack has been allocated.
144   RegExpStackScope stack_scope(isolate);
145   Address stack_base = stack_scope.stack()->stack_base();
146 
147   int direct_call = 0;
148   int result = CALL_GENERATED_REGEXP_CODE(code->entry(),
149                                           input,
150                                           start_offset,
151                                           input_start,
152                                           input_end,
153                                           output,
154                                           output_size,
155                                           stack_base,
156                                           direct_call,
157                                           isolate);
158   ASSERT(result >= RETRY);
159 
160   if (result == EXCEPTION && !isolate->has_pending_exception()) {
161     // We detected a stack overflow (on the backtrack stack) in RegExp code,
162     // but haven't created the exception yet.
163     isolate->StackOverflow();
164   }
165   return static_cast<Result>(result);
166 }
167 
168 
169 const byte NativeRegExpMacroAssembler::word_character_map[] = {
170     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
171     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
172     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
173     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
174 
175     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
176     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
177     0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // '0' - '7'
178     0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,  // '8' - '9'
179 
180     0x00u, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // 'A' - 'G'
181     0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // 'H' - 'O'
182     0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // 'P' - 'W'
183     0xffu, 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0xffu,  // 'X' - 'Z', '_'
184 
185     0x00u, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // 'a' - 'g'
186     0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // 'h' - 'o'
187     0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // 'p' - 'w'
188     0xffu, 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,  // 'x' - 'z'
189     // Latin-1 range
190     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
191     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
192     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
193     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
194 
195     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
196     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
197     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
198     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
199 
200     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
201     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
202     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
203     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
204 
205     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
206     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
207     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
208     0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
209 };
210 
211 
CaseInsensitiveCompareUC16(Address byte_offset1,Address byte_offset2,size_t byte_length,Isolate * isolate)212 int NativeRegExpMacroAssembler::CaseInsensitiveCompareUC16(
213     Address byte_offset1,
214     Address byte_offset2,
215     size_t byte_length,
216     Isolate* isolate) {
217   unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize =
218       isolate->regexp_macro_assembler_canonicalize();
219   // This function is not allowed to cause a garbage collection.
220   // A GC might move the calling generated code and invalidate the
221   // return address on the stack.
222   ASSERT(byte_length % 2 == 0);
223   uc16* substring1 = reinterpret_cast<uc16*>(byte_offset1);
224   uc16* substring2 = reinterpret_cast<uc16*>(byte_offset2);
225   size_t length = byte_length >> 1;
226 
227   for (size_t i = 0; i < length; i++) {
228     unibrow::uchar c1 = substring1[i];
229     unibrow::uchar c2 = substring2[i];
230     if (c1 != c2) {
231       unibrow::uchar s1[1] = { c1 };
232       canonicalize->get(c1, '\0', s1);
233       if (s1[0] != c2) {
234         unibrow::uchar s2[1] = { c2 };
235         canonicalize->get(c2, '\0', s2);
236         if (s1[0] != s2[0]) {
237           return 0;
238         }
239       }
240     }
241   }
242   return 1;
243 }
244 
245 
GrowStack(Address stack_pointer,Address * stack_base,Isolate * isolate)246 Address NativeRegExpMacroAssembler::GrowStack(Address stack_pointer,
247                                               Address* stack_base,
248                                               Isolate* isolate) {
249   RegExpStack* regexp_stack = isolate->regexp_stack();
250   size_t size = regexp_stack->stack_capacity();
251   Address old_stack_base = regexp_stack->stack_base();
252   ASSERT(old_stack_base == *stack_base);
253   ASSERT(stack_pointer <= old_stack_base);
254   ASSERT(static_cast<size_t>(old_stack_base - stack_pointer) <= size);
255   Address new_stack_base = regexp_stack->EnsureCapacity(size * 2);
256   if (new_stack_base == NULL) {
257     return NULL;
258   }
259   *stack_base = new_stack_base;
260   intptr_t stack_content_size = old_stack_base - stack_pointer;
261   return new_stack_base - stack_content_size;
262 }
263 
264 #endif  // V8_INTERPRETED_REGEXP
265 
266 } }  // namespace v8::internal
267