1 // Copyright 2008-2009 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are
4 // met:
5 //
6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided
11 // with the distribution.
12 // * Neither the name of Google Inc. nor the names of its
13 // contributors may be used to endorse or promote products derived
14 // from this software without specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
28 #include "v8.h"
29 #include "ast.h"
30 #include "bytecodes-irregexp.h"
31 #include "regexp-macro-assembler.h"
32 #include "regexp-macro-assembler-irregexp.h"
33 #include "regexp-macro-assembler-irregexp-inl.h"
34
35
36 namespace v8 {
37 namespace internal {
38
39 #ifdef V8_INTERPRETED_REGEXP
40
RegExpMacroAssemblerIrregexp(Vector<byte> buffer)41 RegExpMacroAssemblerIrregexp::RegExpMacroAssemblerIrregexp(Vector<byte> buffer)
42 : buffer_(buffer),
43 pc_(0),
44 own_buffer_(false),
45 advance_current_end_(kInvalidPC) {
46 }
47
48
~RegExpMacroAssemblerIrregexp()49 RegExpMacroAssemblerIrregexp::~RegExpMacroAssemblerIrregexp() {
50 if (backtrack_.is_linked()) backtrack_.Unuse();
51 if (own_buffer_) buffer_.Dispose();
52 }
53
54
55 RegExpMacroAssemblerIrregexp::IrregexpImplementation
Implementation()56 RegExpMacroAssemblerIrregexp::Implementation() {
57 return kBytecodeImplementation;
58 }
59
60
Bind(Label * l)61 void RegExpMacroAssemblerIrregexp::Bind(Label* l) {
62 advance_current_end_ = kInvalidPC;
63 ASSERT(!l->is_bound());
64 if (l->is_linked()) {
65 int pos = l->pos();
66 while (pos != 0) {
67 int fixup = pos;
68 pos = *reinterpret_cast<int32_t*>(buffer_.start() + fixup);
69 *reinterpret_cast<uint32_t*>(buffer_.start() + fixup) = pc_;
70 }
71 }
72 l->bind_to(pc_);
73 }
74
75
EmitOrLink(Label * l)76 void RegExpMacroAssemblerIrregexp::EmitOrLink(Label* l) {
77 if (l == NULL) l = &backtrack_;
78 if (l->is_bound()) {
79 Emit32(l->pos());
80 } else {
81 int pos = 0;
82 if (l->is_linked()) {
83 pos = l->pos();
84 }
85 l->link_to(pc_);
86 Emit32(pos);
87 }
88 }
89
90
PopRegister(int register_index)91 void RegExpMacroAssemblerIrregexp::PopRegister(int register_index) {
92 ASSERT(register_index >= 0);
93 ASSERT(register_index <= kMaxRegister);
94 Emit(BC_POP_REGISTER, register_index);
95 }
96
97
PushRegister(int register_index,StackCheckFlag check_stack_limit)98 void RegExpMacroAssemblerIrregexp::PushRegister(
99 int register_index,
100 StackCheckFlag check_stack_limit) {
101 ASSERT(register_index >= 0);
102 ASSERT(register_index <= kMaxRegister);
103 Emit(BC_PUSH_REGISTER, register_index);
104 }
105
106
WriteCurrentPositionToRegister(int register_index,int cp_offset)107 void RegExpMacroAssemblerIrregexp::WriteCurrentPositionToRegister(
108 int register_index, int cp_offset) {
109 ASSERT(register_index >= 0);
110 ASSERT(register_index <= kMaxRegister);
111 Emit(BC_SET_REGISTER_TO_CP, register_index);
112 Emit32(cp_offset); // Current position offset.
113 }
114
115
ClearRegisters(int reg_from,int reg_to)116 void RegExpMacroAssemblerIrregexp::ClearRegisters(int reg_from, int reg_to) {
117 ASSERT(reg_from <= reg_to);
118 for (int reg = reg_from; reg <= reg_to; reg++) {
119 SetRegister(reg, -1);
120 }
121 }
122
123
ReadCurrentPositionFromRegister(int register_index)124 void RegExpMacroAssemblerIrregexp::ReadCurrentPositionFromRegister(
125 int register_index) {
126 ASSERT(register_index >= 0);
127 ASSERT(register_index <= kMaxRegister);
128 Emit(BC_SET_CP_TO_REGISTER, register_index);
129 }
130
131
WriteStackPointerToRegister(int register_index)132 void RegExpMacroAssemblerIrregexp::WriteStackPointerToRegister(
133 int register_index) {
134 ASSERT(register_index >= 0);
135 ASSERT(register_index <= kMaxRegister);
136 Emit(BC_SET_REGISTER_TO_SP, register_index);
137 }
138
139
ReadStackPointerFromRegister(int register_index)140 void RegExpMacroAssemblerIrregexp::ReadStackPointerFromRegister(
141 int register_index) {
142 ASSERT(register_index >= 0);
143 ASSERT(register_index <= kMaxRegister);
144 Emit(BC_SET_SP_TO_REGISTER, register_index);
145 }
146
147
SetCurrentPositionFromEnd(int by)148 void RegExpMacroAssemblerIrregexp::SetCurrentPositionFromEnd(int by) {
149 ASSERT(is_uint24(by));
150 Emit(BC_SET_CURRENT_POSITION_FROM_END, by);
151 }
152
153
SetRegister(int register_index,int to)154 void RegExpMacroAssemblerIrregexp::SetRegister(int register_index, int to) {
155 ASSERT(register_index >= 0);
156 ASSERT(register_index <= kMaxRegister);
157 Emit(BC_SET_REGISTER, register_index);
158 Emit32(to);
159 }
160
161
AdvanceRegister(int register_index,int by)162 void RegExpMacroAssemblerIrregexp::AdvanceRegister(int register_index, int by) {
163 ASSERT(register_index >= 0);
164 ASSERT(register_index <= kMaxRegister);
165 Emit(BC_ADVANCE_REGISTER, register_index);
166 Emit32(by);
167 }
168
169
PopCurrentPosition()170 void RegExpMacroAssemblerIrregexp::PopCurrentPosition() {
171 Emit(BC_POP_CP, 0);
172 }
173
174
PushCurrentPosition()175 void RegExpMacroAssemblerIrregexp::PushCurrentPosition() {
176 Emit(BC_PUSH_CP, 0);
177 }
178
179
Backtrack()180 void RegExpMacroAssemblerIrregexp::Backtrack() {
181 Emit(BC_POP_BT, 0);
182 }
183
184
GoTo(Label * l)185 void RegExpMacroAssemblerIrregexp::GoTo(Label* l) {
186 if (advance_current_end_ == pc_) {
187 // Combine advance current and goto.
188 pc_ = advance_current_start_;
189 Emit(BC_ADVANCE_CP_AND_GOTO, advance_current_offset_);
190 EmitOrLink(l);
191 advance_current_end_ = kInvalidPC;
192 } else {
193 // Regular goto.
194 Emit(BC_GOTO, 0);
195 EmitOrLink(l);
196 }
197 }
198
199
PushBacktrack(Label * l)200 void RegExpMacroAssemblerIrregexp::PushBacktrack(Label* l) {
201 Emit(BC_PUSH_BT, 0);
202 EmitOrLink(l);
203 }
204
205
Succeed()206 void RegExpMacroAssemblerIrregexp::Succeed() {
207 Emit(BC_SUCCEED, 0);
208 }
209
210
Fail()211 void RegExpMacroAssemblerIrregexp::Fail() {
212 Emit(BC_FAIL, 0);
213 }
214
215
AdvanceCurrentPosition(int by)216 void RegExpMacroAssemblerIrregexp::AdvanceCurrentPosition(int by) {
217 ASSERT(by >= kMinCPOffset);
218 ASSERT(by <= kMaxCPOffset);
219 advance_current_start_ = pc_;
220 advance_current_offset_ = by;
221 Emit(BC_ADVANCE_CP, by);
222 advance_current_end_ = pc_;
223 }
224
225
CheckGreedyLoop(Label * on_tos_equals_current_position)226 void RegExpMacroAssemblerIrregexp::CheckGreedyLoop(
227 Label* on_tos_equals_current_position) {
228 Emit(BC_CHECK_GREEDY, 0);
229 EmitOrLink(on_tos_equals_current_position);
230 }
231
232
LoadCurrentCharacter(int cp_offset,Label * on_failure,bool check_bounds,int characters)233 void RegExpMacroAssemblerIrregexp::LoadCurrentCharacter(int cp_offset,
234 Label* on_failure,
235 bool check_bounds,
236 int characters) {
237 ASSERT(cp_offset >= kMinCPOffset);
238 ASSERT(cp_offset <= kMaxCPOffset);
239 int bytecode;
240 if (check_bounds) {
241 if (characters == 4) {
242 bytecode = BC_LOAD_4_CURRENT_CHARS;
243 } else if (characters == 2) {
244 bytecode = BC_LOAD_2_CURRENT_CHARS;
245 } else {
246 ASSERT(characters == 1);
247 bytecode = BC_LOAD_CURRENT_CHAR;
248 }
249 } else {
250 if (characters == 4) {
251 bytecode = BC_LOAD_4_CURRENT_CHARS_UNCHECKED;
252 } else if (characters == 2) {
253 bytecode = BC_LOAD_2_CURRENT_CHARS_UNCHECKED;
254 } else {
255 ASSERT(characters == 1);
256 bytecode = BC_LOAD_CURRENT_CHAR_UNCHECKED;
257 }
258 }
259 Emit(bytecode, cp_offset);
260 if (check_bounds) EmitOrLink(on_failure);
261 }
262
263
CheckCharacterLT(uc16 limit,Label * on_less)264 void RegExpMacroAssemblerIrregexp::CheckCharacterLT(uc16 limit,
265 Label* on_less) {
266 Emit(BC_CHECK_LT, limit);
267 EmitOrLink(on_less);
268 }
269
270
CheckCharacterGT(uc16 limit,Label * on_greater)271 void RegExpMacroAssemblerIrregexp::CheckCharacterGT(uc16 limit,
272 Label* on_greater) {
273 Emit(BC_CHECK_GT, limit);
274 EmitOrLink(on_greater);
275 }
276
277
CheckCharacter(uint32_t c,Label * on_equal)278 void RegExpMacroAssemblerIrregexp::CheckCharacter(uint32_t c, Label* on_equal) {
279 if (c > MAX_FIRST_ARG) {
280 Emit(BC_CHECK_4_CHARS, 0);
281 Emit32(c);
282 } else {
283 Emit(BC_CHECK_CHAR, c);
284 }
285 EmitOrLink(on_equal);
286 }
287
288
CheckAtStart(Label * on_at_start)289 void RegExpMacroAssemblerIrregexp::CheckAtStart(Label* on_at_start) {
290 Emit(BC_CHECK_AT_START, 0);
291 EmitOrLink(on_at_start);
292 }
293
294
CheckNotAtStart(Label * on_not_at_start)295 void RegExpMacroAssemblerIrregexp::CheckNotAtStart(Label* on_not_at_start) {
296 Emit(BC_CHECK_NOT_AT_START, 0);
297 EmitOrLink(on_not_at_start);
298 }
299
300
CheckNotCharacter(uint32_t c,Label * on_not_equal)301 void RegExpMacroAssemblerIrregexp::CheckNotCharacter(uint32_t c,
302 Label* on_not_equal) {
303 if (c > MAX_FIRST_ARG) {
304 Emit(BC_CHECK_NOT_4_CHARS, 0);
305 Emit32(c);
306 } else {
307 Emit(BC_CHECK_NOT_CHAR, c);
308 }
309 EmitOrLink(on_not_equal);
310 }
311
312
CheckCharacterAfterAnd(uint32_t c,uint32_t mask,Label * on_equal)313 void RegExpMacroAssemblerIrregexp::CheckCharacterAfterAnd(
314 uint32_t c,
315 uint32_t mask,
316 Label* on_equal) {
317 if (c > MAX_FIRST_ARG) {
318 Emit(BC_AND_CHECK_4_CHARS, 0);
319 Emit32(c);
320 } else {
321 Emit(BC_AND_CHECK_CHAR, c);
322 }
323 Emit32(mask);
324 EmitOrLink(on_equal);
325 }
326
327
CheckNotCharacterAfterAnd(uint32_t c,uint32_t mask,Label * on_not_equal)328 void RegExpMacroAssemblerIrregexp::CheckNotCharacterAfterAnd(
329 uint32_t c,
330 uint32_t mask,
331 Label* on_not_equal) {
332 if (c > MAX_FIRST_ARG) {
333 Emit(BC_AND_CHECK_NOT_4_CHARS, 0);
334 Emit32(c);
335 } else {
336 Emit(BC_AND_CHECK_NOT_CHAR, c);
337 }
338 Emit32(mask);
339 EmitOrLink(on_not_equal);
340 }
341
342
CheckNotCharacterAfterMinusAnd(uc16 c,uc16 minus,uc16 mask,Label * on_not_equal)343 void RegExpMacroAssemblerIrregexp::CheckNotCharacterAfterMinusAnd(
344 uc16 c,
345 uc16 minus,
346 uc16 mask,
347 Label* on_not_equal) {
348 Emit(BC_MINUS_AND_CHECK_NOT_CHAR, c);
349 Emit16(minus);
350 Emit16(mask);
351 EmitOrLink(on_not_equal);
352 }
353
354
CheckNotBackReference(int start_reg,Label * on_not_equal)355 void RegExpMacroAssemblerIrregexp::CheckNotBackReference(int start_reg,
356 Label* on_not_equal) {
357 ASSERT(start_reg >= 0);
358 ASSERT(start_reg <= kMaxRegister);
359 Emit(BC_CHECK_NOT_BACK_REF, start_reg);
360 EmitOrLink(on_not_equal);
361 }
362
363
CheckNotBackReferenceIgnoreCase(int start_reg,Label * on_not_equal)364 void RegExpMacroAssemblerIrregexp::CheckNotBackReferenceIgnoreCase(
365 int start_reg,
366 Label* on_not_equal) {
367 ASSERT(start_reg >= 0);
368 ASSERT(start_reg <= kMaxRegister);
369 Emit(BC_CHECK_NOT_BACK_REF_NO_CASE, start_reg);
370 EmitOrLink(on_not_equal);
371 }
372
373
CheckNotRegistersEqual(int reg1,int reg2,Label * on_not_equal)374 void RegExpMacroAssemblerIrregexp::CheckNotRegistersEqual(int reg1,
375 int reg2,
376 Label* on_not_equal) {
377 ASSERT(reg1 >= 0);
378 ASSERT(reg1 <= kMaxRegister);
379 Emit(BC_CHECK_NOT_REGS_EQUAL, reg1);
380 Emit32(reg2);
381 EmitOrLink(on_not_equal);
382 }
383
384
CheckCharacters(Vector<const uc16> str,int cp_offset,Label * on_failure,bool check_end_of_string)385 void RegExpMacroAssemblerIrregexp::CheckCharacters(
386 Vector<const uc16> str,
387 int cp_offset,
388 Label* on_failure,
389 bool check_end_of_string) {
390 ASSERT(cp_offset >= kMinCPOffset);
391 ASSERT(cp_offset + str.length() - 1 <= kMaxCPOffset);
392 // It is vital that this loop is backwards due to the unchecked character
393 // load below.
394 for (int i = str.length() - 1; i >= 0; i--) {
395 if (check_end_of_string && i == str.length() - 1) {
396 Emit(BC_LOAD_CURRENT_CHAR, cp_offset + i);
397 EmitOrLink(on_failure);
398 } else {
399 Emit(BC_LOAD_CURRENT_CHAR_UNCHECKED, cp_offset + i);
400 }
401 Emit(BC_CHECK_NOT_CHAR, str[i]);
402 EmitOrLink(on_failure);
403 }
404 }
405
406
IfRegisterLT(int register_index,int comparand,Label * on_less_than)407 void RegExpMacroAssemblerIrregexp::IfRegisterLT(int register_index,
408 int comparand,
409 Label* on_less_than) {
410 ASSERT(register_index >= 0);
411 ASSERT(register_index <= kMaxRegister);
412 Emit(BC_CHECK_REGISTER_LT, register_index);
413 Emit32(comparand);
414 EmitOrLink(on_less_than);
415 }
416
417
IfRegisterGE(int register_index,int comparand,Label * on_greater_or_equal)418 void RegExpMacroAssemblerIrregexp::IfRegisterGE(int register_index,
419 int comparand,
420 Label* on_greater_or_equal) {
421 ASSERT(register_index >= 0);
422 ASSERT(register_index <= kMaxRegister);
423 Emit(BC_CHECK_REGISTER_GE, register_index);
424 Emit32(comparand);
425 EmitOrLink(on_greater_or_equal);
426 }
427
428
IfRegisterEqPos(int register_index,Label * on_eq)429 void RegExpMacroAssemblerIrregexp::IfRegisterEqPos(int register_index,
430 Label* on_eq) {
431 ASSERT(register_index >= 0);
432 ASSERT(register_index <= kMaxRegister);
433 Emit(BC_CHECK_REGISTER_EQ_POS, register_index);
434 EmitOrLink(on_eq);
435 }
436
437
GetCode(Handle<String> source)438 Handle<HeapObject> RegExpMacroAssemblerIrregexp::GetCode(
439 Handle<String> source) {
440 Bind(&backtrack_);
441 Emit(BC_POP_BT, 0);
442 Handle<ByteArray> array = FACTORY->NewByteArray(length());
443 Copy(array->GetDataStartAddress());
444 return array;
445 }
446
447
length()448 int RegExpMacroAssemblerIrregexp::length() {
449 return pc_;
450 }
451
452
Copy(Address a)453 void RegExpMacroAssemblerIrregexp::Copy(Address a) {
454 memcpy(a, buffer_.start(), length());
455 }
456
457
Expand()458 void RegExpMacroAssemblerIrregexp::Expand() {
459 bool old_buffer_was_our_own = own_buffer_;
460 Vector<byte> old_buffer = buffer_;
461 buffer_ = Vector<byte>::New(old_buffer.length() * 2);
462 own_buffer_ = true;
463 memcpy(buffer_.start(), old_buffer.start(), old_buffer.length());
464 if (old_buffer_was_our_own) {
465 old_buffer.Dispose();
466 }
467 }
468
469 #endif // V8_INTERPRETED_REGEXP
470
471 } } // namespace v8::internal
472