1 // Copyright 2013 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #if V8_TARGET_ARCH_ARM64
6
7 #include "src/regexp/arm64/regexp-macro-assembler-arm64.h"
8
9 #include "src/code-stubs.h"
10 #include "src/log.h"
11 #include "src/macro-assembler.h"
12 #include "src/regexp/regexp-macro-assembler.h"
13 #include "src/regexp/regexp-stack.h"
14 #include "src/unicode.h"
15
16 namespace v8 {
17 namespace internal {
18
19 #ifndef V8_INTERPRETED_REGEXP
20 /*
21 * This assembler uses the following register assignment convention:
22 * - w19 : Used to temporarely store a value before a call to C code.
23 * See CheckNotBackReferenceIgnoreCase.
24 * - x20 : Pointer to the current code object (Code*),
25 * it includes the heap object tag.
26 * - w21 : Current position in input, as negative offset from
27 * the end of the string. Please notice that this is
28 * the byte offset, not the character offset!
29 * - w22 : Currently loaded character. Must be loaded using
30 * LoadCurrentCharacter before using any of the dispatch methods.
31 * - x23 : Points to tip of backtrack stack.
32 * - w24 : Position of the first character minus one: non_position_value.
33 * Used to initialize capture registers.
34 * - x25 : Address at the end of the input string: input_end.
35 * Points to byte after last character in input.
36 * - x26 : Address at the start of the input string: input_start.
37 * - w27 : Where to start in the input string.
38 * - x28 : Output array pointer.
39 * - x29/fp : Frame pointer. Used to access arguments, local variables and
40 * RegExp registers.
41 * - x16/x17 : IP registers, used by assembler. Very volatile.
42 * - csp : Points to tip of C stack.
43 *
44 * - x0-x7 : Used as a cache to store 32 bit capture registers. These
45 * registers need to be retained every time a call to C code
46 * is done.
47 *
48 * The remaining registers are free for computations.
49 * Each call to a public method should retain this convention.
50 *
51 * The stack will have the following structure:
52 *
53 * Location Name Description
54 * (as referred to in
55 * the code)
56 *
57 * - fp[104] isolate Address of the current isolate.
58 * - fp[96] return_address Secondary link/return address
59 * used by an exit frame if this is a
60 * native call.
61 * ^^^ csp when called ^^^
62 * - fp[88] lr Return from the RegExp code.
63 * - fp[80] r29 Old frame pointer (CalleeSaved).
64 * - fp[0..72] r19-r28 Backup of CalleeSaved registers.
65 * - fp[-8] direct_call 1 => Direct call from JavaScript code.
66 * 0 => Call through the runtime system.
67 * - fp[-16] stack_base High end of the memory area to use as
68 * the backtracking stack.
69 * - fp[-24] output_size Output may fit multiple sets of matches.
70 * - fp[-32] input Handle containing the input string.
71 * - fp[-40] success_counter
72 * ^^^^^^^^^^^^^ From here and downwards we store 32 bit values ^^^^^^^^^^^^^
73 * - fp[-44] register N Capture registers initialized with
74 * - fp[-48] register N + 1 non_position_value.
75 * ... The first kNumCachedRegisters (N) registers
76 * ... are cached in x0 to x7.
77 * ... Only positions must be stored in the first
78 * - ... num_saved_registers_ registers.
79 * - ...
80 * - register N + num_registers - 1
81 * ^^^^^^^^^ csp ^^^^^^^^^
82 *
83 * The first num_saved_registers_ registers are initialized to point to
84 * "character -1" in the string (i.e., char_size() bytes before the first
85 * character of the string). The remaining registers start out as garbage.
86 *
87 * The data up to the return address must be placed there by the calling
88 * code and the remaining arguments are passed in registers, e.g. by calling the
89 * code entry as cast to a function with the signature:
90 * int (*match)(String* input,
91 * int start_offset,
92 * Address input_start,
93 * Address input_end,
94 * int* output,
95 * int output_size,
96 * Address stack_base,
97 * bool direct_call = false,
98 * Address secondary_return_address, // Only used by native call.
99 * Isolate* isolate)
100 * The call is performed by NativeRegExpMacroAssembler::Execute()
101 * (in regexp-macro-assembler.cc) via the CALL_GENERATED_REGEXP_CODE macro
102 * in arm64/simulator-arm64.h.
103 * When calling as a non-direct call (i.e., from C++ code), the return address
104 * area is overwritten with the LR register by the RegExp code. When doing a
105 * direct call from generated code, the return address is placed there by
106 * the calling code, as in a normal exit frame.
107 */
108
109 #define __ ACCESS_MASM(masm_)
110
RegExpMacroAssemblerARM64(Isolate * isolate,Zone * zone,Mode mode,int registers_to_save)111 RegExpMacroAssemblerARM64::RegExpMacroAssemblerARM64(Isolate* isolate,
112 Zone* zone, Mode mode,
113 int registers_to_save)
114 : NativeRegExpMacroAssembler(isolate, zone),
115 masm_(new MacroAssembler(isolate, NULL, kRegExpCodeSize,
116 CodeObjectRequired::kYes)),
117 mode_(mode),
118 num_registers_(registers_to_save),
119 num_saved_registers_(registers_to_save),
120 entry_label_(),
121 start_label_(),
122 success_label_(),
123 backtrack_label_(),
124 exit_label_() {
125 __ SetStackPointer(csp);
126 DCHECK_EQ(0, registers_to_save % 2);
127 // We can cache at most 16 W registers in x0-x7.
128 STATIC_ASSERT(kNumCachedRegisters <= 16);
129 STATIC_ASSERT((kNumCachedRegisters % 2) == 0);
130 __ B(&entry_label_); // We'll write the entry code later.
131 __ Bind(&start_label_); // And then continue from here.
132 }
133
134
~RegExpMacroAssemblerARM64()135 RegExpMacroAssemblerARM64::~RegExpMacroAssemblerARM64() {
136 delete masm_;
137 // Unuse labels in case we throw away the assembler without calling GetCode.
138 entry_label_.Unuse();
139 start_label_.Unuse();
140 success_label_.Unuse();
141 backtrack_label_.Unuse();
142 exit_label_.Unuse();
143 check_preempt_label_.Unuse();
144 stack_overflow_label_.Unuse();
145 }
146
stack_limit_slack()147 int RegExpMacroAssemblerARM64::stack_limit_slack() {
148 return RegExpStack::kStackLimitSlack;
149 }
150
151
AdvanceCurrentPosition(int by)152 void RegExpMacroAssemblerARM64::AdvanceCurrentPosition(int by) {
153 if (by != 0) {
154 __ Add(current_input_offset(),
155 current_input_offset(), by * char_size());
156 }
157 }
158
159
AdvanceRegister(int reg,int by)160 void RegExpMacroAssemblerARM64::AdvanceRegister(int reg, int by) {
161 DCHECK((reg >= 0) && (reg < num_registers_));
162 if (by != 0) {
163 Register to_advance;
164 RegisterState register_state = GetRegisterState(reg);
165 switch (register_state) {
166 case STACKED:
167 __ Ldr(w10, register_location(reg));
168 __ Add(w10, w10, by);
169 __ Str(w10, register_location(reg));
170 break;
171 case CACHED_LSW:
172 to_advance = GetCachedRegister(reg);
173 __ Add(to_advance, to_advance, by);
174 break;
175 case CACHED_MSW:
176 to_advance = GetCachedRegister(reg);
177 __ Add(to_advance, to_advance,
178 static_cast<int64_t>(by) << kWRegSizeInBits);
179 break;
180 default:
181 UNREACHABLE();
182 break;
183 }
184 }
185 }
186
187
Backtrack()188 void RegExpMacroAssemblerARM64::Backtrack() {
189 CheckPreemption();
190 Pop(w10);
191 __ Add(x10, code_pointer(), Operand(w10, UXTW));
192 __ Br(x10);
193 }
194
195
Bind(Label * label)196 void RegExpMacroAssemblerARM64::Bind(Label* label) {
197 __ Bind(label);
198 }
199
200
CheckCharacter(uint32_t c,Label * on_equal)201 void RegExpMacroAssemblerARM64::CheckCharacter(uint32_t c, Label* on_equal) {
202 CompareAndBranchOrBacktrack(current_character(), c, eq, on_equal);
203 }
204
205
CheckCharacterGT(uc16 limit,Label * on_greater)206 void RegExpMacroAssemblerARM64::CheckCharacterGT(uc16 limit,
207 Label* on_greater) {
208 CompareAndBranchOrBacktrack(current_character(), limit, hi, on_greater);
209 }
210
211
CheckAtStart(Label * on_at_start)212 void RegExpMacroAssemblerARM64::CheckAtStart(Label* on_at_start) {
213 __ Add(w10, current_input_offset(), Operand(-char_size()));
214 __ Cmp(w10, string_start_minus_one());
215 BranchOrBacktrack(eq, on_at_start);
216 }
217
218
CheckNotAtStart(int cp_offset,Label * on_not_at_start)219 void RegExpMacroAssemblerARM64::CheckNotAtStart(int cp_offset,
220 Label* on_not_at_start) {
221 __ Add(w10, current_input_offset(),
222 Operand(-char_size() + cp_offset * char_size()));
223 __ Cmp(w10, string_start_minus_one());
224 BranchOrBacktrack(ne, on_not_at_start);
225 }
226
227
CheckCharacterLT(uc16 limit,Label * on_less)228 void RegExpMacroAssemblerARM64::CheckCharacterLT(uc16 limit, Label* on_less) {
229 CompareAndBranchOrBacktrack(current_character(), limit, lo, on_less);
230 }
231
232
CheckCharacters(Vector<const uc16> str,int cp_offset,Label * on_failure,bool check_end_of_string)233 void RegExpMacroAssemblerARM64::CheckCharacters(Vector<const uc16> str,
234 int cp_offset,
235 Label* on_failure,
236 bool check_end_of_string) {
237 // This method is only ever called from the cctests.
238
239 if (check_end_of_string) {
240 // Is last character of required match inside string.
241 CheckPosition(cp_offset + str.length() - 1, on_failure);
242 }
243
244 Register characters_address = x11;
245
246 __ Add(characters_address,
247 input_end(),
248 Operand(current_input_offset(), SXTW));
249 if (cp_offset != 0) {
250 __ Add(characters_address, characters_address, cp_offset * char_size());
251 }
252
253 for (int i = 0; i < str.length(); i++) {
254 if (mode_ == LATIN1) {
255 __ Ldrb(w10, MemOperand(characters_address, 1, PostIndex));
256 DCHECK(str[i] <= String::kMaxOneByteCharCode);
257 } else {
258 __ Ldrh(w10, MemOperand(characters_address, 2, PostIndex));
259 }
260 CompareAndBranchOrBacktrack(w10, str[i], ne, on_failure);
261 }
262 }
263
264
CheckGreedyLoop(Label * on_equal)265 void RegExpMacroAssemblerARM64::CheckGreedyLoop(Label* on_equal) {
266 __ Ldr(w10, MemOperand(backtrack_stackpointer()));
267 __ Cmp(current_input_offset(), w10);
268 __ Cset(x11, eq);
269 __ Add(backtrack_stackpointer(),
270 backtrack_stackpointer(), Operand(x11, LSL, kWRegSizeLog2));
271 BranchOrBacktrack(eq, on_equal);
272 }
273
274
CheckNotBackReferenceIgnoreCase(int start_reg,bool read_backward,bool unicode,Label * on_no_match)275 void RegExpMacroAssemblerARM64::CheckNotBackReferenceIgnoreCase(
276 int start_reg, bool read_backward, bool unicode, Label* on_no_match) {
277 Label fallthrough;
278
279 Register capture_start_offset = w10;
280 // Save the capture length in a callee-saved register so it will
281 // be preserved if we call a C helper.
282 Register capture_length = w19;
283 DCHECK(kCalleeSaved.IncludesAliasOf(capture_length));
284
285 // Find length of back-referenced capture.
286 DCHECK((start_reg % 2) == 0);
287 if (start_reg < kNumCachedRegisters) {
288 __ Mov(capture_start_offset.X(), GetCachedRegister(start_reg));
289 __ Lsr(x11, GetCachedRegister(start_reg), kWRegSizeInBits);
290 } else {
291 __ Ldp(w11, capture_start_offset, capture_location(start_reg, x10));
292 }
293 __ Sub(capture_length, w11, capture_start_offset); // Length to check.
294
295 // At this point, the capture registers are either both set or both cleared.
296 // If the capture length is zero, then the capture is either empty or cleared.
297 // Fall through in both cases.
298 __ CompareAndBranch(capture_length, Operand(0), eq, &fallthrough);
299
300 // Check that there are enough characters left in the input.
301 if (read_backward) {
302 __ Add(w12, string_start_minus_one(), capture_length);
303 __ Cmp(current_input_offset(), w12);
304 BranchOrBacktrack(le, on_no_match);
305 } else {
306 __ Cmn(capture_length, current_input_offset());
307 BranchOrBacktrack(gt, on_no_match);
308 }
309
310 if (mode_ == LATIN1) {
311 Label success;
312 Label fail;
313 Label loop_check;
314
315 Register capture_start_address = x12;
316 Register capture_end_addresss = x13;
317 Register current_position_address = x14;
318
319 __ Add(capture_start_address,
320 input_end(),
321 Operand(capture_start_offset, SXTW));
322 __ Add(capture_end_addresss,
323 capture_start_address,
324 Operand(capture_length, SXTW));
325 __ Add(current_position_address,
326 input_end(),
327 Operand(current_input_offset(), SXTW));
328 if (read_backward) {
329 // Offset by length when matching backwards.
330 __ Sub(current_position_address, current_position_address,
331 Operand(capture_length, SXTW));
332 }
333
334 Label loop;
335 __ Bind(&loop);
336 __ Ldrb(w10, MemOperand(capture_start_address, 1, PostIndex));
337 __ Ldrb(w11, MemOperand(current_position_address, 1, PostIndex));
338 __ Cmp(w10, w11);
339 __ B(eq, &loop_check);
340
341 // Mismatch, try case-insensitive match (converting letters to lower-case).
342 __ Orr(w10, w10, 0x20); // Convert capture character to lower-case.
343 __ Orr(w11, w11, 0x20); // Also convert input character.
344 __ Cmp(w11, w10);
345 __ B(ne, &fail);
346 __ Sub(w10, w10, 'a');
347 __ Cmp(w10, 'z' - 'a'); // Is w10 a lowercase letter?
348 __ B(ls, &loop_check); // In range 'a'-'z'.
349 // Latin-1: Check for values in range [224,254] but not 247.
350 __ Sub(w10, w10, 224 - 'a');
351 __ Cmp(w10, 254 - 224);
352 __ Ccmp(w10, 247 - 224, ZFlag, ls); // Check for 247.
353 __ B(eq, &fail); // Weren't Latin-1 letters.
354
355 __ Bind(&loop_check);
356 __ Cmp(capture_start_address, capture_end_addresss);
357 __ B(lt, &loop);
358 __ B(&success);
359
360 __ Bind(&fail);
361 BranchOrBacktrack(al, on_no_match);
362
363 __ Bind(&success);
364 // Compute new value of character position after the matched part.
365 __ Sub(current_input_offset().X(), current_position_address, input_end());
366 if (read_backward) {
367 __ Sub(current_input_offset().X(), current_input_offset().X(),
368 Operand(capture_length, SXTW));
369 }
370 if (masm_->emit_debug_code()) {
371 __ Cmp(current_input_offset().X(), Operand(current_input_offset(), SXTW));
372 __ Ccmp(current_input_offset(), 0, NoFlag, eq);
373 // The current input offset should be <= 0, and fit in a W register.
374 __ Check(le, kOffsetOutOfRange);
375 }
376 } else {
377 DCHECK(mode_ == UC16);
378 int argument_count = 4;
379
380 // The cached registers need to be retained.
381 CPURegList cached_registers(CPURegister::kRegister, kXRegSizeInBits, 0, 7);
382 DCHECK((cached_registers.Count() * 2) == kNumCachedRegisters);
383 __ PushCPURegList(cached_registers);
384
385 // Put arguments into arguments registers.
386 // Parameters are
387 // x0: Address byte_offset1 - Address captured substring's start.
388 // x1: Address byte_offset2 - Address of current character position.
389 // w2: size_t byte_length - length of capture in bytes(!)
390 // x3: Isolate* isolate or 0 if unicode flag
391
392 // Address of start of capture.
393 __ Add(x0, input_end(), Operand(capture_start_offset, SXTW));
394 // Length of capture.
395 __ Mov(w2, capture_length);
396 // Address of current input position.
397 __ Add(x1, input_end(), Operand(current_input_offset(), SXTW));
398 if (read_backward) {
399 __ Sub(x1, x1, Operand(capture_length, SXTW));
400 }
401 // Isolate.
402 #ifdef V8_I18N_SUPPORT
403 if (unicode) {
404 __ Mov(x3, Operand(0));
405 } else // NOLINT
406 #endif // V8_I18N_SUPPORT
407 {
408 __ Mov(x3, ExternalReference::isolate_address(isolate()));
409 }
410
411 {
412 AllowExternalCallThatCantCauseGC scope(masm_);
413 ExternalReference function =
414 ExternalReference::re_case_insensitive_compare_uc16(isolate());
415 __ CallCFunction(function, argument_count);
416 }
417
418 // Check if function returned non-zero for success or zero for failure.
419 // x0 is one of the registers used as a cache so it must be tested before
420 // the cache is restored.
421 __ Cmp(x0, 0);
422 __ PopCPURegList(cached_registers);
423 BranchOrBacktrack(eq, on_no_match);
424
425 // On success, advance position by length of capture.
426 if (read_backward) {
427 __ Sub(current_input_offset(), current_input_offset(), capture_length);
428 } else {
429 __ Add(current_input_offset(), current_input_offset(), capture_length);
430 }
431 }
432
433 __ Bind(&fallthrough);
434 }
435
CheckNotBackReference(int start_reg,bool read_backward,Label * on_no_match)436 void RegExpMacroAssemblerARM64::CheckNotBackReference(int start_reg,
437 bool read_backward,
438 Label* on_no_match) {
439 Label fallthrough;
440
441 Register capture_start_address = x12;
442 Register capture_end_address = x13;
443 Register current_position_address = x14;
444 Register capture_length = w15;
445
446 // Find length of back-referenced capture.
447 DCHECK((start_reg % 2) == 0);
448 if (start_reg < kNumCachedRegisters) {
449 __ Mov(x10, GetCachedRegister(start_reg));
450 __ Lsr(x11, GetCachedRegister(start_reg), kWRegSizeInBits);
451 } else {
452 __ Ldp(w11, w10, capture_location(start_reg, x10));
453 }
454 __ Sub(capture_length, w11, w10); // Length to check.
455
456 // At this point, the capture registers are either both set or both cleared.
457 // If the capture length is zero, then the capture is either empty or cleared.
458 // Fall through in both cases.
459 __ CompareAndBranch(capture_length, Operand(0), eq, &fallthrough);
460
461 // Check that there are enough characters left in the input.
462 if (read_backward) {
463 __ Add(w12, string_start_minus_one(), capture_length);
464 __ Cmp(current_input_offset(), w12);
465 BranchOrBacktrack(le, on_no_match);
466 } else {
467 __ Cmn(capture_length, current_input_offset());
468 BranchOrBacktrack(gt, on_no_match);
469 }
470
471 // Compute pointers to match string and capture string
472 __ Add(capture_start_address, input_end(), Operand(w10, SXTW));
473 __ Add(capture_end_address,
474 capture_start_address,
475 Operand(capture_length, SXTW));
476 __ Add(current_position_address,
477 input_end(),
478 Operand(current_input_offset(), SXTW));
479 if (read_backward) {
480 // Offset by length when matching backwards.
481 __ Sub(current_position_address, current_position_address,
482 Operand(capture_length, SXTW));
483 }
484
485 Label loop;
486 __ Bind(&loop);
487 if (mode_ == LATIN1) {
488 __ Ldrb(w10, MemOperand(capture_start_address, 1, PostIndex));
489 __ Ldrb(w11, MemOperand(current_position_address, 1, PostIndex));
490 } else {
491 DCHECK(mode_ == UC16);
492 __ Ldrh(w10, MemOperand(capture_start_address, 2, PostIndex));
493 __ Ldrh(w11, MemOperand(current_position_address, 2, PostIndex));
494 }
495 __ Cmp(w10, w11);
496 BranchOrBacktrack(ne, on_no_match);
497 __ Cmp(capture_start_address, capture_end_address);
498 __ B(lt, &loop);
499
500 // Move current character position to position after match.
501 __ Sub(current_input_offset().X(), current_position_address, input_end());
502 if (read_backward) {
503 __ Sub(current_input_offset().X(), current_input_offset().X(),
504 Operand(capture_length, SXTW));
505 }
506
507 if (masm_->emit_debug_code()) {
508 __ Cmp(current_input_offset().X(), Operand(current_input_offset(), SXTW));
509 __ Ccmp(current_input_offset(), 0, NoFlag, eq);
510 // The current input offset should be <= 0, and fit in a W register.
511 __ Check(le, kOffsetOutOfRange);
512 }
513 __ Bind(&fallthrough);
514 }
515
516
CheckNotCharacter(unsigned c,Label * on_not_equal)517 void RegExpMacroAssemblerARM64::CheckNotCharacter(unsigned c,
518 Label* on_not_equal) {
519 CompareAndBranchOrBacktrack(current_character(), c, ne, on_not_equal);
520 }
521
522
CheckCharacterAfterAnd(uint32_t c,uint32_t mask,Label * on_equal)523 void RegExpMacroAssemblerARM64::CheckCharacterAfterAnd(uint32_t c,
524 uint32_t mask,
525 Label* on_equal) {
526 __ And(w10, current_character(), mask);
527 CompareAndBranchOrBacktrack(w10, c, eq, on_equal);
528 }
529
530
CheckNotCharacterAfterAnd(unsigned c,unsigned mask,Label * on_not_equal)531 void RegExpMacroAssemblerARM64::CheckNotCharacterAfterAnd(unsigned c,
532 unsigned mask,
533 Label* on_not_equal) {
534 __ And(w10, current_character(), mask);
535 CompareAndBranchOrBacktrack(w10, c, ne, on_not_equal);
536 }
537
538
CheckNotCharacterAfterMinusAnd(uc16 c,uc16 minus,uc16 mask,Label * on_not_equal)539 void RegExpMacroAssemblerARM64::CheckNotCharacterAfterMinusAnd(
540 uc16 c,
541 uc16 minus,
542 uc16 mask,
543 Label* on_not_equal) {
544 DCHECK(minus < String::kMaxUtf16CodeUnit);
545 __ Sub(w10, current_character(), minus);
546 __ And(w10, w10, mask);
547 CompareAndBranchOrBacktrack(w10, c, ne, on_not_equal);
548 }
549
550
CheckCharacterInRange(uc16 from,uc16 to,Label * on_in_range)551 void RegExpMacroAssemblerARM64::CheckCharacterInRange(
552 uc16 from,
553 uc16 to,
554 Label* on_in_range) {
555 __ Sub(w10, current_character(), from);
556 // Unsigned lower-or-same condition.
557 CompareAndBranchOrBacktrack(w10, to - from, ls, on_in_range);
558 }
559
560
CheckCharacterNotInRange(uc16 from,uc16 to,Label * on_not_in_range)561 void RegExpMacroAssemblerARM64::CheckCharacterNotInRange(
562 uc16 from,
563 uc16 to,
564 Label* on_not_in_range) {
565 __ Sub(w10, current_character(), from);
566 // Unsigned higher condition.
567 CompareAndBranchOrBacktrack(w10, to - from, hi, on_not_in_range);
568 }
569
570
CheckBitInTable(Handle<ByteArray> table,Label * on_bit_set)571 void RegExpMacroAssemblerARM64::CheckBitInTable(
572 Handle<ByteArray> table,
573 Label* on_bit_set) {
574 __ Mov(x11, Operand(table));
575 if ((mode_ != LATIN1) || (kTableMask != String::kMaxOneByteCharCode)) {
576 __ And(w10, current_character(), kTableMask);
577 __ Add(w10, w10, ByteArray::kHeaderSize - kHeapObjectTag);
578 } else {
579 __ Add(w10, current_character(), ByteArray::kHeaderSize - kHeapObjectTag);
580 }
581 __ Ldrb(w11, MemOperand(x11, w10, UXTW));
582 CompareAndBranchOrBacktrack(w11, 0, ne, on_bit_set);
583 }
584
585
CheckSpecialCharacterClass(uc16 type,Label * on_no_match)586 bool RegExpMacroAssemblerARM64::CheckSpecialCharacterClass(uc16 type,
587 Label* on_no_match) {
588 // Range checks (c in min..max) are generally implemented by an unsigned
589 // (c - min) <= (max - min) check
590 switch (type) {
591 case 's':
592 // Match space-characters
593 if (mode_ == LATIN1) {
594 // One byte space characters are '\t'..'\r', ' ' and \u00a0.
595 Label success;
596 // Check for ' ' or 0x00a0.
597 __ Cmp(current_character(), ' ');
598 __ Ccmp(current_character(), 0x00a0, ZFlag, ne);
599 __ B(eq, &success);
600 // Check range 0x09..0x0d.
601 __ Sub(w10, current_character(), '\t');
602 CompareAndBranchOrBacktrack(w10, '\r' - '\t', hi, on_no_match);
603 __ Bind(&success);
604 return true;
605 }
606 return false;
607 case 'S':
608 // The emitted code for generic character classes is good enough.
609 return false;
610 case 'd':
611 // Match ASCII digits ('0'..'9').
612 __ Sub(w10, current_character(), '0');
613 CompareAndBranchOrBacktrack(w10, '9' - '0', hi, on_no_match);
614 return true;
615 case 'D':
616 // Match ASCII non-digits.
617 __ Sub(w10, current_character(), '0');
618 CompareAndBranchOrBacktrack(w10, '9' - '0', ls, on_no_match);
619 return true;
620 case '.': {
621 // Match non-newlines (not 0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029)
622 // Here we emit the conditional branch only once at the end to make branch
623 // prediction more efficient, even though we could branch out of here
624 // as soon as a character matches.
625 __ Cmp(current_character(), 0x0a);
626 __ Ccmp(current_character(), 0x0d, ZFlag, ne);
627 if (mode_ == UC16) {
628 __ Sub(w10, current_character(), 0x2028);
629 // If the Z flag was set we clear the flags to force a branch.
630 __ Ccmp(w10, 0x2029 - 0x2028, NoFlag, ne);
631 // ls -> !((C==1) && (Z==0))
632 BranchOrBacktrack(ls, on_no_match);
633 } else {
634 BranchOrBacktrack(eq, on_no_match);
635 }
636 return true;
637 }
638 case 'n': {
639 // Match newlines (0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029)
640 // We have to check all 4 newline characters before emitting
641 // the conditional branch.
642 __ Cmp(current_character(), 0x0a);
643 __ Ccmp(current_character(), 0x0d, ZFlag, ne);
644 if (mode_ == UC16) {
645 __ Sub(w10, current_character(), 0x2028);
646 // If the Z flag was set we clear the flags to force a fall-through.
647 __ Ccmp(w10, 0x2029 - 0x2028, NoFlag, ne);
648 // hi -> (C==1) && (Z==0)
649 BranchOrBacktrack(hi, on_no_match);
650 } else {
651 BranchOrBacktrack(ne, on_no_match);
652 }
653 return true;
654 }
655 case 'w': {
656 if (mode_ != LATIN1) {
657 // Table is 256 entries, so all Latin1 characters can be tested.
658 CompareAndBranchOrBacktrack(current_character(), 'z', hi, on_no_match);
659 }
660 ExternalReference map = ExternalReference::re_word_character_map();
661 __ Mov(x10, map);
662 __ Ldrb(w10, MemOperand(x10, current_character(), UXTW));
663 CompareAndBranchOrBacktrack(w10, 0, eq, on_no_match);
664 return true;
665 }
666 case 'W': {
667 Label done;
668 if (mode_ != LATIN1) {
669 // Table is 256 entries, so all Latin1 characters can be tested.
670 __ Cmp(current_character(), 'z');
671 __ B(hi, &done);
672 }
673 ExternalReference map = ExternalReference::re_word_character_map();
674 __ Mov(x10, map);
675 __ Ldrb(w10, MemOperand(x10, current_character(), UXTW));
676 CompareAndBranchOrBacktrack(w10, 0, ne, on_no_match);
677 __ Bind(&done);
678 return true;
679 }
680 case '*':
681 // Match any character.
682 return true;
683 // No custom implementation (yet): s(UC16), S(UC16).
684 default:
685 return false;
686 }
687 }
688
689
Fail()690 void RegExpMacroAssemblerARM64::Fail() {
691 __ Mov(w0, FAILURE);
692 __ B(&exit_label_);
693 }
694
695
GetCode(Handle<String> source)696 Handle<HeapObject> RegExpMacroAssemblerARM64::GetCode(Handle<String> source) {
697 Label return_w0;
698 // Finalize code - write the entry point code now we know how many
699 // registers we need.
700
701 // Entry code:
702 __ Bind(&entry_label_);
703
704 // Arguments on entry:
705 // x0: String* input
706 // x1: int start_offset
707 // x2: byte* input_start
708 // x3: byte* input_end
709 // x4: int* output array
710 // x5: int output array size
711 // x6: Address stack_base
712 // x7: int direct_call
713
714 // The stack pointer should be csp on entry.
715 // csp[8]: address of the current isolate
716 // csp[0]: secondary link/return address used by native call
717
718 // Tell the system that we have a stack frame. Because the type is MANUAL, no
719 // code is generated.
720 FrameScope scope(masm_, StackFrame::MANUAL);
721
722 // Push registers on the stack, only push the argument registers that we need.
723 CPURegList argument_registers(x0, x5, x6, x7);
724
725 CPURegList registers_to_retain = kCalleeSaved;
726 DCHECK(kCalleeSaved.Count() == 11);
727 registers_to_retain.Combine(lr);
728
729 DCHECK(csp.Is(__ StackPointer()));
730 __ PushCPURegList(registers_to_retain);
731 __ PushCPURegList(argument_registers);
732
733 // Set frame pointer in place.
734 __ Add(frame_pointer(), csp, argument_registers.Count() * kPointerSize);
735
736 // Initialize callee-saved registers.
737 __ Mov(start_offset(), w1);
738 __ Mov(input_start(), x2);
739 __ Mov(input_end(), x3);
740 __ Mov(output_array(), x4);
741
742 // Set the number of registers we will need to allocate, that is:
743 // - success_counter (X register)
744 // - (num_registers_ - kNumCachedRegisters) (W registers)
745 int num_wreg_to_allocate = num_registers_ - kNumCachedRegisters;
746 // Do not allocate registers on the stack if they can all be cached.
747 if (num_wreg_to_allocate < 0) { num_wreg_to_allocate = 0; }
748 // Make room for the success_counter.
749 num_wreg_to_allocate += 2;
750
751 // Make sure the stack alignment will be respected.
752 int alignment = masm_->ActivationFrameAlignment();
753 DCHECK_EQ(alignment % 16, 0);
754 int align_mask = (alignment / kWRegSize) - 1;
755 num_wreg_to_allocate = (num_wreg_to_allocate + align_mask) & ~align_mask;
756
757 // Check if we have space on the stack.
758 Label stack_limit_hit;
759 Label stack_ok;
760
761 ExternalReference stack_limit =
762 ExternalReference::address_of_stack_limit(isolate());
763 __ Mov(x10, stack_limit);
764 __ Ldr(x10, MemOperand(x10));
765 __ Subs(x10, csp, x10);
766
767 // Handle it if the stack pointer is already below the stack limit.
768 __ B(ls, &stack_limit_hit);
769
770 // Check if there is room for the variable number of registers above
771 // the stack limit.
772 __ Cmp(x10, num_wreg_to_allocate * kWRegSize);
773 __ B(hs, &stack_ok);
774
775 // Exit with OutOfMemory exception. There is not enough space on the stack
776 // for our working registers.
777 __ Mov(w0, EXCEPTION);
778 __ B(&return_w0);
779
780 __ Bind(&stack_limit_hit);
781 CallCheckStackGuardState(x10);
782 // If returned value is non-zero, we exit with the returned value as result.
783 __ Cbnz(w0, &return_w0);
784
785 __ Bind(&stack_ok);
786
787 // Allocate space on stack.
788 __ Claim(num_wreg_to_allocate, kWRegSize);
789
790 // Initialize success_counter with 0.
791 __ Str(wzr, MemOperand(frame_pointer(), kSuccessCounter));
792
793 // Find negative length (offset of start relative to end).
794 __ Sub(x10, input_start(), input_end());
795 if (masm_->emit_debug_code()) {
796 // Check that the input string length is < 2^30.
797 __ Neg(x11, x10);
798 __ Cmp(x11, (1<<30) - 1);
799 __ Check(ls, kInputStringTooLong);
800 }
801 __ Mov(current_input_offset(), w10);
802
803 // The non-position value is used as a clearing value for the
804 // capture registers, it corresponds to the position of the first character
805 // minus one.
806 __ Sub(string_start_minus_one(), current_input_offset(), char_size());
807 __ Sub(string_start_minus_one(), string_start_minus_one(),
808 Operand(start_offset(), LSL, (mode_ == UC16) ? 1 : 0));
809 // We can store this value twice in an X register for initializing
810 // on-stack registers later.
811 __ Orr(twice_non_position_value(), string_start_minus_one().X(),
812 Operand(string_start_minus_one().X(), LSL, kWRegSizeInBits));
813
814 // Initialize code pointer register.
815 __ Mov(code_pointer(), Operand(masm_->CodeObject()));
816
817 Label load_char_start_regexp, start_regexp;
818 // Load newline if index is at start, previous character otherwise.
819 __ Cbnz(start_offset(), &load_char_start_regexp);
820 __ Mov(current_character(), '\n');
821 __ B(&start_regexp);
822
823 // Global regexp restarts matching here.
824 __ Bind(&load_char_start_regexp);
825 // Load previous char as initial value of current character register.
826 LoadCurrentCharacterUnchecked(-1, 1);
827 __ Bind(&start_regexp);
828 // Initialize on-stack registers.
829 if (num_saved_registers_ > 0) {
830 ClearRegisters(0, num_saved_registers_ - 1);
831 }
832
833 // Initialize backtrack stack pointer.
834 __ Ldr(backtrack_stackpointer(), MemOperand(frame_pointer(), kStackBase));
835
836 // Execute
837 __ B(&start_label_);
838
839 if (backtrack_label_.is_linked()) {
840 __ Bind(&backtrack_label_);
841 Backtrack();
842 }
843
844 if (success_label_.is_linked()) {
845 Register first_capture_start = w15;
846
847 // Save captures when successful.
848 __ Bind(&success_label_);
849
850 if (num_saved_registers_ > 0) {
851 // V8 expects the output to be an int32_t array.
852 Register capture_start = w12;
853 Register capture_end = w13;
854 Register input_length = w14;
855
856 // Copy captures to output.
857
858 // Get string length.
859 __ Sub(x10, input_end(), input_start());
860 if (masm_->emit_debug_code()) {
861 // Check that the input string length is < 2^30.
862 __ Cmp(x10, (1<<30) - 1);
863 __ Check(ls, kInputStringTooLong);
864 }
865 // input_start has a start_offset offset on entry. We need to include
866 // it when computing the length of the whole string.
867 if (mode_ == UC16) {
868 __ Add(input_length, start_offset(), Operand(w10, LSR, 1));
869 } else {
870 __ Add(input_length, start_offset(), w10);
871 }
872
873 // Copy the results to the output array from the cached registers first.
874 for (int i = 0;
875 (i < num_saved_registers_) && (i < kNumCachedRegisters);
876 i += 2) {
877 __ Mov(capture_start.X(), GetCachedRegister(i));
878 __ Lsr(capture_end.X(), capture_start.X(), kWRegSizeInBits);
879 if ((i == 0) && global_with_zero_length_check()) {
880 // Keep capture start for the zero-length check later.
881 __ Mov(first_capture_start, capture_start);
882 }
883 // Offsets need to be relative to the start of the string.
884 if (mode_ == UC16) {
885 __ Add(capture_start, input_length, Operand(capture_start, ASR, 1));
886 __ Add(capture_end, input_length, Operand(capture_end, ASR, 1));
887 } else {
888 __ Add(capture_start, input_length, capture_start);
889 __ Add(capture_end, input_length, capture_end);
890 }
891 // The output pointer advances for a possible global match.
892 __ Stp(capture_start,
893 capture_end,
894 MemOperand(output_array(), kPointerSize, PostIndex));
895 }
896
897 // Only carry on if there are more than kNumCachedRegisters capture
898 // registers.
899 int num_registers_left_on_stack =
900 num_saved_registers_ - kNumCachedRegisters;
901 if (num_registers_left_on_stack > 0) {
902 Register base = x10;
903 // There are always an even number of capture registers. A couple of
904 // registers determine one match with two offsets.
905 DCHECK_EQ(0, num_registers_left_on_stack % 2);
906 __ Add(base, frame_pointer(), kFirstCaptureOnStack);
907
908 // We can unroll the loop here, we should not unroll for less than 2
909 // registers.
910 STATIC_ASSERT(kNumRegistersToUnroll > 2);
911 if (num_registers_left_on_stack <= kNumRegistersToUnroll) {
912 for (int i = 0; i < num_registers_left_on_stack / 2; i++) {
913 __ Ldp(capture_end,
914 capture_start,
915 MemOperand(base, -kPointerSize, PostIndex));
916 if ((i == 0) && global_with_zero_length_check()) {
917 // Keep capture start for the zero-length check later.
918 __ Mov(first_capture_start, capture_start);
919 }
920 // Offsets need to be relative to the start of the string.
921 if (mode_ == UC16) {
922 __ Add(capture_start,
923 input_length,
924 Operand(capture_start, ASR, 1));
925 __ Add(capture_end, input_length, Operand(capture_end, ASR, 1));
926 } else {
927 __ Add(capture_start, input_length, capture_start);
928 __ Add(capture_end, input_length, capture_end);
929 }
930 // The output pointer advances for a possible global match.
931 __ Stp(capture_start,
932 capture_end,
933 MemOperand(output_array(), kPointerSize, PostIndex));
934 }
935 } else {
936 Label loop, start;
937 __ Mov(x11, num_registers_left_on_stack);
938
939 __ Ldp(capture_end,
940 capture_start,
941 MemOperand(base, -kPointerSize, PostIndex));
942 if (global_with_zero_length_check()) {
943 __ Mov(first_capture_start, capture_start);
944 }
945 __ B(&start);
946
947 __ Bind(&loop);
948 __ Ldp(capture_end,
949 capture_start,
950 MemOperand(base, -kPointerSize, PostIndex));
951 __ Bind(&start);
952 if (mode_ == UC16) {
953 __ Add(capture_start, input_length, Operand(capture_start, ASR, 1));
954 __ Add(capture_end, input_length, Operand(capture_end, ASR, 1));
955 } else {
956 __ Add(capture_start, input_length, capture_start);
957 __ Add(capture_end, input_length, capture_end);
958 }
959 // The output pointer advances for a possible global match.
960 __ Stp(capture_start,
961 capture_end,
962 MemOperand(output_array(), kPointerSize, PostIndex));
963 __ Sub(x11, x11, 2);
964 __ Cbnz(x11, &loop);
965 }
966 }
967 }
968
969 if (global()) {
970 Register success_counter = w0;
971 Register output_size = x10;
972 // Restart matching if the regular expression is flagged as global.
973
974 // Increment success counter.
975 __ Ldr(success_counter, MemOperand(frame_pointer(), kSuccessCounter));
976 __ Add(success_counter, success_counter, 1);
977 __ Str(success_counter, MemOperand(frame_pointer(), kSuccessCounter));
978
979 // Capture results have been stored, so the number of remaining global
980 // output registers is reduced by the number of stored captures.
981 __ Ldr(output_size, MemOperand(frame_pointer(), kOutputSize));
982 __ Sub(output_size, output_size, num_saved_registers_);
983 // Check whether we have enough room for another set of capture results.
984 __ Cmp(output_size, num_saved_registers_);
985 __ B(lt, &return_w0);
986
987 // The output pointer is already set to the next field in the output
988 // array.
989 // Update output size on the frame before we restart matching.
990 __ Str(output_size, MemOperand(frame_pointer(), kOutputSize));
991
992 if (global_with_zero_length_check()) {
993 // Special case for zero-length matches.
994 __ Cmp(current_input_offset(), first_capture_start);
995 // Not a zero-length match, restart.
996 __ B(ne, &load_char_start_regexp);
997 // Offset from the end is zero if we already reached the end.
998 __ Cbz(current_input_offset(), &return_w0);
999 // Advance current position after a zero-length match.
1000 Label advance;
1001 __ bind(&advance);
1002 __ Add(current_input_offset(),
1003 current_input_offset(),
1004 Operand((mode_ == UC16) ? 2 : 1));
1005 if (global_unicode()) CheckNotInSurrogatePair(0, &advance);
1006 }
1007
1008 __ B(&load_char_start_regexp);
1009 } else {
1010 __ Mov(w0, SUCCESS);
1011 }
1012 }
1013
1014 if (exit_label_.is_linked()) {
1015 // Exit and return w0
1016 __ Bind(&exit_label_);
1017 if (global()) {
1018 __ Ldr(w0, MemOperand(frame_pointer(), kSuccessCounter));
1019 }
1020 }
1021
1022 __ Bind(&return_w0);
1023
1024 // Set stack pointer back to first register to retain
1025 DCHECK(csp.Is(__ StackPointer()));
1026 __ Mov(csp, fp);
1027 __ AssertStackConsistency();
1028
1029 // Restore registers.
1030 __ PopCPURegList(registers_to_retain);
1031
1032 __ Ret();
1033
1034 Label exit_with_exception;
1035 // Registers x0 to x7 are used to store the first captures, they need to be
1036 // retained over calls to C++ code.
1037 CPURegList cached_registers(CPURegister::kRegister, kXRegSizeInBits, 0, 7);
1038 DCHECK((cached_registers.Count() * 2) == kNumCachedRegisters);
1039
1040 if (check_preempt_label_.is_linked()) {
1041 __ Bind(&check_preempt_label_);
1042 SaveLinkRegister();
1043 // The cached registers need to be retained.
1044 __ PushCPURegList(cached_registers);
1045 CallCheckStackGuardState(x10);
1046 // Returning from the regexp code restores the stack (csp <- fp)
1047 // so we don't need to drop the link register from it before exiting.
1048 __ Cbnz(w0, &return_w0);
1049 // Reset the cached registers.
1050 __ PopCPURegList(cached_registers);
1051 RestoreLinkRegister();
1052 __ Ret();
1053 }
1054
1055 if (stack_overflow_label_.is_linked()) {
1056 __ Bind(&stack_overflow_label_);
1057 SaveLinkRegister();
1058 // The cached registers need to be retained.
1059 __ PushCPURegList(cached_registers);
1060 // Call GrowStack(backtrack_stackpointer(), &stack_base)
1061 __ Mov(x2, ExternalReference::isolate_address(isolate()));
1062 __ Add(x1, frame_pointer(), kStackBase);
1063 __ Mov(x0, backtrack_stackpointer());
1064 ExternalReference grow_stack =
1065 ExternalReference::re_grow_stack(isolate());
1066 __ CallCFunction(grow_stack, 3);
1067 // If return NULL, we have failed to grow the stack, and
1068 // must exit with a stack-overflow exception.
1069 // Returning from the regexp code restores the stack (csp <- fp)
1070 // so we don't need to drop the link register from it before exiting.
1071 __ Cbz(w0, &exit_with_exception);
1072 // Otherwise use return value as new stack pointer.
1073 __ Mov(backtrack_stackpointer(), x0);
1074 // Reset the cached registers.
1075 __ PopCPURegList(cached_registers);
1076 RestoreLinkRegister();
1077 __ Ret();
1078 }
1079
1080 if (exit_with_exception.is_linked()) {
1081 __ Bind(&exit_with_exception);
1082 __ Mov(w0, EXCEPTION);
1083 __ B(&return_w0);
1084 }
1085
1086 CodeDesc code_desc;
1087 masm_->GetCode(&code_desc);
1088 Handle<Code> code = isolate()->factory()->NewCode(
1089 code_desc, Code::ComputeFlags(Code::REGEXP), masm_->CodeObject());
1090 PROFILE(masm_->isolate(),
1091 RegExpCodeCreateEvent(AbstractCode::cast(*code), *source));
1092 return Handle<HeapObject>::cast(code);
1093 }
1094
1095
GoTo(Label * to)1096 void RegExpMacroAssemblerARM64::GoTo(Label* to) {
1097 BranchOrBacktrack(al, to);
1098 }
1099
IfRegisterGE(int reg,int comparand,Label * if_ge)1100 void RegExpMacroAssemblerARM64::IfRegisterGE(int reg, int comparand,
1101 Label* if_ge) {
1102 Register to_compare = GetRegister(reg, w10);
1103 CompareAndBranchOrBacktrack(to_compare, comparand, ge, if_ge);
1104 }
1105
1106
IfRegisterLT(int reg,int comparand,Label * if_lt)1107 void RegExpMacroAssemblerARM64::IfRegisterLT(int reg, int comparand,
1108 Label* if_lt) {
1109 Register to_compare = GetRegister(reg, w10);
1110 CompareAndBranchOrBacktrack(to_compare, comparand, lt, if_lt);
1111 }
1112
1113
IfRegisterEqPos(int reg,Label * if_eq)1114 void RegExpMacroAssemblerARM64::IfRegisterEqPos(int reg, Label* if_eq) {
1115 Register to_compare = GetRegister(reg, w10);
1116 __ Cmp(to_compare, current_input_offset());
1117 BranchOrBacktrack(eq, if_eq);
1118 }
1119
1120 RegExpMacroAssembler::IrregexpImplementation
Implementation()1121 RegExpMacroAssemblerARM64::Implementation() {
1122 return kARM64Implementation;
1123 }
1124
1125
LoadCurrentCharacter(int cp_offset,Label * on_end_of_input,bool check_bounds,int characters)1126 void RegExpMacroAssemblerARM64::LoadCurrentCharacter(int cp_offset,
1127 Label* on_end_of_input,
1128 bool check_bounds,
1129 int characters) {
1130 // TODO(pielan): Make sure long strings are caught before this, and not
1131 // just asserted in debug mode.
1132 // Be sane! (And ensure that an int32_t can be used to index the string)
1133 DCHECK(cp_offset < (1<<30));
1134 if (check_bounds) {
1135 if (cp_offset >= 0) {
1136 CheckPosition(cp_offset + characters - 1, on_end_of_input);
1137 } else {
1138 CheckPosition(cp_offset, on_end_of_input);
1139 }
1140 }
1141 LoadCurrentCharacterUnchecked(cp_offset, characters);
1142 }
1143
1144
PopCurrentPosition()1145 void RegExpMacroAssemblerARM64::PopCurrentPosition() {
1146 Pop(current_input_offset());
1147 }
1148
1149
PopRegister(int register_index)1150 void RegExpMacroAssemblerARM64::PopRegister(int register_index) {
1151 Pop(w10);
1152 StoreRegister(register_index, w10);
1153 }
1154
1155
PushBacktrack(Label * label)1156 void RegExpMacroAssemblerARM64::PushBacktrack(Label* label) {
1157 if (label->is_bound()) {
1158 int target = label->pos();
1159 __ Mov(w10, target + Code::kHeaderSize - kHeapObjectTag);
1160 } else {
1161 __ Adr(x10, label, MacroAssembler::kAdrFar);
1162 __ Sub(x10, x10, code_pointer());
1163 if (masm_->emit_debug_code()) {
1164 __ Cmp(x10, kWRegMask);
1165 // The code offset has to fit in a W register.
1166 __ Check(ls, kOffsetOutOfRange);
1167 }
1168 }
1169 Push(w10);
1170 CheckStackLimit();
1171 }
1172
1173
PushCurrentPosition()1174 void RegExpMacroAssemblerARM64::PushCurrentPosition() {
1175 Push(current_input_offset());
1176 }
1177
1178
PushRegister(int register_index,StackCheckFlag check_stack_limit)1179 void RegExpMacroAssemblerARM64::PushRegister(int register_index,
1180 StackCheckFlag check_stack_limit) {
1181 Register to_push = GetRegister(register_index, w10);
1182 Push(to_push);
1183 if (check_stack_limit) CheckStackLimit();
1184 }
1185
1186
ReadCurrentPositionFromRegister(int reg)1187 void RegExpMacroAssemblerARM64::ReadCurrentPositionFromRegister(int reg) {
1188 Register cached_register;
1189 RegisterState register_state = GetRegisterState(reg);
1190 switch (register_state) {
1191 case STACKED:
1192 __ Ldr(current_input_offset(), register_location(reg));
1193 break;
1194 case CACHED_LSW:
1195 cached_register = GetCachedRegister(reg);
1196 __ Mov(current_input_offset(), cached_register.W());
1197 break;
1198 case CACHED_MSW:
1199 cached_register = GetCachedRegister(reg);
1200 __ Lsr(current_input_offset().X(), cached_register, kWRegSizeInBits);
1201 break;
1202 default:
1203 UNREACHABLE();
1204 break;
1205 }
1206 }
1207
1208
ReadStackPointerFromRegister(int reg)1209 void RegExpMacroAssemblerARM64::ReadStackPointerFromRegister(int reg) {
1210 Register read_from = GetRegister(reg, w10);
1211 __ Ldr(x11, MemOperand(frame_pointer(), kStackBase));
1212 __ Add(backtrack_stackpointer(), x11, Operand(read_from, SXTW));
1213 }
1214
1215
SetCurrentPositionFromEnd(int by)1216 void RegExpMacroAssemblerARM64::SetCurrentPositionFromEnd(int by) {
1217 Label after_position;
1218 __ Cmp(current_input_offset(), -by * char_size());
1219 __ B(ge, &after_position);
1220 __ Mov(current_input_offset(), -by * char_size());
1221 // On RegExp code entry (where this operation is used), the character before
1222 // the current position is expected to be already loaded.
1223 // We have advanced the position, so it's safe to read backwards.
1224 LoadCurrentCharacterUnchecked(-1, 1);
1225 __ Bind(&after_position);
1226 }
1227
1228
SetRegister(int register_index,int to)1229 void RegExpMacroAssemblerARM64::SetRegister(int register_index, int to) {
1230 DCHECK(register_index >= num_saved_registers_); // Reserved for positions!
1231 Register set_to = wzr;
1232 if (to != 0) {
1233 set_to = w10;
1234 __ Mov(set_to, to);
1235 }
1236 StoreRegister(register_index, set_to);
1237 }
1238
1239
Succeed()1240 bool RegExpMacroAssemblerARM64::Succeed() {
1241 __ B(&success_label_);
1242 return global();
1243 }
1244
1245
WriteCurrentPositionToRegister(int reg,int cp_offset)1246 void RegExpMacroAssemblerARM64::WriteCurrentPositionToRegister(int reg,
1247 int cp_offset) {
1248 Register position = current_input_offset();
1249 if (cp_offset != 0) {
1250 position = w10;
1251 __ Add(position, current_input_offset(), cp_offset * char_size());
1252 }
1253 StoreRegister(reg, position);
1254 }
1255
1256
ClearRegisters(int reg_from,int reg_to)1257 void RegExpMacroAssemblerARM64::ClearRegisters(int reg_from, int reg_to) {
1258 DCHECK(reg_from <= reg_to);
1259 int num_registers = reg_to - reg_from + 1;
1260
1261 // If the first capture register is cached in a hardware register but not
1262 // aligned on a 64-bit one, we need to clear the first one specifically.
1263 if ((reg_from < kNumCachedRegisters) && ((reg_from % 2) != 0)) {
1264 StoreRegister(reg_from, string_start_minus_one());
1265 num_registers--;
1266 reg_from++;
1267 }
1268
1269 // Clear cached registers in pairs as far as possible.
1270 while ((num_registers >= 2) && (reg_from < kNumCachedRegisters)) {
1271 DCHECK(GetRegisterState(reg_from) == CACHED_LSW);
1272 __ Mov(GetCachedRegister(reg_from), twice_non_position_value());
1273 reg_from += 2;
1274 num_registers -= 2;
1275 }
1276
1277 if ((num_registers % 2) == 1) {
1278 StoreRegister(reg_from, string_start_minus_one());
1279 num_registers--;
1280 reg_from++;
1281 }
1282
1283 if (num_registers > 0) {
1284 // If there are some remaining registers, they are stored on the stack.
1285 DCHECK(reg_from >= kNumCachedRegisters);
1286
1287 // Move down the indexes of the registers on stack to get the correct offset
1288 // in memory.
1289 reg_from -= kNumCachedRegisters;
1290 reg_to -= kNumCachedRegisters;
1291 // We should not unroll the loop for less than 2 registers.
1292 STATIC_ASSERT(kNumRegistersToUnroll > 2);
1293 // We position the base pointer to (reg_from + 1).
1294 int base_offset = kFirstRegisterOnStack -
1295 kWRegSize - (kWRegSize * reg_from);
1296 if (num_registers > kNumRegistersToUnroll) {
1297 Register base = x10;
1298 __ Add(base, frame_pointer(), base_offset);
1299
1300 Label loop;
1301 __ Mov(x11, num_registers);
1302 __ Bind(&loop);
1303 __ Str(twice_non_position_value(),
1304 MemOperand(base, -kPointerSize, PostIndex));
1305 __ Sub(x11, x11, 2);
1306 __ Cbnz(x11, &loop);
1307 } else {
1308 for (int i = reg_from; i <= reg_to; i += 2) {
1309 __ Str(twice_non_position_value(),
1310 MemOperand(frame_pointer(), base_offset));
1311 base_offset -= kWRegSize * 2;
1312 }
1313 }
1314 }
1315 }
1316
1317
WriteStackPointerToRegister(int reg)1318 void RegExpMacroAssemblerARM64::WriteStackPointerToRegister(int reg) {
1319 __ Ldr(x10, MemOperand(frame_pointer(), kStackBase));
1320 __ Sub(x10, backtrack_stackpointer(), x10);
1321 if (masm_->emit_debug_code()) {
1322 __ Cmp(x10, Operand(w10, SXTW));
1323 // The stack offset needs to fit in a W register.
1324 __ Check(eq, kOffsetOutOfRange);
1325 }
1326 StoreRegister(reg, w10);
1327 }
1328
1329
1330 // Helper function for reading a value out of a stack frame.
1331 template <typename T>
frame_entry(Address re_frame,int frame_offset)1332 static T& frame_entry(Address re_frame, int frame_offset) {
1333 return *reinterpret_cast<T*>(re_frame + frame_offset);
1334 }
1335
1336
1337 template <typename T>
frame_entry_address(Address re_frame,int frame_offset)1338 static T* frame_entry_address(Address re_frame, int frame_offset) {
1339 return reinterpret_cast<T*>(re_frame + frame_offset);
1340 }
1341
1342
CheckStackGuardState(Address * return_address,Code * re_code,Address re_frame,int start_index,const byte ** input_start,const byte ** input_end)1343 int RegExpMacroAssemblerARM64::CheckStackGuardState(
1344 Address* return_address, Code* re_code, Address re_frame, int start_index,
1345 const byte** input_start, const byte** input_end) {
1346 return NativeRegExpMacroAssembler::CheckStackGuardState(
1347 frame_entry<Isolate*>(re_frame, kIsolate), start_index,
1348 frame_entry<int>(re_frame, kDirectCall) == 1, return_address, re_code,
1349 frame_entry_address<String*>(re_frame, kInput), input_start, input_end);
1350 }
1351
1352
CheckPosition(int cp_offset,Label * on_outside_input)1353 void RegExpMacroAssemblerARM64::CheckPosition(int cp_offset,
1354 Label* on_outside_input) {
1355 if (cp_offset >= 0) {
1356 CompareAndBranchOrBacktrack(current_input_offset(),
1357 -cp_offset * char_size(), ge, on_outside_input);
1358 } else {
1359 __ Add(w12, current_input_offset(), Operand(cp_offset * char_size()));
1360 __ Cmp(w12, string_start_minus_one());
1361 BranchOrBacktrack(le, on_outside_input);
1362 }
1363 }
1364
1365
CanReadUnaligned()1366 bool RegExpMacroAssemblerARM64::CanReadUnaligned() {
1367 // TODO(pielan): See whether or not we should disable unaligned accesses.
1368 return !slow_safe();
1369 }
1370
1371
1372 // Private methods:
1373
CallCheckStackGuardState(Register scratch)1374 void RegExpMacroAssemblerARM64::CallCheckStackGuardState(Register scratch) {
1375 // Allocate space on the stack to store the return address. The
1376 // CheckStackGuardState C++ function will override it if the code
1377 // moved. Allocate extra space for 2 arguments passed by pointers.
1378 // AAPCS64 requires the stack to be 16 byte aligned.
1379 int alignment = masm_->ActivationFrameAlignment();
1380 DCHECK_EQ(alignment % 16, 0);
1381 int align_mask = (alignment / kXRegSize) - 1;
1382 int xreg_to_claim = (3 + align_mask) & ~align_mask;
1383
1384 DCHECK(csp.Is(__ StackPointer()));
1385 __ Claim(xreg_to_claim);
1386
1387 // CheckStackGuardState needs the end and start addresses of the input string.
1388 __ Poke(input_end(), 2 * kPointerSize);
1389 __ Add(x5, csp, 2 * kPointerSize);
1390 __ Poke(input_start(), kPointerSize);
1391 __ Add(x4, csp, kPointerSize);
1392
1393 __ Mov(w3, start_offset());
1394 // RegExp code frame pointer.
1395 __ Mov(x2, frame_pointer());
1396 // Code* of self.
1397 __ Mov(x1, Operand(masm_->CodeObject()));
1398
1399 // We need to pass a pointer to the return address as first argument.
1400 // The DirectCEntry stub will place the return address on the stack before
1401 // calling so the stack pointer will point to it.
1402 __ Mov(x0, csp);
1403
1404 ExternalReference check_stack_guard_state =
1405 ExternalReference::re_check_stack_guard_state(isolate());
1406 __ Mov(scratch, check_stack_guard_state);
1407 DirectCEntryStub stub(isolate());
1408 stub.GenerateCall(masm_, scratch);
1409
1410 // The input string may have been moved in memory, we need to reload it.
1411 __ Peek(input_start(), kPointerSize);
1412 __ Peek(input_end(), 2 * kPointerSize);
1413
1414 DCHECK(csp.Is(__ StackPointer()));
1415 __ Drop(xreg_to_claim);
1416
1417 // Reload the Code pointer.
1418 __ Mov(code_pointer(), Operand(masm_->CodeObject()));
1419 }
1420
BranchOrBacktrack(Condition condition,Label * to)1421 void RegExpMacroAssemblerARM64::BranchOrBacktrack(Condition condition,
1422 Label* to) {
1423 if (condition == al) { // Unconditional.
1424 if (to == NULL) {
1425 Backtrack();
1426 return;
1427 }
1428 __ B(to);
1429 return;
1430 }
1431 if (to == NULL) {
1432 to = &backtrack_label_;
1433 }
1434 __ B(condition, to);
1435 }
1436
CompareAndBranchOrBacktrack(Register reg,int immediate,Condition condition,Label * to)1437 void RegExpMacroAssemblerARM64::CompareAndBranchOrBacktrack(Register reg,
1438 int immediate,
1439 Condition condition,
1440 Label* to) {
1441 if ((immediate == 0) && ((condition == eq) || (condition == ne))) {
1442 if (to == NULL) {
1443 to = &backtrack_label_;
1444 }
1445 if (condition == eq) {
1446 __ Cbz(reg, to);
1447 } else {
1448 __ Cbnz(reg, to);
1449 }
1450 } else {
1451 __ Cmp(reg, immediate);
1452 BranchOrBacktrack(condition, to);
1453 }
1454 }
1455
1456
CheckPreemption()1457 void RegExpMacroAssemblerARM64::CheckPreemption() {
1458 // Check for preemption.
1459 ExternalReference stack_limit =
1460 ExternalReference::address_of_stack_limit(isolate());
1461 __ Mov(x10, stack_limit);
1462 __ Ldr(x10, MemOperand(x10));
1463 DCHECK(csp.Is(__ StackPointer()));
1464 __ Cmp(csp, x10);
1465 CallIf(&check_preempt_label_, ls);
1466 }
1467
1468
CheckStackLimit()1469 void RegExpMacroAssemblerARM64::CheckStackLimit() {
1470 ExternalReference stack_limit =
1471 ExternalReference::address_of_regexp_stack_limit(isolate());
1472 __ Mov(x10, stack_limit);
1473 __ Ldr(x10, MemOperand(x10));
1474 __ Cmp(backtrack_stackpointer(), x10);
1475 CallIf(&stack_overflow_label_, ls);
1476 }
1477
1478
Push(Register source)1479 void RegExpMacroAssemblerARM64::Push(Register source) {
1480 DCHECK(source.Is32Bits());
1481 DCHECK(!source.is(backtrack_stackpointer()));
1482 __ Str(source,
1483 MemOperand(backtrack_stackpointer(),
1484 -static_cast<int>(kWRegSize),
1485 PreIndex));
1486 }
1487
1488
Pop(Register target)1489 void RegExpMacroAssemblerARM64::Pop(Register target) {
1490 DCHECK(target.Is32Bits());
1491 DCHECK(!target.is(backtrack_stackpointer()));
1492 __ Ldr(target,
1493 MemOperand(backtrack_stackpointer(), kWRegSize, PostIndex));
1494 }
1495
1496
GetCachedRegister(int register_index)1497 Register RegExpMacroAssemblerARM64::GetCachedRegister(int register_index) {
1498 DCHECK(register_index < kNumCachedRegisters);
1499 return Register::Create(register_index / 2, kXRegSizeInBits);
1500 }
1501
1502
GetRegister(int register_index,Register maybe_result)1503 Register RegExpMacroAssemblerARM64::GetRegister(int register_index,
1504 Register maybe_result) {
1505 DCHECK(maybe_result.Is32Bits());
1506 DCHECK(register_index >= 0);
1507 if (num_registers_ <= register_index) {
1508 num_registers_ = register_index + 1;
1509 }
1510 Register result;
1511 RegisterState register_state = GetRegisterState(register_index);
1512 switch (register_state) {
1513 case STACKED:
1514 __ Ldr(maybe_result, register_location(register_index));
1515 result = maybe_result;
1516 break;
1517 case CACHED_LSW:
1518 result = GetCachedRegister(register_index).W();
1519 break;
1520 case CACHED_MSW:
1521 __ Lsr(maybe_result.X(), GetCachedRegister(register_index),
1522 kWRegSizeInBits);
1523 result = maybe_result;
1524 break;
1525 default:
1526 UNREACHABLE();
1527 break;
1528 }
1529 DCHECK(result.Is32Bits());
1530 return result;
1531 }
1532
1533
StoreRegister(int register_index,Register source)1534 void RegExpMacroAssemblerARM64::StoreRegister(int register_index,
1535 Register source) {
1536 DCHECK(source.Is32Bits());
1537 DCHECK(register_index >= 0);
1538 if (num_registers_ <= register_index) {
1539 num_registers_ = register_index + 1;
1540 }
1541
1542 Register cached_register;
1543 RegisterState register_state = GetRegisterState(register_index);
1544 switch (register_state) {
1545 case STACKED:
1546 __ Str(source, register_location(register_index));
1547 break;
1548 case CACHED_LSW:
1549 cached_register = GetCachedRegister(register_index);
1550 if (!source.Is(cached_register.W())) {
1551 __ Bfi(cached_register, source.X(), 0, kWRegSizeInBits);
1552 }
1553 break;
1554 case CACHED_MSW:
1555 cached_register = GetCachedRegister(register_index);
1556 __ Bfi(cached_register, source.X(), kWRegSizeInBits, kWRegSizeInBits);
1557 break;
1558 default:
1559 UNREACHABLE();
1560 break;
1561 }
1562 }
1563
1564
CallIf(Label * to,Condition condition)1565 void RegExpMacroAssemblerARM64::CallIf(Label* to, Condition condition) {
1566 Label skip_call;
1567 if (condition != al) __ B(&skip_call, NegateCondition(condition));
1568 __ Bl(to);
1569 __ Bind(&skip_call);
1570 }
1571
1572
RestoreLinkRegister()1573 void RegExpMacroAssemblerARM64::RestoreLinkRegister() {
1574 DCHECK(csp.Is(__ StackPointer()));
1575 __ Pop(lr, xzr);
1576 __ Add(lr, lr, Operand(masm_->CodeObject()));
1577 }
1578
1579
SaveLinkRegister()1580 void RegExpMacroAssemblerARM64::SaveLinkRegister() {
1581 DCHECK(csp.Is(__ StackPointer()));
1582 __ Sub(lr, lr, Operand(masm_->CodeObject()));
1583 __ Push(xzr, lr);
1584 }
1585
1586
register_location(int register_index)1587 MemOperand RegExpMacroAssemblerARM64::register_location(int register_index) {
1588 DCHECK(register_index < (1<<30));
1589 DCHECK(register_index >= kNumCachedRegisters);
1590 if (num_registers_ <= register_index) {
1591 num_registers_ = register_index + 1;
1592 }
1593 register_index -= kNumCachedRegisters;
1594 int offset = kFirstRegisterOnStack - register_index * kWRegSize;
1595 return MemOperand(frame_pointer(), offset);
1596 }
1597
capture_location(int register_index,Register scratch)1598 MemOperand RegExpMacroAssemblerARM64::capture_location(int register_index,
1599 Register scratch) {
1600 DCHECK(register_index < (1<<30));
1601 DCHECK(register_index < num_saved_registers_);
1602 DCHECK(register_index >= kNumCachedRegisters);
1603 DCHECK_EQ(register_index % 2, 0);
1604 register_index -= kNumCachedRegisters;
1605 int offset = kFirstCaptureOnStack - register_index * kWRegSize;
1606 // capture_location is used with Stp instructions to load/store 2 registers.
1607 // The immediate field in the encoding is limited to 7 bits (signed).
1608 if (is_int7(offset)) {
1609 return MemOperand(frame_pointer(), offset);
1610 } else {
1611 __ Add(scratch, frame_pointer(), offset);
1612 return MemOperand(scratch);
1613 }
1614 }
1615
LoadCurrentCharacterUnchecked(int cp_offset,int characters)1616 void RegExpMacroAssemblerARM64::LoadCurrentCharacterUnchecked(int cp_offset,
1617 int characters) {
1618 Register offset = current_input_offset();
1619
1620 // The ldr, str, ldrh, strh instructions can do unaligned accesses, if the CPU
1621 // and the operating system running on the target allow it.
1622 // If unaligned load/stores are not supported then this function must only
1623 // be used to load a single character at a time.
1624
1625 // ARMv8 supports unaligned accesses but V8 or the kernel can decide to
1626 // disable it.
1627 // TODO(pielan): See whether or not we should disable unaligned accesses.
1628 if (!CanReadUnaligned()) {
1629 DCHECK(characters == 1);
1630 }
1631
1632 if (cp_offset != 0) {
1633 if (masm_->emit_debug_code()) {
1634 __ Mov(x10, cp_offset * char_size());
1635 __ Add(x10, x10, Operand(current_input_offset(), SXTW));
1636 __ Cmp(x10, Operand(w10, SXTW));
1637 // The offset needs to fit in a W register.
1638 __ Check(eq, kOffsetOutOfRange);
1639 } else {
1640 __ Add(w10, current_input_offset(), cp_offset * char_size());
1641 }
1642 offset = w10;
1643 }
1644
1645 if (mode_ == LATIN1) {
1646 if (characters == 4) {
1647 __ Ldr(current_character(), MemOperand(input_end(), offset, SXTW));
1648 } else if (characters == 2) {
1649 __ Ldrh(current_character(), MemOperand(input_end(), offset, SXTW));
1650 } else {
1651 DCHECK(characters == 1);
1652 __ Ldrb(current_character(), MemOperand(input_end(), offset, SXTW));
1653 }
1654 } else {
1655 DCHECK(mode_ == UC16);
1656 if (characters == 2) {
1657 __ Ldr(current_character(), MemOperand(input_end(), offset, SXTW));
1658 } else {
1659 DCHECK(characters == 1);
1660 __ Ldrh(current_character(), MemOperand(input_end(), offset, SXTW));
1661 }
1662 }
1663 }
1664
1665 #endif // V8_INTERPRETED_REGEXP
1666
1667 } // namespace internal
1668 } // namespace v8
1669
1670 #endif // V8_TARGET_ARCH_ARM64
1671