1 // Copyright 2013 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #if V8_TARGET_ARCH_ARM64
6
7 #include "src/regexp/arm64/regexp-macro-assembler-arm64.h"
8
9 #include "src/code-stubs.h"
10 #include "src/log.h"
11 #include "src/macro-assembler.h"
12 #include "src/profiler/cpu-profiler.h"
13 #include "src/regexp/regexp-macro-assembler.h"
14 #include "src/regexp/regexp-stack.h"
15 #include "src/unicode.h"
16
17 namespace v8 {
18 namespace internal {
19
20 #ifndef V8_INTERPRETED_REGEXP
21 /*
22 * This assembler uses the following register assignment convention:
23 * - w19 : Used to temporarely store a value before a call to C code.
24 * See CheckNotBackReferenceIgnoreCase.
25 * - x20 : Pointer to the current code object (Code*),
26 * it includes the heap object tag.
27 * - w21 : Current position in input, as negative offset from
28 * the end of the string. Please notice that this is
29 * the byte offset, not the character offset!
30 * - w22 : Currently loaded character. Must be loaded using
31 * LoadCurrentCharacter before using any of the dispatch methods.
32 * - x23 : Points to tip of backtrack stack.
33 * - w24 : Position of the first character minus one: non_position_value.
34 * Used to initialize capture registers.
35 * - x25 : Address at the end of the input string: input_end.
36 * Points to byte after last character in input.
37 * - x26 : Address at the start of the input string: input_start.
38 * - w27 : Where to start in the input string.
39 * - x28 : Output array pointer.
40 * - x29/fp : Frame pointer. Used to access arguments, local variables and
41 * RegExp registers.
42 * - x16/x17 : IP registers, used by assembler. Very volatile.
43 * - csp : Points to tip of C stack.
44 *
45 * - x0-x7 : Used as a cache to store 32 bit capture registers. These
46 * registers need to be retained every time a call to C code
47 * is done.
48 *
49 * The remaining registers are free for computations.
50 * Each call to a public method should retain this convention.
51 *
52 * The stack will have the following structure:
53 *
54 * Location Name Description
55 * (as referred to in
56 * the code)
57 *
58 * - fp[104] isolate Address of the current isolate.
59 * - fp[96] return_address Secondary link/return address
60 * used by an exit frame if this is a
61 * native call.
62 * ^^^ csp when called ^^^
63 * - fp[88] lr Return from the RegExp code.
64 * - fp[80] r29 Old frame pointer (CalleeSaved).
65 * - fp[0..72] r19-r28 Backup of CalleeSaved registers.
66 * - fp[-8] direct_call 1 => Direct call from JavaScript code.
67 * 0 => Call through the runtime system.
68 * - fp[-16] stack_base High end of the memory area to use as
69 * the backtracking stack.
70 * - fp[-24] output_size Output may fit multiple sets of matches.
71 * - fp[-32] input Handle containing the input string.
72 * - fp[-40] success_counter
73 * ^^^^^^^^^^^^^ From here and downwards we store 32 bit values ^^^^^^^^^^^^^
74 * - fp[-44] register N Capture registers initialized with
75 * - fp[-48] register N + 1 non_position_value.
76 * ... The first kNumCachedRegisters (N) registers
77 * ... are cached in x0 to x7.
78 * ... Only positions must be stored in the first
79 * - ... num_saved_registers_ registers.
80 * - ...
81 * - register N + num_registers - 1
82 * ^^^^^^^^^ csp ^^^^^^^^^
83 *
84 * The first num_saved_registers_ registers are initialized to point to
85 * "character -1" in the string (i.e., char_size() bytes before the first
86 * character of the string). The remaining registers start out as garbage.
87 *
88 * The data up to the return address must be placed there by the calling
89 * code and the remaining arguments are passed in registers, e.g. by calling the
90 * code entry as cast to a function with the signature:
91 * int (*match)(String* input,
92 * int start_offset,
93 * Address input_start,
94 * Address input_end,
95 * int* output,
96 * int output_size,
97 * Address stack_base,
98 * bool direct_call = false,
99 * Address secondary_return_address, // Only used by native call.
100 * Isolate* isolate)
101 * The call is performed by NativeRegExpMacroAssembler::Execute()
102 * (in regexp-macro-assembler.cc) via the CALL_GENERATED_REGEXP_CODE macro
103 * in arm64/simulator-arm64.h.
104 * When calling as a non-direct call (i.e., from C++ code), the return address
105 * area is overwritten with the LR register by the RegExp code. When doing a
106 * direct call from generated code, the return address is placed there by
107 * the calling code, as in a normal exit frame.
108 */
109
110 #define __ ACCESS_MASM(masm_)
111
RegExpMacroAssemblerARM64(Isolate * isolate,Zone * zone,Mode mode,int registers_to_save)112 RegExpMacroAssemblerARM64::RegExpMacroAssemblerARM64(Isolate* isolate,
113 Zone* zone, Mode mode,
114 int registers_to_save)
115 : NativeRegExpMacroAssembler(isolate, zone),
116 masm_(new MacroAssembler(isolate, NULL, kRegExpCodeSize,
117 CodeObjectRequired::kYes)),
118 mode_(mode),
119 num_registers_(registers_to_save),
120 num_saved_registers_(registers_to_save),
121 entry_label_(),
122 start_label_(),
123 success_label_(),
124 backtrack_label_(),
125 exit_label_() {
126 __ SetStackPointer(csp);
127 DCHECK_EQ(0, registers_to_save % 2);
128 // We can cache at most 16 W registers in x0-x7.
129 STATIC_ASSERT(kNumCachedRegisters <= 16);
130 STATIC_ASSERT((kNumCachedRegisters % 2) == 0);
131 __ B(&entry_label_); // We'll write the entry code later.
132 __ Bind(&start_label_); // And then continue from here.
133 }
134
135
~RegExpMacroAssemblerARM64()136 RegExpMacroAssemblerARM64::~RegExpMacroAssemblerARM64() {
137 delete masm_;
138 // Unuse labels in case we throw away the assembler without calling GetCode.
139 entry_label_.Unuse();
140 start_label_.Unuse();
141 success_label_.Unuse();
142 backtrack_label_.Unuse();
143 exit_label_.Unuse();
144 check_preempt_label_.Unuse();
145 stack_overflow_label_.Unuse();
146 }
147
stack_limit_slack()148 int RegExpMacroAssemblerARM64::stack_limit_slack() {
149 return RegExpStack::kStackLimitSlack;
150 }
151
152
AdvanceCurrentPosition(int by)153 void RegExpMacroAssemblerARM64::AdvanceCurrentPosition(int by) {
154 if (by != 0) {
155 __ Add(current_input_offset(),
156 current_input_offset(), by * char_size());
157 }
158 }
159
160
AdvanceRegister(int reg,int by)161 void RegExpMacroAssemblerARM64::AdvanceRegister(int reg, int by) {
162 DCHECK((reg >= 0) && (reg < num_registers_));
163 if (by != 0) {
164 Register to_advance;
165 RegisterState register_state = GetRegisterState(reg);
166 switch (register_state) {
167 case STACKED:
168 __ Ldr(w10, register_location(reg));
169 __ Add(w10, w10, by);
170 __ Str(w10, register_location(reg));
171 break;
172 case CACHED_LSW:
173 to_advance = GetCachedRegister(reg);
174 __ Add(to_advance, to_advance, by);
175 break;
176 case CACHED_MSW:
177 to_advance = GetCachedRegister(reg);
178 __ Add(to_advance, to_advance,
179 static_cast<int64_t>(by) << kWRegSizeInBits);
180 break;
181 default:
182 UNREACHABLE();
183 break;
184 }
185 }
186 }
187
188
Backtrack()189 void RegExpMacroAssemblerARM64::Backtrack() {
190 CheckPreemption();
191 Pop(w10);
192 __ Add(x10, code_pointer(), Operand(w10, UXTW));
193 __ Br(x10);
194 }
195
196
Bind(Label * label)197 void RegExpMacroAssemblerARM64::Bind(Label* label) {
198 __ Bind(label);
199 }
200
201
CheckCharacter(uint32_t c,Label * on_equal)202 void RegExpMacroAssemblerARM64::CheckCharacter(uint32_t c, Label* on_equal) {
203 CompareAndBranchOrBacktrack(current_character(), c, eq, on_equal);
204 }
205
206
CheckCharacterGT(uc16 limit,Label * on_greater)207 void RegExpMacroAssemblerARM64::CheckCharacterGT(uc16 limit,
208 Label* on_greater) {
209 CompareAndBranchOrBacktrack(current_character(), limit, hi, on_greater);
210 }
211
212
CheckAtStart(Label * on_at_start)213 void RegExpMacroAssemblerARM64::CheckAtStart(Label* on_at_start) {
214 __ Add(w10, current_input_offset(), Operand(-char_size()));
215 __ Cmp(w10, string_start_minus_one());
216 BranchOrBacktrack(eq, on_at_start);
217 }
218
219
CheckNotAtStart(int cp_offset,Label * on_not_at_start)220 void RegExpMacroAssemblerARM64::CheckNotAtStart(int cp_offset,
221 Label* on_not_at_start) {
222 __ Add(w10, current_input_offset(),
223 Operand(-char_size() + cp_offset * char_size()));
224 __ Cmp(w10, string_start_minus_one());
225 BranchOrBacktrack(ne, on_not_at_start);
226 }
227
228
CheckCharacterLT(uc16 limit,Label * on_less)229 void RegExpMacroAssemblerARM64::CheckCharacterLT(uc16 limit, Label* on_less) {
230 CompareAndBranchOrBacktrack(current_character(), limit, lo, on_less);
231 }
232
233
CheckCharacters(Vector<const uc16> str,int cp_offset,Label * on_failure,bool check_end_of_string)234 void RegExpMacroAssemblerARM64::CheckCharacters(Vector<const uc16> str,
235 int cp_offset,
236 Label* on_failure,
237 bool check_end_of_string) {
238 // This method is only ever called from the cctests.
239
240 if (check_end_of_string) {
241 // Is last character of required match inside string.
242 CheckPosition(cp_offset + str.length() - 1, on_failure);
243 }
244
245 Register characters_address = x11;
246
247 __ Add(characters_address,
248 input_end(),
249 Operand(current_input_offset(), SXTW));
250 if (cp_offset != 0) {
251 __ Add(characters_address, characters_address, cp_offset * char_size());
252 }
253
254 for (int i = 0; i < str.length(); i++) {
255 if (mode_ == LATIN1) {
256 __ Ldrb(w10, MemOperand(characters_address, 1, PostIndex));
257 DCHECK(str[i] <= String::kMaxOneByteCharCode);
258 } else {
259 __ Ldrh(w10, MemOperand(characters_address, 2, PostIndex));
260 }
261 CompareAndBranchOrBacktrack(w10, str[i], ne, on_failure);
262 }
263 }
264
265
CheckGreedyLoop(Label * on_equal)266 void RegExpMacroAssemblerARM64::CheckGreedyLoop(Label* on_equal) {
267 __ Ldr(w10, MemOperand(backtrack_stackpointer()));
268 __ Cmp(current_input_offset(), w10);
269 __ Cset(x11, eq);
270 __ Add(backtrack_stackpointer(),
271 backtrack_stackpointer(), Operand(x11, LSL, kWRegSizeLog2));
272 BranchOrBacktrack(eq, on_equal);
273 }
274
275
CheckNotBackReferenceIgnoreCase(int start_reg,bool read_backward,Label * on_no_match)276 void RegExpMacroAssemblerARM64::CheckNotBackReferenceIgnoreCase(
277 int start_reg, bool read_backward, Label* on_no_match) {
278 Label fallthrough;
279
280 Register capture_start_offset = w10;
281 // Save the capture length in a callee-saved register so it will
282 // be preserved if we call a C helper.
283 Register capture_length = w19;
284 DCHECK(kCalleeSaved.IncludesAliasOf(capture_length));
285
286 // Find length of back-referenced capture.
287 DCHECK((start_reg % 2) == 0);
288 if (start_reg < kNumCachedRegisters) {
289 __ Mov(capture_start_offset.X(), GetCachedRegister(start_reg));
290 __ Lsr(x11, GetCachedRegister(start_reg), kWRegSizeInBits);
291 } else {
292 __ Ldp(w11, capture_start_offset, capture_location(start_reg, x10));
293 }
294 __ Sub(capture_length, w11, capture_start_offset); // Length to check.
295
296 // At this point, the capture registers are either both set or both cleared.
297 // If the capture length is zero, then the capture is either empty or cleared.
298 // Fall through in both cases.
299 __ CompareAndBranch(capture_length, Operand(0), eq, &fallthrough);
300
301 // Check that there are enough characters left in the input.
302 if (read_backward) {
303 __ Add(w12, string_start_minus_one(), capture_length);
304 __ Cmp(current_input_offset(), w12);
305 BranchOrBacktrack(le, on_no_match);
306 } else {
307 __ Cmn(capture_length, current_input_offset());
308 BranchOrBacktrack(gt, on_no_match);
309 }
310
311 if (mode_ == LATIN1) {
312 Label success;
313 Label fail;
314 Label loop_check;
315
316 Register capture_start_address = x12;
317 Register capture_end_addresss = x13;
318 Register current_position_address = x14;
319
320 __ Add(capture_start_address,
321 input_end(),
322 Operand(capture_start_offset, SXTW));
323 __ Add(capture_end_addresss,
324 capture_start_address,
325 Operand(capture_length, SXTW));
326 __ Add(current_position_address,
327 input_end(),
328 Operand(current_input_offset(), SXTW));
329 if (read_backward) {
330 // Offset by length when matching backwards.
331 __ Sub(current_position_address, current_position_address,
332 Operand(capture_length, SXTW));
333 }
334
335 Label loop;
336 __ Bind(&loop);
337 __ Ldrb(w10, MemOperand(capture_start_address, 1, PostIndex));
338 __ Ldrb(w11, MemOperand(current_position_address, 1, PostIndex));
339 __ Cmp(w10, w11);
340 __ B(eq, &loop_check);
341
342 // Mismatch, try case-insensitive match (converting letters to lower-case).
343 __ Orr(w10, w10, 0x20); // Convert capture character to lower-case.
344 __ Orr(w11, w11, 0x20); // Also convert input character.
345 __ Cmp(w11, w10);
346 __ B(ne, &fail);
347 __ Sub(w10, w10, 'a');
348 __ Cmp(w10, 'z' - 'a'); // Is w10 a lowercase letter?
349 __ B(ls, &loop_check); // In range 'a'-'z'.
350 // Latin-1: Check for values in range [224,254] but not 247.
351 __ Sub(w10, w10, 224 - 'a');
352 __ Cmp(w10, 254 - 224);
353 __ Ccmp(w10, 247 - 224, ZFlag, ls); // Check for 247.
354 __ B(eq, &fail); // Weren't Latin-1 letters.
355
356 __ Bind(&loop_check);
357 __ Cmp(capture_start_address, capture_end_addresss);
358 __ B(lt, &loop);
359 __ B(&success);
360
361 __ Bind(&fail);
362 BranchOrBacktrack(al, on_no_match);
363
364 __ Bind(&success);
365 // Compute new value of character position after the matched part.
366 __ Sub(current_input_offset().X(), current_position_address, input_end());
367 if (read_backward) {
368 __ Sub(current_input_offset().X(), current_input_offset().X(),
369 Operand(capture_length, SXTW));
370 }
371 if (masm_->emit_debug_code()) {
372 __ Cmp(current_input_offset().X(), Operand(current_input_offset(), SXTW));
373 __ Ccmp(current_input_offset(), 0, NoFlag, eq);
374 // The current input offset should be <= 0, and fit in a W register.
375 __ Check(le, kOffsetOutOfRange);
376 }
377 } else {
378 DCHECK(mode_ == UC16);
379 int argument_count = 4;
380
381 // The cached registers need to be retained.
382 CPURegList cached_registers(CPURegister::kRegister, kXRegSizeInBits, 0, 7);
383 DCHECK((cached_registers.Count() * 2) == kNumCachedRegisters);
384 __ PushCPURegList(cached_registers);
385
386 // Put arguments into arguments registers.
387 // Parameters are
388 // x0: Address byte_offset1 - Address captured substring's start.
389 // x1: Address byte_offset2 - Address of current character position.
390 // w2: size_t byte_length - length of capture in bytes(!)
391 // x3: Isolate* isolate
392
393 // Address of start of capture.
394 __ Add(x0, input_end(), Operand(capture_start_offset, SXTW));
395 // Length of capture.
396 __ Mov(w2, capture_length);
397 // Address of current input position.
398 __ Add(x1, input_end(), Operand(current_input_offset(), SXTW));
399 if (read_backward) {
400 __ Sub(x1, x1, Operand(capture_length, SXTW));
401 }
402 // Isolate.
403 __ Mov(x3, ExternalReference::isolate_address(isolate()));
404
405 {
406 AllowExternalCallThatCantCauseGC scope(masm_);
407 ExternalReference function =
408 ExternalReference::re_case_insensitive_compare_uc16(isolate());
409 __ CallCFunction(function, argument_count);
410 }
411
412 // Check if function returned non-zero for success or zero for failure.
413 // x0 is one of the registers used as a cache so it must be tested before
414 // the cache is restored.
415 __ Cmp(x0, 0);
416 __ PopCPURegList(cached_registers);
417 BranchOrBacktrack(eq, on_no_match);
418
419 // On success, advance position by length of capture.
420 if (read_backward) {
421 __ Sub(current_input_offset(), current_input_offset(), capture_length);
422 } else {
423 __ Add(current_input_offset(), current_input_offset(), capture_length);
424 }
425 }
426
427 __ Bind(&fallthrough);
428 }
429
CheckNotBackReference(int start_reg,bool read_backward,Label * on_no_match)430 void RegExpMacroAssemblerARM64::CheckNotBackReference(int start_reg,
431 bool read_backward,
432 Label* on_no_match) {
433 Label fallthrough;
434
435 Register capture_start_address = x12;
436 Register capture_end_address = x13;
437 Register current_position_address = x14;
438 Register capture_length = w15;
439
440 // Find length of back-referenced capture.
441 DCHECK((start_reg % 2) == 0);
442 if (start_reg < kNumCachedRegisters) {
443 __ Mov(x10, GetCachedRegister(start_reg));
444 __ Lsr(x11, GetCachedRegister(start_reg), kWRegSizeInBits);
445 } else {
446 __ Ldp(w11, w10, capture_location(start_reg, x10));
447 }
448 __ Sub(capture_length, w11, w10); // Length to check.
449
450 // At this point, the capture registers are either both set or both cleared.
451 // If the capture length is zero, then the capture is either empty or cleared.
452 // Fall through in both cases.
453 __ CompareAndBranch(capture_length, Operand(0), eq, &fallthrough);
454
455 // Check that there are enough characters left in the input.
456 if (read_backward) {
457 __ Add(w12, string_start_minus_one(), capture_length);
458 __ Cmp(current_input_offset(), w12);
459 BranchOrBacktrack(le, on_no_match);
460 } else {
461 __ Cmn(capture_length, current_input_offset());
462 BranchOrBacktrack(gt, on_no_match);
463 }
464
465 // Compute pointers to match string and capture string
466 __ Add(capture_start_address, input_end(), Operand(w10, SXTW));
467 __ Add(capture_end_address,
468 capture_start_address,
469 Operand(capture_length, SXTW));
470 __ Add(current_position_address,
471 input_end(),
472 Operand(current_input_offset(), SXTW));
473 if (read_backward) {
474 // Offset by length when matching backwards.
475 __ Sub(current_position_address, current_position_address,
476 Operand(capture_length, SXTW));
477 }
478
479 Label loop;
480 __ Bind(&loop);
481 if (mode_ == LATIN1) {
482 __ Ldrb(w10, MemOperand(capture_start_address, 1, PostIndex));
483 __ Ldrb(w11, MemOperand(current_position_address, 1, PostIndex));
484 } else {
485 DCHECK(mode_ == UC16);
486 __ Ldrh(w10, MemOperand(capture_start_address, 2, PostIndex));
487 __ Ldrh(w11, MemOperand(current_position_address, 2, PostIndex));
488 }
489 __ Cmp(w10, w11);
490 BranchOrBacktrack(ne, on_no_match);
491 __ Cmp(capture_start_address, capture_end_address);
492 __ B(lt, &loop);
493
494 // Move current character position to position after match.
495 __ Sub(current_input_offset().X(), current_position_address, input_end());
496 if (read_backward) {
497 __ Sub(current_input_offset().X(), current_input_offset().X(),
498 Operand(capture_length, SXTW));
499 }
500
501 if (masm_->emit_debug_code()) {
502 __ Cmp(current_input_offset().X(), Operand(current_input_offset(), SXTW));
503 __ Ccmp(current_input_offset(), 0, NoFlag, eq);
504 // The current input offset should be <= 0, and fit in a W register.
505 __ Check(le, kOffsetOutOfRange);
506 }
507 __ Bind(&fallthrough);
508 }
509
510
CheckNotCharacter(unsigned c,Label * on_not_equal)511 void RegExpMacroAssemblerARM64::CheckNotCharacter(unsigned c,
512 Label* on_not_equal) {
513 CompareAndBranchOrBacktrack(current_character(), c, ne, on_not_equal);
514 }
515
516
CheckCharacterAfterAnd(uint32_t c,uint32_t mask,Label * on_equal)517 void RegExpMacroAssemblerARM64::CheckCharacterAfterAnd(uint32_t c,
518 uint32_t mask,
519 Label* on_equal) {
520 __ And(w10, current_character(), mask);
521 CompareAndBranchOrBacktrack(w10, c, eq, on_equal);
522 }
523
524
CheckNotCharacterAfterAnd(unsigned c,unsigned mask,Label * on_not_equal)525 void RegExpMacroAssemblerARM64::CheckNotCharacterAfterAnd(unsigned c,
526 unsigned mask,
527 Label* on_not_equal) {
528 __ And(w10, current_character(), mask);
529 CompareAndBranchOrBacktrack(w10, c, ne, on_not_equal);
530 }
531
532
CheckNotCharacterAfterMinusAnd(uc16 c,uc16 minus,uc16 mask,Label * on_not_equal)533 void RegExpMacroAssemblerARM64::CheckNotCharacterAfterMinusAnd(
534 uc16 c,
535 uc16 minus,
536 uc16 mask,
537 Label* on_not_equal) {
538 DCHECK(minus < String::kMaxUtf16CodeUnit);
539 __ Sub(w10, current_character(), minus);
540 __ And(w10, w10, mask);
541 CompareAndBranchOrBacktrack(w10, c, ne, on_not_equal);
542 }
543
544
CheckCharacterInRange(uc16 from,uc16 to,Label * on_in_range)545 void RegExpMacroAssemblerARM64::CheckCharacterInRange(
546 uc16 from,
547 uc16 to,
548 Label* on_in_range) {
549 __ Sub(w10, current_character(), from);
550 // Unsigned lower-or-same condition.
551 CompareAndBranchOrBacktrack(w10, to - from, ls, on_in_range);
552 }
553
554
CheckCharacterNotInRange(uc16 from,uc16 to,Label * on_not_in_range)555 void RegExpMacroAssemblerARM64::CheckCharacterNotInRange(
556 uc16 from,
557 uc16 to,
558 Label* on_not_in_range) {
559 __ Sub(w10, current_character(), from);
560 // Unsigned higher condition.
561 CompareAndBranchOrBacktrack(w10, to - from, hi, on_not_in_range);
562 }
563
564
CheckBitInTable(Handle<ByteArray> table,Label * on_bit_set)565 void RegExpMacroAssemblerARM64::CheckBitInTable(
566 Handle<ByteArray> table,
567 Label* on_bit_set) {
568 __ Mov(x11, Operand(table));
569 if ((mode_ != LATIN1) || (kTableMask != String::kMaxOneByteCharCode)) {
570 __ And(w10, current_character(), kTableMask);
571 __ Add(w10, w10, ByteArray::kHeaderSize - kHeapObjectTag);
572 } else {
573 __ Add(w10, current_character(), ByteArray::kHeaderSize - kHeapObjectTag);
574 }
575 __ Ldrb(w11, MemOperand(x11, w10, UXTW));
576 CompareAndBranchOrBacktrack(w11, 0, ne, on_bit_set);
577 }
578
579
CheckSpecialCharacterClass(uc16 type,Label * on_no_match)580 bool RegExpMacroAssemblerARM64::CheckSpecialCharacterClass(uc16 type,
581 Label* on_no_match) {
582 // Range checks (c in min..max) are generally implemented by an unsigned
583 // (c - min) <= (max - min) check
584 switch (type) {
585 case 's':
586 // Match space-characters
587 if (mode_ == LATIN1) {
588 // One byte space characters are '\t'..'\r', ' ' and \u00a0.
589 Label success;
590 // Check for ' ' or 0x00a0.
591 __ Cmp(current_character(), ' ');
592 __ Ccmp(current_character(), 0x00a0, ZFlag, ne);
593 __ B(eq, &success);
594 // Check range 0x09..0x0d.
595 __ Sub(w10, current_character(), '\t');
596 CompareAndBranchOrBacktrack(w10, '\r' - '\t', hi, on_no_match);
597 __ Bind(&success);
598 return true;
599 }
600 return false;
601 case 'S':
602 // The emitted code for generic character classes is good enough.
603 return false;
604 case 'd':
605 // Match ASCII digits ('0'..'9').
606 __ Sub(w10, current_character(), '0');
607 CompareAndBranchOrBacktrack(w10, '9' - '0', hi, on_no_match);
608 return true;
609 case 'D':
610 // Match ASCII non-digits.
611 __ Sub(w10, current_character(), '0');
612 CompareAndBranchOrBacktrack(w10, '9' - '0', ls, on_no_match);
613 return true;
614 case '.': {
615 // Match non-newlines (not 0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029)
616 // Here we emit the conditional branch only once at the end to make branch
617 // prediction more efficient, even though we could branch out of here
618 // as soon as a character matches.
619 __ Cmp(current_character(), 0x0a);
620 __ Ccmp(current_character(), 0x0d, ZFlag, ne);
621 if (mode_ == UC16) {
622 __ Sub(w10, current_character(), 0x2028);
623 // If the Z flag was set we clear the flags to force a branch.
624 __ Ccmp(w10, 0x2029 - 0x2028, NoFlag, ne);
625 // ls -> !((C==1) && (Z==0))
626 BranchOrBacktrack(ls, on_no_match);
627 } else {
628 BranchOrBacktrack(eq, on_no_match);
629 }
630 return true;
631 }
632 case 'n': {
633 // Match newlines (0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029)
634 // We have to check all 4 newline characters before emitting
635 // the conditional branch.
636 __ Cmp(current_character(), 0x0a);
637 __ Ccmp(current_character(), 0x0d, ZFlag, ne);
638 if (mode_ == UC16) {
639 __ Sub(w10, current_character(), 0x2028);
640 // If the Z flag was set we clear the flags to force a fall-through.
641 __ Ccmp(w10, 0x2029 - 0x2028, NoFlag, ne);
642 // hi -> (C==1) && (Z==0)
643 BranchOrBacktrack(hi, on_no_match);
644 } else {
645 BranchOrBacktrack(ne, on_no_match);
646 }
647 return true;
648 }
649 case 'w': {
650 if (mode_ != LATIN1) {
651 // Table is 256 entries, so all Latin1 characters can be tested.
652 CompareAndBranchOrBacktrack(current_character(), 'z', hi, on_no_match);
653 }
654 ExternalReference map = ExternalReference::re_word_character_map();
655 __ Mov(x10, map);
656 __ Ldrb(w10, MemOperand(x10, current_character(), UXTW));
657 CompareAndBranchOrBacktrack(w10, 0, eq, on_no_match);
658 return true;
659 }
660 case 'W': {
661 Label done;
662 if (mode_ != LATIN1) {
663 // Table is 256 entries, so all Latin1 characters can be tested.
664 __ Cmp(current_character(), 'z');
665 __ B(hi, &done);
666 }
667 ExternalReference map = ExternalReference::re_word_character_map();
668 __ Mov(x10, map);
669 __ Ldrb(w10, MemOperand(x10, current_character(), UXTW));
670 CompareAndBranchOrBacktrack(w10, 0, ne, on_no_match);
671 __ Bind(&done);
672 return true;
673 }
674 case '*':
675 // Match any character.
676 return true;
677 // No custom implementation (yet): s(UC16), S(UC16).
678 default:
679 return false;
680 }
681 }
682
683
Fail()684 void RegExpMacroAssemblerARM64::Fail() {
685 __ Mov(w0, FAILURE);
686 __ B(&exit_label_);
687 }
688
689
GetCode(Handle<String> source)690 Handle<HeapObject> RegExpMacroAssemblerARM64::GetCode(Handle<String> source) {
691 Label return_w0;
692 // Finalize code - write the entry point code now we know how many
693 // registers we need.
694
695 // Entry code:
696 __ Bind(&entry_label_);
697
698 // Arguments on entry:
699 // x0: String* input
700 // x1: int start_offset
701 // x2: byte* input_start
702 // x3: byte* input_end
703 // x4: int* output array
704 // x5: int output array size
705 // x6: Address stack_base
706 // x7: int direct_call
707
708 // The stack pointer should be csp on entry.
709 // csp[8]: address of the current isolate
710 // csp[0]: secondary link/return address used by native call
711
712 // Tell the system that we have a stack frame. Because the type is MANUAL, no
713 // code is generated.
714 FrameScope scope(masm_, StackFrame::MANUAL);
715
716 // Push registers on the stack, only push the argument registers that we need.
717 CPURegList argument_registers(x0, x5, x6, x7);
718
719 CPURegList registers_to_retain = kCalleeSaved;
720 DCHECK(kCalleeSaved.Count() == 11);
721 registers_to_retain.Combine(lr);
722
723 DCHECK(csp.Is(__ StackPointer()));
724 __ PushCPURegList(registers_to_retain);
725 __ PushCPURegList(argument_registers);
726
727 // Set frame pointer in place.
728 __ Add(frame_pointer(), csp, argument_registers.Count() * kPointerSize);
729
730 // Initialize callee-saved registers.
731 __ Mov(start_offset(), w1);
732 __ Mov(input_start(), x2);
733 __ Mov(input_end(), x3);
734 __ Mov(output_array(), x4);
735
736 // Set the number of registers we will need to allocate, that is:
737 // - success_counter (X register)
738 // - (num_registers_ - kNumCachedRegisters) (W registers)
739 int num_wreg_to_allocate = num_registers_ - kNumCachedRegisters;
740 // Do not allocate registers on the stack if they can all be cached.
741 if (num_wreg_to_allocate < 0) { num_wreg_to_allocate = 0; }
742 // Make room for the success_counter.
743 num_wreg_to_allocate += 2;
744
745 // Make sure the stack alignment will be respected.
746 int alignment = masm_->ActivationFrameAlignment();
747 DCHECK_EQ(alignment % 16, 0);
748 int align_mask = (alignment / kWRegSize) - 1;
749 num_wreg_to_allocate = (num_wreg_to_allocate + align_mask) & ~align_mask;
750
751 // Check if we have space on the stack.
752 Label stack_limit_hit;
753 Label stack_ok;
754
755 ExternalReference stack_limit =
756 ExternalReference::address_of_stack_limit(isolate());
757 __ Mov(x10, stack_limit);
758 __ Ldr(x10, MemOperand(x10));
759 __ Subs(x10, csp, x10);
760
761 // Handle it if the stack pointer is already below the stack limit.
762 __ B(ls, &stack_limit_hit);
763
764 // Check if there is room for the variable number of registers above
765 // the stack limit.
766 __ Cmp(x10, num_wreg_to_allocate * kWRegSize);
767 __ B(hs, &stack_ok);
768
769 // Exit with OutOfMemory exception. There is not enough space on the stack
770 // for our working registers.
771 __ Mov(w0, EXCEPTION);
772 __ B(&return_w0);
773
774 __ Bind(&stack_limit_hit);
775 CallCheckStackGuardState(x10);
776 // If returned value is non-zero, we exit with the returned value as result.
777 __ Cbnz(w0, &return_w0);
778
779 __ Bind(&stack_ok);
780
781 // Allocate space on stack.
782 __ Claim(num_wreg_to_allocate, kWRegSize);
783
784 // Initialize success_counter with 0.
785 __ Str(wzr, MemOperand(frame_pointer(), kSuccessCounter));
786
787 // Find negative length (offset of start relative to end).
788 __ Sub(x10, input_start(), input_end());
789 if (masm_->emit_debug_code()) {
790 // Check that the input string length is < 2^30.
791 __ Neg(x11, x10);
792 __ Cmp(x11, (1<<30) - 1);
793 __ Check(ls, kInputStringTooLong);
794 }
795 __ Mov(current_input_offset(), w10);
796
797 // The non-position value is used as a clearing value for the
798 // capture registers, it corresponds to the position of the first character
799 // minus one.
800 __ Sub(string_start_minus_one(), current_input_offset(), char_size());
801 __ Sub(string_start_minus_one(), string_start_minus_one(),
802 Operand(start_offset(), LSL, (mode_ == UC16) ? 1 : 0));
803 // We can store this value twice in an X register for initializing
804 // on-stack registers later.
805 __ Orr(twice_non_position_value(), string_start_minus_one().X(),
806 Operand(string_start_minus_one().X(), LSL, kWRegSizeInBits));
807
808 // Initialize code pointer register.
809 __ Mov(code_pointer(), Operand(masm_->CodeObject()));
810
811 Label load_char_start_regexp, start_regexp;
812 // Load newline if index is at start, previous character otherwise.
813 __ Cbnz(start_offset(), &load_char_start_regexp);
814 __ Mov(current_character(), '\n');
815 __ B(&start_regexp);
816
817 // Global regexp restarts matching here.
818 __ Bind(&load_char_start_regexp);
819 // Load previous char as initial value of current character register.
820 LoadCurrentCharacterUnchecked(-1, 1);
821 __ Bind(&start_regexp);
822 // Initialize on-stack registers.
823 if (num_saved_registers_ > 0) {
824 ClearRegisters(0, num_saved_registers_ - 1);
825 }
826
827 // Initialize backtrack stack pointer.
828 __ Ldr(backtrack_stackpointer(), MemOperand(frame_pointer(), kStackBase));
829
830 // Execute
831 __ B(&start_label_);
832
833 if (backtrack_label_.is_linked()) {
834 __ Bind(&backtrack_label_);
835 Backtrack();
836 }
837
838 if (success_label_.is_linked()) {
839 Register first_capture_start = w15;
840
841 // Save captures when successful.
842 __ Bind(&success_label_);
843
844 if (num_saved_registers_ > 0) {
845 // V8 expects the output to be an int32_t array.
846 Register capture_start = w12;
847 Register capture_end = w13;
848 Register input_length = w14;
849
850 // Copy captures to output.
851
852 // Get string length.
853 __ Sub(x10, input_end(), input_start());
854 if (masm_->emit_debug_code()) {
855 // Check that the input string length is < 2^30.
856 __ Cmp(x10, (1<<30) - 1);
857 __ Check(ls, kInputStringTooLong);
858 }
859 // input_start has a start_offset offset on entry. We need to include
860 // it when computing the length of the whole string.
861 if (mode_ == UC16) {
862 __ Add(input_length, start_offset(), Operand(w10, LSR, 1));
863 } else {
864 __ Add(input_length, start_offset(), w10);
865 }
866
867 // Copy the results to the output array from the cached registers first.
868 for (int i = 0;
869 (i < num_saved_registers_) && (i < kNumCachedRegisters);
870 i += 2) {
871 __ Mov(capture_start.X(), GetCachedRegister(i));
872 __ Lsr(capture_end.X(), capture_start.X(), kWRegSizeInBits);
873 if ((i == 0) && global_with_zero_length_check()) {
874 // Keep capture start for the zero-length check later.
875 __ Mov(first_capture_start, capture_start);
876 }
877 // Offsets need to be relative to the start of the string.
878 if (mode_ == UC16) {
879 __ Add(capture_start, input_length, Operand(capture_start, ASR, 1));
880 __ Add(capture_end, input_length, Operand(capture_end, ASR, 1));
881 } else {
882 __ Add(capture_start, input_length, capture_start);
883 __ Add(capture_end, input_length, capture_end);
884 }
885 // The output pointer advances for a possible global match.
886 __ Stp(capture_start,
887 capture_end,
888 MemOperand(output_array(), kPointerSize, PostIndex));
889 }
890
891 // Only carry on if there are more than kNumCachedRegisters capture
892 // registers.
893 int num_registers_left_on_stack =
894 num_saved_registers_ - kNumCachedRegisters;
895 if (num_registers_left_on_stack > 0) {
896 Register base = x10;
897 // There are always an even number of capture registers. A couple of
898 // registers determine one match with two offsets.
899 DCHECK_EQ(0, num_registers_left_on_stack % 2);
900 __ Add(base, frame_pointer(), kFirstCaptureOnStack);
901
902 // We can unroll the loop here, we should not unroll for less than 2
903 // registers.
904 STATIC_ASSERT(kNumRegistersToUnroll > 2);
905 if (num_registers_left_on_stack <= kNumRegistersToUnroll) {
906 for (int i = 0; i < num_registers_left_on_stack / 2; i++) {
907 __ Ldp(capture_end,
908 capture_start,
909 MemOperand(base, -kPointerSize, PostIndex));
910 if ((i == 0) && global_with_zero_length_check()) {
911 // Keep capture start for the zero-length check later.
912 __ Mov(first_capture_start, capture_start);
913 }
914 // Offsets need to be relative to the start of the string.
915 if (mode_ == UC16) {
916 __ Add(capture_start,
917 input_length,
918 Operand(capture_start, ASR, 1));
919 __ Add(capture_end, input_length, Operand(capture_end, ASR, 1));
920 } else {
921 __ Add(capture_start, input_length, capture_start);
922 __ Add(capture_end, input_length, capture_end);
923 }
924 // The output pointer advances for a possible global match.
925 __ Stp(capture_start,
926 capture_end,
927 MemOperand(output_array(), kPointerSize, PostIndex));
928 }
929 } else {
930 Label loop, start;
931 __ Mov(x11, num_registers_left_on_stack);
932
933 __ Ldp(capture_end,
934 capture_start,
935 MemOperand(base, -kPointerSize, PostIndex));
936 if (global_with_zero_length_check()) {
937 __ Mov(first_capture_start, capture_start);
938 }
939 __ B(&start);
940
941 __ Bind(&loop);
942 __ Ldp(capture_end,
943 capture_start,
944 MemOperand(base, -kPointerSize, PostIndex));
945 __ Bind(&start);
946 if (mode_ == UC16) {
947 __ Add(capture_start, input_length, Operand(capture_start, ASR, 1));
948 __ Add(capture_end, input_length, Operand(capture_end, ASR, 1));
949 } else {
950 __ Add(capture_start, input_length, capture_start);
951 __ Add(capture_end, input_length, capture_end);
952 }
953 // The output pointer advances for a possible global match.
954 __ Stp(capture_start,
955 capture_end,
956 MemOperand(output_array(), kPointerSize, PostIndex));
957 __ Sub(x11, x11, 2);
958 __ Cbnz(x11, &loop);
959 }
960 }
961 }
962
963 if (global()) {
964 Register success_counter = w0;
965 Register output_size = x10;
966 // Restart matching if the regular expression is flagged as global.
967
968 // Increment success counter.
969 __ Ldr(success_counter, MemOperand(frame_pointer(), kSuccessCounter));
970 __ Add(success_counter, success_counter, 1);
971 __ Str(success_counter, MemOperand(frame_pointer(), kSuccessCounter));
972
973 // Capture results have been stored, so the number of remaining global
974 // output registers is reduced by the number of stored captures.
975 __ Ldr(output_size, MemOperand(frame_pointer(), kOutputSize));
976 __ Sub(output_size, output_size, num_saved_registers_);
977 // Check whether we have enough room for another set of capture results.
978 __ Cmp(output_size, num_saved_registers_);
979 __ B(lt, &return_w0);
980
981 // The output pointer is already set to the next field in the output
982 // array.
983 // Update output size on the frame before we restart matching.
984 __ Str(output_size, MemOperand(frame_pointer(), kOutputSize));
985
986 if (global_with_zero_length_check()) {
987 // Special case for zero-length matches.
988 __ Cmp(current_input_offset(), first_capture_start);
989 // Not a zero-length match, restart.
990 __ B(ne, &load_char_start_regexp);
991 // Offset from the end is zero if we already reached the end.
992 __ Cbz(current_input_offset(), &return_w0);
993 // Advance current position after a zero-length match.
994 __ Add(current_input_offset(),
995 current_input_offset(),
996 Operand((mode_ == UC16) ? 2 : 1));
997 }
998
999 __ B(&load_char_start_regexp);
1000 } else {
1001 __ Mov(w0, SUCCESS);
1002 }
1003 }
1004
1005 if (exit_label_.is_linked()) {
1006 // Exit and return w0
1007 __ Bind(&exit_label_);
1008 if (global()) {
1009 __ Ldr(w0, MemOperand(frame_pointer(), kSuccessCounter));
1010 }
1011 }
1012
1013 __ Bind(&return_w0);
1014
1015 // Set stack pointer back to first register to retain
1016 DCHECK(csp.Is(__ StackPointer()));
1017 __ Mov(csp, fp);
1018 __ AssertStackConsistency();
1019
1020 // Restore registers.
1021 __ PopCPURegList(registers_to_retain);
1022
1023 __ Ret();
1024
1025 Label exit_with_exception;
1026 // Registers x0 to x7 are used to store the first captures, they need to be
1027 // retained over calls to C++ code.
1028 CPURegList cached_registers(CPURegister::kRegister, kXRegSizeInBits, 0, 7);
1029 DCHECK((cached_registers.Count() * 2) == kNumCachedRegisters);
1030
1031 if (check_preempt_label_.is_linked()) {
1032 __ Bind(&check_preempt_label_);
1033 SaveLinkRegister();
1034 // The cached registers need to be retained.
1035 __ PushCPURegList(cached_registers);
1036 CallCheckStackGuardState(x10);
1037 // Returning from the regexp code restores the stack (csp <- fp)
1038 // so we don't need to drop the link register from it before exiting.
1039 __ Cbnz(w0, &return_w0);
1040 // Reset the cached registers.
1041 __ PopCPURegList(cached_registers);
1042 RestoreLinkRegister();
1043 __ Ret();
1044 }
1045
1046 if (stack_overflow_label_.is_linked()) {
1047 __ Bind(&stack_overflow_label_);
1048 SaveLinkRegister();
1049 // The cached registers need to be retained.
1050 __ PushCPURegList(cached_registers);
1051 // Call GrowStack(backtrack_stackpointer(), &stack_base)
1052 __ Mov(x2, ExternalReference::isolate_address(isolate()));
1053 __ Add(x1, frame_pointer(), kStackBase);
1054 __ Mov(x0, backtrack_stackpointer());
1055 ExternalReference grow_stack =
1056 ExternalReference::re_grow_stack(isolate());
1057 __ CallCFunction(grow_stack, 3);
1058 // If return NULL, we have failed to grow the stack, and
1059 // must exit with a stack-overflow exception.
1060 // Returning from the regexp code restores the stack (csp <- fp)
1061 // so we don't need to drop the link register from it before exiting.
1062 __ Cbz(w0, &exit_with_exception);
1063 // Otherwise use return value as new stack pointer.
1064 __ Mov(backtrack_stackpointer(), x0);
1065 // Reset the cached registers.
1066 __ PopCPURegList(cached_registers);
1067 RestoreLinkRegister();
1068 __ Ret();
1069 }
1070
1071 if (exit_with_exception.is_linked()) {
1072 __ Bind(&exit_with_exception);
1073 __ Mov(w0, EXCEPTION);
1074 __ B(&return_w0);
1075 }
1076
1077 CodeDesc code_desc;
1078 masm_->GetCode(&code_desc);
1079 Handle<Code> code = isolate()->factory()->NewCode(
1080 code_desc, Code::ComputeFlags(Code::REGEXP), masm_->CodeObject());
1081 PROFILE(masm_->isolate(), RegExpCodeCreateEvent(*code, *source));
1082 return Handle<HeapObject>::cast(code);
1083 }
1084
1085
GoTo(Label * to)1086 void RegExpMacroAssemblerARM64::GoTo(Label* to) {
1087 BranchOrBacktrack(al, to);
1088 }
1089
IfRegisterGE(int reg,int comparand,Label * if_ge)1090 void RegExpMacroAssemblerARM64::IfRegisterGE(int reg, int comparand,
1091 Label* if_ge) {
1092 Register to_compare = GetRegister(reg, w10);
1093 CompareAndBranchOrBacktrack(to_compare, comparand, ge, if_ge);
1094 }
1095
1096
IfRegisterLT(int reg,int comparand,Label * if_lt)1097 void RegExpMacroAssemblerARM64::IfRegisterLT(int reg, int comparand,
1098 Label* if_lt) {
1099 Register to_compare = GetRegister(reg, w10);
1100 CompareAndBranchOrBacktrack(to_compare, comparand, lt, if_lt);
1101 }
1102
1103
IfRegisterEqPos(int reg,Label * if_eq)1104 void RegExpMacroAssemblerARM64::IfRegisterEqPos(int reg, Label* if_eq) {
1105 Register to_compare = GetRegister(reg, w10);
1106 __ Cmp(to_compare, current_input_offset());
1107 BranchOrBacktrack(eq, if_eq);
1108 }
1109
1110 RegExpMacroAssembler::IrregexpImplementation
Implementation()1111 RegExpMacroAssemblerARM64::Implementation() {
1112 return kARM64Implementation;
1113 }
1114
1115
LoadCurrentCharacter(int cp_offset,Label * on_end_of_input,bool check_bounds,int characters)1116 void RegExpMacroAssemblerARM64::LoadCurrentCharacter(int cp_offset,
1117 Label* on_end_of_input,
1118 bool check_bounds,
1119 int characters) {
1120 // TODO(pielan): Make sure long strings are caught before this, and not
1121 // just asserted in debug mode.
1122 // Be sane! (And ensure that an int32_t can be used to index the string)
1123 DCHECK(cp_offset < (1<<30));
1124 if (check_bounds) {
1125 if (cp_offset >= 0) {
1126 CheckPosition(cp_offset + characters - 1, on_end_of_input);
1127 } else {
1128 CheckPosition(cp_offset, on_end_of_input);
1129 }
1130 }
1131 LoadCurrentCharacterUnchecked(cp_offset, characters);
1132 }
1133
1134
PopCurrentPosition()1135 void RegExpMacroAssemblerARM64::PopCurrentPosition() {
1136 Pop(current_input_offset());
1137 }
1138
1139
PopRegister(int register_index)1140 void RegExpMacroAssemblerARM64::PopRegister(int register_index) {
1141 Pop(w10);
1142 StoreRegister(register_index, w10);
1143 }
1144
1145
PushBacktrack(Label * label)1146 void RegExpMacroAssemblerARM64::PushBacktrack(Label* label) {
1147 if (label->is_bound()) {
1148 int target = label->pos();
1149 __ Mov(w10, target + Code::kHeaderSize - kHeapObjectTag);
1150 } else {
1151 __ Adr(x10, label, MacroAssembler::kAdrFar);
1152 __ Sub(x10, x10, code_pointer());
1153 if (masm_->emit_debug_code()) {
1154 __ Cmp(x10, kWRegMask);
1155 // The code offset has to fit in a W register.
1156 __ Check(ls, kOffsetOutOfRange);
1157 }
1158 }
1159 Push(w10);
1160 CheckStackLimit();
1161 }
1162
1163
PushCurrentPosition()1164 void RegExpMacroAssemblerARM64::PushCurrentPosition() {
1165 Push(current_input_offset());
1166 }
1167
1168
PushRegister(int register_index,StackCheckFlag check_stack_limit)1169 void RegExpMacroAssemblerARM64::PushRegister(int register_index,
1170 StackCheckFlag check_stack_limit) {
1171 Register to_push = GetRegister(register_index, w10);
1172 Push(to_push);
1173 if (check_stack_limit) CheckStackLimit();
1174 }
1175
1176
ReadCurrentPositionFromRegister(int reg)1177 void RegExpMacroAssemblerARM64::ReadCurrentPositionFromRegister(int reg) {
1178 Register cached_register;
1179 RegisterState register_state = GetRegisterState(reg);
1180 switch (register_state) {
1181 case STACKED:
1182 __ Ldr(current_input_offset(), register_location(reg));
1183 break;
1184 case CACHED_LSW:
1185 cached_register = GetCachedRegister(reg);
1186 __ Mov(current_input_offset(), cached_register.W());
1187 break;
1188 case CACHED_MSW:
1189 cached_register = GetCachedRegister(reg);
1190 __ Lsr(current_input_offset().X(), cached_register, kWRegSizeInBits);
1191 break;
1192 default:
1193 UNREACHABLE();
1194 break;
1195 }
1196 }
1197
1198
ReadStackPointerFromRegister(int reg)1199 void RegExpMacroAssemblerARM64::ReadStackPointerFromRegister(int reg) {
1200 Register read_from = GetRegister(reg, w10);
1201 __ Ldr(x11, MemOperand(frame_pointer(), kStackBase));
1202 __ Add(backtrack_stackpointer(), x11, Operand(read_from, SXTW));
1203 }
1204
1205
SetCurrentPositionFromEnd(int by)1206 void RegExpMacroAssemblerARM64::SetCurrentPositionFromEnd(int by) {
1207 Label after_position;
1208 __ Cmp(current_input_offset(), -by * char_size());
1209 __ B(ge, &after_position);
1210 __ Mov(current_input_offset(), -by * char_size());
1211 // On RegExp code entry (where this operation is used), the character before
1212 // the current position is expected to be already loaded.
1213 // We have advanced the position, so it's safe to read backwards.
1214 LoadCurrentCharacterUnchecked(-1, 1);
1215 __ Bind(&after_position);
1216 }
1217
1218
SetRegister(int register_index,int to)1219 void RegExpMacroAssemblerARM64::SetRegister(int register_index, int to) {
1220 DCHECK(register_index >= num_saved_registers_); // Reserved for positions!
1221 Register set_to = wzr;
1222 if (to != 0) {
1223 set_to = w10;
1224 __ Mov(set_to, to);
1225 }
1226 StoreRegister(register_index, set_to);
1227 }
1228
1229
Succeed()1230 bool RegExpMacroAssemblerARM64::Succeed() {
1231 __ B(&success_label_);
1232 return global();
1233 }
1234
1235
WriteCurrentPositionToRegister(int reg,int cp_offset)1236 void RegExpMacroAssemblerARM64::WriteCurrentPositionToRegister(int reg,
1237 int cp_offset) {
1238 Register position = current_input_offset();
1239 if (cp_offset != 0) {
1240 position = w10;
1241 __ Add(position, current_input_offset(), cp_offset * char_size());
1242 }
1243 StoreRegister(reg, position);
1244 }
1245
1246
ClearRegisters(int reg_from,int reg_to)1247 void RegExpMacroAssemblerARM64::ClearRegisters(int reg_from, int reg_to) {
1248 DCHECK(reg_from <= reg_to);
1249 int num_registers = reg_to - reg_from + 1;
1250
1251 // If the first capture register is cached in a hardware register but not
1252 // aligned on a 64-bit one, we need to clear the first one specifically.
1253 if ((reg_from < kNumCachedRegisters) && ((reg_from % 2) != 0)) {
1254 StoreRegister(reg_from, string_start_minus_one());
1255 num_registers--;
1256 reg_from++;
1257 }
1258
1259 // Clear cached registers in pairs as far as possible.
1260 while ((num_registers >= 2) && (reg_from < kNumCachedRegisters)) {
1261 DCHECK(GetRegisterState(reg_from) == CACHED_LSW);
1262 __ Mov(GetCachedRegister(reg_from), twice_non_position_value());
1263 reg_from += 2;
1264 num_registers -= 2;
1265 }
1266
1267 if ((num_registers % 2) == 1) {
1268 StoreRegister(reg_from, string_start_minus_one());
1269 num_registers--;
1270 reg_from++;
1271 }
1272
1273 if (num_registers > 0) {
1274 // If there are some remaining registers, they are stored on the stack.
1275 DCHECK(reg_from >= kNumCachedRegisters);
1276
1277 // Move down the indexes of the registers on stack to get the correct offset
1278 // in memory.
1279 reg_from -= kNumCachedRegisters;
1280 reg_to -= kNumCachedRegisters;
1281 // We should not unroll the loop for less than 2 registers.
1282 STATIC_ASSERT(kNumRegistersToUnroll > 2);
1283 // We position the base pointer to (reg_from + 1).
1284 int base_offset = kFirstRegisterOnStack -
1285 kWRegSize - (kWRegSize * reg_from);
1286 if (num_registers > kNumRegistersToUnroll) {
1287 Register base = x10;
1288 __ Add(base, frame_pointer(), base_offset);
1289
1290 Label loop;
1291 __ Mov(x11, num_registers);
1292 __ Bind(&loop);
1293 __ Str(twice_non_position_value(),
1294 MemOperand(base, -kPointerSize, PostIndex));
1295 __ Sub(x11, x11, 2);
1296 __ Cbnz(x11, &loop);
1297 } else {
1298 for (int i = reg_from; i <= reg_to; i += 2) {
1299 __ Str(twice_non_position_value(),
1300 MemOperand(frame_pointer(), base_offset));
1301 base_offset -= kWRegSize * 2;
1302 }
1303 }
1304 }
1305 }
1306
1307
WriteStackPointerToRegister(int reg)1308 void RegExpMacroAssemblerARM64::WriteStackPointerToRegister(int reg) {
1309 __ Ldr(x10, MemOperand(frame_pointer(), kStackBase));
1310 __ Sub(x10, backtrack_stackpointer(), x10);
1311 if (masm_->emit_debug_code()) {
1312 __ Cmp(x10, Operand(w10, SXTW));
1313 // The stack offset needs to fit in a W register.
1314 __ Check(eq, kOffsetOutOfRange);
1315 }
1316 StoreRegister(reg, w10);
1317 }
1318
1319
1320 // Helper function for reading a value out of a stack frame.
1321 template <typename T>
frame_entry(Address re_frame,int frame_offset)1322 static T& frame_entry(Address re_frame, int frame_offset) {
1323 return *reinterpret_cast<T*>(re_frame + frame_offset);
1324 }
1325
1326
1327 template <typename T>
frame_entry_address(Address re_frame,int frame_offset)1328 static T* frame_entry_address(Address re_frame, int frame_offset) {
1329 return reinterpret_cast<T*>(re_frame + frame_offset);
1330 }
1331
1332
CheckStackGuardState(Address * return_address,Code * re_code,Address re_frame,int start_index,const byte ** input_start,const byte ** input_end)1333 int RegExpMacroAssemblerARM64::CheckStackGuardState(
1334 Address* return_address, Code* re_code, Address re_frame, int start_index,
1335 const byte** input_start, const byte** input_end) {
1336 return NativeRegExpMacroAssembler::CheckStackGuardState(
1337 frame_entry<Isolate*>(re_frame, kIsolate), start_index,
1338 frame_entry<int>(re_frame, kDirectCall) == 1, return_address, re_code,
1339 frame_entry_address<String*>(re_frame, kInput), input_start, input_end);
1340 }
1341
1342
CheckPosition(int cp_offset,Label * on_outside_input)1343 void RegExpMacroAssemblerARM64::CheckPosition(int cp_offset,
1344 Label* on_outside_input) {
1345 if (cp_offset >= 0) {
1346 CompareAndBranchOrBacktrack(current_input_offset(),
1347 -cp_offset * char_size(), ge, on_outside_input);
1348 } else {
1349 __ Add(w12, current_input_offset(), Operand(cp_offset * char_size()));
1350 __ Cmp(w12, string_start_minus_one());
1351 BranchOrBacktrack(le, on_outside_input);
1352 }
1353 }
1354
1355
CanReadUnaligned()1356 bool RegExpMacroAssemblerARM64::CanReadUnaligned() {
1357 // TODO(pielan): See whether or not we should disable unaligned accesses.
1358 return !slow_safe();
1359 }
1360
1361
1362 // Private methods:
1363
CallCheckStackGuardState(Register scratch)1364 void RegExpMacroAssemblerARM64::CallCheckStackGuardState(Register scratch) {
1365 // Allocate space on the stack to store the return address. The
1366 // CheckStackGuardState C++ function will override it if the code
1367 // moved. Allocate extra space for 2 arguments passed by pointers.
1368 // AAPCS64 requires the stack to be 16 byte aligned.
1369 int alignment = masm_->ActivationFrameAlignment();
1370 DCHECK_EQ(alignment % 16, 0);
1371 int align_mask = (alignment / kXRegSize) - 1;
1372 int xreg_to_claim = (3 + align_mask) & ~align_mask;
1373
1374 DCHECK(csp.Is(__ StackPointer()));
1375 __ Claim(xreg_to_claim);
1376
1377 // CheckStackGuardState needs the end and start addresses of the input string.
1378 __ Poke(input_end(), 2 * kPointerSize);
1379 __ Add(x5, csp, 2 * kPointerSize);
1380 __ Poke(input_start(), kPointerSize);
1381 __ Add(x4, csp, kPointerSize);
1382
1383 __ Mov(w3, start_offset());
1384 // RegExp code frame pointer.
1385 __ Mov(x2, frame_pointer());
1386 // Code* of self.
1387 __ Mov(x1, Operand(masm_->CodeObject()));
1388
1389 // We need to pass a pointer to the return address as first argument.
1390 // The DirectCEntry stub will place the return address on the stack before
1391 // calling so the stack pointer will point to it.
1392 __ Mov(x0, csp);
1393
1394 ExternalReference check_stack_guard_state =
1395 ExternalReference::re_check_stack_guard_state(isolate());
1396 __ Mov(scratch, check_stack_guard_state);
1397 DirectCEntryStub stub(isolate());
1398 stub.GenerateCall(masm_, scratch);
1399
1400 // The input string may have been moved in memory, we need to reload it.
1401 __ Peek(input_start(), kPointerSize);
1402 __ Peek(input_end(), 2 * kPointerSize);
1403
1404 DCHECK(csp.Is(__ StackPointer()));
1405 __ Drop(xreg_to_claim);
1406
1407 // Reload the Code pointer.
1408 __ Mov(code_pointer(), Operand(masm_->CodeObject()));
1409 }
1410
BranchOrBacktrack(Condition condition,Label * to)1411 void RegExpMacroAssemblerARM64::BranchOrBacktrack(Condition condition,
1412 Label* to) {
1413 if (condition == al) { // Unconditional.
1414 if (to == NULL) {
1415 Backtrack();
1416 return;
1417 }
1418 __ B(to);
1419 return;
1420 }
1421 if (to == NULL) {
1422 to = &backtrack_label_;
1423 }
1424 __ B(condition, to);
1425 }
1426
CompareAndBranchOrBacktrack(Register reg,int immediate,Condition condition,Label * to)1427 void RegExpMacroAssemblerARM64::CompareAndBranchOrBacktrack(Register reg,
1428 int immediate,
1429 Condition condition,
1430 Label* to) {
1431 if ((immediate == 0) && ((condition == eq) || (condition == ne))) {
1432 if (to == NULL) {
1433 to = &backtrack_label_;
1434 }
1435 if (condition == eq) {
1436 __ Cbz(reg, to);
1437 } else {
1438 __ Cbnz(reg, to);
1439 }
1440 } else {
1441 __ Cmp(reg, immediate);
1442 BranchOrBacktrack(condition, to);
1443 }
1444 }
1445
1446
CheckPreemption()1447 void RegExpMacroAssemblerARM64::CheckPreemption() {
1448 // Check for preemption.
1449 ExternalReference stack_limit =
1450 ExternalReference::address_of_stack_limit(isolate());
1451 __ Mov(x10, stack_limit);
1452 __ Ldr(x10, MemOperand(x10));
1453 DCHECK(csp.Is(__ StackPointer()));
1454 __ Cmp(csp, x10);
1455 CallIf(&check_preempt_label_, ls);
1456 }
1457
1458
CheckStackLimit()1459 void RegExpMacroAssemblerARM64::CheckStackLimit() {
1460 ExternalReference stack_limit =
1461 ExternalReference::address_of_regexp_stack_limit(isolate());
1462 __ Mov(x10, stack_limit);
1463 __ Ldr(x10, MemOperand(x10));
1464 __ Cmp(backtrack_stackpointer(), x10);
1465 CallIf(&stack_overflow_label_, ls);
1466 }
1467
1468
Push(Register source)1469 void RegExpMacroAssemblerARM64::Push(Register source) {
1470 DCHECK(source.Is32Bits());
1471 DCHECK(!source.is(backtrack_stackpointer()));
1472 __ Str(source,
1473 MemOperand(backtrack_stackpointer(),
1474 -static_cast<int>(kWRegSize),
1475 PreIndex));
1476 }
1477
1478
Pop(Register target)1479 void RegExpMacroAssemblerARM64::Pop(Register target) {
1480 DCHECK(target.Is32Bits());
1481 DCHECK(!target.is(backtrack_stackpointer()));
1482 __ Ldr(target,
1483 MemOperand(backtrack_stackpointer(), kWRegSize, PostIndex));
1484 }
1485
1486
GetCachedRegister(int register_index)1487 Register RegExpMacroAssemblerARM64::GetCachedRegister(int register_index) {
1488 DCHECK(register_index < kNumCachedRegisters);
1489 return Register::Create(register_index / 2, kXRegSizeInBits);
1490 }
1491
1492
GetRegister(int register_index,Register maybe_result)1493 Register RegExpMacroAssemblerARM64::GetRegister(int register_index,
1494 Register maybe_result) {
1495 DCHECK(maybe_result.Is32Bits());
1496 DCHECK(register_index >= 0);
1497 if (num_registers_ <= register_index) {
1498 num_registers_ = register_index + 1;
1499 }
1500 Register result;
1501 RegisterState register_state = GetRegisterState(register_index);
1502 switch (register_state) {
1503 case STACKED:
1504 __ Ldr(maybe_result, register_location(register_index));
1505 result = maybe_result;
1506 break;
1507 case CACHED_LSW:
1508 result = GetCachedRegister(register_index).W();
1509 break;
1510 case CACHED_MSW:
1511 __ Lsr(maybe_result.X(), GetCachedRegister(register_index),
1512 kWRegSizeInBits);
1513 result = maybe_result;
1514 break;
1515 default:
1516 UNREACHABLE();
1517 break;
1518 }
1519 DCHECK(result.Is32Bits());
1520 return result;
1521 }
1522
1523
StoreRegister(int register_index,Register source)1524 void RegExpMacroAssemblerARM64::StoreRegister(int register_index,
1525 Register source) {
1526 DCHECK(source.Is32Bits());
1527 DCHECK(register_index >= 0);
1528 if (num_registers_ <= register_index) {
1529 num_registers_ = register_index + 1;
1530 }
1531
1532 Register cached_register;
1533 RegisterState register_state = GetRegisterState(register_index);
1534 switch (register_state) {
1535 case STACKED:
1536 __ Str(source, register_location(register_index));
1537 break;
1538 case CACHED_LSW:
1539 cached_register = GetCachedRegister(register_index);
1540 if (!source.Is(cached_register.W())) {
1541 __ Bfi(cached_register, source.X(), 0, kWRegSizeInBits);
1542 }
1543 break;
1544 case CACHED_MSW:
1545 cached_register = GetCachedRegister(register_index);
1546 __ Bfi(cached_register, source.X(), kWRegSizeInBits, kWRegSizeInBits);
1547 break;
1548 default:
1549 UNREACHABLE();
1550 break;
1551 }
1552 }
1553
1554
CallIf(Label * to,Condition condition)1555 void RegExpMacroAssemblerARM64::CallIf(Label* to, Condition condition) {
1556 Label skip_call;
1557 if (condition != al) __ B(&skip_call, NegateCondition(condition));
1558 __ Bl(to);
1559 __ Bind(&skip_call);
1560 }
1561
1562
RestoreLinkRegister()1563 void RegExpMacroAssemblerARM64::RestoreLinkRegister() {
1564 DCHECK(csp.Is(__ StackPointer()));
1565 __ Pop(lr, xzr);
1566 __ Add(lr, lr, Operand(masm_->CodeObject()));
1567 }
1568
1569
SaveLinkRegister()1570 void RegExpMacroAssemblerARM64::SaveLinkRegister() {
1571 DCHECK(csp.Is(__ StackPointer()));
1572 __ Sub(lr, lr, Operand(masm_->CodeObject()));
1573 __ Push(xzr, lr);
1574 }
1575
1576
register_location(int register_index)1577 MemOperand RegExpMacroAssemblerARM64::register_location(int register_index) {
1578 DCHECK(register_index < (1<<30));
1579 DCHECK(register_index >= kNumCachedRegisters);
1580 if (num_registers_ <= register_index) {
1581 num_registers_ = register_index + 1;
1582 }
1583 register_index -= kNumCachedRegisters;
1584 int offset = kFirstRegisterOnStack - register_index * kWRegSize;
1585 return MemOperand(frame_pointer(), offset);
1586 }
1587
capture_location(int register_index,Register scratch)1588 MemOperand RegExpMacroAssemblerARM64::capture_location(int register_index,
1589 Register scratch) {
1590 DCHECK(register_index < (1<<30));
1591 DCHECK(register_index < num_saved_registers_);
1592 DCHECK(register_index >= kNumCachedRegisters);
1593 DCHECK_EQ(register_index % 2, 0);
1594 register_index -= kNumCachedRegisters;
1595 int offset = kFirstCaptureOnStack - register_index * kWRegSize;
1596 // capture_location is used with Stp instructions to load/store 2 registers.
1597 // The immediate field in the encoding is limited to 7 bits (signed).
1598 if (is_int7(offset)) {
1599 return MemOperand(frame_pointer(), offset);
1600 } else {
1601 __ Add(scratch, frame_pointer(), offset);
1602 return MemOperand(scratch);
1603 }
1604 }
1605
LoadCurrentCharacterUnchecked(int cp_offset,int characters)1606 void RegExpMacroAssemblerARM64::LoadCurrentCharacterUnchecked(int cp_offset,
1607 int characters) {
1608 Register offset = current_input_offset();
1609
1610 // The ldr, str, ldrh, strh instructions can do unaligned accesses, if the CPU
1611 // and the operating system running on the target allow it.
1612 // If unaligned load/stores are not supported then this function must only
1613 // be used to load a single character at a time.
1614
1615 // ARMv8 supports unaligned accesses but V8 or the kernel can decide to
1616 // disable it.
1617 // TODO(pielan): See whether or not we should disable unaligned accesses.
1618 if (!CanReadUnaligned()) {
1619 DCHECK(characters == 1);
1620 }
1621
1622 if (cp_offset != 0) {
1623 if (masm_->emit_debug_code()) {
1624 __ Mov(x10, cp_offset * char_size());
1625 __ Add(x10, x10, Operand(current_input_offset(), SXTW));
1626 __ Cmp(x10, Operand(w10, SXTW));
1627 // The offset needs to fit in a W register.
1628 __ Check(eq, kOffsetOutOfRange);
1629 } else {
1630 __ Add(w10, current_input_offset(), cp_offset * char_size());
1631 }
1632 offset = w10;
1633 }
1634
1635 if (mode_ == LATIN1) {
1636 if (characters == 4) {
1637 __ Ldr(current_character(), MemOperand(input_end(), offset, SXTW));
1638 } else if (characters == 2) {
1639 __ Ldrh(current_character(), MemOperand(input_end(), offset, SXTW));
1640 } else {
1641 DCHECK(characters == 1);
1642 __ Ldrb(current_character(), MemOperand(input_end(), offset, SXTW));
1643 }
1644 } else {
1645 DCHECK(mode_ == UC16);
1646 if (characters == 2) {
1647 __ Ldr(current_character(), MemOperand(input_end(), offset, SXTW));
1648 } else {
1649 DCHECK(characters == 1);
1650 __ Ldrh(current_character(), MemOperand(input_end(), offset, SXTW));
1651 }
1652 }
1653 }
1654
1655 #endif // V8_INTERPRETED_REGEXP
1656
1657 } // namespace internal
1658 } // namespace v8
1659
1660 #endif // V8_TARGET_ARCH_ARM64
1661