1 // Copyright 2013 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #if V8_TARGET_ARCH_ARM64
6
7 #include "src/regexp/arm64/regexp-macro-assembler-arm64.h"
8
9 #include "src/codegen/arm64/macro-assembler-arm64-inl.h"
10 #include "src/codegen/macro-assembler.h"
11 #include "src/logging/log.h"
12 #include "src/objects/objects-inl.h"
13 #include "src/regexp/regexp-macro-assembler.h"
14 #include "src/regexp/regexp-stack.h"
15 #include "src/snapshot/embedded/embedded-data.h"
16 #include "src/strings/unicode.h"
17
18 namespace v8 {
19 namespace internal {
20
21 /*
22 * This assembler uses the following register assignment convention:
23 * - w19 : Used to temporarely store a value before a call to C code.
24 * See CheckNotBackReferenceIgnoreCase.
25 * - x20 : Pointer to the current Code object,
26 * it includes the heap object tag.
27 * - w21 : Current position in input, as negative offset from
28 * the end of the string. Please notice that this is
29 * the byte offset, not the character offset!
30 * - w22 : Currently loaded character. Must be loaded using
31 * LoadCurrentCharacter before using any of the dispatch methods.
32 * - x23 : Points to tip of backtrack stack.
33 * - w24 : Position of the first character minus one: non_position_value.
34 * Used to initialize capture registers.
35 * - x25 : Address at the end of the input string: input_end.
36 * Points to byte after last character in input.
37 * - x26 : Address at the start of the input string: input_start.
38 * - w27 : Where to start in the input string.
39 * - x28 : Output array pointer.
40 * - x29/fp : Frame pointer. Used to access arguments, local variables and
41 * RegExp registers.
42 * - x16/x17 : IP registers, used by assembler. Very volatile.
43 * - sp : Points to tip of C stack.
44 *
45 * - x0-x7 : Used as a cache to store 32 bit capture registers. These
46 * registers need to be retained every time a call to C code
47 * is done.
48 *
49 * The remaining registers are free for computations.
50 * Each call to a public method should retain this convention.
51 *
52 * The stack will have the following structure:
53 *
54 * Location Name Description
55 * (as referred to in
56 * the code)
57 *
58 * - fp[104] Address regexp Address of the JSRegExp object. Unused in
59 * native code, passed to match signature of
60 * the interpreter.
61 * - fp[96] isolate Address of the current isolate.
62 * ^^^ sp when called ^^^
63 * - fp[88] lr Return from the RegExp code.
64 * - fp[80] r29 Old frame pointer (CalleeSaved).
65 * - fp[0..72] r19-r28 Backup of CalleeSaved registers.
66 * - fp[-8] direct_call 1 => Direct call from JavaScript code.
67 * 0 => Call through the runtime system.
68 * - fp[-16] stack_base High end of the memory area to use as
69 * the backtracking stack.
70 * - fp[-24] output_size Output may fit multiple sets of matches.
71 * - fp[-32] input Handle containing the input string.
72 * - fp[-40] success_counter
73 * ^^^^^^^^^^^^^ From here and downwards we store 32 bit values ^^^^^^^^^^^^^
74 * - fp[-44] register N Capture registers initialized with
75 * - fp[-48] register N + 1 non_position_value.
76 * ... The first kNumCachedRegisters (N) registers
77 * ... are cached in x0 to x7.
78 * ... Only positions must be stored in the first
79 * - ... num_saved_registers_ registers.
80 * - ...
81 * - register N + num_registers - 1
82 * ^^^^^^^^^ sp ^^^^^^^^^
83 *
84 * The first num_saved_registers_ registers are initialized to point to
85 * "character -1" in the string (i.e., char_size() bytes before the first
86 * character of the string). The remaining registers start out as garbage.
87 *
88 * The data up to the return address must be placed there by the calling
89 * code and the remaining arguments are passed in registers, e.g. by calling the
90 * code entry as cast to a function with the signature:
91 * int (*match)(String input_string,
92 * int start_index,
93 * Address start,
94 * Address end,
95 * int* capture_output_array,
96 * int num_capture_registers,
97 * byte* stack_area_base,
98 * bool direct_call = false,
99 * Isolate* isolate,
100 * Address regexp);
101 * The call is performed by NativeRegExpMacroAssembler::Execute()
102 * (in regexp-macro-assembler.cc) via the GeneratedCode wrapper.
103 */
104
105 #define __ ACCESS_MASM(masm_)
106
107 const int RegExpMacroAssemblerARM64::kRegExpCodeSize;
108
RegExpMacroAssemblerARM64(Isolate * isolate,Zone * zone,Mode mode,int registers_to_save)109 RegExpMacroAssemblerARM64::RegExpMacroAssemblerARM64(Isolate* isolate,
110 Zone* zone, Mode mode,
111 int registers_to_save)
112 : NativeRegExpMacroAssembler(isolate, zone),
113 masm_(new MacroAssembler(isolate, CodeObjectRequired::kYes,
114 NewAssemblerBuffer(kRegExpCodeSize))),
115 mode_(mode),
116 num_registers_(registers_to_save),
117 num_saved_registers_(registers_to_save),
118 entry_label_(),
119 start_label_(),
120 success_label_(),
121 backtrack_label_(),
122 exit_label_() {
123 masm_->set_root_array_available(false);
124
125 DCHECK_EQ(0, registers_to_save % 2);
126 // We can cache at most 16 W registers in x0-x7.
127 STATIC_ASSERT(kNumCachedRegisters <= 16);
128 STATIC_ASSERT((kNumCachedRegisters % 2) == 0);
129 __ CallTarget();
130
131 __ B(&entry_label_); // We'll write the entry code later.
132 __ Bind(&start_label_); // And then continue from here.
133 }
134
~RegExpMacroAssemblerARM64()135 RegExpMacroAssemblerARM64::~RegExpMacroAssemblerARM64() {
136 delete masm_;
137 // Unuse labels in case we throw away the assembler without calling GetCode.
138 entry_label_.Unuse();
139 start_label_.Unuse();
140 success_label_.Unuse();
141 backtrack_label_.Unuse();
142 exit_label_.Unuse();
143 check_preempt_label_.Unuse();
144 stack_overflow_label_.Unuse();
145 fallback_label_.Unuse();
146 }
147
stack_limit_slack()148 int RegExpMacroAssemblerARM64::stack_limit_slack() {
149 return RegExpStack::kStackLimitSlack;
150 }
151
152
AdvanceCurrentPosition(int by)153 void RegExpMacroAssemblerARM64::AdvanceCurrentPosition(int by) {
154 if (by != 0) {
155 __ Add(current_input_offset(),
156 current_input_offset(), by * char_size());
157 }
158 }
159
160
AdvanceRegister(int reg,int by)161 void RegExpMacroAssemblerARM64::AdvanceRegister(int reg, int by) {
162 DCHECK((reg >= 0) && (reg < num_registers_));
163 if (by != 0) {
164 RegisterState register_state = GetRegisterState(reg);
165 switch (register_state) {
166 case STACKED:
167 __ Ldr(w10, register_location(reg));
168 __ Add(w10, w10, by);
169 __ Str(w10, register_location(reg));
170 break;
171 case CACHED_LSW: {
172 Register to_advance = GetCachedRegister(reg);
173 __ Add(to_advance, to_advance, by);
174 break;
175 }
176 case CACHED_MSW: {
177 Register to_advance = GetCachedRegister(reg);
178 // Sign-extend to int64, shift as uint64, cast back to int64.
179 __ Add(
180 to_advance, to_advance,
181 static_cast<int64_t>(static_cast<uint64_t>(static_cast<int64_t>(by))
182 << kWRegSizeInBits));
183 break;
184 }
185 default:
186 UNREACHABLE();
187 break;
188 }
189 }
190 }
191
192
Backtrack()193 void RegExpMacroAssemblerARM64::Backtrack() {
194 CheckPreemption();
195 if (has_backtrack_limit()) {
196 Label next;
197 UseScratchRegisterScope temps(masm_);
198 Register scratch = temps.AcquireW();
199 __ Ldr(scratch, MemOperand(frame_pointer(), kBacktrackCount));
200 __ Add(scratch, scratch, 1);
201 __ Str(scratch, MemOperand(frame_pointer(), kBacktrackCount));
202 __ Cmp(scratch, Operand(backtrack_limit()));
203 __ B(ne, &next);
204
205 // Backtrack limit exceeded.
206 if (can_fallback()) {
207 __ B(&fallback_label_);
208 } else {
209 // Can't fallback, so we treat it as a failed match.
210 Fail();
211 }
212
213 __ bind(&next);
214 }
215 Pop(w10);
216 __ Add(x10, code_pointer(), Operand(w10, UXTW));
217 __ Br(x10);
218 }
219
220
Bind(Label * label)221 void RegExpMacroAssemblerARM64::Bind(Label* label) {
222 __ Bind(label);
223 }
224
BindJumpTarget(Label * label)225 void RegExpMacroAssemblerARM64::BindJumpTarget(Label* label) {
226 __ BindJumpTarget(label);
227 }
228
CheckCharacter(uint32_t c,Label * on_equal)229 void RegExpMacroAssemblerARM64::CheckCharacter(uint32_t c, Label* on_equal) {
230 CompareAndBranchOrBacktrack(current_character(), c, eq, on_equal);
231 }
232
233
CheckCharacterGT(uc16 limit,Label * on_greater)234 void RegExpMacroAssemblerARM64::CheckCharacterGT(uc16 limit,
235 Label* on_greater) {
236 CompareAndBranchOrBacktrack(current_character(), limit, hi, on_greater);
237 }
238
CheckAtStart(int cp_offset,Label * on_at_start)239 void RegExpMacroAssemblerARM64::CheckAtStart(int cp_offset,
240 Label* on_at_start) {
241 __ Add(w10, current_input_offset(),
242 Operand(-char_size() + cp_offset * char_size()));
243 __ Cmp(w10, string_start_minus_one());
244 BranchOrBacktrack(eq, on_at_start);
245 }
246
CheckNotAtStart(int cp_offset,Label * on_not_at_start)247 void RegExpMacroAssemblerARM64::CheckNotAtStart(int cp_offset,
248 Label* on_not_at_start) {
249 __ Add(w10, current_input_offset(),
250 Operand(-char_size() + cp_offset * char_size()));
251 __ Cmp(w10, string_start_minus_one());
252 BranchOrBacktrack(ne, on_not_at_start);
253 }
254
255
CheckCharacterLT(uc16 limit,Label * on_less)256 void RegExpMacroAssemblerARM64::CheckCharacterLT(uc16 limit, Label* on_less) {
257 CompareAndBranchOrBacktrack(current_character(), limit, lo, on_less);
258 }
259
260
CheckCharacters(Vector<const uc16> str,int cp_offset,Label * on_failure,bool check_end_of_string)261 void RegExpMacroAssemblerARM64::CheckCharacters(Vector<const uc16> str,
262 int cp_offset,
263 Label* on_failure,
264 bool check_end_of_string) {
265 // This method is only ever called from the cctests.
266
267 if (check_end_of_string) {
268 // Is last character of required match inside string.
269 CheckPosition(cp_offset + str.length() - 1, on_failure);
270 }
271
272 Register characters_address = x11;
273
274 __ Add(characters_address,
275 input_end(),
276 Operand(current_input_offset(), SXTW));
277 if (cp_offset != 0) {
278 __ Add(characters_address, characters_address, cp_offset * char_size());
279 }
280
281 for (int i = 0; i < str.length(); i++) {
282 if (mode_ == LATIN1) {
283 __ Ldrb(w10, MemOperand(characters_address, 1, PostIndex));
284 DCHECK_GE(String::kMaxOneByteCharCode, str[i]);
285 } else {
286 __ Ldrh(w10, MemOperand(characters_address, 2, PostIndex));
287 }
288 CompareAndBranchOrBacktrack(w10, str[i], ne, on_failure);
289 }
290 }
291
292
CheckGreedyLoop(Label * on_equal)293 void RegExpMacroAssemblerARM64::CheckGreedyLoop(Label* on_equal) {
294 __ Ldr(w10, MemOperand(backtrack_stackpointer()));
295 __ Cmp(current_input_offset(), w10);
296 __ Cset(x11, eq);
297 __ Add(backtrack_stackpointer(),
298 backtrack_stackpointer(), Operand(x11, LSL, kWRegSizeLog2));
299 BranchOrBacktrack(eq, on_equal);
300 }
301
CheckNotBackReferenceIgnoreCase(int start_reg,bool read_backward,bool unicode,Label * on_no_match)302 void RegExpMacroAssemblerARM64::CheckNotBackReferenceIgnoreCase(
303 int start_reg, bool read_backward, bool unicode, Label* on_no_match) {
304 Label fallthrough;
305
306 Register capture_start_offset = w10;
307 // Save the capture length in a callee-saved register so it will
308 // be preserved if we call a C helper.
309 Register capture_length = w19;
310 DCHECK(kCalleeSaved.IncludesAliasOf(capture_length));
311
312 // Find length of back-referenced capture.
313 DCHECK_EQ(0, start_reg % 2);
314 if (start_reg < kNumCachedRegisters) {
315 __ Mov(capture_start_offset.X(), GetCachedRegister(start_reg));
316 __ Lsr(x11, GetCachedRegister(start_reg), kWRegSizeInBits);
317 } else {
318 __ Ldp(w11, capture_start_offset, capture_location(start_reg, x10));
319 }
320 __ Sub(capture_length, w11, capture_start_offset); // Length to check.
321
322 // At this point, the capture registers are either both set or both cleared.
323 // If the capture length is zero, then the capture is either empty or cleared.
324 // Fall through in both cases.
325 __ CompareAndBranch(capture_length, Operand(0), eq, &fallthrough);
326
327 // Check that there are enough characters left in the input.
328 if (read_backward) {
329 __ Add(w12, string_start_minus_one(), capture_length);
330 __ Cmp(current_input_offset(), w12);
331 BranchOrBacktrack(le, on_no_match);
332 } else {
333 __ Cmn(capture_length, current_input_offset());
334 BranchOrBacktrack(gt, on_no_match);
335 }
336
337 if (mode_ == LATIN1) {
338 Label success;
339 Label fail;
340 Label loop_check;
341
342 Register capture_start_address = x12;
343 Register capture_end_addresss = x13;
344 Register current_position_address = x14;
345
346 __ Add(capture_start_address,
347 input_end(),
348 Operand(capture_start_offset, SXTW));
349 __ Add(capture_end_addresss,
350 capture_start_address,
351 Operand(capture_length, SXTW));
352 __ Add(current_position_address,
353 input_end(),
354 Operand(current_input_offset(), SXTW));
355 if (read_backward) {
356 // Offset by length when matching backwards.
357 __ Sub(current_position_address, current_position_address,
358 Operand(capture_length, SXTW));
359 }
360
361 Label loop;
362 __ Bind(&loop);
363 __ Ldrb(w10, MemOperand(capture_start_address, 1, PostIndex));
364 __ Ldrb(w11, MemOperand(current_position_address, 1, PostIndex));
365 __ Cmp(w10, w11);
366 __ B(eq, &loop_check);
367
368 // Mismatch, try case-insensitive match (converting letters to lower-case).
369 __ Orr(w10, w10, 0x20); // Convert capture character to lower-case.
370 __ Orr(w11, w11, 0x20); // Also convert input character.
371 __ Cmp(w11, w10);
372 __ B(ne, &fail);
373 __ Sub(w10, w10, 'a');
374 __ Cmp(w10, 'z' - 'a'); // Is w10 a lowercase letter?
375 __ B(ls, &loop_check); // In range 'a'-'z'.
376 // Latin-1: Check for values in range [224,254] but not 247.
377 __ Sub(w10, w10, 224 - 'a');
378 __ Cmp(w10, 254 - 224);
379 __ Ccmp(w10, 247 - 224, ZFlag, ls); // Check for 247.
380 __ B(eq, &fail); // Weren't Latin-1 letters.
381
382 __ Bind(&loop_check);
383 __ Cmp(capture_start_address, capture_end_addresss);
384 __ B(lt, &loop);
385 __ B(&success);
386
387 __ Bind(&fail);
388 BranchOrBacktrack(al, on_no_match);
389
390 __ Bind(&success);
391 // Compute new value of character position after the matched part.
392 __ Sub(current_input_offset().X(), current_position_address, input_end());
393 if (read_backward) {
394 __ Sub(current_input_offset().X(), current_input_offset().X(),
395 Operand(capture_length, SXTW));
396 }
397 if (masm_->emit_debug_code()) {
398 __ Cmp(current_input_offset().X(), Operand(current_input_offset(), SXTW));
399 __ Ccmp(current_input_offset(), 0, NoFlag, eq);
400 // The current input offset should be <= 0, and fit in a W register.
401 __ Check(le, AbortReason::kOffsetOutOfRange);
402 }
403 } else {
404 DCHECK(mode_ == UC16);
405 int argument_count = 4;
406
407 // The cached registers need to be retained.
408 CPURegList cached_registers(CPURegister::kRegister, kXRegSizeInBits, 0, 7);
409 DCHECK_EQ(kNumCachedRegisters, cached_registers.Count() * 2);
410 __ PushCPURegList(cached_registers);
411
412 // Put arguments into arguments registers.
413 // Parameters are
414 // x0: Address byte_offset1 - Address captured substring's start.
415 // x1: Address byte_offset2 - Address of current character position.
416 // w2: size_t byte_length - length of capture in bytes(!)
417 // x3: Isolate* isolate.
418
419 // Address of start of capture.
420 __ Add(x0, input_end(), Operand(capture_start_offset, SXTW));
421 // Length of capture.
422 __ Mov(w2, capture_length);
423 // Address of current input position.
424 __ Add(x1, input_end(), Operand(current_input_offset(), SXTW));
425 if (read_backward) {
426 __ Sub(x1, x1, Operand(capture_length, SXTW));
427 }
428 // Isolate.
429 __ Mov(x3, ExternalReference::isolate_address(isolate()));
430
431 {
432 AllowExternalCallThatCantCauseGC scope(masm_);
433 ExternalReference function =
434 unicode ? ExternalReference::re_case_insensitive_compare_unicode(
435 isolate())
436 : ExternalReference::re_case_insensitive_compare_non_unicode(
437 isolate());
438 __ CallCFunction(function, argument_count);
439 }
440
441 // Check if function returned non-zero for success or zero for failure.
442 // x0 is one of the registers used as a cache so it must be tested before
443 // the cache is restored.
444 __ Cmp(x0, 0);
445 __ PopCPURegList(cached_registers);
446 BranchOrBacktrack(eq, on_no_match);
447
448 // On success, advance position by length of capture.
449 if (read_backward) {
450 __ Sub(current_input_offset(), current_input_offset(), capture_length);
451 } else {
452 __ Add(current_input_offset(), current_input_offset(), capture_length);
453 }
454 }
455
456 __ Bind(&fallthrough);
457 }
458
CheckNotBackReference(int start_reg,bool read_backward,Label * on_no_match)459 void RegExpMacroAssemblerARM64::CheckNotBackReference(int start_reg,
460 bool read_backward,
461 Label* on_no_match) {
462 Label fallthrough;
463
464 Register capture_start_address = x12;
465 Register capture_end_address = x13;
466 Register current_position_address = x14;
467 Register capture_length = w15;
468
469 // Find length of back-referenced capture.
470 DCHECK_EQ(0, start_reg % 2);
471 if (start_reg < kNumCachedRegisters) {
472 __ Mov(x10, GetCachedRegister(start_reg));
473 __ Lsr(x11, GetCachedRegister(start_reg), kWRegSizeInBits);
474 } else {
475 __ Ldp(w11, w10, capture_location(start_reg, x10));
476 }
477 __ Sub(capture_length, w11, w10); // Length to check.
478
479 // At this point, the capture registers are either both set or both cleared.
480 // If the capture length is zero, then the capture is either empty or cleared.
481 // Fall through in both cases.
482 __ CompareAndBranch(capture_length, Operand(0), eq, &fallthrough);
483
484 // Check that there are enough characters left in the input.
485 if (read_backward) {
486 __ Add(w12, string_start_minus_one(), capture_length);
487 __ Cmp(current_input_offset(), w12);
488 BranchOrBacktrack(le, on_no_match);
489 } else {
490 __ Cmn(capture_length, current_input_offset());
491 BranchOrBacktrack(gt, on_no_match);
492 }
493
494 // Compute pointers to match string and capture string
495 __ Add(capture_start_address, input_end(), Operand(w10, SXTW));
496 __ Add(capture_end_address,
497 capture_start_address,
498 Operand(capture_length, SXTW));
499 __ Add(current_position_address,
500 input_end(),
501 Operand(current_input_offset(), SXTW));
502 if (read_backward) {
503 // Offset by length when matching backwards.
504 __ Sub(current_position_address, current_position_address,
505 Operand(capture_length, SXTW));
506 }
507
508 Label loop;
509 __ Bind(&loop);
510 if (mode_ == LATIN1) {
511 __ Ldrb(w10, MemOperand(capture_start_address, 1, PostIndex));
512 __ Ldrb(w11, MemOperand(current_position_address, 1, PostIndex));
513 } else {
514 DCHECK(mode_ == UC16);
515 __ Ldrh(w10, MemOperand(capture_start_address, 2, PostIndex));
516 __ Ldrh(w11, MemOperand(current_position_address, 2, PostIndex));
517 }
518 __ Cmp(w10, w11);
519 BranchOrBacktrack(ne, on_no_match);
520 __ Cmp(capture_start_address, capture_end_address);
521 __ B(lt, &loop);
522
523 // Move current character position to position after match.
524 __ Sub(current_input_offset().X(), current_position_address, input_end());
525 if (read_backward) {
526 __ Sub(current_input_offset().X(), current_input_offset().X(),
527 Operand(capture_length, SXTW));
528 }
529
530 if (masm_->emit_debug_code()) {
531 __ Cmp(current_input_offset().X(), Operand(current_input_offset(), SXTW));
532 __ Ccmp(current_input_offset(), 0, NoFlag, eq);
533 // The current input offset should be <= 0, and fit in a W register.
534 __ Check(le, AbortReason::kOffsetOutOfRange);
535 }
536 __ Bind(&fallthrough);
537 }
538
539
CheckNotCharacter(unsigned c,Label * on_not_equal)540 void RegExpMacroAssemblerARM64::CheckNotCharacter(unsigned c,
541 Label* on_not_equal) {
542 CompareAndBranchOrBacktrack(current_character(), c, ne, on_not_equal);
543 }
544
545
CheckCharacterAfterAnd(uint32_t c,uint32_t mask,Label * on_equal)546 void RegExpMacroAssemblerARM64::CheckCharacterAfterAnd(uint32_t c,
547 uint32_t mask,
548 Label* on_equal) {
549 __ And(w10, current_character(), mask);
550 CompareAndBranchOrBacktrack(w10, c, eq, on_equal);
551 }
552
553
CheckNotCharacterAfterAnd(unsigned c,unsigned mask,Label * on_not_equal)554 void RegExpMacroAssemblerARM64::CheckNotCharacterAfterAnd(unsigned c,
555 unsigned mask,
556 Label* on_not_equal) {
557 __ And(w10, current_character(), mask);
558 CompareAndBranchOrBacktrack(w10, c, ne, on_not_equal);
559 }
560
561
CheckNotCharacterAfterMinusAnd(uc16 c,uc16 minus,uc16 mask,Label * on_not_equal)562 void RegExpMacroAssemblerARM64::CheckNotCharacterAfterMinusAnd(
563 uc16 c,
564 uc16 minus,
565 uc16 mask,
566 Label* on_not_equal) {
567 DCHECK_GT(String::kMaxUtf16CodeUnit, minus);
568 __ Sub(w10, current_character(), minus);
569 __ And(w10, w10, mask);
570 CompareAndBranchOrBacktrack(w10, c, ne, on_not_equal);
571 }
572
573
CheckCharacterInRange(uc16 from,uc16 to,Label * on_in_range)574 void RegExpMacroAssemblerARM64::CheckCharacterInRange(
575 uc16 from,
576 uc16 to,
577 Label* on_in_range) {
578 __ Sub(w10, current_character(), from);
579 // Unsigned lower-or-same condition.
580 CompareAndBranchOrBacktrack(w10, to - from, ls, on_in_range);
581 }
582
583
CheckCharacterNotInRange(uc16 from,uc16 to,Label * on_not_in_range)584 void RegExpMacroAssemblerARM64::CheckCharacterNotInRange(
585 uc16 from,
586 uc16 to,
587 Label* on_not_in_range) {
588 __ Sub(w10, current_character(), from);
589 // Unsigned higher condition.
590 CompareAndBranchOrBacktrack(w10, to - from, hi, on_not_in_range);
591 }
592
593
CheckBitInTable(Handle<ByteArray> table,Label * on_bit_set)594 void RegExpMacroAssemblerARM64::CheckBitInTable(
595 Handle<ByteArray> table,
596 Label* on_bit_set) {
597 __ Mov(x11, Operand(table));
598 if ((mode_ != LATIN1) || (kTableMask != String::kMaxOneByteCharCode)) {
599 __ And(w10, current_character(), kTableMask);
600 __ Add(w10, w10, ByteArray::kHeaderSize - kHeapObjectTag);
601 } else {
602 __ Add(w10, current_character(), ByteArray::kHeaderSize - kHeapObjectTag);
603 }
604 __ Ldrb(w11, MemOperand(x11, w10, UXTW));
605 CompareAndBranchOrBacktrack(w11, 0, ne, on_bit_set);
606 }
607
608
CheckSpecialCharacterClass(uc16 type,Label * on_no_match)609 bool RegExpMacroAssemblerARM64::CheckSpecialCharacterClass(uc16 type,
610 Label* on_no_match) {
611 // Range checks (c in min..max) are generally implemented by an unsigned
612 // (c - min) <= (max - min) check
613 switch (type) {
614 case 's':
615 // Match space-characters
616 if (mode_ == LATIN1) {
617 // One byte space characters are '\t'..'\r', ' ' and \u00a0.
618 Label success;
619 // Check for ' ' or 0x00A0.
620 __ Cmp(current_character(), ' ');
621 __ Ccmp(current_character(), 0x00A0, ZFlag, ne);
622 __ B(eq, &success);
623 // Check range 0x09..0x0D.
624 __ Sub(w10, current_character(), '\t');
625 CompareAndBranchOrBacktrack(w10, '\r' - '\t', hi, on_no_match);
626 __ Bind(&success);
627 return true;
628 }
629 return false;
630 case 'S':
631 // The emitted code for generic character classes is good enough.
632 return false;
633 case 'd':
634 // Match ASCII digits ('0'..'9').
635 __ Sub(w10, current_character(), '0');
636 CompareAndBranchOrBacktrack(w10, '9' - '0', hi, on_no_match);
637 return true;
638 case 'D':
639 // Match ASCII non-digits.
640 __ Sub(w10, current_character(), '0');
641 CompareAndBranchOrBacktrack(w10, '9' - '0', ls, on_no_match);
642 return true;
643 case '.': {
644 // Match non-newlines (not 0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029)
645 // Here we emit the conditional branch only once at the end to make branch
646 // prediction more efficient, even though we could branch out of here
647 // as soon as a character matches.
648 __ Cmp(current_character(), 0x0A);
649 __ Ccmp(current_character(), 0x0D, ZFlag, ne);
650 if (mode_ == UC16) {
651 __ Sub(w10, current_character(), 0x2028);
652 // If the Z flag was set we clear the flags to force a branch.
653 __ Ccmp(w10, 0x2029 - 0x2028, NoFlag, ne);
654 // ls -> !((C==1) && (Z==0))
655 BranchOrBacktrack(ls, on_no_match);
656 } else {
657 BranchOrBacktrack(eq, on_no_match);
658 }
659 return true;
660 }
661 case 'n': {
662 // Match newlines (0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029)
663 // We have to check all 4 newline characters before emitting
664 // the conditional branch.
665 __ Cmp(current_character(), 0x0A);
666 __ Ccmp(current_character(), 0x0D, ZFlag, ne);
667 if (mode_ == UC16) {
668 __ Sub(w10, current_character(), 0x2028);
669 // If the Z flag was set we clear the flags to force a fall-through.
670 __ Ccmp(w10, 0x2029 - 0x2028, NoFlag, ne);
671 // hi -> (C==1) && (Z==0)
672 BranchOrBacktrack(hi, on_no_match);
673 } else {
674 BranchOrBacktrack(ne, on_no_match);
675 }
676 return true;
677 }
678 case 'w': {
679 if (mode_ != LATIN1) {
680 // Table is 256 entries, so all Latin1 characters can be tested.
681 CompareAndBranchOrBacktrack(current_character(), 'z', hi, on_no_match);
682 }
683 ExternalReference map = ExternalReference::re_word_character_map(isolate());
684 __ Mov(x10, map);
685 __ Ldrb(w10, MemOperand(x10, current_character(), UXTW));
686 CompareAndBranchOrBacktrack(w10, 0, eq, on_no_match);
687 return true;
688 }
689 case 'W': {
690 Label done;
691 if (mode_ != LATIN1) {
692 // Table is 256 entries, so all Latin1 characters can be tested.
693 __ Cmp(current_character(), 'z');
694 __ B(hi, &done);
695 }
696 ExternalReference map = ExternalReference::re_word_character_map(isolate());
697 __ Mov(x10, map);
698 __ Ldrb(w10, MemOperand(x10, current_character(), UXTW));
699 CompareAndBranchOrBacktrack(w10, 0, ne, on_no_match);
700 __ Bind(&done);
701 return true;
702 }
703 case '*':
704 // Match any character.
705 return true;
706 // No custom implementation (yet): s(UC16), S(UC16).
707 default:
708 return false;
709 }
710 }
711
712
Fail()713 void RegExpMacroAssemblerARM64::Fail() {
714 __ Mov(w0, FAILURE);
715 __ B(&exit_label_);
716 }
717
718
GetCode(Handle<String> source)719 Handle<HeapObject> RegExpMacroAssemblerARM64::GetCode(Handle<String> source) {
720 Label return_w0;
721 // Finalize code - write the entry point code now we know how many
722 // registers we need.
723
724 // Entry code:
725 __ Bind(&entry_label_);
726
727 // Arguments on entry:
728 // x0: String input
729 // x1: int start_offset
730 // x2: byte* input_start
731 // x3: byte* input_end
732 // x4: int* output array
733 // x5: int output array size
734 // x6: Address stack_base
735 // x7: int direct_call
736
737 // sp[8]: address of the current isolate
738 // sp[0]: secondary link/return address used by native call
739
740 // Tell the system that we have a stack frame. Because the type is MANUAL, no
741 // code is generated.
742 FrameScope scope(masm_, StackFrame::MANUAL);
743
744 // Push registers on the stack, only push the argument registers that we need.
745 CPURegList argument_registers(x0, x5, x6, x7);
746
747 CPURegList registers_to_retain = kCalleeSaved;
748 registers_to_retain.Combine(fp);
749 registers_to_retain.Combine(lr);
750
751 DCHECK(registers_to_retain.IncludesAliasOf(lr));
752 __ PushCPURegList<TurboAssembler::kSignLR>(registers_to_retain);
753 __ PushCPURegList(argument_registers);
754
755 // Set frame pointer in place.
756 __ Add(frame_pointer(), sp, argument_registers.Count() * kSystemPointerSize);
757
758 // Initialize callee-saved registers.
759 __ Mov(start_offset(), w1);
760 __ Mov(input_start(), x2);
761 __ Mov(input_end(), x3);
762 __ Mov(output_array(), x4);
763
764 // Set the number of registers we will need to allocate, that is:
765 // - kSuccessCounter / success_counter (X register)
766 // - kBacktrackCount (X register)
767 // - (num_registers_ - kNumCachedRegisters) (W registers)
768 int num_wreg_to_allocate = num_registers_ - kNumCachedRegisters;
769 // Do not allocate registers on the stack if they can all be cached.
770 if (num_wreg_to_allocate < 0) { num_wreg_to_allocate = 0; }
771 // Make room for the success_counter and kBacktrackCount. Each X (64-bit)
772 // register is equivalent to two W (32-bit) registers.
773 num_wreg_to_allocate += 2 + 2;
774
775 // Make sure the stack alignment will be respected.
776 int alignment = masm_->ActivationFrameAlignment();
777 DCHECK_EQ(alignment % 16, 0);
778 int align_mask = (alignment / kWRegSize) - 1;
779 num_wreg_to_allocate = (num_wreg_to_allocate + align_mask) & ~align_mask;
780
781 // Check if we have space on the stack.
782 Label stack_limit_hit;
783 Label stack_ok;
784
785 ExternalReference stack_limit =
786 ExternalReference::address_of_jslimit(isolate());
787 __ Mov(x10, stack_limit);
788 __ Ldr(x10, MemOperand(x10));
789 __ Subs(x10, sp, x10);
790
791 // Handle it if the stack pointer is already below the stack limit.
792 __ B(ls, &stack_limit_hit);
793
794 // Check if there is room for the variable number of registers above
795 // the stack limit.
796 __ Cmp(x10, num_wreg_to_allocate * kWRegSize);
797 __ B(hs, &stack_ok);
798
799 // Exit with OutOfMemory exception. There is not enough space on the stack
800 // for our working registers.
801 __ Mov(w0, EXCEPTION);
802 __ B(&return_w0);
803
804 __ Bind(&stack_limit_hit);
805 CallCheckStackGuardState(x10);
806 // If returned value is non-zero, we exit with the returned value as result.
807 __ Cbnz(w0, &return_w0);
808
809 __ Bind(&stack_ok);
810
811 // Allocate space on stack.
812 __ Claim(num_wreg_to_allocate, kWRegSize);
813
814 // Initialize success_counter and kBacktrackCount with 0.
815 __ Str(wzr, MemOperand(frame_pointer(), kSuccessCounter));
816 __ Str(wzr, MemOperand(frame_pointer(), kBacktrackCount));
817
818 // Find negative length (offset of start relative to end).
819 __ Sub(x10, input_start(), input_end());
820 if (masm_->emit_debug_code()) {
821 // Check that the size of the input string chars is in range.
822 __ Neg(x11, x10);
823 __ Cmp(x11, SeqTwoByteString::kMaxCharsSize);
824 __ Check(ls, AbortReason::kInputStringTooLong);
825 }
826 __ Mov(current_input_offset(), w10);
827
828 // The non-position value is used as a clearing value for the
829 // capture registers, it corresponds to the position of the first character
830 // minus one.
831 __ Sub(string_start_minus_one(), current_input_offset(), char_size());
832 __ Sub(string_start_minus_one(), string_start_minus_one(),
833 Operand(start_offset(), LSL, (mode_ == UC16) ? 1 : 0));
834 // We can store this value twice in an X register for initializing
835 // on-stack registers later.
836 __ Orr(twice_non_position_value(), string_start_minus_one().X(),
837 Operand(string_start_minus_one().X(), LSL, kWRegSizeInBits));
838
839 // Initialize code pointer register.
840 __ Mov(code_pointer(), Operand(masm_->CodeObject()));
841
842 Label load_char_start_regexp, start_regexp;
843 // Load newline if index is at start, previous character otherwise.
844 __ Cbnz(start_offset(), &load_char_start_regexp);
845 __ Mov(current_character(), '\n');
846 __ B(&start_regexp);
847
848 // Global regexp restarts matching here.
849 __ Bind(&load_char_start_regexp);
850 // Load previous char as initial value of current character register.
851 LoadCurrentCharacterUnchecked(-1, 1);
852 __ Bind(&start_regexp);
853 // Initialize on-stack registers.
854 if (num_saved_registers_ > 0) {
855 ClearRegisters(0, num_saved_registers_ - 1);
856 }
857
858 // Initialize backtrack stack pointer.
859 __ Ldr(backtrack_stackpointer(), MemOperand(frame_pointer(), kStackBase));
860
861 // Execute
862 __ B(&start_label_);
863
864 if (backtrack_label_.is_linked()) {
865 __ Bind(&backtrack_label_);
866 Backtrack();
867 }
868
869 if (success_label_.is_linked()) {
870 Register first_capture_start = w15;
871
872 // Save captures when successful.
873 __ Bind(&success_label_);
874
875 if (num_saved_registers_ > 0) {
876 // V8 expects the output to be an int32_t array.
877 Register capture_start = w12;
878 Register capture_end = w13;
879 Register input_length = w14;
880
881 // Copy captures to output.
882
883 // Get string length.
884 __ Sub(x10, input_end(), input_start());
885 if (masm_->emit_debug_code()) {
886 // Check that the size of the input string chars is in range.
887 __ Cmp(x10, SeqTwoByteString::kMaxCharsSize);
888 __ Check(ls, AbortReason::kInputStringTooLong);
889 }
890 // input_start has a start_offset offset on entry. We need to include
891 // it when computing the length of the whole string.
892 if (mode_ == UC16) {
893 __ Add(input_length, start_offset(), Operand(w10, LSR, 1));
894 } else {
895 __ Add(input_length, start_offset(), w10);
896 }
897
898 // Copy the results to the output array from the cached registers first.
899 for (int i = 0;
900 (i < num_saved_registers_) && (i < kNumCachedRegisters);
901 i += 2) {
902 __ Mov(capture_start.X(), GetCachedRegister(i));
903 __ Lsr(capture_end.X(), capture_start.X(), kWRegSizeInBits);
904 if ((i == 0) && global_with_zero_length_check()) {
905 // Keep capture start for the zero-length check later.
906 __ Mov(first_capture_start, capture_start);
907 }
908 // Offsets need to be relative to the start of the string.
909 if (mode_ == UC16) {
910 __ Add(capture_start, input_length, Operand(capture_start, ASR, 1));
911 __ Add(capture_end, input_length, Operand(capture_end, ASR, 1));
912 } else {
913 __ Add(capture_start, input_length, capture_start);
914 __ Add(capture_end, input_length, capture_end);
915 }
916 // The output pointer advances for a possible global match.
917 __ Stp(capture_start, capture_end,
918 MemOperand(output_array(), kSystemPointerSize, PostIndex));
919 }
920
921 // Only carry on if there are more than kNumCachedRegisters capture
922 // registers.
923 int num_registers_left_on_stack =
924 num_saved_registers_ - kNumCachedRegisters;
925 if (num_registers_left_on_stack > 0) {
926 Register base = x10;
927 // There are always an even number of capture registers. A couple of
928 // registers determine one match with two offsets.
929 DCHECK_EQ(0, num_registers_left_on_stack % 2);
930 __ Add(base, frame_pointer(), kFirstCaptureOnStack);
931
932 // We can unroll the loop here, we should not unroll for less than 2
933 // registers.
934 STATIC_ASSERT(kNumRegistersToUnroll > 2);
935 if (num_registers_left_on_stack <= kNumRegistersToUnroll) {
936 for (int i = 0; i < num_registers_left_on_stack / 2; i++) {
937 __ Ldp(capture_end, capture_start,
938 MemOperand(base, -kSystemPointerSize, PostIndex));
939 if ((i == 0) && global_with_zero_length_check()) {
940 // Keep capture start for the zero-length check later.
941 __ Mov(first_capture_start, capture_start);
942 }
943 // Offsets need to be relative to the start of the string.
944 if (mode_ == UC16) {
945 __ Add(capture_start,
946 input_length,
947 Operand(capture_start, ASR, 1));
948 __ Add(capture_end, input_length, Operand(capture_end, ASR, 1));
949 } else {
950 __ Add(capture_start, input_length, capture_start);
951 __ Add(capture_end, input_length, capture_end);
952 }
953 // The output pointer advances for a possible global match.
954 __ Stp(capture_start, capture_end,
955 MemOperand(output_array(), kSystemPointerSize, PostIndex));
956 }
957 } else {
958 Label loop, start;
959 __ Mov(x11, num_registers_left_on_stack);
960
961 __ Ldp(capture_end, capture_start,
962 MemOperand(base, -kSystemPointerSize, PostIndex));
963 if (global_with_zero_length_check()) {
964 __ Mov(first_capture_start, capture_start);
965 }
966 __ B(&start);
967
968 __ Bind(&loop);
969 __ Ldp(capture_end, capture_start,
970 MemOperand(base, -kSystemPointerSize, PostIndex));
971 __ Bind(&start);
972 if (mode_ == UC16) {
973 __ Add(capture_start, input_length, Operand(capture_start, ASR, 1));
974 __ Add(capture_end, input_length, Operand(capture_end, ASR, 1));
975 } else {
976 __ Add(capture_start, input_length, capture_start);
977 __ Add(capture_end, input_length, capture_end);
978 }
979 // The output pointer advances for a possible global match.
980 __ Stp(capture_start, capture_end,
981 MemOperand(output_array(), kSystemPointerSize, PostIndex));
982 __ Sub(x11, x11, 2);
983 __ Cbnz(x11, &loop);
984 }
985 }
986 }
987
988 if (global()) {
989 Register success_counter = w0;
990 Register output_size = x10;
991 // Restart matching if the regular expression is flagged as global.
992
993 // Increment success counter.
994 __ Ldr(success_counter, MemOperand(frame_pointer(), kSuccessCounter));
995 __ Add(success_counter, success_counter, 1);
996 __ Str(success_counter, MemOperand(frame_pointer(), kSuccessCounter));
997
998 // Capture results have been stored, so the number of remaining global
999 // output registers is reduced by the number of stored captures.
1000 __ Ldr(output_size, MemOperand(frame_pointer(), kOutputSize));
1001 __ Sub(output_size, output_size, num_saved_registers_);
1002 // Check whether we have enough room for another set of capture results.
1003 __ Cmp(output_size, num_saved_registers_);
1004 __ B(lt, &return_w0);
1005
1006 // The output pointer is already set to the next field in the output
1007 // array.
1008 // Update output size on the frame before we restart matching.
1009 __ Str(output_size, MemOperand(frame_pointer(), kOutputSize));
1010
1011 if (global_with_zero_length_check()) {
1012 // Special case for zero-length matches.
1013 __ Cmp(current_input_offset(), first_capture_start);
1014 // Not a zero-length match, restart.
1015 __ B(ne, &load_char_start_regexp);
1016 // Offset from the end is zero if we already reached the end.
1017 __ Cbz(current_input_offset(), &return_w0);
1018 // Advance current position after a zero-length match.
1019 Label advance;
1020 __ bind(&advance);
1021 __ Add(current_input_offset(),
1022 current_input_offset(),
1023 Operand((mode_ == UC16) ? 2 : 1));
1024 if (global_unicode()) CheckNotInSurrogatePair(0, &advance);
1025 }
1026
1027 __ B(&load_char_start_regexp);
1028 } else {
1029 __ Mov(w0, SUCCESS);
1030 }
1031 }
1032
1033 if (exit_label_.is_linked()) {
1034 // Exit and return w0
1035 __ Bind(&exit_label_);
1036 if (global()) {
1037 __ Ldr(w0, MemOperand(frame_pointer(), kSuccessCounter));
1038 }
1039 }
1040
1041 __ Bind(&return_w0);
1042
1043 // Set stack pointer back to first register to retain
1044 __ Mov(sp, fp);
1045
1046 // Restore registers.
1047 __ PopCPURegList<TurboAssembler::kAuthLR>(registers_to_retain);
1048
1049 __ Ret();
1050
1051 Label exit_with_exception;
1052 // Registers x0 to x7 are used to store the first captures, they need to be
1053 // retained over calls to C++ code.
1054 CPURegList cached_registers(CPURegister::kRegister, kXRegSizeInBits, 0, 7);
1055 DCHECK_EQ(kNumCachedRegisters, cached_registers.Count() * 2);
1056
1057 if (check_preempt_label_.is_linked()) {
1058 __ Bind(&check_preempt_label_);
1059 SaveLinkRegister();
1060 // The cached registers need to be retained.
1061 __ PushCPURegList(cached_registers);
1062 CallCheckStackGuardState(x10);
1063 // Returning from the regexp code restores the stack (sp <- fp)
1064 // so we don't need to drop the link register from it before exiting.
1065 __ Cbnz(w0, &return_w0);
1066 // Reset the cached registers.
1067 __ PopCPURegList(cached_registers);
1068 RestoreLinkRegister();
1069 __ Ret();
1070 }
1071
1072 if (stack_overflow_label_.is_linked()) {
1073 __ Bind(&stack_overflow_label_);
1074 SaveLinkRegister();
1075 // The cached registers need to be retained.
1076 __ PushCPURegList(cached_registers);
1077 // Call GrowStack(backtrack_stackpointer(), &stack_base)
1078 __ Mov(x2, ExternalReference::isolate_address(isolate()));
1079 __ Add(x1, frame_pointer(), kStackBase);
1080 __ Mov(x0, backtrack_stackpointer());
1081 ExternalReference grow_stack =
1082 ExternalReference::re_grow_stack(isolate());
1083 __ CallCFunction(grow_stack, 3);
1084 // If return nullptr, we have failed to grow the stack, and
1085 // must exit with a stack-overflow exception.
1086 // Returning from the regexp code restores the stack (sp <- fp)
1087 // so we don't need to drop the link register from it before exiting.
1088 __ Cbz(w0, &exit_with_exception);
1089 // Otherwise use return value as new stack pointer.
1090 __ Mov(backtrack_stackpointer(), x0);
1091 // Reset the cached registers.
1092 __ PopCPURegList(cached_registers);
1093 RestoreLinkRegister();
1094 __ Ret();
1095 }
1096
1097 if (exit_with_exception.is_linked()) {
1098 __ Bind(&exit_with_exception);
1099 __ Mov(w0, EXCEPTION);
1100 __ B(&return_w0);
1101 }
1102
1103 if (fallback_label_.is_linked()) {
1104 __ Bind(&fallback_label_);
1105 __ Mov(w0, FALLBACK_TO_EXPERIMENTAL);
1106 __ B(&return_w0);
1107 }
1108
1109 CodeDesc code_desc;
1110 masm_->GetCode(isolate(), &code_desc);
1111 Handle<Code> code =
1112 Factory::CodeBuilder(isolate(), code_desc, CodeKind::REGEXP)
1113 .set_self_reference(masm_->CodeObject())
1114 .Build();
1115 PROFILE(masm_->isolate(),
1116 RegExpCodeCreateEvent(Handle<AbstractCode>::cast(code), source));
1117 return Handle<HeapObject>::cast(code);
1118 }
1119
1120
GoTo(Label * to)1121 void RegExpMacroAssemblerARM64::GoTo(Label* to) {
1122 BranchOrBacktrack(al, to);
1123 }
1124
IfRegisterGE(int reg,int comparand,Label * if_ge)1125 void RegExpMacroAssemblerARM64::IfRegisterGE(int reg, int comparand,
1126 Label* if_ge) {
1127 Register to_compare = GetRegister(reg, w10);
1128 CompareAndBranchOrBacktrack(to_compare, comparand, ge, if_ge);
1129 }
1130
1131
IfRegisterLT(int reg,int comparand,Label * if_lt)1132 void RegExpMacroAssemblerARM64::IfRegisterLT(int reg, int comparand,
1133 Label* if_lt) {
1134 Register to_compare = GetRegister(reg, w10);
1135 CompareAndBranchOrBacktrack(to_compare, comparand, lt, if_lt);
1136 }
1137
1138
IfRegisterEqPos(int reg,Label * if_eq)1139 void RegExpMacroAssemblerARM64::IfRegisterEqPos(int reg, Label* if_eq) {
1140 Register to_compare = GetRegister(reg, w10);
1141 __ Cmp(to_compare, current_input_offset());
1142 BranchOrBacktrack(eq, if_eq);
1143 }
1144
1145 RegExpMacroAssembler::IrregexpImplementation
Implementation()1146 RegExpMacroAssemblerARM64::Implementation() {
1147 return kARM64Implementation;
1148 }
1149
1150
PopCurrentPosition()1151 void RegExpMacroAssemblerARM64::PopCurrentPosition() {
1152 Pop(current_input_offset());
1153 }
1154
1155
PopRegister(int register_index)1156 void RegExpMacroAssemblerARM64::PopRegister(int register_index) {
1157 Pop(w10);
1158 StoreRegister(register_index, w10);
1159 }
1160
1161
PushBacktrack(Label * label)1162 void RegExpMacroAssemblerARM64::PushBacktrack(Label* label) {
1163 if (label->is_bound()) {
1164 int target = label->pos();
1165 __ Mov(w10, target + Code::kHeaderSize - kHeapObjectTag);
1166 } else {
1167 __ Adr(x10, label, MacroAssembler::kAdrFar);
1168 __ Sub(x10, x10, code_pointer());
1169 if (masm_->emit_debug_code()) {
1170 __ Cmp(x10, kWRegMask);
1171 // The code offset has to fit in a W register.
1172 __ Check(ls, AbortReason::kOffsetOutOfRange);
1173 }
1174 }
1175 Push(w10);
1176 CheckStackLimit();
1177 }
1178
1179
PushCurrentPosition()1180 void RegExpMacroAssemblerARM64::PushCurrentPosition() {
1181 Push(current_input_offset());
1182 }
1183
1184
PushRegister(int register_index,StackCheckFlag check_stack_limit)1185 void RegExpMacroAssemblerARM64::PushRegister(int register_index,
1186 StackCheckFlag check_stack_limit) {
1187 Register to_push = GetRegister(register_index, w10);
1188 Push(to_push);
1189 if (check_stack_limit) CheckStackLimit();
1190 }
1191
1192
ReadCurrentPositionFromRegister(int reg)1193 void RegExpMacroAssemblerARM64::ReadCurrentPositionFromRegister(int reg) {
1194 RegisterState register_state = GetRegisterState(reg);
1195 switch (register_state) {
1196 case STACKED:
1197 __ Ldr(current_input_offset(), register_location(reg));
1198 break;
1199 case CACHED_LSW:
1200 __ Mov(current_input_offset(), GetCachedRegister(reg).W());
1201 break;
1202 case CACHED_MSW:
1203 __ Lsr(current_input_offset().X(), GetCachedRegister(reg),
1204 kWRegSizeInBits);
1205 break;
1206 default:
1207 UNREACHABLE();
1208 }
1209 }
1210
1211
ReadStackPointerFromRegister(int reg)1212 void RegExpMacroAssemblerARM64::ReadStackPointerFromRegister(int reg) {
1213 Register read_from = GetRegister(reg, w10);
1214 __ Ldr(x11, MemOperand(frame_pointer(), kStackBase));
1215 __ Add(backtrack_stackpointer(), x11, Operand(read_from, SXTW));
1216 }
1217
1218
SetCurrentPositionFromEnd(int by)1219 void RegExpMacroAssemblerARM64::SetCurrentPositionFromEnd(int by) {
1220 Label after_position;
1221 __ Cmp(current_input_offset(), -by * char_size());
1222 __ B(ge, &after_position);
1223 __ Mov(current_input_offset(), -by * char_size());
1224 // On RegExp code entry (where this operation is used), the character before
1225 // the current position is expected to be already loaded.
1226 // We have advanced the position, so it's safe to read backwards.
1227 LoadCurrentCharacterUnchecked(-1, 1);
1228 __ Bind(&after_position);
1229 }
1230
1231
SetRegister(int register_index,int to)1232 void RegExpMacroAssemblerARM64::SetRegister(int register_index, int to) {
1233 DCHECK(register_index >= num_saved_registers_); // Reserved for positions!
1234 Register set_to = wzr;
1235 if (to != 0) {
1236 set_to = w10;
1237 __ Mov(set_to, to);
1238 }
1239 StoreRegister(register_index, set_to);
1240 }
1241
1242
Succeed()1243 bool RegExpMacroAssemblerARM64::Succeed() {
1244 __ B(&success_label_);
1245 return global();
1246 }
1247
1248
WriteCurrentPositionToRegister(int reg,int cp_offset)1249 void RegExpMacroAssemblerARM64::WriteCurrentPositionToRegister(int reg,
1250 int cp_offset) {
1251 Register position = current_input_offset();
1252 if (cp_offset != 0) {
1253 position = w10;
1254 __ Add(position, current_input_offset(), cp_offset * char_size());
1255 }
1256 StoreRegister(reg, position);
1257 }
1258
1259
ClearRegisters(int reg_from,int reg_to)1260 void RegExpMacroAssemblerARM64::ClearRegisters(int reg_from, int reg_to) {
1261 DCHECK(reg_from <= reg_to);
1262 int num_registers = reg_to - reg_from + 1;
1263
1264 // If the first capture register is cached in a hardware register but not
1265 // aligned on a 64-bit one, we need to clear the first one specifically.
1266 if ((reg_from < kNumCachedRegisters) && ((reg_from % 2) != 0)) {
1267 StoreRegister(reg_from, string_start_minus_one());
1268 num_registers--;
1269 reg_from++;
1270 }
1271
1272 // Clear cached registers in pairs as far as possible.
1273 while ((num_registers >= 2) && (reg_from < kNumCachedRegisters)) {
1274 DCHECK(GetRegisterState(reg_from) == CACHED_LSW);
1275 __ Mov(GetCachedRegister(reg_from), twice_non_position_value());
1276 reg_from += 2;
1277 num_registers -= 2;
1278 }
1279
1280 if ((num_registers % 2) == 1) {
1281 StoreRegister(reg_from, string_start_minus_one());
1282 num_registers--;
1283 reg_from++;
1284 }
1285
1286 if (num_registers > 0) {
1287 // If there are some remaining registers, they are stored on the stack.
1288 DCHECK_LE(kNumCachedRegisters, reg_from);
1289
1290 // Move down the indexes of the registers on stack to get the correct offset
1291 // in memory.
1292 reg_from -= kNumCachedRegisters;
1293 reg_to -= kNumCachedRegisters;
1294 // We should not unroll the loop for less than 2 registers.
1295 STATIC_ASSERT(kNumRegistersToUnroll > 2);
1296 // We position the base pointer to (reg_from + 1).
1297 int base_offset = kFirstRegisterOnStack -
1298 kWRegSize - (kWRegSize * reg_from);
1299 if (num_registers > kNumRegistersToUnroll) {
1300 Register base = x10;
1301 __ Add(base, frame_pointer(), base_offset);
1302
1303 Label loop;
1304 __ Mov(x11, num_registers);
1305 __ Bind(&loop);
1306 __ Str(twice_non_position_value(),
1307 MemOperand(base, -kSystemPointerSize, PostIndex));
1308 __ Sub(x11, x11, 2);
1309 __ Cbnz(x11, &loop);
1310 } else {
1311 for (int i = reg_from; i <= reg_to; i += 2) {
1312 __ Str(twice_non_position_value(),
1313 MemOperand(frame_pointer(), base_offset));
1314 base_offset -= kWRegSize * 2;
1315 }
1316 }
1317 }
1318 }
1319
1320
WriteStackPointerToRegister(int reg)1321 void RegExpMacroAssemblerARM64::WriteStackPointerToRegister(int reg) {
1322 __ Ldr(x10, MemOperand(frame_pointer(), kStackBase));
1323 __ Sub(x10, backtrack_stackpointer(), x10);
1324 if (masm_->emit_debug_code()) {
1325 __ Cmp(x10, Operand(w10, SXTW));
1326 // The stack offset needs to fit in a W register.
1327 __ Check(eq, AbortReason::kOffsetOutOfRange);
1328 }
1329 StoreRegister(reg, w10);
1330 }
1331
1332
1333 // Helper function for reading a value out of a stack frame.
1334 template <typename T>
frame_entry(Address re_frame,int frame_offset)1335 static T& frame_entry(Address re_frame, int frame_offset) {
1336 return *reinterpret_cast<T*>(re_frame + frame_offset);
1337 }
1338
1339
1340 template <typename T>
frame_entry_address(Address re_frame,int frame_offset)1341 static T* frame_entry_address(Address re_frame, int frame_offset) {
1342 return reinterpret_cast<T*>(re_frame + frame_offset);
1343 }
1344
CheckStackGuardState(Address * return_address,Address raw_code,Address re_frame,int start_index,const byte ** input_start,const byte ** input_end)1345 int RegExpMacroAssemblerARM64::CheckStackGuardState(
1346 Address* return_address, Address raw_code, Address re_frame,
1347 int start_index, const byte** input_start, const byte** input_end) {
1348 Code re_code = Code::cast(Object(raw_code));
1349 return NativeRegExpMacroAssembler::CheckStackGuardState(
1350 frame_entry<Isolate*>(re_frame, kIsolate), start_index,
1351 static_cast<RegExp::CallOrigin>(frame_entry<int>(re_frame, kDirectCall)),
1352 return_address, re_code, frame_entry_address<Address>(re_frame, kInput),
1353 input_start, input_end);
1354 }
1355
1356
CheckPosition(int cp_offset,Label * on_outside_input)1357 void RegExpMacroAssemblerARM64::CheckPosition(int cp_offset,
1358 Label* on_outside_input) {
1359 if (cp_offset >= 0) {
1360 CompareAndBranchOrBacktrack(current_input_offset(),
1361 -cp_offset * char_size(), ge, on_outside_input);
1362 } else {
1363 __ Add(w12, current_input_offset(), Operand(cp_offset * char_size()));
1364 __ Cmp(w12, string_start_minus_one());
1365 BranchOrBacktrack(le, on_outside_input);
1366 }
1367 }
1368
1369
1370 // Private methods:
1371
CallCheckStackGuardState(Register scratch)1372 void RegExpMacroAssemblerARM64::CallCheckStackGuardState(Register scratch) {
1373 DCHECK(!isolate()->IsGeneratingEmbeddedBuiltins());
1374 DCHECK(!masm_->options().isolate_independent_code);
1375
1376 // Allocate space on the stack to store the return address. The
1377 // CheckStackGuardState C++ function will override it if the code
1378 // moved. Allocate extra space for 2 arguments passed by pointers.
1379 // AAPCS64 requires the stack to be 16 byte aligned.
1380 int alignment = masm_->ActivationFrameAlignment();
1381 DCHECK_EQ(alignment % 16, 0);
1382 int align_mask = (alignment / kXRegSize) - 1;
1383 int xreg_to_claim = (3 + align_mask) & ~align_mask;
1384
1385 __ Claim(xreg_to_claim);
1386
1387 // CheckStackGuardState needs the end and start addresses of the input string.
1388 __ Poke(input_end(), 2 * kSystemPointerSize);
1389 __ Add(x5, sp, 2 * kSystemPointerSize);
1390 __ Poke(input_start(), kSystemPointerSize);
1391 __ Add(x4, sp, kSystemPointerSize);
1392
1393 __ Mov(w3, start_offset());
1394 // RegExp code frame pointer.
1395 __ Mov(x2, frame_pointer());
1396 // Code of self.
1397 __ Mov(x1, Operand(masm_->CodeObject()));
1398
1399 // We need to pass a pointer to the return address as first argument.
1400 // DirectCEntry will place the return address on the stack before calling so
1401 // the stack pointer will point to it.
1402 __ Mov(x0, sp);
1403
1404 DCHECK_EQ(scratch, x10);
1405 ExternalReference check_stack_guard_state =
1406 ExternalReference::re_check_stack_guard_state(isolate());
1407 __ Mov(scratch, check_stack_guard_state);
1408
1409 {
1410 UseScratchRegisterScope temps(masm_);
1411 Register scratch = temps.AcquireX();
1412
1413 EmbeddedData d = EmbeddedData::FromBlob();
1414 Address entry = d.InstructionStartOfBuiltin(Builtins::kDirectCEntry);
1415
1416 __ Ldr(scratch, Operand(entry, RelocInfo::OFF_HEAP_TARGET));
1417 __ Call(scratch);
1418 }
1419
1420 // The input string may have been moved in memory, we need to reload it.
1421 __ Peek(input_start(), kSystemPointerSize);
1422 __ Peek(input_end(), 2 * kSystemPointerSize);
1423
1424 __ Drop(xreg_to_claim);
1425
1426 // Reload the Code pointer.
1427 __ Mov(code_pointer(), Operand(masm_->CodeObject()));
1428 }
1429
BranchOrBacktrack(Condition condition,Label * to)1430 void RegExpMacroAssemblerARM64::BranchOrBacktrack(Condition condition,
1431 Label* to) {
1432 if (condition == al) { // Unconditional.
1433 if (to == nullptr) {
1434 Backtrack();
1435 return;
1436 }
1437 __ B(to);
1438 return;
1439 }
1440 if (to == nullptr) {
1441 to = &backtrack_label_;
1442 }
1443 __ B(condition, to);
1444 }
1445
CompareAndBranchOrBacktrack(Register reg,int immediate,Condition condition,Label * to)1446 void RegExpMacroAssemblerARM64::CompareAndBranchOrBacktrack(Register reg,
1447 int immediate,
1448 Condition condition,
1449 Label* to) {
1450 if ((immediate == 0) && ((condition == eq) || (condition == ne))) {
1451 if (to == nullptr) {
1452 to = &backtrack_label_;
1453 }
1454 if (condition == eq) {
1455 __ Cbz(reg, to);
1456 } else {
1457 __ Cbnz(reg, to);
1458 }
1459 } else {
1460 __ Cmp(reg, immediate);
1461 BranchOrBacktrack(condition, to);
1462 }
1463 }
1464
1465
CheckPreemption()1466 void RegExpMacroAssemblerARM64::CheckPreemption() {
1467 // Check for preemption.
1468 ExternalReference stack_limit =
1469 ExternalReference::address_of_jslimit(isolate());
1470 __ Mov(x10, stack_limit);
1471 __ Ldr(x10, MemOperand(x10));
1472 __ Cmp(sp, x10);
1473 CallIf(&check_preempt_label_, ls);
1474 }
1475
1476
CheckStackLimit()1477 void RegExpMacroAssemblerARM64::CheckStackLimit() {
1478 ExternalReference stack_limit =
1479 ExternalReference::address_of_regexp_stack_limit_address(isolate());
1480 __ Mov(x10, stack_limit);
1481 __ Ldr(x10, MemOperand(x10));
1482 __ Cmp(backtrack_stackpointer(), x10);
1483 CallIf(&stack_overflow_label_, ls);
1484 }
1485
1486
Push(Register source)1487 void RegExpMacroAssemblerARM64::Push(Register source) {
1488 DCHECK(source.Is32Bits());
1489 DCHECK_NE(source, backtrack_stackpointer());
1490 __ Str(source,
1491 MemOperand(backtrack_stackpointer(),
1492 -static_cast<int>(kWRegSize),
1493 PreIndex));
1494 }
1495
1496
Pop(Register target)1497 void RegExpMacroAssemblerARM64::Pop(Register target) {
1498 DCHECK(target.Is32Bits());
1499 DCHECK_NE(target, backtrack_stackpointer());
1500 __ Ldr(target,
1501 MemOperand(backtrack_stackpointer(), kWRegSize, PostIndex));
1502 }
1503
1504
GetCachedRegister(int register_index)1505 Register RegExpMacroAssemblerARM64::GetCachedRegister(int register_index) {
1506 DCHECK_GT(kNumCachedRegisters, register_index);
1507 return Register::Create(register_index / 2, kXRegSizeInBits);
1508 }
1509
1510
GetRegister(int register_index,Register maybe_result)1511 Register RegExpMacroAssemblerARM64::GetRegister(int register_index,
1512 Register maybe_result) {
1513 DCHECK(maybe_result.Is32Bits());
1514 DCHECK_LE(0, register_index);
1515 if (num_registers_ <= register_index) {
1516 num_registers_ = register_index + 1;
1517 }
1518 Register result = NoReg;
1519 RegisterState register_state = GetRegisterState(register_index);
1520 switch (register_state) {
1521 case STACKED:
1522 __ Ldr(maybe_result, register_location(register_index));
1523 result = maybe_result;
1524 break;
1525 case CACHED_LSW:
1526 result = GetCachedRegister(register_index).W();
1527 break;
1528 case CACHED_MSW:
1529 __ Lsr(maybe_result.X(), GetCachedRegister(register_index),
1530 kWRegSizeInBits);
1531 result = maybe_result;
1532 break;
1533 default:
1534 UNREACHABLE();
1535 }
1536 DCHECK(result.Is32Bits());
1537 return result;
1538 }
1539
1540
StoreRegister(int register_index,Register source)1541 void RegExpMacroAssemblerARM64::StoreRegister(int register_index,
1542 Register source) {
1543 DCHECK(source.Is32Bits());
1544 DCHECK_LE(0, register_index);
1545 if (num_registers_ <= register_index) {
1546 num_registers_ = register_index + 1;
1547 }
1548
1549 RegisterState register_state = GetRegisterState(register_index);
1550 switch (register_state) {
1551 case STACKED:
1552 __ Str(source, register_location(register_index));
1553 break;
1554 case CACHED_LSW: {
1555 Register cached_register = GetCachedRegister(register_index);
1556 if (source != cached_register.W()) {
1557 __ Bfi(cached_register, source.X(), 0, kWRegSizeInBits);
1558 }
1559 break;
1560 }
1561 case CACHED_MSW: {
1562 Register cached_register = GetCachedRegister(register_index);
1563 __ Bfi(cached_register, source.X(), kWRegSizeInBits, kWRegSizeInBits);
1564 break;
1565 }
1566 default:
1567 UNREACHABLE();
1568 }
1569 }
1570
1571
CallIf(Label * to,Condition condition)1572 void RegExpMacroAssemblerARM64::CallIf(Label* to, Condition condition) {
1573 Label skip_call;
1574 if (condition != al) __ B(&skip_call, NegateCondition(condition));
1575 __ Bl(to);
1576 __ Bind(&skip_call);
1577 }
1578
1579
RestoreLinkRegister()1580 void RegExpMacroAssemblerARM64::RestoreLinkRegister() {
1581 __ Pop<TurboAssembler::kAuthLR>(padreg, lr);
1582 __ Add(lr, lr, Operand(masm_->CodeObject()));
1583 }
1584
1585
SaveLinkRegister()1586 void RegExpMacroAssemblerARM64::SaveLinkRegister() {
1587 __ Sub(lr, lr, Operand(masm_->CodeObject()));
1588 __ Push<TurboAssembler::kSignLR>(lr, padreg);
1589 }
1590
1591
register_location(int register_index)1592 MemOperand RegExpMacroAssemblerARM64::register_location(int register_index) {
1593 DCHECK(register_index < (1<<30));
1594 DCHECK_LE(kNumCachedRegisters, register_index);
1595 if (num_registers_ <= register_index) {
1596 num_registers_ = register_index + 1;
1597 }
1598 register_index -= kNumCachedRegisters;
1599 int offset = kFirstRegisterOnStack - register_index * kWRegSize;
1600 return MemOperand(frame_pointer(), offset);
1601 }
1602
capture_location(int register_index,Register scratch)1603 MemOperand RegExpMacroAssemblerARM64::capture_location(int register_index,
1604 Register scratch) {
1605 DCHECK(register_index < (1<<30));
1606 DCHECK(register_index < num_saved_registers_);
1607 DCHECK_LE(kNumCachedRegisters, register_index);
1608 DCHECK_EQ(register_index % 2, 0);
1609 register_index -= kNumCachedRegisters;
1610 int offset = kFirstCaptureOnStack - register_index * kWRegSize;
1611 // capture_location is used with Stp instructions to load/store 2 registers.
1612 // The immediate field in the encoding is limited to 7 bits (signed).
1613 if (is_int7(offset)) {
1614 return MemOperand(frame_pointer(), offset);
1615 } else {
1616 __ Add(scratch, frame_pointer(), offset);
1617 return MemOperand(scratch);
1618 }
1619 }
1620
LoadCurrentCharacterUnchecked(int cp_offset,int characters)1621 void RegExpMacroAssemblerARM64::LoadCurrentCharacterUnchecked(int cp_offset,
1622 int characters) {
1623 Register offset = current_input_offset();
1624
1625 // The ldr, str, ldrh, strh instructions can do unaligned accesses, if the CPU
1626 // and the operating system running on the target allow it.
1627 // If unaligned load/stores are not supported then this function must only
1628 // be used to load a single character at a time.
1629
1630 // ARMv8 supports unaligned accesses but V8 or the kernel can decide to
1631 // disable it.
1632 // TODO(pielan): See whether or not we should disable unaligned accesses.
1633 if (!CanReadUnaligned()) {
1634 DCHECK_EQ(1, characters);
1635 }
1636
1637 if (cp_offset != 0) {
1638 if (masm_->emit_debug_code()) {
1639 __ Mov(x10, cp_offset * char_size());
1640 __ Add(x10, x10, Operand(current_input_offset(), SXTW));
1641 __ Cmp(x10, Operand(w10, SXTW));
1642 // The offset needs to fit in a W register.
1643 __ Check(eq, AbortReason::kOffsetOutOfRange);
1644 } else {
1645 __ Add(w10, current_input_offset(), cp_offset * char_size());
1646 }
1647 offset = w10;
1648 }
1649
1650 if (mode_ == LATIN1) {
1651 if (characters == 4) {
1652 __ Ldr(current_character(), MemOperand(input_end(), offset, SXTW));
1653 } else if (characters == 2) {
1654 __ Ldrh(current_character(), MemOperand(input_end(), offset, SXTW));
1655 } else {
1656 DCHECK_EQ(1, characters);
1657 __ Ldrb(current_character(), MemOperand(input_end(), offset, SXTW));
1658 }
1659 } else {
1660 DCHECK(mode_ == UC16);
1661 if (characters == 2) {
1662 __ Ldr(current_character(), MemOperand(input_end(), offset, SXTW));
1663 } else {
1664 DCHECK_EQ(1, characters);
1665 __ Ldrh(current_character(), MemOperand(input_end(), offset, SXTW));
1666 }
1667 }
1668 }
1669
1670 } // namespace internal
1671 } // namespace v8
1672
1673 #undef __
1674
1675 #endif // V8_TARGET_ARCH_ARM64
1676