1 // Copyright 2013 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #if V8_TARGET_ARCH_ARM64
6
7 #include "src/regexp/arm64/regexp-macro-assembler-arm64.h"
8
9 #include "src/arm64/macro-assembler-arm64-inl.h"
10 #include "src/code-stubs.h"
11 #include "src/log.h"
12 #include "src/macro-assembler.h"
13 #include "src/objects-inl.h"
14 #include "src/regexp/regexp-macro-assembler.h"
15 #include "src/regexp/regexp-stack.h"
16 #include "src/unicode.h"
17
18 namespace v8 {
19 namespace internal {
20
21 #ifndef V8_INTERPRETED_REGEXP
22 /*
23 * This assembler uses the following register assignment convention:
24 * - w19 : Used to temporarely store a value before a call to C code.
25 * See CheckNotBackReferenceIgnoreCase.
26 * - x20 : Pointer to the current code object (Code*),
27 * it includes the heap object tag.
28 * - w21 : Current position in input, as negative offset from
29 * the end of the string. Please notice that this is
30 * the byte offset, not the character offset!
31 * - w22 : Currently loaded character. Must be loaded using
32 * LoadCurrentCharacter before using any of the dispatch methods.
33 * - x23 : Points to tip of backtrack stack.
34 * - w24 : Position of the first character minus one: non_position_value.
35 * Used to initialize capture registers.
36 * - x25 : Address at the end of the input string: input_end.
37 * Points to byte after last character in input.
38 * - x26 : Address at the start of the input string: input_start.
39 * - w27 : Where to start in the input string.
40 * - x28 : Output array pointer.
41 * - x29/fp : Frame pointer. Used to access arguments, local variables and
42 * RegExp registers.
43 * - x16/x17 : IP registers, used by assembler. Very volatile.
44 * - sp : Points to tip of C stack.
45 *
46 * - x0-x7 : Used as a cache to store 32 bit capture registers. These
47 * registers need to be retained every time a call to C code
48 * is done.
49 *
50 * The remaining registers are free for computations.
51 * Each call to a public method should retain this convention.
52 *
53 * The stack will have the following structure:
54 *
55 * Location Name Description
56 * (as referred to in
57 * the code)
58 *
59 * - fp[96] isolate Address of the current isolate.
60 * ^^^ sp when called ^^^
61 * - fp[88] lr Return from the RegExp code.
62 * - fp[80] r29 Old frame pointer (CalleeSaved).
63 * - fp[0..72] r19-r28 Backup of CalleeSaved registers.
64 * - fp[-8] direct_call 1 => Direct call from JavaScript code.
65 * 0 => Call through the runtime system.
66 * - fp[-16] stack_base High end of the memory area to use as
67 * the backtracking stack.
68 * - fp[-24] output_size Output may fit multiple sets of matches.
69 * - fp[-32] input Handle containing the input string.
70 * - fp[-40] success_counter
71 * ^^^^^^^^^^^^^ From here and downwards we store 32 bit values ^^^^^^^^^^^^^
72 * - fp[-44] register N Capture registers initialized with
73 * - fp[-48] register N + 1 non_position_value.
74 * ... The first kNumCachedRegisters (N) registers
75 * ... are cached in x0 to x7.
76 * ... Only positions must be stored in the first
77 * - ... num_saved_registers_ registers.
78 * - ...
79 * - register N + num_registers - 1
80 * ^^^^^^^^^ sp ^^^^^^^^^
81 *
82 * The first num_saved_registers_ registers are initialized to point to
83 * "character -1" in the string (i.e., char_size() bytes before the first
84 * character of the string). The remaining registers start out as garbage.
85 *
86 * The data up to the return address must be placed there by the calling
87 * code and the remaining arguments are passed in registers, e.g. by calling the
88 * code entry as cast to a function with the signature:
89 * int (*match)(String* input_string,
90 * int start_index,
91 * Address start,
92 * Address end,
93 * int* capture_output_array,
94 * int num_capture_registers,
95 * byte* stack_area_base,
96 * bool direct_call = false,
97 * Isolate* isolate);
98 * The call is performed by NativeRegExpMacroAssembler::Execute()
99 * (in regexp-macro-assembler.cc) via the GeneratedCode wrapper.
100 */
101
102 #define __ ACCESS_MASM(masm_)
103
RegExpMacroAssemblerARM64(Isolate * isolate,Zone * zone,Mode mode,int registers_to_save)104 RegExpMacroAssemblerARM64::RegExpMacroAssemblerARM64(Isolate* isolate,
105 Zone* zone, Mode mode,
106 int registers_to_save)
107 : NativeRegExpMacroAssembler(isolate, zone),
108 masm_(new MacroAssembler(isolate, nullptr, kRegExpCodeSize,
109 CodeObjectRequired::kYes)),
110 mode_(mode),
111 num_registers_(registers_to_save),
112 num_saved_registers_(registers_to_save),
113 entry_label_(),
114 start_label_(),
115 success_label_(),
116 backtrack_label_(),
117 exit_label_() {
118 DCHECK_EQ(0, registers_to_save % 2);
119 // We can cache at most 16 W registers in x0-x7.
120 STATIC_ASSERT(kNumCachedRegisters <= 16);
121 STATIC_ASSERT((kNumCachedRegisters % 2) == 0);
122 __ B(&entry_label_); // We'll write the entry code later.
123 __ Bind(&start_label_); // And then continue from here.
124 }
125
126
~RegExpMacroAssemblerARM64()127 RegExpMacroAssemblerARM64::~RegExpMacroAssemblerARM64() {
128 delete masm_;
129 // Unuse labels in case we throw away the assembler without calling GetCode.
130 entry_label_.Unuse();
131 start_label_.Unuse();
132 success_label_.Unuse();
133 backtrack_label_.Unuse();
134 exit_label_.Unuse();
135 check_preempt_label_.Unuse();
136 stack_overflow_label_.Unuse();
137 }
138
stack_limit_slack()139 int RegExpMacroAssemblerARM64::stack_limit_slack() {
140 return RegExpStack::kStackLimitSlack;
141 }
142
143
AdvanceCurrentPosition(int by)144 void RegExpMacroAssemblerARM64::AdvanceCurrentPosition(int by) {
145 if (by != 0) {
146 __ Add(current_input_offset(),
147 current_input_offset(), by * char_size());
148 }
149 }
150
151
AdvanceRegister(int reg,int by)152 void RegExpMacroAssemblerARM64::AdvanceRegister(int reg, int by) {
153 DCHECK((reg >= 0) && (reg < num_registers_));
154 if (by != 0) {
155 RegisterState register_state = GetRegisterState(reg);
156 switch (register_state) {
157 case STACKED:
158 __ Ldr(w10, register_location(reg));
159 __ Add(w10, w10, by);
160 __ Str(w10, register_location(reg));
161 break;
162 case CACHED_LSW: {
163 Register to_advance = GetCachedRegister(reg);
164 __ Add(to_advance, to_advance, by);
165 break;
166 }
167 case CACHED_MSW: {
168 Register to_advance = GetCachedRegister(reg);
169 __ Add(to_advance, to_advance,
170 static_cast<int64_t>(by) << kWRegSizeInBits);
171 break;
172 }
173 default:
174 UNREACHABLE();
175 break;
176 }
177 }
178 }
179
180
Backtrack()181 void RegExpMacroAssemblerARM64::Backtrack() {
182 CheckPreemption();
183 Pop(w10);
184 __ Add(x10, code_pointer(), Operand(w10, UXTW));
185 __ Br(x10);
186 }
187
188
Bind(Label * label)189 void RegExpMacroAssemblerARM64::Bind(Label* label) {
190 __ Bind(label);
191 }
192
193
CheckCharacter(uint32_t c,Label * on_equal)194 void RegExpMacroAssemblerARM64::CheckCharacter(uint32_t c, Label* on_equal) {
195 CompareAndBranchOrBacktrack(current_character(), c, eq, on_equal);
196 }
197
198
CheckCharacterGT(uc16 limit,Label * on_greater)199 void RegExpMacroAssemblerARM64::CheckCharacterGT(uc16 limit,
200 Label* on_greater) {
201 CompareAndBranchOrBacktrack(current_character(), limit, hi, on_greater);
202 }
203
204
CheckAtStart(Label * on_at_start)205 void RegExpMacroAssemblerARM64::CheckAtStart(Label* on_at_start) {
206 __ Add(w10, current_input_offset(), Operand(-char_size()));
207 __ Cmp(w10, string_start_minus_one());
208 BranchOrBacktrack(eq, on_at_start);
209 }
210
211
CheckNotAtStart(int cp_offset,Label * on_not_at_start)212 void RegExpMacroAssemblerARM64::CheckNotAtStart(int cp_offset,
213 Label* on_not_at_start) {
214 __ Add(w10, current_input_offset(),
215 Operand(-char_size() + cp_offset * char_size()));
216 __ Cmp(w10, string_start_minus_one());
217 BranchOrBacktrack(ne, on_not_at_start);
218 }
219
220
CheckCharacterLT(uc16 limit,Label * on_less)221 void RegExpMacroAssemblerARM64::CheckCharacterLT(uc16 limit, Label* on_less) {
222 CompareAndBranchOrBacktrack(current_character(), limit, lo, on_less);
223 }
224
225
CheckCharacters(Vector<const uc16> str,int cp_offset,Label * on_failure,bool check_end_of_string)226 void RegExpMacroAssemblerARM64::CheckCharacters(Vector<const uc16> str,
227 int cp_offset,
228 Label* on_failure,
229 bool check_end_of_string) {
230 // This method is only ever called from the cctests.
231
232 if (check_end_of_string) {
233 // Is last character of required match inside string.
234 CheckPosition(cp_offset + str.length() - 1, on_failure);
235 }
236
237 Register characters_address = x11;
238
239 __ Add(characters_address,
240 input_end(),
241 Operand(current_input_offset(), SXTW));
242 if (cp_offset != 0) {
243 __ Add(characters_address, characters_address, cp_offset * char_size());
244 }
245
246 for (int i = 0; i < str.length(); i++) {
247 if (mode_ == LATIN1) {
248 __ Ldrb(w10, MemOperand(characters_address, 1, PostIndex));
249 DCHECK_GE(String::kMaxOneByteCharCode, str[i]);
250 } else {
251 __ Ldrh(w10, MemOperand(characters_address, 2, PostIndex));
252 }
253 CompareAndBranchOrBacktrack(w10, str[i], ne, on_failure);
254 }
255 }
256
257
CheckGreedyLoop(Label * on_equal)258 void RegExpMacroAssemblerARM64::CheckGreedyLoop(Label* on_equal) {
259 __ Ldr(w10, MemOperand(backtrack_stackpointer()));
260 __ Cmp(current_input_offset(), w10);
261 __ Cset(x11, eq);
262 __ Add(backtrack_stackpointer(),
263 backtrack_stackpointer(), Operand(x11, LSL, kWRegSizeLog2));
264 BranchOrBacktrack(eq, on_equal);
265 }
266
267
CheckNotBackReferenceIgnoreCase(int start_reg,bool read_backward,bool unicode,Label * on_no_match)268 void RegExpMacroAssemblerARM64::CheckNotBackReferenceIgnoreCase(
269 int start_reg, bool read_backward, bool unicode, Label* on_no_match) {
270 Label fallthrough;
271
272 Register capture_start_offset = w10;
273 // Save the capture length in a callee-saved register so it will
274 // be preserved if we call a C helper.
275 Register capture_length = w19;
276 DCHECK(kCalleeSaved.IncludesAliasOf(capture_length));
277
278 // Find length of back-referenced capture.
279 DCHECK_EQ(0, start_reg % 2);
280 if (start_reg < kNumCachedRegisters) {
281 __ Mov(capture_start_offset.X(), GetCachedRegister(start_reg));
282 __ Lsr(x11, GetCachedRegister(start_reg), kWRegSizeInBits);
283 } else {
284 __ Ldp(w11, capture_start_offset, capture_location(start_reg, x10));
285 }
286 __ Sub(capture_length, w11, capture_start_offset); // Length to check.
287
288 // At this point, the capture registers are either both set or both cleared.
289 // If the capture length is zero, then the capture is either empty or cleared.
290 // Fall through in both cases.
291 __ CompareAndBranch(capture_length, Operand(0), eq, &fallthrough);
292
293 // Check that there are enough characters left in the input.
294 if (read_backward) {
295 __ Add(w12, string_start_minus_one(), capture_length);
296 __ Cmp(current_input_offset(), w12);
297 BranchOrBacktrack(le, on_no_match);
298 } else {
299 __ Cmn(capture_length, current_input_offset());
300 BranchOrBacktrack(gt, on_no_match);
301 }
302
303 if (mode_ == LATIN1) {
304 Label success;
305 Label fail;
306 Label loop_check;
307
308 Register capture_start_address = x12;
309 Register capture_end_addresss = x13;
310 Register current_position_address = x14;
311
312 __ Add(capture_start_address,
313 input_end(),
314 Operand(capture_start_offset, SXTW));
315 __ Add(capture_end_addresss,
316 capture_start_address,
317 Operand(capture_length, SXTW));
318 __ Add(current_position_address,
319 input_end(),
320 Operand(current_input_offset(), SXTW));
321 if (read_backward) {
322 // Offset by length when matching backwards.
323 __ Sub(current_position_address, current_position_address,
324 Operand(capture_length, SXTW));
325 }
326
327 Label loop;
328 __ Bind(&loop);
329 __ Ldrb(w10, MemOperand(capture_start_address, 1, PostIndex));
330 __ Ldrb(w11, MemOperand(current_position_address, 1, PostIndex));
331 __ Cmp(w10, w11);
332 __ B(eq, &loop_check);
333
334 // Mismatch, try case-insensitive match (converting letters to lower-case).
335 __ Orr(w10, w10, 0x20); // Convert capture character to lower-case.
336 __ Orr(w11, w11, 0x20); // Also convert input character.
337 __ Cmp(w11, w10);
338 __ B(ne, &fail);
339 __ Sub(w10, w10, 'a');
340 __ Cmp(w10, 'z' - 'a'); // Is w10 a lowercase letter?
341 __ B(ls, &loop_check); // In range 'a'-'z'.
342 // Latin-1: Check for values in range [224,254] but not 247.
343 __ Sub(w10, w10, 224 - 'a');
344 __ Cmp(w10, 254 - 224);
345 __ Ccmp(w10, 247 - 224, ZFlag, ls); // Check for 247.
346 __ B(eq, &fail); // Weren't Latin-1 letters.
347
348 __ Bind(&loop_check);
349 __ Cmp(capture_start_address, capture_end_addresss);
350 __ B(lt, &loop);
351 __ B(&success);
352
353 __ Bind(&fail);
354 BranchOrBacktrack(al, on_no_match);
355
356 __ Bind(&success);
357 // Compute new value of character position after the matched part.
358 __ Sub(current_input_offset().X(), current_position_address, input_end());
359 if (read_backward) {
360 __ Sub(current_input_offset().X(), current_input_offset().X(),
361 Operand(capture_length, SXTW));
362 }
363 if (masm_->emit_debug_code()) {
364 __ Cmp(current_input_offset().X(), Operand(current_input_offset(), SXTW));
365 __ Ccmp(current_input_offset(), 0, NoFlag, eq);
366 // The current input offset should be <= 0, and fit in a W register.
367 __ Check(le, AbortReason::kOffsetOutOfRange);
368 }
369 } else {
370 DCHECK(mode_ == UC16);
371 int argument_count = 4;
372
373 // The cached registers need to be retained.
374 CPURegList cached_registers(CPURegister::kRegister, kXRegSizeInBits, 0, 7);
375 DCHECK_EQ(kNumCachedRegisters, cached_registers.Count() * 2);
376 __ PushCPURegList(cached_registers);
377
378 // Put arguments into arguments registers.
379 // Parameters are
380 // x0: Address byte_offset1 - Address captured substring's start.
381 // x1: Address byte_offset2 - Address of current character position.
382 // w2: size_t byte_length - length of capture in bytes(!)
383 // x3: Isolate* isolate or 0 if unicode flag
384
385 // Address of start of capture.
386 __ Add(x0, input_end(), Operand(capture_start_offset, SXTW));
387 // Length of capture.
388 __ Mov(w2, capture_length);
389 // Address of current input position.
390 __ Add(x1, input_end(), Operand(current_input_offset(), SXTW));
391 if (read_backward) {
392 __ Sub(x1, x1, Operand(capture_length, SXTW));
393 }
394 // Isolate.
395 #ifdef V8_INTL_SUPPORT
396 if (unicode) {
397 __ Mov(x3, Operand(0));
398 } else // NOLINT
399 #endif // V8_INTL_SUPPORT
400 {
401 __ Mov(x3, ExternalReference::isolate_address(isolate()));
402 }
403
404 {
405 AllowExternalCallThatCantCauseGC scope(masm_);
406 ExternalReference function =
407 ExternalReference::re_case_insensitive_compare_uc16(isolate());
408 __ CallCFunction(function, argument_count);
409 }
410
411 // Check if function returned non-zero for success or zero for failure.
412 // x0 is one of the registers used as a cache so it must be tested before
413 // the cache is restored.
414 __ Cmp(x0, 0);
415 __ PopCPURegList(cached_registers);
416 BranchOrBacktrack(eq, on_no_match);
417
418 // On success, advance position by length of capture.
419 if (read_backward) {
420 __ Sub(current_input_offset(), current_input_offset(), capture_length);
421 } else {
422 __ Add(current_input_offset(), current_input_offset(), capture_length);
423 }
424 }
425
426 __ Bind(&fallthrough);
427 }
428
CheckNotBackReference(int start_reg,bool read_backward,Label * on_no_match)429 void RegExpMacroAssemblerARM64::CheckNotBackReference(int start_reg,
430 bool read_backward,
431 Label* on_no_match) {
432 Label fallthrough;
433
434 Register capture_start_address = x12;
435 Register capture_end_address = x13;
436 Register current_position_address = x14;
437 Register capture_length = w15;
438
439 // Find length of back-referenced capture.
440 DCHECK_EQ(0, start_reg % 2);
441 if (start_reg < kNumCachedRegisters) {
442 __ Mov(x10, GetCachedRegister(start_reg));
443 __ Lsr(x11, GetCachedRegister(start_reg), kWRegSizeInBits);
444 } else {
445 __ Ldp(w11, w10, capture_location(start_reg, x10));
446 }
447 __ Sub(capture_length, w11, w10); // Length to check.
448
449 // At this point, the capture registers are either both set or both cleared.
450 // If the capture length is zero, then the capture is either empty or cleared.
451 // Fall through in both cases.
452 __ CompareAndBranch(capture_length, Operand(0), eq, &fallthrough);
453
454 // Check that there are enough characters left in the input.
455 if (read_backward) {
456 __ Add(w12, string_start_minus_one(), capture_length);
457 __ Cmp(current_input_offset(), w12);
458 BranchOrBacktrack(le, on_no_match);
459 } else {
460 __ Cmn(capture_length, current_input_offset());
461 BranchOrBacktrack(gt, on_no_match);
462 }
463
464 // Compute pointers to match string and capture string
465 __ Add(capture_start_address, input_end(), Operand(w10, SXTW));
466 __ Add(capture_end_address,
467 capture_start_address,
468 Operand(capture_length, SXTW));
469 __ Add(current_position_address,
470 input_end(),
471 Operand(current_input_offset(), SXTW));
472 if (read_backward) {
473 // Offset by length when matching backwards.
474 __ Sub(current_position_address, current_position_address,
475 Operand(capture_length, SXTW));
476 }
477
478 Label loop;
479 __ Bind(&loop);
480 if (mode_ == LATIN1) {
481 __ Ldrb(w10, MemOperand(capture_start_address, 1, PostIndex));
482 __ Ldrb(w11, MemOperand(current_position_address, 1, PostIndex));
483 } else {
484 DCHECK(mode_ == UC16);
485 __ Ldrh(w10, MemOperand(capture_start_address, 2, PostIndex));
486 __ Ldrh(w11, MemOperand(current_position_address, 2, PostIndex));
487 }
488 __ Cmp(w10, w11);
489 BranchOrBacktrack(ne, on_no_match);
490 __ Cmp(capture_start_address, capture_end_address);
491 __ B(lt, &loop);
492
493 // Move current character position to position after match.
494 __ Sub(current_input_offset().X(), current_position_address, input_end());
495 if (read_backward) {
496 __ Sub(current_input_offset().X(), current_input_offset().X(),
497 Operand(capture_length, SXTW));
498 }
499
500 if (masm_->emit_debug_code()) {
501 __ Cmp(current_input_offset().X(), Operand(current_input_offset(), SXTW));
502 __ Ccmp(current_input_offset(), 0, NoFlag, eq);
503 // The current input offset should be <= 0, and fit in a W register.
504 __ Check(le, AbortReason::kOffsetOutOfRange);
505 }
506 __ Bind(&fallthrough);
507 }
508
509
CheckNotCharacter(unsigned c,Label * on_not_equal)510 void RegExpMacroAssemblerARM64::CheckNotCharacter(unsigned c,
511 Label* on_not_equal) {
512 CompareAndBranchOrBacktrack(current_character(), c, ne, on_not_equal);
513 }
514
515
CheckCharacterAfterAnd(uint32_t c,uint32_t mask,Label * on_equal)516 void RegExpMacroAssemblerARM64::CheckCharacterAfterAnd(uint32_t c,
517 uint32_t mask,
518 Label* on_equal) {
519 __ And(w10, current_character(), mask);
520 CompareAndBranchOrBacktrack(w10, c, eq, on_equal);
521 }
522
523
CheckNotCharacterAfterAnd(unsigned c,unsigned mask,Label * on_not_equal)524 void RegExpMacroAssemblerARM64::CheckNotCharacterAfterAnd(unsigned c,
525 unsigned mask,
526 Label* on_not_equal) {
527 __ And(w10, current_character(), mask);
528 CompareAndBranchOrBacktrack(w10, c, ne, on_not_equal);
529 }
530
531
CheckNotCharacterAfterMinusAnd(uc16 c,uc16 minus,uc16 mask,Label * on_not_equal)532 void RegExpMacroAssemblerARM64::CheckNotCharacterAfterMinusAnd(
533 uc16 c,
534 uc16 minus,
535 uc16 mask,
536 Label* on_not_equal) {
537 DCHECK_GT(String::kMaxUtf16CodeUnit, minus);
538 __ Sub(w10, current_character(), minus);
539 __ And(w10, w10, mask);
540 CompareAndBranchOrBacktrack(w10, c, ne, on_not_equal);
541 }
542
543
CheckCharacterInRange(uc16 from,uc16 to,Label * on_in_range)544 void RegExpMacroAssemblerARM64::CheckCharacterInRange(
545 uc16 from,
546 uc16 to,
547 Label* on_in_range) {
548 __ Sub(w10, current_character(), from);
549 // Unsigned lower-or-same condition.
550 CompareAndBranchOrBacktrack(w10, to - from, ls, on_in_range);
551 }
552
553
CheckCharacterNotInRange(uc16 from,uc16 to,Label * on_not_in_range)554 void RegExpMacroAssemblerARM64::CheckCharacterNotInRange(
555 uc16 from,
556 uc16 to,
557 Label* on_not_in_range) {
558 __ Sub(w10, current_character(), from);
559 // Unsigned higher condition.
560 CompareAndBranchOrBacktrack(w10, to - from, hi, on_not_in_range);
561 }
562
563
CheckBitInTable(Handle<ByteArray> table,Label * on_bit_set)564 void RegExpMacroAssemblerARM64::CheckBitInTable(
565 Handle<ByteArray> table,
566 Label* on_bit_set) {
567 __ Mov(x11, Operand(table));
568 if ((mode_ != LATIN1) || (kTableMask != String::kMaxOneByteCharCode)) {
569 __ And(w10, current_character(), kTableMask);
570 __ Add(w10, w10, ByteArray::kHeaderSize - kHeapObjectTag);
571 } else {
572 __ Add(w10, current_character(), ByteArray::kHeaderSize - kHeapObjectTag);
573 }
574 __ Ldrb(w11, MemOperand(x11, w10, UXTW));
575 CompareAndBranchOrBacktrack(w11, 0, ne, on_bit_set);
576 }
577
578
CheckSpecialCharacterClass(uc16 type,Label * on_no_match)579 bool RegExpMacroAssemblerARM64::CheckSpecialCharacterClass(uc16 type,
580 Label* on_no_match) {
581 // Range checks (c in min..max) are generally implemented by an unsigned
582 // (c - min) <= (max - min) check
583 switch (type) {
584 case 's':
585 // Match space-characters
586 if (mode_ == LATIN1) {
587 // One byte space characters are '\t'..'\r', ' ' and \u00a0.
588 Label success;
589 // Check for ' ' or 0x00A0.
590 __ Cmp(current_character(), ' ');
591 __ Ccmp(current_character(), 0x00A0, ZFlag, ne);
592 __ B(eq, &success);
593 // Check range 0x09..0x0D.
594 __ Sub(w10, current_character(), '\t');
595 CompareAndBranchOrBacktrack(w10, '\r' - '\t', hi, on_no_match);
596 __ Bind(&success);
597 return true;
598 }
599 return false;
600 case 'S':
601 // The emitted code for generic character classes is good enough.
602 return false;
603 case 'd':
604 // Match ASCII digits ('0'..'9').
605 __ Sub(w10, current_character(), '0');
606 CompareAndBranchOrBacktrack(w10, '9' - '0', hi, on_no_match);
607 return true;
608 case 'D':
609 // Match ASCII non-digits.
610 __ Sub(w10, current_character(), '0');
611 CompareAndBranchOrBacktrack(w10, '9' - '0', ls, on_no_match);
612 return true;
613 case '.': {
614 // Match non-newlines (not 0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029)
615 // Here we emit the conditional branch only once at the end to make branch
616 // prediction more efficient, even though we could branch out of here
617 // as soon as a character matches.
618 __ Cmp(current_character(), 0x0A);
619 __ Ccmp(current_character(), 0x0D, ZFlag, ne);
620 if (mode_ == UC16) {
621 __ Sub(w10, current_character(), 0x2028);
622 // If the Z flag was set we clear the flags to force a branch.
623 __ Ccmp(w10, 0x2029 - 0x2028, NoFlag, ne);
624 // ls -> !((C==1) && (Z==0))
625 BranchOrBacktrack(ls, on_no_match);
626 } else {
627 BranchOrBacktrack(eq, on_no_match);
628 }
629 return true;
630 }
631 case 'n': {
632 // Match newlines (0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029)
633 // We have to check all 4 newline characters before emitting
634 // the conditional branch.
635 __ Cmp(current_character(), 0x0A);
636 __ Ccmp(current_character(), 0x0D, ZFlag, ne);
637 if (mode_ == UC16) {
638 __ Sub(w10, current_character(), 0x2028);
639 // If the Z flag was set we clear the flags to force a fall-through.
640 __ Ccmp(w10, 0x2029 - 0x2028, NoFlag, ne);
641 // hi -> (C==1) && (Z==0)
642 BranchOrBacktrack(hi, on_no_match);
643 } else {
644 BranchOrBacktrack(ne, on_no_match);
645 }
646 return true;
647 }
648 case 'w': {
649 if (mode_ != LATIN1) {
650 // Table is 256 entries, so all Latin1 characters can be tested.
651 CompareAndBranchOrBacktrack(current_character(), 'z', hi, on_no_match);
652 }
653 ExternalReference map = ExternalReference::re_word_character_map(isolate());
654 __ Mov(x10, map);
655 __ Ldrb(w10, MemOperand(x10, current_character(), UXTW));
656 CompareAndBranchOrBacktrack(w10, 0, eq, on_no_match);
657 return true;
658 }
659 case 'W': {
660 Label done;
661 if (mode_ != LATIN1) {
662 // Table is 256 entries, so all Latin1 characters can be tested.
663 __ Cmp(current_character(), 'z');
664 __ B(hi, &done);
665 }
666 ExternalReference map = ExternalReference::re_word_character_map(isolate());
667 __ Mov(x10, map);
668 __ Ldrb(w10, MemOperand(x10, current_character(), UXTW));
669 CompareAndBranchOrBacktrack(w10, 0, ne, on_no_match);
670 __ Bind(&done);
671 return true;
672 }
673 case '*':
674 // Match any character.
675 return true;
676 // No custom implementation (yet): s(UC16), S(UC16).
677 default:
678 return false;
679 }
680 }
681
682
Fail()683 void RegExpMacroAssemblerARM64::Fail() {
684 __ Mov(w0, FAILURE);
685 __ B(&exit_label_);
686 }
687
688
GetCode(Handle<String> source)689 Handle<HeapObject> RegExpMacroAssemblerARM64::GetCode(Handle<String> source) {
690 Label return_w0;
691 // Finalize code - write the entry point code now we know how many
692 // registers we need.
693
694 // Entry code:
695 __ Bind(&entry_label_);
696
697 // Arguments on entry:
698 // x0: String* input
699 // x1: int start_offset
700 // x2: byte* input_start
701 // x3: byte* input_end
702 // x4: int* output array
703 // x5: int output array size
704 // x6: Address stack_base
705 // x7: int direct_call
706
707 // sp[8]: address of the current isolate
708 // sp[0]: secondary link/return address used by native call
709
710 // Tell the system that we have a stack frame. Because the type is MANUAL, no
711 // code is generated.
712 FrameScope scope(masm_, StackFrame::MANUAL);
713
714 // Push registers on the stack, only push the argument registers that we need.
715 CPURegList argument_registers(x0, x5, x6, x7);
716
717 CPURegList registers_to_retain = kCalleeSaved;
718 DCHECK_EQ(11, kCalleeSaved.Count());
719 registers_to_retain.Combine(lr);
720
721 __ PushCPURegList(registers_to_retain);
722 __ PushCPURegList(argument_registers);
723
724 // Set frame pointer in place.
725 __ Add(frame_pointer(), sp, argument_registers.Count() * kPointerSize);
726
727 // Initialize callee-saved registers.
728 __ Mov(start_offset(), w1);
729 __ Mov(input_start(), x2);
730 __ Mov(input_end(), x3);
731 __ Mov(output_array(), x4);
732
733 // Set the number of registers we will need to allocate, that is:
734 // - success_counter (X register)
735 // - (num_registers_ - kNumCachedRegisters) (W registers)
736 int num_wreg_to_allocate = num_registers_ - kNumCachedRegisters;
737 // Do not allocate registers on the stack if they can all be cached.
738 if (num_wreg_to_allocate < 0) { num_wreg_to_allocate = 0; }
739 // Make room for the success_counter.
740 num_wreg_to_allocate += 2;
741
742 // Make sure the stack alignment will be respected.
743 int alignment = masm_->ActivationFrameAlignment();
744 DCHECK_EQ(alignment % 16, 0);
745 int align_mask = (alignment / kWRegSize) - 1;
746 num_wreg_to_allocate = (num_wreg_to_allocate + align_mask) & ~align_mask;
747
748 // Check if we have space on the stack.
749 Label stack_limit_hit;
750 Label stack_ok;
751
752 ExternalReference stack_limit =
753 ExternalReference::address_of_stack_limit(isolate());
754 __ Mov(x10, stack_limit);
755 __ Ldr(x10, MemOperand(x10));
756 __ Subs(x10, sp, x10);
757
758 // Handle it if the stack pointer is already below the stack limit.
759 __ B(ls, &stack_limit_hit);
760
761 // Check if there is room for the variable number of registers above
762 // the stack limit.
763 __ Cmp(x10, num_wreg_to_allocate * kWRegSize);
764 __ B(hs, &stack_ok);
765
766 // Exit with OutOfMemory exception. There is not enough space on the stack
767 // for our working registers.
768 __ Mov(w0, EXCEPTION);
769 __ B(&return_w0);
770
771 __ Bind(&stack_limit_hit);
772 CallCheckStackGuardState(x10);
773 // If returned value is non-zero, we exit with the returned value as result.
774 __ Cbnz(w0, &return_w0);
775
776 __ Bind(&stack_ok);
777
778 // Allocate space on stack.
779 __ Claim(num_wreg_to_allocate, kWRegSize);
780
781 // Initialize success_counter with 0.
782 __ Str(wzr, MemOperand(frame_pointer(), kSuccessCounter));
783
784 // Find negative length (offset of start relative to end).
785 __ Sub(x10, input_start(), input_end());
786 if (masm_->emit_debug_code()) {
787 // Check that the size of the input string chars is in range.
788 __ Neg(x11, x10);
789 __ Cmp(x11, SeqTwoByteString::kMaxCharsSize);
790 __ Check(ls, AbortReason::kInputStringTooLong);
791 }
792 __ Mov(current_input_offset(), w10);
793
794 // The non-position value is used as a clearing value for the
795 // capture registers, it corresponds to the position of the first character
796 // minus one.
797 __ Sub(string_start_minus_one(), current_input_offset(), char_size());
798 __ Sub(string_start_minus_one(), string_start_minus_one(),
799 Operand(start_offset(), LSL, (mode_ == UC16) ? 1 : 0));
800 // We can store this value twice in an X register for initializing
801 // on-stack registers later.
802 __ Orr(twice_non_position_value(), string_start_minus_one().X(),
803 Operand(string_start_minus_one().X(), LSL, kWRegSizeInBits));
804
805 // Initialize code pointer register.
806 __ Mov(code_pointer(), Operand(masm_->CodeObject()));
807
808 Label load_char_start_regexp, start_regexp;
809 // Load newline if index is at start, previous character otherwise.
810 __ Cbnz(start_offset(), &load_char_start_regexp);
811 __ Mov(current_character(), '\n');
812 __ B(&start_regexp);
813
814 // Global regexp restarts matching here.
815 __ Bind(&load_char_start_regexp);
816 // Load previous char as initial value of current character register.
817 LoadCurrentCharacterUnchecked(-1, 1);
818 __ Bind(&start_regexp);
819 // Initialize on-stack registers.
820 if (num_saved_registers_ > 0) {
821 ClearRegisters(0, num_saved_registers_ - 1);
822 }
823
824 // Initialize backtrack stack pointer.
825 __ Ldr(backtrack_stackpointer(), MemOperand(frame_pointer(), kStackBase));
826
827 // Execute
828 __ B(&start_label_);
829
830 if (backtrack_label_.is_linked()) {
831 __ Bind(&backtrack_label_);
832 Backtrack();
833 }
834
835 if (success_label_.is_linked()) {
836 Register first_capture_start = w15;
837
838 // Save captures when successful.
839 __ Bind(&success_label_);
840
841 if (num_saved_registers_ > 0) {
842 // V8 expects the output to be an int32_t array.
843 Register capture_start = w12;
844 Register capture_end = w13;
845 Register input_length = w14;
846
847 // Copy captures to output.
848
849 // Get string length.
850 __ Sub(x10, input_end(), input_start());
851 if (masm_->emit_debug_code()) {
852 // Check that the size of the input string chars is in range.
853 __ Cmp(x10, SeqTwoByteString::kMaxCharsSize);
854 __ Check(ls, AbortReason::kInputStringTooLong);
855 }
856 // input_start has a start_offset offset on entry. We need to include
857 // it when computing the length of the whole string.
858 if (mode_ == UC16) {
859 __ Add(input_length, start_offset(), Operand(w10, LSR, 1));
860 } else {
861 __ Add(input_length, start_offset(), w10);
862 }
863
864 // Copy the results to the output array from the cached registers first.
865 for (int i = 0;
866 (i < num_saved_registers_) && (i < kNumCachedRegisters);
867 i += 2) {
868 __ Mov(capture_start.X(), GetCachedRegister(i));
869 __ Lsr(capture_end.X(), capture_start.X(), kWRegSizeInBits);
870 if ((i == 0) && global_with_zero_length_check()) {
871 // Keep capture start for the zero-length check later.
872 __ Mov(first_capture_start, capture_start);
873 }
874 // Offsets need to be relative to the start of the string.
875 if (mode_ == UC16) {
876 __ Add(capture_start, input_length, Operand(capture_start, ASR, 1));
877 __ Add(capture_end, input_length, Operand(capture_end, ASR, 1));
878 } else {
879 __ Add(capture_start, input_length, capture_start);
880 __ Add(capture_end, input_length, capture_end);
881 }
882 // The output pointer advances for a possible global match.
883 __ Stp(capture_start,
884 capture_end,
885 MemOperand(output_array(), kPointerSize, PostIndex));
886 }
887
888 // Only carry on if there are more than kNumCachedRegisters capture
889 // registers.
890 int num_registers_left_on_stack =
891 num_saved_registers_ - kNumCachedRegisters;
892 if (num_registers_left_on_stack > 0) {
893 Register base = x10;
894 // There are always an even number of capture registers. A couple of
895 // registers determine one match with two offsets.
896 DCHECK_EQ(0, num_registers_left_on_stack % 2);
897 __ Add(base, frame_pointer(), kFirstCaptureOnStack);
898
899 // We can unroll the loop here, we should not unroll for less than 2
900 // registers.
901 STATIC_ASSERT(kNumRegistersToUnroll > 2);
902 if (num_registers_left_on_stack <= kNumRegistersToUnroll) {
903 for (int i = 0; i < num_registers_left_on_stack / 2; i++) {
904 __ Ldp(capture_end,
905 capture_start,
906 MemOperand(base, -kPointerSize, PostIndex));
907 if ((i == 0) && global_with_zero_length_check()) {
908 // Keep capture start for the zero-length check later.
909 __ Mov(first_capture_start, capture_start);
910 }
911 // Offsets need to be relative to the start of the string.
912 if (mode_ == UC16) {
913 __ Add(capture_start,
914 input_length,
915 Operand(capture_start, ASR, 1));
916 __ Add(capture_end, input_length, Operand(capture_end, ASR, 1));
917 } else {
918 __ Add(capture_start, input_length, capture_start);
919 __ Add(capture_end, input_length, capture_end);
920 }
921 // The output pointer advances for a possible global match.
922 __ Stp(capture_start,
923 capture_end,
924 MemOperand(output_array(), kPointerSize, PostIndex));
925 }
926 } else {
927 Label loop, start;
928 __ Mov(x11, num_registers_left_on_stack);
929
930 __ Ldp(capture_end,
931 capture_start,
932 MemOperand(base, -kPointerSize, PostIndex));
933 if (global_with_zero_length_check()) {
934 __ Mov(first_capture_start, capture_start);
935 }
936 __ B(&start);
937
938 __ Bind(&loop);
939 __ Ldp(capture_end,
940 capture_start,
941 MemOperand(base, -kPointerSize, PostIndex));
942 __ Bind(&start);
943 if (mode_ == UC16) {
944 __ Add(capture_start, input_length, Operand(capture_start, ASR, 1));
945 __ Add(capture_end, input_length, Operand(capture_end, ASR, 1));
946 } else {
947 __ Add(capture_start, input_length, capture_start);
948 __ Add(capture_end, input_length, capture_end);
949 }
950 // The output pointer advances for a possible global match.
951 __ Stp(capture_start,
952 capture_end,
953 MemOperand(output_array(), kPointerSize, PostIndex));
954 __ Sub(x11, x11, 2);
955 __ Cbnz(x11, &loop);
956 }
957 }
958 }
959
960 if (global()) {
961 Register success_counter = w0;
962 Register output_size = x10;
963 // Restart matching if the regular expression is flagged as global.
964
965 // Increment success counter.
966 __ Ldr(success_counter, MemOperand(frame_pointer(), kSuccessCounter));
967 __ Add(success_counter, success_counter, 1);
968 __ Str(success_counter, MemOperand(frame_pointer(), kSuccessCounter));
969
970 // Capture results have been stored, so the number of remaining global
971 // output registers is reduced by the number of stored captures.
972 __ Ldr(output_size, MemOperand(frame_pointer(), kOutputSize));
973 __ Sub(output_size, output_size, num_saved_registers_);
974 // Check whether we have enough room for another set of capture results.
975 __ Cmp(output_size, num_saved_registers_);
976 __ B(lt, &return_w0);
977
978 // The output pointer is already set to the next field in the output
979 // array.
980 // Update output size on the frame before we restart matching.
981 __ Str(output_size, MemOperand(frame_pointer(), kOutputSize));
982
983 if (global_with_zero_length_check()) {
984 // Special case for zero-length matches.
985 __ Cmp(current_input_offset(), first_capture_start);
986 // Not a zero-length match, restart.
987 __ B(ne, &load_char_start_regexp);
988 // Offset from the end is zero if we already reached the end.
989 __ Cbz(current_input_offset(), &return_w0);
990 // Advance current position after a zero-length match.
991 Label advance;
992 __ bind(&advance);
993 __ Add(current_input_offset(),
994 current_input_offset(),
995 Operand((mode_ == UC16) ? 2 : 1));
996 if (global_unicode()) CheckNotInSurrogatePair(0, &advance);
997 }
998
999 __ B(&load_char_start_regexp);
1000 } else {
1001 __ Mov(w0, SUCCESS);
1002 }
1003 }
1004
1005 if (exit_label_.is_linked()) {
1006 // Exit and return w0
1007 __ Bind(&exit_label_);
1008 if (global()) {
1009 __ Ldr(w0, MemOperand(frame_pointer(), kSuccessCounter));
1010 }
1011 }
1012
1013 __ Bind(&return_w0);
1014
1015 // Set stack pointer back to first register to retain
1016 __ Mov(sp, fp);
1017
1018 // Restore registers.
1019 __ PopCPURegList(registers_to_retain);
1020
1021 __ Ret();
1022
1023 Label exit_with_exception;
1024 // Registers x0 to x7 are used to store the first captures, they need to be
1025 // retained over calls to C++ code.
1026 CPURegList cached_registers(CPURegister::kRegister, kXRegSizeInBits, 0, 7);
1027 DCHECK_EQ(kNumCachedRegisters, cached_registers.Count() * 2);
1028
1029 if (check_preempt_label_.is_linked()) {
1030 __ Bind(&check_preempt_label_);
1031 SaveLinkRegister();
1032 // The cached registers need to be retained.
1033 __ PushCPURegList(cached_registers);
1034 CallCheckStackGuardState(x10);
1035 // Returning from the regexp code restores the stack (sp <- fp)
1036 // so we don't need to drop the link register from it before exiting.
1037 __ Cbnz(w0, &return_w0);
1038 // Reset the cached registers.
1039 __ PopCPURegList(cached_registers);
1040 RestoreLinkRegister();
1041 __ Ret();
1042 }
1043
1044 if (stack_overflow_label_.is_linked()) {
1045 __ Bind(&stack_overflow_label_);
1046 SaveLinkRegister();
1047 // The cached registers need to be retained.
1048 __ PushCPURegList(cached_registers);
1049 // Call GrowStack(backtrack_stackpointer(), &stack_base)
1050 __ Mov(x2, ExternalReference::isolate_address(isolate()));
1051 __ Add(x1, frame_pointer(), kStackBase);
1052 __ Mov(x0, backtrack_stackpointer());
1053 ExternalReference grow_stack =
1054 ExternalReference::re_grow_stack(isolate());
1055 __ CallCFunction(grow_stack, 3);
1056 // If return nullptr, we have failed to grow the stack, and
1057 // must exit with a stack-overflow exception.
1058 // Returning from the regexp code restores the stack (sp <- fp)
1059 // so we don't need to drop the link register from it before exiting.
1060 __ Cbz(w0, &exit_with_exception);
1061 // Otherwise use return value as new stack pointer.
1062 __ Mov(backtrack_stackpointer(), x0);
1063 // Reset the cached registers.
1064 __ PopCPURegList(cached_registers);
1065 RestoreLinkRegister();
1066 __ Ret();
1067 }
1068
1069 if (exit_with_exception.is_linked()) {
1070 __ Bind(&exit_with_exception);
1071 __ Mov(w0, EXCEPTION);
1072 __ B(&return_w0);
1073 }
1074
1075 CodeDesc code_desc;
1076 masm_->GetCode(isolate(), &code_desc);
1077 Handle<Code> code = isolate()->factory()->NewCode(code_desc, Code::REGEXP,
1078 masm_->CodeObject());
1079 PROFILE(masm_->isolate(),
1080 RegExpCodeCreateEvent(AbstractCode::cast(*code), *source));
1081 return Handle<HeapObject>::cast(code);
1082 }
1083
1084
GoTo(Label * to)1085 void RegExpMacroAssemblerARM64::GoTo(Label* to) {
1086 BranchOrBacktrack(al, to);
1087 }
1088
IfRegisterGE(int reg,int comparand,Label * if_ge)1089 void RegExpMacroAssemblerARM64::IfRegisterGE(int reg, int comparand,
1090 Label* if_ge) {
1091 Register to_compare = GetRegister(reg, w10);
1092 CompareAndBranchOrBacktrack(to_compare, comparand, ge, if_ge);
1093 }
1094
1095
IfRegisterLT(int reg,int comparand,Label * if_lt)1096 void RegExpMacroAssemblerARM64::IfRegisterLT(int reg, int comparand,
1097 Label* if_lt) {
1098 Register to_compare = GetRegister(reg, w10);
1099 CompareAndBranchOrBacktrack(to_compare, comparand, lt, if_lt);
1100 }
1101
1102
IfRegisterEqPos(int reg,Label * if_eq)1103 void RegExpMacroAssemblerARM64::IfRegisterEqPos(int reg, Label* if_eq) {
1104 Register to_compare = GetRegister(reg, w10);
1105 __ Cmp(to_compare, current_input_offset());
1106 BranchOrBacktrack(eq, if_eq);
1107 }
1108
1109 RegExpMacroAssembler::IrregexpImplementation
Implementation()1110 RegExpMacroAssemblerARM64::Implementation() {
1111 return kARM64Implementation;
1112 }
1113
1114
LoadCurrentCharacter(int cp_offset,Label * on_end_of_input,bool check_bounds,int characters)1115 void RegExpMacroAssemblerARM64::LoadCurrentCharacter(int cp_offset,
1116 Label* on_end_of_input,
1117 bool check_bounds,
1118 int characters) {
1119 // TODO(pielan): Make sure long strings are caught before this, and not
1120 // just asserted in debug mode.
1121 // Be sane! (And ensure that an int32_t can be used to index the string)
1122 DCHECK(cp_offset < (1<<30));
1123 if (check_bounds) {
1124 if (cp_offset >= 0) {
1125 CheckPosition(cp_offset + characters - 1, on_end_of_input);
1126 } else {
1127 CheckPosition(cp_offset, on_end_of_input);
1128 }
1129 }
1130 LoadCurrentCharacterUnchecked(cp_offset, characters);
1131 }
1132
1133
PopCurrentPosition()1134 void RegExpMacroAssemblerARM64::PopCurrentPosition() {
1135 Pop(current_input_offset());
1136 }
1137
1138
PopRegister(int register_index)1139 void RegExpMacroAssemblerARM64::PopRegister(int register_index) {
1140 Pop(w10);
1141 StoreRegister(register_index, w10);
1142 }
1143
1144
PushBacktrack(Label * label)1145 void RegExpMacroAssemblerARM64::PushBacktrack(Label* label) {
1146 if (label->is_bound()) {
1147 int target = label->pos();
1148 __ Mov(w10, target + Code::kHeaderSize - kHeapObjectTag);
1149 } else {
1150 __ Adr(x10, label, MacroAssembler::kAdrFar);
1151 __ Sub(x10, x10, code_pointer());
1152 if (masm_->emit_debug_code()) {
1153 __ Cmp(x10, kWRegMask);
1154 // The code offset has to fit in a W register.
1155 __ Check(ls, AbortReason::kOffsetOutOfRange);
1156 }
1157 }
1158 Push(w10);
1159 CheckStackLimit();
1160 }
1161
1162
PushCurrentPosition()1163 void RegExpMacroAssemblerARM64::PushCurrentPosition() {
1164 Push(current_input_offset());
1165 }
1166
1167
PushRegister(int register_index,StackCheckFlag check_stack_limit)1168 void RegExpMacroAssemblerARM64::PushRegister(int register_index,
1169 StackCheckFlag check_stack_limit) {
1170 Register to_push = GetRegister(register_index, w10);
1171 Push(to_push);
1172 if (check_stack_limit) CheckStackLimit();
1173 }
1174
1175
ReadCurrentPositionFromRegister(int reg)1176 void RegExpMacroAssemblerARM64::ReadCurrentPositionFromRegister(int reg) {
1177 RegisterState register_state = GetRegisterState(reg);
1178 switch (register_state) {
1179 case STACKED:
1180 __ Ldr(current_input_offset(), register_location(reg));
1181 break;
1182 case CACHED_LSW:
1183 __ Mov(current_input_offset(), GetCachedRegister(reg).W());
1184 break;
1185 case CACHED_MSW:
1186 __ Lsr(current_input_offset().X(), GetCachedRegister(reg),
1187 kWRegSizeInBits);
1188 break;
1189 default:
1190 UNREACHABLE();
1191 break;
1192 }
1193 }
1194
1195
ReadStackPointerFromRegister(int reg)1196 void RegExpMacroAssemblerARM64::ReadStackPointerFromRegister(int reg) {
1197 Register read_from = GetRegister(reg, w10);
1198 __ Ldr(x11, MemOperand(frame_pointer(), kStackBase));
1199 __ Add(backtrack_stackpointer(), x11, Operand(read_from, SXTW));
1200 }
1201
1202
SetCurrentPositionFromEnd(int by)1203 void RegExpMacroAssemblerARM64::SetCurrentPositionFromEnd(int by) {
1204 Label after_position;
1205 __ Cmp(current_input_offset(), -by * char_size());
1206 __ B(ge, &after_position);
1207 __ Mov(current_input_offset(), -by * char_size());
1208 // On RegExp code entry (where this operation is used), the character before
1209 // the current position is expected to be already loaded.
1210 // We have advanced the position, so it's safe to read backwards.
1211 LoadCurrentCharacterUnchecked(-1, 1);
1212 __ Bind(&after_position);
1213 }
1214
1215
SetRegister(int register_index,int to)1216 void RegExpMacroAssemblerARM64::SetRegister(int register_index, int to) {
1217 DCHECK(register_index >= num_saved_registers_); // Reserved for positions!
1218 Register set_to = wzr;
1219 if (to != 0) {
1220 set_to = w10;
1221 __ Mov(set_to, to);
1222 }
1223 StoreRegister(register_index, set_to);
1224 }
1225
1226
Succeed()1227 bool RegExpMacroAssemblerARM64::Succeed() {
1228 __ B(&success_label_);
1229 return global();
1230 }
1231
1232
WriteCurrentPositionToRegister(int reg,int cp_offset)1233 void RegExpMacroAssemblerARM64::WriteCurrentPositionToRegister(int reg,
1234 int cp_offset) {
1235 Register position = current_input_offset();
1236 if (cp_offset != 0) {
1237 position = w10;
1238 __ Add(position, current_input_offset(), cp_offset * char_size());
1239 }
1240 StoreRegister(reg, position);
1241 }
1242
1243
ClearRegisters(int reg_from,int reg_to)1244 void RegExpMacroAssemblerARM64::ClearRegisters(int reg_from, int reg_to) {
1245 DCHECK(reg_from <= reg_to);
1246 int num_registers = reg_to - reg_from + 1;
1247
1248 // If the first capture register is cached in a hardware register but not
1249 // aligned on a 64-bit one, we need to clear the first one specifically.
1250 if ((reg_from < kNumCachedRegisters) && ((reg_from % 2) != 0)) {
1251 StoreRegister(reg_from, string_start_minus_one());
1252 num_registers--;
1253 reg_from++;
1254 }
1255
1256 // Clear cached registers in pairs as far as possible.
1257 while ((num_registers >= 2) && (reg_from < kNumCachedRegisters)) {
1258 DCHECK(GetRegisterState(reg_from) == CACHED_LSW);
1259 __ Mov(GetCachedRegister(reg_from), twice_non_position_value());
1260 reg_from += 2;
1261 num_registers -= 2;
1262 }
1263
1264 if ((num_registers % 2) == 1) {
1265 StoreRegister(reg_from, string_start_minus_one());
1266 num_registers--;
1267 reg_from++;
1268 }
1269
1270 if (num_registers > 0) {
1271 // If there are some remaining registers, they are stored on the stack.
1272 DCHECK_LE(kNumCachedRegisters, reg_from);
1273
1274 // Move down the indexes of the registers on stack to get the correct offset
1275 // in memory.
1276 reg_from -= kNumCachedRegisters;
1277 reg_to -= kNumCachedRegisters;
1278 // We should not unroll the loop for less than 2 registers.
1279 STATIC_ASSERT(kNumRegistersToUnroll > 2);
1280 // We position the base pointer to (reg_from + 1).
1281 int base_offset = kFirstRegisterOnStack -
1282 kWRegSize - (kWRegSize * reg_from);
1283 if (num_registers > kNumRegistersToUnroll) {
1284 Register base = x10;
1285 __ Add(base, frame_pointer(), base_offset);
1286
1287 Label loop;
1288 __ Mov(x11, num_registers);
1289 __ Bind(&loop);
1290 __ Str(twice_non_position_value(),
1291 MemOperand(base, -kPointerSize, PostIndex));
1292 __ Sub(x11, x11, 2);
1293 __ Cbnz(x11, &loop);
1294 } else {
1295 for (int i = reg_from; i <= reg_to; i += 2) {
1296 __ Str(twice_non_position_value(),
1297 MemOperand(frame_pointer(), base_offset));
1298 base_offset -= kWRegSize * 2;
1299 }
1300 }
1301 }
1302 }
1303
1304
WriteStackPointerToRegister(int reg)1305 void RegExpMacroAssemblerARM64::WriteStackPointerToRegister(int reg) {
1306 __ Ldr(x10, MemOperand(frame_pointer(), kStackBase));
1307 __ Sub(x10, backtrack_stackpointer(), x10);
1308 if (masm_->emit_debug_code()) {
1309 __ Cmp(x10, Operand(w10, SXTW));
1310 // The stack offset needs to fit in a W register.
1311 __ Check(eq, AbortReason::kOffsetOutOfRange);
1312 }
1313 StoreRegister(reg, w10);
1314 }
1315
1316
1317 // Helper function for reading a value out of a stack frame.
1318 template <typename T>
frame_entry(Address re_frame,int frame_offset)1319 static T& frame_entry(Address re_frame, int frame_offset) {
1320 return *reinterpret_cast<T*>(re_frame + frame_offset);
1321 }
1322
1323
1324 template <typename T>
frame_entry_address(Address re_frame,int frame_offset)1325 static T* frame_entry_address(Address re_frame, int frame_offset) {
1326 return reinterpret_cast<T*>(re_frame + frame_offset);
1327 }
1328
1329
CheckStackGuardState(Address * return_address,Code * re_code,Address re_frame,int start_index,const byte ** input_start,const byte ** input_end)1330 int RegExpMacroAssemblerARM64::CheckStackGuardState(
1331 Address* return_address, Code* re_code, Address re_frame, int start_index,
1332 const byte** input_start, const byte** input_end) {
1333 return NativeRegExpMacroAssembler::CheckStackGuardState(
1334 frame_entry<Isolate*>(re_frame, kIsolate), start_index,
1335 frame_entry<int>(re_frame, kDirectCall) == 1, return_address, re_code,
1336 frame_entry_address<String*>(re_frame, kInput), input_start, input_end);
1337 }
1338
1339
CheckPosition(int cp_offset,Label * on_outside_input)1340 void RegExpMacroAssemblerARM64::CheckPosition(int cp_offset,
1341 Label* on_outside_input) {
1342 if (cp_offset >= 0) {
1343 CompareAndBranchOrBacktrack(current_input_offset(),
1344 -cp_offset * char_size(), ge, on_outside_input);
1345 } else {
1346 __ Add(w12, current_input_offset(), Operand(cp_offset * char_size()));
1347 __ Cmp(w12, string_start_minus_one());
1348 BranchOrBacktrack(le, on_outside_input);
1349 }
1350 }
1351
1352
1353 // Private methods:
1354
CallCheckStackGuardState(Register scratch)1355 void RegExpMacroAssemblerARM64::CallCheckStackGuardState(Register scratch) {
1356 // Allocate space on the stack to store the return address. The
1357 // CheckStackGuardState C++ function will override it if the code
1358 // moved. Allocate extra space for 2 arguments passed by pointers.
1359 // AAPCS64 requires the stack to be 16 byte aligned.
1360 int alignment = masm_->ActivationFrameAlignment();
1361 DCHECK_EQ(alignment % 16, 0);
1362 int align_mask = (alignment / kXRegSize) - 1;
1363 int xreg_to_claim = (3 + align_mask) & ~align_mask;
1364
1365 __ Claim(xreg_to_claim);
1366
1367 // CheckStackGuardState needs the end and start addresses of the input string.
1368 __ Poke(input_end(), 2 * kPointerSize);
1369 __ Add(x5, sp, 2 * kPointerSize);
1370 __ Poke(input_start(), kPointerSize);
1371 __ Add(x4, sp, kPointerSize);
1372
1373 __ Mov(w3, start_offset());
1374 // RegExp code frame pointer.
1375 __ Mov(x2, frame_pointer());
1376 // Code* of self.
1377 __ Mov(x1, Operand(masm_->CodeObject()));
1378
1379 // We need to pass a pointer to the return address as first argument.
1380 // The DirectCEntry stub will place the return address on the stack before
1381 // calling so the stack pointer will point to it.
1382 __ Mov(x0, sp);
1383
1384 ExternalReference check_stack_guard_state =
1385 ExternalReference::re_check_stack_guard_state(isolate());
1386 __ Mov(scratch, check_stack_guard_state);
1387 DirectCEntryStub stub(isolate());
1388 stub.GenerateCall(masm_, scratch);
1389
1390 // The input string may have been moved in memory, we need to reload it.
1391 __ Peek(input_start(), kPointerSize);
1392 __ Peek(input_end(), 2 * kPointerSize);
1393
1394 __ Drop(xreg_to_claim);
1395
1396 // Reload the Code pointer.
1397 __ Mov(code_pointer(), Operand(masm_->CodeObject()));
1398 }
1399
BranchOrBacktrack(Condition condition,Label * to)1400 void RegExpMacroAssemblerARM64::BranchOrBacktrack(Condition condition,
1401 Label* to) {
1402 if (condition == al) { // Unconditional.
1403 if (to == nullptr) {
1404 Backtrack();
1405 return;
1406 }
1407 __ B(to);
1408 return;
1409 }
1410 if (to == nullptr) {
1411 to = &backtrack_label_;
1412 }
1413 __ B(condition, to);
1414 }
1415
CompareAndBranchOrBacktrack(Register reg,int immediate,Condition condition,Label * to)1416 void RegExpMacroAssemblerARM64::CompareAndBranchOrBacktrack(Register reg,
1417 int immediate,
1418 Condition condition,
1419 Label* to) {
1420 if ((immediate == 0) && ((condition == eq) || (condition == ne))) {
1421 if (to == nullptr) {
1422 to = &backtrack_label_;
1423 }
1424 if (condition == eq) {
1425 __ Cbz(reg, to);
1426 } else {
1427 __ Cbnz(reg, to);
1428 }
1429 } else {
1430 __ Cmp(reg, immediate);
1431 BranchOrBacktrack(condition, to);
1432 }
1433 }
1434
1435
CheckPreemption()1436 void RegExpMacroAssemblerARM64::CheckPreemption() {
1437 // Check for preemption.
1438 ExternalReference stack_limit =
1439 ExternalReference::address_of_stack_limit(isolate());
1440 __ Mov(x10, stack_limit);
1441 __ Ldr(x10, MemOperand(x10));
1442 __ Cmp(sp, x10);
1443 CallIf(&check_preempt_label_, ls);
1444 }
1445
1446
CheckStackLimit()1447 void RegExpMacroAssemblerARM64::CheckStackLimit() {
1448 ExternalReference stack_limit =
1449 ExternalReference::address_of_regexp_stack_limit(isolate());
1450 __ Mov(x10, stack_limit);
1451 __ Ldr(x10, MemOperand(x10));
1452 __ Cmp(backtrack_stackpointer(), x10);
1453 CallIf(&stack_overflow_label_, ls);
1454 }
1455
1456
Push(Register source)1457 void RegExpMacroAssemblerARM64::Push(Register source) {
1458 DCHECK(source.Is32Bits());
1459 DCHECK(!source.is(backtrack_stackpointer()));
1460 __ Str(source,
1461 MemOperand(backtrack_stackpointer(),
1462 -static_cast<int>(kWRegSize),
1463 PreIndex));
1464 }
1465
1466
Pop(Register target)1467 void RegExpMacroAssemblerARM64::Pop(Register target) {
1468 DCHECK(target.Is32Bits());
1469 DCHECK(!target.is(backtrack_stackpointer()));
1470 __ Ldr(target,
1471 MemOperand(backtrack_stackpointer(), kWRegSize, PostIndex));
1472 }
1473
1474
GetCachedRegister(int register_index)1475 Register RegExpMacroAssemblerARM64::GetCachedRegister(int register_index) {
1476 DCHECK_GT(kNumCachedRegisters, register_index);
1477 return Register::Create(register_index / 2, kXRegSizeInBits);
1478 }
1479
1480
GetRegister(int register_index,Register maybe_result)1481 Register RegExpMacroAssemblerARM64::GetRegister(int register_index,
1482 Register maybe_result) {
1483 DCHECK(maybe_result.Is32Bits());
1484 DCHECK_LE(0, register_index);
1485 if (num_registers_ <= register_index) {
1486 num_registers_ = register_index + 1;
1487 }
1488 Register result = NoReg;
1489 RegisterState register_state = GetRegisterState(register_index);
1490 switch (register_state) {
1491 case STACKED:
1492 __ Ldr(maybe_result, register_location(register_index));
1493 result = maybe_result;
1494 break;
1495 case CACHED_LSW:
1496 result = GetCachedRegister(register_index).W();
1497 break;
1498 case CACHED_MSW:
1499 __ Lsr(maybe_result.X(), GetCachedRegister(register_index),
1500 kWRegSizeInBits);
1501 result = maybe_result;
1502 break;
1503 default:
1504 UNREACHABLE();
1505 break;
1506 }
1507 DCHECK(result.Is32Bits());
1508 return result;
1509 }
1510
1511
StoreRegister(int register_index,Register source)1512 void RegExpMacroAssemblerARM64::StoreRegister(int register_index,
1513 Register source) {
1514 DCHECK(source.Is32Bits());
1515 DCHECK_LE(0, register_index);
1516 if (num_registers_ <= register_index) {
1517 num_registers_ = register_index + 1;
1518 }
1519
1520 RegisterState register_state = GetRegisterState(register_index);
1521 switch (register_state) {
1522 case STACKED:
1523 __ Str(source, register_location(register_index));
1524 break;
1525 case CACHED_LSW: {
1526 Register cached_register = GetCachedRegister(register_index);
1527 if (!source.Is(cached_register.W())) {
1528 __ Bfi(cached_register, source.X(), 0, kWRegSizeInBits);
1529 }
1530 break;
1531 }
1532 case CACHED_MSW: {
1533 Register cached_register = GetCachedRegister(register_index);
1534 __ Bfi(cached_register, source.X(), kWRegSizeInBits, kWRegSizeInBits);
1535 break;
1536 }
1537 default:
1538 UNREACHABLE();
1539 break;
1540 }
1541 }
1542
1543
CallIf(Label * to,Condition condition)1544 void RegExpMacroAssemblerARM64::CallIf(Label* to, Condition condition) {
1545 Label skip_call;
1546 if (condition != al) __ B(&skip_call, NegateCondition(condition));
1547 __ Bl(to);
1548 __ Bind(&skip_call);
1549 }
1550
1551
RestoreLinkRegister()1552 void RegExpMacroAssemblerARM64::RestoreLinkRegister() {
1553 __ Pop(lr, xzr);
1554 __ Add(lr, lr, Operand(masm_->CodeObject()));
1555 }
1556
1557
SaveLinkRegister()1558 void RegExpMacroAssemblerARM64::SaveLinkRegister() {
1559 __ Sub(lr, lr, Operand(masm_->CodeObject()));
1560 __ Push(xzr, lr);
1561 }
1562
1563
register_location(int register_index)1564 MemOperand RegExpMacroAssemblerARM64::register_location(int register_index) {
1565 DCHECK(register_index < (1<<30));
1566 DCHECK_LE(kNumCachedRegisters, register_index);
1567 if (num_registers_ <= register_index) {
1568 num_registers_ = register_index + 1;
1569 }
1570 register_index -= kNumCachedRegisters;
1571 int offset = kFirstRegisterOnStack - register_index * kWRegSize;
1572 return MemOperand(frame_pointer(), offset);
1573 }
1574
capture_location(int register_index,Register scratch)1575 MemOperand RegExpMacroAssemblerARM64::capture_location(int register_index,
1576 Register scratch) {
1577 DCHECK(register_index < (1<<30));
1578 DCHECK(register_index < num_saved_registers_);
1579 DCHECK_LE(kNumCachedRegisters, register_index);
1580 DCHECK_EQ(register_index % 2, 0);
1581 register_index -= kNumCachedRegisters;
1582 int offset = kFirstCaptureOnStack - register_index * kWRegSize;
1583 // capture_location is used with Stp instructions to load/store 2 registers.
1584 // The immediate field in the encoding is limited to 7 bits (signed).
1585 if (is_int7(offset)) {
1586 return MemOperand(frame_pointer(), offset);
1587 } else {
1588 __ Add(scratch, frame_pointer(), offset);
1589 return MemOperand(scratch);
1590 }
1591 }
1592
LoadCurrentCharacterUnchecked(int cp_offset,int characters)1593 void RegExpMacroAssemblerARM64::LoadCurrentCharacterUnchecked(int cp_offset,
1594 int characters) {
1595 Register offset = current_input_offset();
1596
1597 // The ldr, str, ldrh, strh instructions can do unaligned accesses, if the CPU
1598 // and the operating system running on the target allow it.
1599 // If unaligned load/stores are not supported then this function must only
1600 // be used to load a single character at a time.
1601
1602 // ARMv8 supports unaligned accesses but V8 or the kernel can decide to
1603 // disable it.
1604 // TODO(pielan): See whether or not we should disable unaligned accesses.
1605 if (!CanReadUnaligned()) {
1606 DCHECK_EQ(1, characters);
1607 }
1608
1609 if (cp_offset != 0) {
1610 if (masm_->emit_debug_code()) {
1611 __ Mov(x10, cp_offset * char_size());
1612 __ Add(x10, x10, Operand(current_input_offset(), SXTW));
1613 __ Cmp(x10, Operand(w10, SXTW));
1614 // The offset needs to fit in a W register.
1615 __ Check(eq, AbortReason::kOffsetOutOfRange);
1616 } else {
1617 __ Add(w10, current_input_offset(), cp_offset * char_size());
1618 }
1619 offset = w10;
1620 }
1621
1622 if (mode_ == LATIN1) {
1623 if (characters == 4) {
1624 __ Ldr(current_character(), MemOperand(input_end(), offset, SXTW));
1625 } else if (characters == 2) {
1626 __ Ldrh(current_character(), MemOperand(input_end(), offset, SXTW));
1627 } else {
1628 DCHECK_EQ(1, characters);
1629 __ Ldrb(current_character(), MemOperand(input_end(), offset, SXTW));
1630 }
1631 } else {
1632 DCHECK(mode_ == UC16);
1633 if (characters == 2) {
1634 __ Ldr(current_character(), MemOperand(input_end(), offset, SXTW));
1635 } else {
1636 DCHECK_EQ(1, characters);
1637 __ Ldrh(current_character(), MemOperand(input_end(), offset, SXTW));
1638 }
1639 }
1640 }
1641
1642 #endif // V8_INTERPRETED_REGEXP
1643
1644 } // namespace internal
1645 } // namespace v8
1646
1647 #undef __
1648
1649 #endif // V8_TARGET_ARCH_ARM64
1650