• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2013 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #if V8_TARGET_ARCH_ARM64
6 
7 #include "src/regexp/arm64/regexp-macro-assembler-arm64.h"
8 
9 #include "src/codegen/arm64/macro-assembler-arm64-inl.h"
10 #include "src/codegen/macro-assembler.h"
11 #include "src/logging/log.h"
12 #include "src/objects/objects-inl.h"
13 #include "src/regexp/regexp-macro-assembler.h"
14 #include "src/regexp/regexp-stack.h"
15 #include "src/snapshot/embedded/embedded-data.h"
16 #include "src/strings/unicode.h"
17 
18 namespace v8 {
19 namespace internal {
20 
21 /*
22  * This assembler uses the following register assignment convention:
23  * - w19     : Used to temporarely store a value before a call to C code.
24  *             See CheckNotBackReferenceIgnoreCase.
25  * - x20     : Pointer to the current Code object,
26  *             it includes the heap object tag.
27  * - w21     : Current position in input, as negative offset from
28  *             the end of the string. Please notice that this is
29  *             the byte offset, not the character offset!
30  * - w22     : Currently loaded character. Must be loaded using
31  *             LoadCurrentCharacter before using any of the dispatch methods.
32  * - x23     : Points to tip of backtrack stack.
33  * - w24     : Position of the first character minus one: non_position_value.
34  *             Used to initialize capture registers.
35  * - x25     : Address at the end of the input string: input_end.
36  *             Points to byte after last character in input.
37  * - x26     : Address at the start of the input string: input_start.
38  * - w27     : Where to start in the input string.
39  * - x28     : Output array pointer.
40  * - x29/fp  : Frame pointer. Used to access arguments, local variables and
41  *             RegExp registers.
42  * - x16/x17 : IP registers, used by assembler. Very volatile.
43  * - sp      : Points to tip of C stack.
44  *
45  * - x0-x7   : Used as a cache to store 32 bit capture registers. These
46  *             registers need to be retained every time a call to C code
47  *             is done.
48  *
49  * The remaining registers are free for computations.
50  * Each call to a public method should retain this convention.
51  *
52  * The stack will have the following structure:
53  *
54  *  Location    Name               Description
55  *              (as referred to in
56  *              the code)
57  *
58  *  - fp[104]  Address regexp      Address of the JSRegExp object. Unused in
59  *                                 native code, passed to match signature of
60  *                                 the interpreter.
61  *  - fp[96]   isolate             Address of the current isolate.
62  *  ^^^ sp when called ^^^
63  *  - fp[88]    lr                 Return from the RegExp code.
64  *  - fp[80]    r29                Old frame pointer (CalleeSaved).
65  *  - fp[0..72] r19-r28            Backup of CalleeSaved registers.
66  *  - fp[-8]    direct_call        1 => Direct call from JavaScript code.
67  *                                 0 => Call through the runtime system.
68  *  - fp[-16]   stack_base         High end of the memory area to use as
69  *                                 the backtracking stack.
70  *  - fp[-24]   output_size        Output may fit multiple sets of matches.
71  *  - fp[-32]   input              Handle containing the input string.
72  *  - fp[-40]   success_counter
73  *  ^^^^^^^^^^^^^ From here and downwards we store 32 bit values ^^^^^^^^^^^^^
74  *  - fp[-44]   register N         Capture registers initialized with
75  *  - fp[-48]   register N + 1     non_position_value.
76  *              ...                The first kNumCachedRegisters (N) registers
77  *              ...                are cached in x0 to x7.
78  *              ...                Only positions must be stored in the first
79  *  -           ...                num_saved_registers_ registers.
80  *  -           ...
81  *  -           register N + num_registers - 1
82  *  ^^^^^^^^^ sp ^^^^^^^^^
83  *
84  * The first num_saved_registers_ registers are initialized to point to
85  * "character -1" in the string (i.e., char_size() bytes before the first
86  * character of the string). The remaining registers start out as garbage.
87  *
88  * The data up to the return address must be placed there by the calling
89  * code and the remaining arguments are passed in registers, e.g. by calling the
90  * code entry as cast to a function with the signature:
91  * int (*match)(String input_string,
92  *              int start_index,
93  *              Address start,
94  *              Address end,
95  *              int* capture_output_array,
96  *              int num_capture_registers,
97  *              byte* stack_area_base,
98  *              bool direct_call = false,
99  *              Isolate* isolate,
100  *              Address regexp);
101  * The call is performed by NativeRegExpMacroAssembler::Execute()
102  * (in regexp-macro-assembler.cc) via the GeneratedCode wrapper.
103  */
104 
105 #define __ ACCESS_MASM(masm_)
106 
107 const int RegExpMacroAssemblerARM64::kRegExpCodeSize;
108 
RegExpMacroAssemblerARM64(Isolate * isolate,Zone * zone,Mode mode,int registers_to_save)109 RegExpMacroAssemblerARM64::RegExpMacroAssemblerARM64(Isolate* isolate,
110                                                      Zone* zone, Mode mode,
111                                                      int registers_to_save)
112     : NativeRegExpMacroAssembler(isolate, zone),
113       masm_(new MacroAssembler(isolate, CodeObjectRequired::kYes,
114                                NewAssemblerBuffer(kRegExpCodeSize))),
115       mode_(mode),
116       num_registers_(registers_to_save),
117       num_saved_registers_(registers_to_save),
118       entry_label_(),
119       start_label_(),
120       success_label_(),
121       backtrack_label_(),
122       exit_label_() {
123   masm_->set_root_array_available(false);
124 
125   DCHECK_EQ(0, registers_to_save % 2);
126   // We can cache at most 16 W registers in x0-x7.
127   STATIC_ASSERT(kNumCachedRegisters <= 16);
128   STATIC_ASSERT((kNumCachedRegisters % 2) == 0);
129   __ CallTarget();
130 
131   __ B(&entry_label_);   // We'll write the entry code later.
132   __ Bind(&start_label_);  // And then continue from here.
133 }
134 
~RegExpMacroAssemblerARM64()135 RegExpMacroAssemblerARM64::~RegExpMacroAssemblerARM64() {
136   delete masm_;
137   // Unuse labels in case we throw away the assembler without calling GetCode.
138   entry_label_.Unuse();
139   start_label_.Unuse();
140   success_label_.Unuse();
141   backtrack_label_.Unuse();
142   exit_label_.Unuse();
143   check_preempt_label_.Unuse();
144   stack_overflow_label_.Unuse();
145   fallback_label_.Unuse();
146 }
147 
stack_limit_slack()148 int RegExpMacroAssemblerARM64::stack_limit_slack()  {
149   return RegExpStack::kStackLimitSlack;
150 }
151 
152 
AdvanceCurrentPosition(int by)153 void RegExpMacroAssemblerARM64::AdvanceCurrentPosition(int by) {
154   if (by != 0) {
155     __ Add(current_input_offset(),
156            current_input_offset(), by * char_size());
157   }
158 }
159 
160 
AdvanceRegister(int reg,int by)161 void RegExpMacroAssemblerARM64::AdvanceRegister(int reg, int by) {
162   DCHECK((reg >= 0) && (reg < num_registers_));
163   if (by != 0) {
164     RegisterState register_state = GetRegisterState(reg);
165     switch (register_state) {
166       case STACKED:
167         __ Ldr(w10, register_location(reg));
168         __ Add(w10, w10, by);
169         __ Str(w10, register_location(reg));
170         break;
171       case CACHED_LSW: {
172         Register to_advance = GetCachedRegister(reg);
173         __ Add(to_advance, to_advance, by);
174         break;
175       }
176       case CACHED_MSW: {
177         Register to_advance = GetCachedRegister(reg);
178         // Sign-extend to int64, shift as uint64, cast back to int64.
179         __ Add(
180             to_advance, to_advance,
181             static_cast<int64_t>(static_cast<uint64_t>(static_cast<int64_t>(by))
182                                  << kWRegSizeInBits));
183         break;
184       }
185       default:
186         UNREACHABLE();
187         break;
188     }
189   }
190 }
191 
192 
Backtrack()193 void RegExpMacroAssemblerARM64::Backtrack() {
194   CheckPreemption();
195   if (has_backtrack_limit()) {
196     Label next;
197     UseScratchRegisterScope temps(masm_);
198     Register scratch = temps.AcquireW();
199     __ Ldr(scratch, MemOperand(frame_pointer(), kBacktrackCount));
200     __ Add(scratch, scratch, 1);
201     __ Str(scratch, MemOperand(frame_pointer(), kBacktrackCount));
202     __ Cmp(scratch, Operand(backtrack_limit()));
203     __ B(ne, &next);
204 
205     // Backtrack limit exceeded.
206     if (can_fallback()) {
207       __ B(&fallback_label_);
208     } else {
209       // Can't fallback, so we treat it as a failed match.
210       Fail();
211     }
212 
213     __ bind(&next);
214   }
215   Pop(w10);
216   __ Add(x10, code_pointer(), Operand(w10, UXTW));
217   __ Br(x10);
218 }
219 
220 
Bind(Label * label)221 void RegExpMacroAssemblerARM64::Bind(Label* label) {
222   __ Bind(label);
223 }
224 
BindJumpTarget(Label * label)225 void RegExpMacroAssemblerARM64::BindJumpTarget(Label* label) {
226   __ BindJumpTarget(label);
227 }
228 
CheckCharacter(uint32_t c,Label * on_equal)229 void RegExpMacroAssemblerARM64::CheckCharacter(uint32_t c, Label* on_equal) {
230   CompareAndBranchOrBacktrack(current_character(), c, eq, on_equal);
231 }
232 
233 
CheckCharacterGT(uc16 limit,Label * on_greater)234 void RegExpMacroAssemblerARM64::CheckCharacterGT(uc16 limit,
235                                                  Label* on_greater) {
236   CompareAndBranchOrBacktrack(current_character(), limit, hi, on_greater);
237 }
238 
CheckAtStart(int cp_offset,Label * on_at_start)239 void RegExpMacroAssemblerARM64::CheckAtStart(int cp_offset,
240                                              Label* on_at_start) {
241   __ Add(w10, current_input_offset(),
242          Operand(-char_size() + cp_offset * char_size()));
243   __ Cmp(w10, string_start_minus_one());
244   BranchOrBacktrack(eq, on_at_start);
245 }
246 
CheckNotAtStart(int cp_offset,Label * on_not_at_start)247 void RegExpMacroAssemblerARM64::CheckNotAtStart(int cp_offset,
248                                                 Label* on_not_at_start) {
249   __ Add(w10, current_input_offset(),
250          Operand(-char_size() + cp_offset * char_size()));
251   __ Cmp(w10, string_start_minus_one());
252   BranchOrBacktrack(ne, on_not_at_start);
253 }
254 
255 
CheckCharacterLT(uc16 limit,Label * on_less)256 void RegExpMacroAssemblerARM64::CheckCharacterLT(uc16 limit, Label* on_less) {
257   CompareAndBranchOrBacktrack(current_character(), limit, lo, on_less);
258 }
259 
260 
CheckCharacters(Vector<const uc16> str,int cp_offset,Label * on_failure,bool check_end_of_string)261 void RegExpMacroAssemblerARM64::CheckCharacters(Vector<const uc16> str,
262                                               int cp_offset,
263                                               Label* on_failure,
264                                               bool check_end_of_string) {
265   // This method is only ever called from the cctests.
266 
267   if (check_end_of_string) {
268     // Is last character of required match inside string.
269     CheckPosition(cp_offset + str.length() - 1, on_failure);
270   }
271 
272   Register characters_address = x11;
273 
274   __ Add(characters_address,
275          input_end(),
276          Operand(current_input_offset(), SXTW));
277   if (cp_offset != 0) {
278     __ Add(characters_address, characters_address, cp_offset * char_size());
279   }
280 
281   for (int i = 0; i < str.length(); i++) {
282     if (mode_ == LATIN1) {
283       __ Ldrb(w10, MemOperand(characters_address, 1, PostIndex));
284       DCHECK_GE(String::kMaxOneByteCharCode, str[i]);
285     } else {
286       __ Ldrh(w10, MemOperand(characters_address, 2, PostIndex));
287     }
288     CompareAndBranchOrBacktrack(w10, str[i], ne, on_failure);
289   }
290 }
291 
292 
CheckGreedyLoop(Label * on_equal)293 void RegExpMacroAssemblerARM64::CheckGreedyLoop(Label* on_equal) {
294   __ Ldr(w10, MemOperand(backtrack_stackpointer()));
295   __ Cmp(current_input_offset(), w10);
296   __ Cset(x11, eq);
297   __ Add(backtrack_stackpointer(),
298          backtrack_stackpointer(), Operand(x11, LSL, kWRegSizeLog2));
299   BranchOrBacktrack(eq, on_equal);
300 }
301 
CheckNotBackReferenceIgnoreCase(int start_reg,bool read_backward,bool unicode,Label * on_no_match)302 void RegExpMacroAssemblerARM64::CheckNotBackReferenceIgnoreCase(
303     int start_reg, bool read_backward, bool unicode, Label* on_no_match) {
304   Label fallthrough;
305 
306   Register capture_start_offset = w10;
307   // Save the capture length in a callee-saved register so it will
308   // be preserved if we call a C helper.
309   Register capture_length = w19;
310   DCHECK(kCalleeSaved.IncludesAliasOf(capture_length));
311 
312   // Find length of back-referenced capture.
313   DCHECK_EQ(0, start_reg % 2);
314   if (start_reg < kNumCachedRegisters) {
315     __ Mov(capture_start_offset.X(), GetCachedRegister(start_reg));
316     __ Lsr(x11, GetCachedRegister(start_reg), kWRegSizeInBits);
317   } else {
318     __ Ldp(w11, capture_start_offset, capture_location(start_reg, x10));
319   }
320   __ Sub(capture_length, w11, capture_start_offset);  // Length to check.
321 
322   // At this point, the capture registers are either both set or both cleared.
323   // If the capture length is zero, then the capture is either empty or cleared.
324   // Fall through in both cases.
325   __ CompareAndBranch(capture_length, Operand(0), eq, &fallthrough);
326 
327   // Check that there are enough characters left in the input.
328   if (read_backward) {
329     __ Add(w12, string_start_minus_one(), capture_length);
330     __ Cmp(current_input_offset(), w12);
331     BranchOrBacktrack(le, on_no_match);
332   } else {
333     __ Cmn(capture_length, current_input_offset());
334     BranchOrBacktrack(gt, on_no_match);
335   }
336 
337   if (mode_ == LATIN1) {
338     Label success;
339     Label fail;
340     Label loop_check;
341 
342     Register capture_start_address = x12;
343     Register capture_end_addresss = x13;
344     Register current_position_address = x14;
345 
346     __ Add(capture_start_address,
347            input_end(),
348            Operand(capture_start_offset, SXTW));
349     __ Add(capture_end_addresss,
350            capture_start_address,
351            Operand(capture_length, SXTW));
352     __ Add(current_position_address,
353            input_end(),
354            Operand(current_input_offset(), SXTW));
355     if (read_backward) {
356       // Offset by length when matching backwards.
357       __ Sub(current_position_address, current_position_address,
358              Operand(capture_length, SXTW));
359     }
360 
361     Label loop;
362     __ Bind(&loop);
363     __ Ldrb(w10, MemOperand(capture_start_address, 1, PostIndex));
364     __ Ldrb(w11, MemOperand(current_position_address, 1, PostIndex));
365     __ Cmp(w10, w11);
366     __ B(eq, &loop_check);
367 
368     // Mismatch, try case-insensitive match (converting letters to lower-case).
369     __ Orr(w10, w10, 0x20);  // Convert capture character to lower-case.
370     __ Orr(w11, w11, 0x20);  // Also convert input character.
371     __ Cmp(w11, w10);
372     __ B(ne, &fail);
373     __ Sub(w10, w10, 'a');
374     __ Cmp(w10, 'z' - 'a');  // Is w10 a lowercase letter?
375     __ B(ls, &loop_check);  // In range 'a'-'z'.
376     // Latin-1: Check for values in range [224,254] but not 247.
377     __ Sub(w10, w10, 224 - 'a');
378     __ Cmp(w10, 254 - 224);
379     __ Ccmp(w10, 247 - 224, ZFlag, ls);  // Check for 247.
380     __ B(eq, &fail);  // Weren't Latin-1 letters.
381 
382     __ Bind(&loop_check);
383     __ Cmp(capture_start_address, capture_end_addresss);
384     __ B(lt, &loop);
385     __ B(&success);
386 
387     __ Bind(&fail);
388     BranchOrBacktrack(al, on_no_match);
389 
390     __ Bind(&success);
391     // Compute new value of character position after the matched part.
392     __ Sub(current_input_offset().X(), current_position_address, input_end());
393     if (read_backward) {
394       __ Sub(current_input_offset().X(), current_input_offset().X(),
395              Operand(capture_length, SXTW));
396     }
397     if (masm_->emit_debug_code()) {
398       __ Cmp(current_input_offset().X(), Operand(current_input_offset(), SXTW));
399       __ Ccmp(current_input_offset(), 0, NoFlag, eq);
400       // The current input offset should be <= 0, and fit in a W register.
401       __ Check(le, AbortReason::kOffsetOutOfRange);
402     }
403   } else {
404     DCHECK(mode_ == UC16);
405     int argument_count = 4;
406 
407     // The cached registers need to be retained.
408     CPURegList cached_registers(CPURegister::kRegister, kXRegSizeInBits, 0, 7);
409     DCHECK_EQ(kNumCachedRegisters, cached_registers.Count() * 2);
410     __ PushCPURegList(cached_registers);
411 
412     // Put arguments into arguments registers.
413     // Parameters are
414     //   x0: Address byte_offset1 - Address captured substring's start.
415     //   x1: Address byte_offset2 - Address of current character position.
416     //   w2: size_t byte_length - length of capture in bytes(!)
417     //   x3: Isolate* isolate.
418 
419     // Address of start of capture.
420     __ Add(x0, input_end(), Operand(capture_start_offset, SXTW));
421     // Length of capture.
422     __ Mov(w2, capture_length);
423     // Address of current input position.
424     __ Add(x1, input_end(), Operand(current_input_offset(), SXTW));
425     if (read_backward) {
426       __ Sub(x1, x1, Operand(capture_length, SXTW));
427     }
428     // Isolate.
429     __ Mov(x3, ExternalReference::isolate_address(isolate()));
430 
431     {
432       AllowExternalCallThatCantCauseGC scope(masm_);
433       ExternalReference function =
434           unicode ? ExternalReference::re_case_insensitive_compare_unicode(
435                         isolate())
436                   : ExternalReference::re_case_insensitive_compare_non_unicode(
437                         isolate());
438       __ CallCFunction(function, argument_count);
439     }
440 
441     // Check if function returned non-zero for success or zero for failure.
442     // x0 is one of the registers used as a cache so it must be tested before
443     // the cache is restored.
444     __ Cmp(x0, 0);
445     __ PopCPURegList(cached_registers);
446     BranchOrBacktrack(eq, on_no_match);
447 
448     // On success, advance position by length of capture.
449     if (read_backward) {
450       __ Sub(current_input_offset(), current_input_offset(), capture_length);
451     } else {
452       __ Add(current_input_offset(), current_input_offset(), capture_length);
453     }
454   }
455 
456   __ Bind(&fallthrough);
457 }
458 
CheckNotBackReference(int start_reg,bool read_backward,Label * on_no_match)459 void RegExpMacroAssemblerARM64::CheckNotBackReference(int start_reg,
460                                                       bool read_backward,
461                                                       Label* on_no_match) {
462   Label fallthrough;
463 
464   Register capture_start_address = x12;
465   Register capture_end_address = x13;
466   Register current_position_address = x14;
467   Register capture_length = w15;
468 
469   // Find length of back-referenced capture.
470   DCHECK_EQ(0, start_reg % 2);
471   if (start_reg < kNumCachedRegisters) {
472     __ Mov(x10, GetCachedRegister(start_reg));
473     __ Lsr(x11, GetCachedRegister(start_reg), kWRegSizeInBits);
474   } else {
475     __ Ldp(w11, w10, capture_location(start_reg, x10));
476   }
477   __ Sub(capture_length, w11, w10);  // Length to check.
478 
479   // At this point, the capture registers are either both set or both cleared.
480   // If the capture length is zero, then the capture is either empty or cleared.
481   // Fall through in both cases.
482   __ CompareAndBranch(capture_length, Operand(0), eq, &fallthrough);
483 
484   // Check that there are enough characters left in the input.
485   if (read_backward) {
486     __ Add(w12, string_start_minus_one(), capture_length);
487     __ Cmp(current_input_offset(), w12);
488     BranchOrBacktrack(le, on_no_match);
489   } else {
490     __ Cmn(capture_length, current_input_offset());
491     BranchOrBacktrack(gt, on_no_match);
492   }
493 
494   // Compute pointers to match string and capture string
495   __ Add(capture_start_address, input_end(), Operand(w10, SXTW));
496   __ Add(capture_end_address,
497          capture_start_address,
498          Operand(capture_length, SXTW));
499   __ Add(current_position_address,
500          input_end(),
501          Operand(current_input_offset(), SXTW));
502   if (read_backward) {
503     // Offset by length when matching backwards.
504     __ Sub(current_position_address, current_position_address,
505            Operand(capture_length, SXTW));
506   }
507 
508   Label loop;
509   __ Bind(&loop);
510   if (mode_ == LATIN1) {
511     __ Ldrb(w10, MemOperand(capture_start_address, 1, PostIndex));
512     __ Ldrb(w11, MemOperand(current_position_address, 1, PostIndex));
513   } else {
514     DCHECK(mode_ == UC16);
515     __ Ldrh(w10, MemOperand(capture_start_address, 2, PostIndex));
516     __ Ldrh(w11, MemOperand(current_position_address, 2, PostIndex));
517   }
518   __ Cmp(w10, w11);
519   BranchOrBacktrack(ne, on_no_match);
520   __ Cmp(capture_start_address, capture_end_address);
521   __ B(lt, &loop);
522 
523   // Move current character position to position after match.
524   __ Sub(current_input_offset().X(), current_position_address, input_end());
525   if (read_backward) {
526     __ Sub(current_input_offset().X(), current_input_offset().X(),
527            Operand(capture_length, SXTW));
528   }
529 
530   if (masm_->emit_debug_code()) {
531     __ Cmp(current_input_offset().X(), Operand(current_input_offset(), SXTW));
532     __ Ccmp(current_input_offset(), 0, NoFlag, eq);
533     // The current input offset should be <= 0, and fit in a W register.
534     __ Check(le, AbortReason::kOffsetOutOfRange);
535   }
536   __ Bind(&fallthrough);
537 }
538 
539 
CheckNotCharacter(unsigned c,Label * on_not_equal)540 void RegExpMacroAssemblerARM64::CheckNotCharacter(unsigned c,
541                                                   Label* on_not_equal) {
542   CompareAndBranchOrBacktrack(current_character(), c, ne, on_not_equal);
543 }
544 
545 
CheckCharacterAfterAnd(uint32_t c,uint32_t mask,Label * on_equal)546 void RegExpMacroAssemblerARM64::CheckCharacterAfterAnd(uint32_t c,
547                                                        uint32_t mask,
548                                                        Label* on_equal) {
549   __ And(w10, current_character(), mask);
550   CompareAndBranchOrBacktrack(w10, c, eq, on_equal);
551 }
552 
553 
CheckNotCharacterAfterAnd(unsigned c,unsigned mask,Label * on_not_equal)554 void RegExpMacroAssemblerARM64::CheckNotCharacterAfterAnd(unsigned c,
555                                                           unsigned mask,
556                                                           Label* on_not_equal) {
557   __ And(w10, current_character(), mask);
558   CompareAndBranchOrBacktrack(w10, c, ne, on_not_equal);
559 }
560 
561 
CheckNotCharacterAfterMinusAnd(uc16 c,uc16 minus,uc16 mask,Label * on_not_equal)562 void RegExpMacroAssemblerARM64::CheckNotCharacterAfterMinusAnd(
563     uc16 c,
564     uc16 minus,
565     uc16 mask,
566     Label* on_not_equal) {
567   DCHECK_GT(String::kMaxUtf16CodeUnit, minus);
568   __ Sub(w10, current_character(), minus);
569   __ And(w10, w10, mask);
570   CompareAndBranchOrBacktrack(w10, c, ne, on_not_equal);
571 }
572 
573 
CheckCharacterInRange(uc16 from,uc16 to,Label * on_in_range)574 void RegExpMacroAssemblerARM64::CheckCharacterInRange(
575     uc16 from,
576     uc16 to,
577     Label* on_in_range) {
578   __ Sub(w10, current_character(), from);
579   // Unsigned lower-or-same condition.
580   CompareAndBranchOrBacktrack(w10, to - from, ls, on_in_range);
581 }
582 
583 
CheckCharacterNotInRange(uc16 from,uc16 to,Label * on_not_in_range)584 void RegExpMacroAssemblerARM64::CheckCharacterNotInRange(
585     uc16 from,
586     uc16 to,
587     Label* on_not_in_range) {
588   __ Sub(w10, current_character(), from);
589   // Unsigned higher condition.
590   CompareAndBranchOrBacktrack(w10, to - from, hi, on_not_in_range);
591 }
592 
593 
CheckBitInTable(Handle<ByteArray> table,Label * on_bit_set)594 void RegExpMacroAssemblerARM64::CheckBitInTable(
595     Handle<ByteArray> table,
596     Label* on_bit_set) {
597   __ Mov(x11, Operand(table));
598   if ((mode_ != LATIN1) || (kTableMask != String::kMaxOneByteCharCode)) {
599     __ And(w10, current_character(), kTableMask);
600     __ Add(w10, w10, ByteArray::kHeaderSize - kHeapObjectTag);
601   } else {
602     __ Add(w10, current_character(), ByteArray::kHeaderSize - kHeapObjectTag);
603   }
604   __ Ldrb(w11, MemOperand(x11, w10, UXTW));
605   CompareAndBranchOrBacktrack(w11, 0, ne, on_bit_set);
606 }
607 
608 
CheckSpecialCharacterClass(uc16 type,Label * on_no_match)609 bool RegExpMacroAssemblerARM64::CheckSpecialCharacterClass(uc16 type,
610                                                            Label* on_no_match) {
611   // Range checks (c in min..max) are generally implemented by an unsigned
612   // (c - min) <= (max - min) check
613   switch (type) {
614   case 's':
615     // Match space-characters
616     if (mode_ == LATIN1) {
617       // One byte space characters are '\t'..'\r', ' ' and \u00a0.
618       Label success;
619       // Check for ' ' or 0x00A0.
620       __ Cmp(current_character(), ' ');
621       __ Ccmp(current_character(), 0x00A0, ZFlag, ne);
622       __ B(eq, &success);
623       // Check range 0x09..0x0D.
624       __ Sub(w10, current_character(), '\t');
625       CompareAndBranchOrBacktrack(w10, '\r' - '\t', hi, on_no_match);
626       __ Bind(&success);
627       return true;
628     }
629     return false;
630   case 'S':
631     // The emitted code for generic character classes is good enough.
632     return false;
633   case 'd':
634     // Match ASCII digits ('0'..'9').
635     __ Sub(w10, current_character(), '0');
636     CompareAndBranchOrBacktrack(w10, '9' - '0', hi, on_no_match);
637     return true;
638   case 'D':
639     // Match ASCII non-digits.
640     __ Sub(w10, current_character(), '0');
641     CompareAndBranchOrBacktrack(w10, '9' - '0', ls, on_no_match);
642     return true;
643   case '.': {
644     // Match non-newlines (not 0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029)
645     // Here we emit the conditional branch only once at the end to make branch
646     // prediction more efficient, even though we could branch out of here
647     // as soon as a character matches.
648     __ Cmp(current_character(), 0x0A);
649     __ Ccmp(current_character(), 0x0D, ZFlag, ne);
650     if (mode_ == UC16) {
651       __ Sub(w10, current_character(), 0x2028);
652       // If the Z flag was set we clear the flags to force a branch.
653       __ Ccmp(w10, 0x2029 - 0x2028, NoFlag, ne);
654       // ls -> !((C==1) && (Z==0))
655       BranchOrBacktrack(ls, on_no_match);
656     } else {
657       BranchOrBacktrack(eq, on_no_match);
658     }
659     return true;
660   }
661   case 'n': {
662     // Match newlines (0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029)
663     // We have to check all 4 newline characters before emitting
664     // the conditional branch.
665     __ Cmp(current_character(), 0x0A);
666     __ Ccmp(current_character(), 0x0D, ZFlag, ne);
667     if (mode_ == UC16) {
668       __ Sub(w10, current_character(), 0x2028);
669       // If the Z flag was set we clear the flags to force a fall-through.
670       __ Ccmp(w10, 0x2029 - 0x2028, NoFlag, ne);
671       // hi -> (C==1) && (Z==0)
672       BranchOrBacktrack(hi, on_no_match);
673     } else {
674       BranchOrBacktrack(ne, on_no_match);
675     }
676     return true;
677   }
678   case 'w': {
679     if (mode_ != LATIN1) {
680       // Table is 256 entries, so all Latin1 characters can be tested.
681       CompareAndBranchOrBacktrack(current_character(), 'z', hi, on_no_match);
682     }
683     ExternalReference map = ExternalReference::re_word_character_map(isolate());
684     __ Mov(x10, map);
685     __ Ldrb(w10, MemOperand(x10, current_character(), UXTW));
686     CompareAndBranchOrBacktrack(w10, 0, eq, on_no_match);
687     return true;
688   }
689   case 'W': {
690     Label done;
691     if (mode_ != LATIN1) {
692       // Table is 256 entries, so all Latin1 characters can be tested.
693       __ Cmp(current_character(), 'z');
694       __ B(hi, &done);
695     }
696     ExternalReference map = ExternalReference::re_word_character_map(isolate());
697     __ Mov(x10, map);
698     __ Ldrb(w10, MemOperand(x10, current_character(), UXTW));
699     CompareAndBranchOrBacktrack(w10, 0, ne, on_no_match);
700     __ Bind(&done);
701     return true;
702   }
703   case '*':
704     // Match any character.
705     return true;
706   // No custom implementation (yet): s(UC16), S(UC16).
707   default:
708     return false;
709   }
710 }
711 
712 
Fail()713 void RegExpMacroAssemblerARM64::Fail() {
714   __ Mov(w0, FAILURE);
715   __ B(&exit_label_);
716 }
717 
718 
GetCode(Handle<String> source)719 Handle<HeapObject> RegExpMacroAssemblerARM64::GetCode(Handle<String> source) {
720   Label return_w0;
721   // Finalize code - write the entry point code now we know how many
722   // registers we need.
723 
724   // Entry code:
725   __ Bind(&entry_label_);
726 
727   // Arguments on entry:
728   // x0:  String   input
729   // x1:  int      start_offset
730   // x2:  byte*    input_start
731   // x3:  byte*    input_end
732   // x4:  int*     output array
733   // x5:  int      output array size
734   // x6:  Address  stack_base
735   // x7:  int      direct_call
736 
737   //  sp[8]:  address of the current isolate
738   //  sp[0]:  secondary link/return address used by native call
739 
740   // Tell the system that we have a stack frame.  Because the type is MANUAL, no
741   // code is generated.
742   FrameScope scope(masm_, StackFrame::MANUAL);
743 
744   // Push registers on the stack, only push the argument registers that we need.
745   CPURegList argument_registers(x0, x5, x6, x7);
746 
747   CPURegList registers_to_retain = kCalleeSaved;
748   registers_to_retain.Combine(fp);
749   registers_to_retain.Combine(lr);
750 
751   DCHECK(registers_to_retain.IncludesAliasOf(lr));
752   __ PushCPURegList<TurboAssembler::kSignLR>(registers_to_retain);
753   __ PushCPURegList(argument_registers);
754 
755   // Set frame pointer in place.
756   __ Add(frame_pointer(), sp, argument_registers.Count() * kSystemPointerSize);
757 
758   // Initialize callee-saved registers.
759   __ Mov(start_offset(), w1);
760   __ Mov(input_start(), x2);
761   __ Mov(input_end(), x3);
762   __ Mov(output_array(), x4);
763 
764   // Set the number of registers we will need to allocate, that is:
765   //   - kSuccessCounter / success_counter (X register)
766   //   - kBacktrackCount (X register)
767   //   - (num_registers_ - kNumCachedRegisters) (W registers)
768   int num_wreg_to_allocate = num_registers_ - kNumCachedRegisters;
769   // Do not allocate registers on the stack if they can all be cached.
770   if (num_wreg_to_allocate < 0) { num_wreg_to_allocate = 0; }
771   // Make room for the success_counter and kBacktrackCount. Each X (64-bit)
772   // register is equivalent to two W (32-bit) registers.
773   num_wreg_to_allocate += 2 + 2;
774 
775   // Make sure the stack alignment will be respected.
776   int alignment = masm_->ActivationFrameAlignment();
777   DCHECK_EQ(alignment % 16, 0);
778   int align_mask = (alignment / kWRegSize) - 1;
779   num_wreg_to_allocate = (num_wreg_to_allocate + align_mask) & ~align_mask;
780 
781   // Check if we have space on the stack.
782   Label stack_limit_hit;
783   Label stack_ok;
784 
785   ExternalReference stack_limit =
786       ExternalReference::address_of_jslimit(isolate());
787   __ Mov(x10, stack_limit);
788   __ Ldr(x10, MemOperand(x10));
789   __ Subs(x10, sp, x10);
790 
791   // Handle it if the stack pointer is already below the stack limit.
792   __ B(ls, &stack_limit_hit);
793 
794   // Check if there is room for the variable number of registers above
795   // the stack limit.
796   __ Cmp(x10, num_wreg_to_allocate * kWRegSize);
797   __ B(hs, &stack_ok);
798 
799   // Exit with OutOfMemory exception. There is not enough space on the stack
800   // for our working registers.
801   __ Mov(w0, EXCEPTION);
802   __ B(&return_w0);
803 
804   __ Bind(&stack_limit_hit);
805   CallCheckStackGuardState(x10);
806   // If returned value is non-zero, we exit with the returned value as result.
807   __ Cbnz(w0, &return_w0);
808 
809   __ Bind(&stack_ok);
810 
811   // Allocate space on stack.
812   __ Claim(num_wreg_to_allocate, kWRegSize);
813 
814   // Initialize success_counter and kBacktrackCount with 0.
815   __ Str(wzr, MemOperand(frame_pointer(), kSuccessCounter));
816   __ Str(wzr, MemOperand(frame_pointer(), kBacktrackCount));
817 
818   // Find negative length (offset of start relative to end).
819   __ Sub(x10, input_start(), input_end());
820   if (masm_->emit_debug_code()) {
821     // Check that the size of the input string chars is in range.
822     __ Neg(x11, x10);
823     __ Cmp(x11, SeqTwoByteString::kMaxCharsSize);
824     __ Check(ls, AbortReason::kInputStringTooLong);
825   }
826   __ Mov(current_input_offset(), w10);
827 
828   // The non-position value is used as a clearing value for the
829   // capture registers, it corresponds to the position of the first character
830   // minus one.
831   __ Sub(string_start_minus_one(), current_input_offset(), char_size());
832   __ Sub(string_start_minus_one(), string_start_minus_one(),
833          Operand(start_offset(), LSL, (mode_ == UC16) ? 1 : 0));
834   // We can store this value twice in an X register for initializing
835   // on-stack registers later.
836   __ Orr(twice_non_position_value(), string_start_minus_one().X(),
837          Operand(string_start_minus_one().X(), LSL, kWRegSizeInBits));
838 
839   // Initialize code pointer register.
840   __ Mov(code_pointer(), Operand(masm_->CodeObject()));
841 
842   Label load_char_start_regexp, start_regexp;
843   // Load newline if index is at start, previous character otherwise.
844   __ Cbnz(start_offset(), &load_char_start_regexp);
845   __ Mov(current_character(), '\n');
846   __ B(&start_regexp);
847 
848   // Global regexp restarts matching here.
849   __ Bind(&load_char_start_regexp);
850   // Load previous char as initial value of current character register.
851   LoadCurrentCharacterUnchecked(-1, 1);
852   __ Bind(&start_regexp);
853   // Initialize on-stack registers.
854   if (num_saved_registers_ > 0) {
855     ClearRegisters(0, num_saved_registers_ - 1);
856   }
857 
858   // Initialize backtrack stack pointer.
859   __ Ldr(backtrack_stackpointer(), MemOperand(frame_pointer(), kStackBase));
860 
861   // Execute
862   __ B(&start_label_);
863 
864   if (backtrack_label_.is_linked()) {
865     __ Bind(&backtrack_label_);
866     Backtrack();
867   }
868 
869   if (success_label_.is_linked()) {
870     Register first_capture_start = w15;
871 
872     // Save captures when successful.
873     __ Bind(&success_label_);
874 
875     if (num_saved_registers_ > 0) {
876       // V8 expects the output to be an int32_t array.
877       Register capture_start = w12;
878       Register capture_end = w13;
879       Register input_length = w14;
880 
881       // Copy captures to output.
882 
883       // Get string length.
884       __ Sub(x10, input_end(), input_start());
885       if (masm_->emit_debug_code()) {
886         // Check that the size of the input string chars is in range.
887         __ Cmp(x10, SeqTwoByteString::kMaxCharsSize);
888         __ Check(ls, AbortReason::kInputStringTooLong);
889       }
890       // input_start has a start_offset offset on entry. We need to include
891       // it when computing the length of the whole string.
892       if (mode_ == UC16) {
893         __ Add(input_length, start_offset(), Operand(w10, LSR, 1));
894       } else {
895         __ Add(input_length, start_offset(), w10);
896       }
897 
898       // Copy the results to the output array from the cached registers first.
899       for (int i = 0;
900            (i < num_saved_registers_) && (i < kNumCachedRegisters);
901            i += 2) {
902         __ Mov(capture_start.X(), GetCachedRegister(i));
903         __ Lsr(capture_end.X(), capture_start.X(), kWRegSizeInBits);
904         if ((i == 0) && global_with_zero_length_check()) {
905           // Keep capture start for the zero-length check later.
906           __ Mov(first_capture_start, capture_start);
907         }
908         // Offsets need to be relative to the start of the string.
909         if (mode_ == UC16) {
910           __ Add(capture_start, input_length, Operand(capture_start, ASR, 1));
911           __ Add(capture_end, input_length, Operand(capture_end, ASR, 1));
912         } else {
913           __ Add(capture_start, input_length, capture_start);
914           __ Add(capture_end, input_length, capture_end);
915         }
916         // The output pointer advances for a possible global match.
917         __ Stp(capture_start, capture_end,
918                MemOperand(output_array(), kSystemPointerSize, PostIndex));
919       }
920 
921       // Only carry on if there are more than kNumCachedRegisters capture
922       // registers.
923       int num_registers_left_on_stack =
924           num_saved_registers_ - kNumCachedRegisters;
925       if (num_registers_left_on_stack > 0) {
926         Register base = x10;
927         // There are always an even number of capture registers. A couple of
928         // registers determine one match with two offsets.
929         DCHECK_EQ(0, num_registers_left_on_stack % 2);
930         __ Add(base, frame_pointer(), kFirstCaptureOnStack);
931 
932         // We can unroll the loop here, we should not unroll for less than 2
933         // registers.
934         STATIC_ASSERT(kNumRegistersToUnroll > 2);
935         if (num_registers_left_on_stack <= kNumRegistersToUnroll) {
936           for (int i = 0; i < num_registers_left_on_stack / 2; i++) {
937             __ Ldp(capture_end, capture_start,
938                    MemOperand(base, -kSystemPointerSize, PostIndex));
939             if ((i == 0) && global_with_zero_length_check()) {
940               // Keep capture start for the zero-length check later.
941               __ Mov(first_capture_start, capture_start);
942             }
943             // Offsets need to be relative to the start of the string.
944             if (mode_ == UC16) {
945               __ Add(capture_start,
946                      input_length,
947                      Operand(capture_start, ASR, 1));
948               __ Add(capture_end, input_length, Operand(capture_end, ASR, 1));
949             } else {
950               __ Add(capture_start, input_length, capture_start);
951               __ Add(capture_end, input_length, capture_end);
952             }
953             // The output pointer advances for a possible global match.
954             __ Stp(capture_start, capture_end,
955                    MemOperand(output_array(), kSystemPointerSize, PostIndex));
956           }
957         } else {
958           Label loop, start;
959           __ Mov(x11, num_registers_left_on_stack);
960 
961           __ Ldp(capture_end, capture_start,
962                  MemOperand(base, -kSystemPointerSize, PostIndex));
963           if (global_with_zero_length_check()) {
964             __ Mov(first_capture_start, capture_start);
965           }
966           __ B(&start);
967 
968           __ Bind(&loop);
969           __ Ldp(capture_end, capture_start,
970                  MemOperand(base, -kSystemPointerSize, PostIndex));
971           __ Bind(&start);
972           if (mode_ == UC16) {
973             __ Add(capture_start, input_length, Operand(capture_start, ASR, 1));
974             __ Add(capture_end, input_length, Operand(capture_end, ASR, 1));
975           } else {
976             __ Add(capture_start, input_length, capture_start);
977             __ Add(capture_end, input_length, capture_end);
978           }
979           // The output pointer advances for a possible global match.
980           __ Stp(capture_start, capture_end,
981                  MemOperand(output_array(), kSystemPointerSize, PostIndex));
982           __ Sub(x11, x11, 2);
983           __ Cbnz(x11, &loop);
984         }
985       }
986     }
987 
988     if (global()) {
989       Register success_counter = w0;
990       Register output_size = x10;
991       // Restart matching if the regular expression is flagged as global.
992 
993       // Increment success counter.
994       __ Ldr(success_counter, MemOperand(frame_pointer(), kSuccessCounter));
995       __ Add(success_counter, success_counter, 1);
996       __ Str(success_counter, MemOperand(frame_pointer(), kSuccessCounter));
997 
998       // Capture results have been stored, so the number of remaining global
999       // output registers is reduced by the number of stored captures.
1000       __ Ldr(output_size, MemOperand(frame_pointer(), kOutputSize));
1001       __ Sub(output_size, output_size, num_saved_registers_);
1002       // Check whether we have enough room for another set of capture results.
1003       __ Cmp(output_size, num_saved_registers_);
1004       __ B(lt, &return_w0);
1005 
1006       // The output pointer is already set to the next field in the output
1007       // array.
1008       // Update output size on the frame before we restart matching.
1009       __ Str(output_size, MemOperand(frame_pointer(), kOutputSize));
1010 
1011       if (global_with_zero_length_check()) {
1012         // Special case for zero-length matches.
1013         __ Cmp(current_input_offset(), first_capture_start);
1014         // Not a zero-length match, restart.
1015         __ B(ne, &load_char_start_regexp);
1016         // Offset from the end is zero if we already reached the end.
1017         __ Cbz(current_input_offset(), &return_w0);
1018         // Advance current position after a zero-length match.
1019         Label advance;
1020         __ bind(&advance);
1021         __ Add(current_input_offset(),
1022                current_input_offset(),
1023                Operand((mode_ == UC16) ? 2 : 1));
1024         if (global_unicode()) CheckNotInSurrogatePair(0, &advance);
1025       }
1026 
1027       __ B(&load_char_start_regexp);
1028     } else {
1029       __ Mov(w0, SUCCESS);
1030     }
1031   }
1032 
1033   if (exit_label_.is_linked()) {
1034     // Exit and return w0
1035     __ Bind(&exit_label_);
1036     if (global()) {
1037       __ Ldr(w0, MemOperand(frame_pointer(), kSuccessCounter));
1038     }
1039   }
1040 
1041   __ Bind(&return_w0);
1042 
1043   // Set stack pointer back to first register to retain
1044   __ Mov(sp, fp);
1045 
1046   // Restore registers.
1047   __ PopCPURegList<TurboAssembler::kAuthLR>(registers_to_retain);
1048 
1049   __ Ret();
1050 
1051   Label exit_with_exception;
1052   // Registers x0 to x7 are used to store the first captures, they need to be
1053   // retained over calls to C++ code.
1054   CPURegList cached_registers(CPURegister::kRegister, kXRegSizeInBits, 0, 7);
1055   DCHECK_EQ(kNumCachedRegisters, cached_registers.Count() * 2);
1056 
1057   if (check_preempt_label_.is_linked()) {
1058     __ Bind(&check_preempt_label_);
1059     SaveLinkRegister();
1060     // The cached registers need to be retained.
1061     __ PushCPURegList(cached_registers);
1062     CallCheckStackGuardState(x10);
1063     // Returning from the regexp code restores the stack (sp <- fp)
1064     // so we don't need to drop the link register from it before exiting.
1065     __ Cbnz(w0, &return_w0);
1066     // Reset the cached registers.
1067     __ PopCPURegList(cached_registers);
1068     RestoreLinkRegister();
1069     __ Ret();
1070   }
1071 
1072   if (stack_overflow_label_.is_linked()) {
1073     __ Bind(&stack_overflow_label_);
1074     SaveLinkRegister();
1075     // The cached registers need to be retained.
1076     __ PushCPURegList(cached_registers);
1077     // Call GrowStack(backtrack_stackpointer(), &stack_base)
1078     __ Mov(x2, ExternalReference::isolate_address(isolate()));
1079     __ Add(x1, frame_pointer(), kStackBase);
1080     __ Mov(x0, backtrack_stackpointer());
1081     ExternalReference grow_stack =
1082         ExternalReference::re_grow_stack(isolate());
1083     __ CallCFunction(grow_stack, 3);
1084     // If return nullptr, we have failed to grow the stack, and
1085     // must exit with a stack-overflow exception.
1086     // Returning from the regexp code restores the stack (sp <- fp)
1087     // so we don't need to drop the link register from it before exiting.
1088     __ Cbz(w0, &exit_with_exception);
1089     // Otherwise use return value as new stack pointer.
1090     __ Mov(backtrack_stackpointer(), x0);
1091     // Reset the cached registers.
1092     __ PopCPURegList(cached_registers);
1093     RestoreLinkRegister();
1094     __ Ret();
1095   }
1096 
1097   if (exit_with_exception.is_linked()) {
1098     __ Bind(&exit_with_exception);
1099     __ Mov(w0, EXCEPTION);
1100     __ B(&return_w0);
1101   }
1102 
1103   if (fallback_label_.is_linked()) {
1104     __ Bind(&fallback_label_);
1105     __ Mov(w0, FALLBACK_TO_EXPERIMENTAL);
1106     __ B(&return_w0);
1107   }
1108 
1109   CodeDesc code_desc;
1110   masm_->GetCode(isolate(), &code_desc);
1111   Handle<Code> code =
1112       Factory::CodeBuilder(isolate(), code_desc, CodeKind::REGEXP)
1113           .set_self_reference(masm_->CodeObject())
1114           .Build();
1115   PROFILE(masm_->isolate(),
1116           RegExpCodeCreateEvent(Handle<AbstractCode>::cast(code), source));
1117   return Handle<HeapObject>::cast(code);
1118 }
1119 
1120 
GoTo(Label * to)1121 void RegExpMacroAssemblerARM64::GoTo(Label* to) {
1122   BranchOrBacktrack(al, to);
1123 }
1124 
IfRegisterGE(int reg,int comparand,Label * if_ge)1125 void RegExpMacroAssemblerARM64::IfRegisterGE(int reg, int comparand,
1126                                              Label* if_ge) {
1127   Register to_compare = GetRegister(reg, w10);
1128   CompareAndBranchOrBacktrack(to_compare, comparand, ge, if_ge);
1129 }
1130 
1131 
IfRegisterLT(int reg,int comparand,Label * if_lt)1132 void RegExpMacroAssemblerARM64::IfRegisterLT(int reg, int comparand,
1133                                              Label* if_lt) {
1134   Register to_compare = GetRegister(reg, w10);
1135   CompareAndBranchOrBacktrack(to_compare, comparand, lt, if_lt);
1136 }
1137 
1138 
IfRegisterEqPos(int reg,Label * if_eq)1139 void RegExpMacroAssemblerARM64::IfRegisterEqPos(int reg, Label* if_eq) {
1140   Register to_compare = GetRegister(reg, w10);
1141   __ Cmp(to_compare, current_input_offset());
1142   BranchOrBacktrack(eq, if_eq);
1143 }
1144 
1145 RegExpMacroAssembler::IrregexpImplementation
Implementation()1146     RegExpMacroAssemblerARM64::Implementation() {
1147   return kARM64Implementation;
1148 }
1149 
1150 
PopCurrentPosition()1151 void RegExpMacroAssemblerARM64::PopCurrentPosition() {
1152   Pop(current_input_offset());
1153 }
1154 
1155 
PopRegister(int register_index)1156 void RegExpMacroAssemblerARM64::PopRegister(int register_index) {
1157   Pop(w10);
1158   StoreRegister(register_index, w10);
1159 }
1160 
1161 
PushBacktrack(Label * label)1162 void RegExpMacroAssemblerARM64::PushBacktrack(Label* label) {
1163   if (label->is_bound()) {
1164     int target = label->pos();
1165     __ Mov(w10, target + Code::kHeaderSize - kHeapObjectTag);
1166   } else {
1167     __ Adr(x10, label, MacroAssembler::kAdrFar);
1168     __ Sub(x10, x10, code_pointer());
1169     if (masm_->emit_debug_code()) {
1170       __ Cmp(x10, kWRegMask);
1171       // The code offset has to fit in a W register.
1172       __ Check(ls, AbortReason::kOffsetOutOfRange);
1173     }
1174   }
1175   Push(w10);
1176   CheckStackLimit();
1177 }
1178 
1179 
PushCurrentPosition()1180 void RegExpMacroAssemblerARM64::PushCurrentPosition() {
1181   Push(current_input_offset());
1182 }
1183 
1184 
PushRegister(int register_index,StackCheckFlag check_stack_limit)1185 void RegExpMacroAssemblerARM64::PushRegister(int register_index,
1186                                              StackCheckFlag check_stack_limit) {
1187   Register to_push = GetRegister(register_index, w10);
1188   Push(to_push);
1189   if (check_stack_limit) CheckStackLimit();
1190 }
1191 
1192 
ReadCurrentPositionFromRegister(int reg)1193 void RegExpMacroAssemblerARM64::ReadCurrentPositionFromRegister(int reg) {
1194   RegisterState register_state = GetRegisterState(reg);
1195   switch (register_state) {
1196     case STACKED:
1197       __ Ldr(current_input_offset(), register_location(reg));
1198       break;
1199     case CACHED_LSW:
1200       __ Mov(current_input_offset(), GetCachedRegister(reg).W());
1201       break;
1202     case CACHED_MSW:
1203       __ Lsr(current_input_offset().X(), GetCachedRegister(reg),
1204              kWRegSizeInBits);
1205       break;
1206     default:
1207       UNREACHABLE();
1208   }
1209 }
1210 
1211 
ReadStackPointerFromRegister(int reg)1212 void RegExpMacroAssemblerARM64::ReadStackPointerFromRegister(int reg) {
1213   Register read_from = GetRegister(reg, w10);
1214   __ Ldr(x11, MemOperand(frame_pointer(), kStackBase));
1215   __ Add(backtrack_stackpointer(), x11, Operand(read_from, SXTW));
1216 }
1217 
1218 
SetCurrentPositionFromEnd(int by)1219 void RegExpMacroAssemblerARM64::SetCurrentPositionFromEnd(int by) {
1220   Label after_position;
1221   __ Cmp(current_input_offset(), -by * char_size());
1222   __ B(ge, &after_position);
1223   __ Mov(current_input_offset(), -by * char_size());
1224   // On RegExp code entry (where this operation is used), the character before
1225   // the current position is expected to be already loaded.
1226   // We have advanced the position, so it's safe to read backwards.
1227   LoadCurrentCharacterUnchecked(-1, 1);
1228   __ Bind(&after_position);
1229 }
1230 
1231 
SetRegister(int register_index,int to)1232 void RegExpMacroAssemblerARM64::SetRegister(int register_index, int to) {
1233   DCHECK(register_index >= num_saved_registers_);  // Reserved for positions!
1234   Register set_to = wzr;
1235   if (to != 0) {
1236     set_to = w10;
1237     __ Mov(set_to, to);
1238   }
1239   StoreRegister(register_index, set_to);
1240 }
1241 
1242 
Succeed()1243 bool RegExpMacroAssemblerARM64::Succeed() {
1244   __ B(&success_label_);
1245   return global();
1246 }
1247 
1248 
WriteCurrentPositionToRegister(int reg,int cp_offset)1249 void RegExpMacroAssemblerARM64::WriteCurrentPositionToRegister(int reg,
1250                                                                int cp_offset) {
1251   Register position = current_input_offset();
1252   if (cp_offset != 0) {
1253     position = w10;
1254     __ Add(position, current_input_offset(), cp_offset * char_size());
1255   }
1256   StoreRegister(reg, position);
1257 }
1258 
1259 
ClearRegisters(int reg_from,int reg_to)1260 void RegExpMacroAssemblerARM64::ClearRegisters(int reg_from, int reg_to) {
1261   DCHECK(reg_from <= reg_to);
1262   int num_registers = reg_to - reg_from + 1;
1263 
1264   // If the first capture register is cached in a hardware register but not
1265   // aligned on a 64-bit one, we need to clear the first one specifically.
1266   if ((reg_from < kNumCachedRegisters) && ((reg_from % 2) != 0)) {
1267     StoreRegister(reg_from, string_start_minus_one());
1268     num_registers--;
1269     reg_from++;
1270   }
1271 
1272   // Clear cached registers in pairs as far as possible.
1273   while ((num_registers >= 2) && (reg_from < kNumCachedRegisters)) {
1274     DCHECK(GetRegisterState(reg_from) == CACHED_LSW);
1275     __ Mov(GetCachedRegister(reg_from), twice_non_position_value());
1276     reg_from += 2;
1277     num_registers -= 2;
1278   }
1279 
1280   if ((num_registers % 2) == 1) {
1281     StoreRegister(reg_from, string_start_minus_one());
1282     num_registers--;
1283     reg_from++;
1284   }
1285 
1286   if (num_registers > 0) {
1287     // If there are some remaining registers, they are stored on the stack.
1288     DCHECK_LE(kNumCachedRegisters, reg_from);
1289 
1290     // Move down the indexes of the registers on stack to get the correct offset
1291     // in memory.
1292     reg_from -= kNumCachedRegisters;
1293     reg_to -= kNumCachedRegisters;
1294     // We should not unroll the loop for less than 2 registers.
1295     STATIC_ASSERT(kNumRegistersToUnroll > 2);
1296     // We position the base pointer to (reg_from + 1).
1297     int base_offset = kFirstRegisterOnStack -
1298         kWRegSize - (kWRegSize * reg_from);
1299     if (num_registers > kNumRegistersToUnroll) {
1300       Register base = x10;
1301       __ Add(base, frame_pointer(), base_offset);
1302 
1303       Label loop;
1304       __ Mov(x11, num_registers);
1305       __ Bind(&loop);
1306       __ Str(twice_non_position_value(),
1307              MemOperand(base, -kSystemPointerSize, PostIndex));
1308       __ Sub(x11, x11, 2);
1309       __ Cbnz(x11, &loop);
1310     } else {
1311       for (int i = reg_from; i <= reg_to; i += 2) {
1312         __ Str(twice_non_position_value(),
1313                MemOperand(frame_pointer(), base_offset));
1314         base_offset -= kWRegSize * 2;
1315       }
1316     }
1317   }
1318 }
1319 
1320 
WriteStackPointerToRegister(int reg)1321 void RegExpMacroAssemblerARM64::WriteStackPointerToRegister(int reg) {
1322   __ Ldr(x10, MemOperand(frame_pointer(), kStackBase));
1323   __ Sub(x10, backtrack_stackpointer(), x10);
1324   if (masm_->emit_debug_code()) {
1325     __ Cmp(x10, Operand(w10, SXTW));
1326     // The stack offset needs to fit in a W register.
1327     __ Check(eq, AbortReason::kOffsetOutOfRange);
1328   }
1329   StoreRegister(reg, w10);
1330 }
1331 
1332 
1333 // Helper function for reading a value out of a stack frame.
1334 template <typename T>
frame_entry(Address re_frame,int frame_offset)1335 static T& frame_entry(Address re_frame, int frame_offset) {
1336   return *reinterpret_cast<T*>(re_frame + frame_offset);
1337 }
1338 
1339 
1340 template <typename T>
frame_entry_address(Address re_frame,int frame_offset)1341 static T* frame_entry_address(Address re_frame, int frame_offset) {
1342   return reinterpret_cast<T*>(re_frame + frame_offset);
1343 }
1344 
CheckStackGuardState(Address * return_address,Address raw_code,Address re_frame,int start_index,const byte ** input_start,const byte ** input_end)1345 int RegExpMacroAssemblerARM64::CheckStackGuardState(
1346     Address* return_address, Address raw_code, Address re_frame,
1347     int start_index, const byte** input_start, const byte** input_end) {
1348   Code re_code = Code::cast(Object(raw_code));
1349   return NativeRegExpMacroAssembler::CheckStackGuardState(
1350       frame_entry<Isolate*>(re_frame, kIsolate), start_index,
1351       static_cast<RegExp::CallOrigin>(frame_entry<int>(re_frame, kDirectCall)),
1352       return_address, re_code, frame_entry_address<Address>(re_frame, kInput),
1353       input_start, input_end);
1354 }
1355 
1356 
CheckPosition(int cp_offset,Label * on_outside_input)1357 void RegExpMacroAssemblerARM64::CheckPosition(int cp_offset,
1358                                               Label* on_outside_input) {
1359   if (cp_offset >= 0) {
1360     CompareAndBranchOrBacktrack(current_input_offset(),
1361                                 -cp_offset * char_size(), ge, on_outside_input);
1362   } else {
1363     __ Add(w12, current_input_offset(), Operand(cp_offset * char_size()));
1364     __ Cmp(w12, string_start_minus_one());
1365     BranchOrBacktrack(le, on_outside_input);
1366   }
1367 }
1368 
1369 
1370 // Private methods:
1371 
CallCheckStackGuardState(Register scratch)1372 void RegExpMacroAssemblerARM64::CallCheckStackGuardState(Register scratch) {
1373   DCHECK(!isolate()->IsGeneratingEmbeddedBuiltins());
1374   DCHECK(!masm_->options().isolate_independent_code);
1375 
1376   // Allocate space on the stack to store the return address. The
1377   // CheckStackGuardState C++ function will override it if the code
1378   // moved. Allocate extra space for 2 arguments passed by pointers.
1379   // AAPCS64 requires the stack to be 16 byte aligned.
1380   int alignment = masm_->ActivationFrameAlignment();
1381   DCHECK_EQ(alignment % 16, 0);
1382   int align_mask = (alignment / kXRegSize) - 1;
1383   int xreg_to_claim = (3 + align_mask) & ~align_mask;
1384 
1385   __ Claim(xreg_to_claim);
1386 
1387   // CheckStackGuardState needs the end and start addresses of the input string.
1388   __ Poke(input_end(), 2 * kSystemPointerSize);
1389   __ Add(x5, sp, 2 * kSystemPointerSize);
1390   __ Poke(input_start(), kSystemPointerSize);
1391   __ Add(x4, sp, kSystemPointerSize);
1392 
1393   __ Mov(w3, start_offset());
1394   // RegExp code frame pointer.
1395   __ Mov(x2, frame_pointer());
1396   // Code of self.
1397   __ Mov(x1, Operand(masm_->CodeObject()));
1398 
1399   // We need to pass a pointer to the return address as first argument.
1400   // DirectCEntry will place the return address on the stack before calling so
1401   // the stack pointer will point to it.
1402   __ Mov(x0, sp);
1403 
1404   DCHECK_EQ(scratch, x10);
1405   ExternalReference check_stack_guard_state =
1406       ExternalReference::re_check_stack_guard_state(isolate());
1407   __ Mov(scratch, check_stack_guard_state);
1408 
1409   {
1410     UseScratchRegisterScope temps(masm_);
1411     Register scratch = temps.AcquireX();
1412 
1413     EmbeddedData d = EmbeddedData::FromBlob();
1414     Address entry = d.InstructionStartOfBuiltin(Builtins::kDirectCEntry);
1415 
1416     __ Ldr(scratch, Operand(entry, RelocInfo::OFF_HEAP_TARGET));
1417     __ Call(scratch);
1418   }
1419 
1420   // The input string may have been moved in memory, we need to reload it.
1421   __ Peek(input_start(), kSystemPointerSize);
1422   __ Peek(input_end(), 2 * kSystemPointerSize);
1423 
1424   __ Drop(xreg_to_claim);
1425 
1426   // Reload the Code pointer.
1427   __ Mov(code_pointer(), Operand(masm_->CodeObject()));
1428 }
1429 
BranchOrBacktrack(Condition condition,Label * to)1430 void RegExpMacroAssemblerARM64::BranchOrBacktrack(Condition condition,
1431                                                   Label* to) {
1432   if (condition == al) {  // Unconditional.
1433     if (to == nullptr) {
1434       Backtrack();
1435       return;
1436     }
1437     __ B(to);
1438     return;
1439   }
1440   if (to == nullptr) {
1441     to = &backtrack_label_;
1442   }
1443   __ B(condition, to);
1444 }
1445 
CompareAndBranchOrBacktrack(Register reg,int immediate,Condition condition,Label * to)1446 void RegExpMacroAssemblerARM64::CompareAndBranchOrBacktrack(Register reg,
1447                                                             int immediate,
1448                                                             Condition condition,
1449                                                             Label* to) {
1450   if ((immediate == 0) && ((condition == eq) || (condition == ne))) {
1451     if (to == nullptr) {
1452       to = &backtrack_label_;
1453     }
1454     if (condition == eq) {
1455       __ Cbz(reg, to);
1456     } else {
1457       __ Cbnz(reg, to);
1458     }
1459   } else {
1460     __ Cmp(reg, immediate);
1461     BranchOrBacktrack(condition, to);
1462   }
1463 }
1464 
1465 
CheckPreemption()1466 void RegExpMacroAssemblerARM64::CheckPreemption() {
1467   // Check for preemption.
1468   ExternalReference stack_limit =
1469       ExternalReference::address_of_jslimit(isolate());
1470   __ Mov(x10, stack_limit);
1471   __ Ldr(x10, MemOperand(x10));
1472   __ Cmp(sp, x10);
1473   CallIf(&check_preempt_label_, ls);
1474 }
1475 
1476 
CheckStackLimit()1477 void RegExpMacroAssemblerARM64::CheckStackLimit() {
1478   ExternalReference stack_limit =
1479       ExternalReference::address_of_regexp_stack_limit_address(isolate());
1480   __ Mov(x10, stack_limit);
1481   __ Ldr(x10, MemOperand(x10));
1482   __ Cmp(backtrack_stackpointer(), x10);
1483   CallIf(&stack_overflow_label_, ls);
1484 }
1485 
1486 
Push(Register source)1487 void RegExpMacroAssemblerARM64::Push(Register source) {
1488   DCHECK(source.Is32Bits());
1489   DCHECK_NE(source, backtrack_stackpointer());
1490   __ Str(source,
1491          MemOperand(backtrack_stackpointer(),
1492                     -static_cast<int>(kWRegSize),
1493                     PreIndex));
1494 }
1495 
1496 
Pop(Register target)1497 void RegExpMacroAssemblerARM64::Pop(Register target) {
1498   DCHECK(target.Is32Bits());
1499   DCHECK_NE(target, backtrack_stackpointer());
1500   __ Ldr(target,
1501          MemOperand(backtrack_stackpointer(), kWRegSize, PostIndex));
1502 }
1503 
1504 
GetCachedRegister(int register_index)1505 Register RegExpMacroAssemblerARM64::GetCachedRegister(int register_index) {
1506   DCHECK_GT(kNumCachedRegisters, register_index);
1507   return Register::Create(register_index / 2, kXRegSizeInBits);
1508 }
1509 
1510 
GetRegister(int register_index,Register maybe_result)1511 Register RegExpMacroAssemblerARM64::GetRegister(int register_index,
1512                                                 Register maybe_result) {
1513   DCHECK(maybe_result.Is32Bits());
1514   DCHECK_LE(0, register_index);
1515   if (num_registers_ <= register_index) {
1516     num_registers_ = register_index + 1;
1517   }
1518   Register result = NoReg;
1519   RegisterState register_state = GetRegisterState(register_index);
1520   switch (register_state) {
1521     case STACKED:
1522       __ Ldr(maybe_result, register_location(register_index));
1523       result = maybe_result;
1524       break;
1525     case CACHED_LSW:
1526       result = GetCachedRegister(register_index).W();
1527       break;
1528     case CACHED_MSW:
1529       __ Lsr(maybe_result.X(), GetCachedRegister(register_index),
1530              kWRegSizeInBits);
1531       result = maybe_result;
1532       break;
1533     default:
1534       UNREACHABLE();
1535   }
1536   DCHECK(result.Is32Bits());
1537   return result;
1538 }
1539 
1540 
StoreRegister(int register_index,Register source)1541 void RegExpMacroAssemblerARM64::StoreRegister(int register_index,
1542                                               Register source) {
1543   DCHECK(source.Is32Bits());
1544   DCHECK_LE(0, register_index);
1545   if (num_registers_ <= register_index) {
1546     num_registers_ = register_index + 1;
1547   }
1548 
1549   RegisterState register_state = GetRegisterState(register_index);
1550   switch (register_state) {
1551     case STACKED:
1552       __ Str(source, register_location(register_index));
1553       break;
1554     case CACHED_LSW: {
1555       Register cached_register = GetCachedRegister(register_index);
1556       if (source != cached_register.W()) {
1557         __ Bfi(cached_register, source.X(), 0, kWRegSizeInBits);
1558       }
1559       break;
1560     }
1561     case CACHED_MSW: {
1562       Register cached_register = GetCachedRegister(register_index);
1563       __ Bfi(cached_register, source.X(), kWRegSizeInBits, kWRegSizeInBits);
1564       break;
1565     }
1566     default:
1567       UNREACHABLE();
1568   }
1569 }
1570 
1571 
CallIf(Label * to,Condition condition)1572 void RegExpMacroAssemblerARM64::CallIf(Label* to, Condition condition) {
1573   Label skip_call;
1574   if (condition != al) __ B(&skip_call, NegateCondition(condition));
1575   __ Bl(to);
1576   __ Bind(&skip_call);
1577 }
1578 
1579 
RestoreLinkRegister()1580 void RegExpMacroAssemblerARM64::RestoreLinkRegister() {
1581   __ Pop<TurboAssembler::kAuthLR>(padreg, lr);
1582   __ Add(lr, lr, Operand(masm_->CodeObject()));
1583 }
1584 
1585 
SaveLinkRegister()1586 void RegExpMacroAssemblerARM64::SaveLinkRegister() {
1587   __ Sub(lr, lr, Operand(masm_->CodeObject()));
1588   __ Push<TurboAssembler::kSignLR>(lr, padreg);
1589 }
1590 
1591 
register_location(int register_index)1592 MemOperand RegExpMacroAssemblerARM64::register_location(int register_index) {
1593   DCHECK(register_index < (1<<30));
1594   DCHECK_LE(kNumCachedRegisters, register_index);
1595   if (num_registers_ <= register_index) {
1596     num_registers_ = register_index + 1;
1597   }
1598   register_index -= kNumCachedRegisters;
1599   int offset = kFirstRegisterOnStack - register_index * kWRegSize;
1600   return MemOperand(frame_pointer(), offset);
1601 }
1602 
capture_location(int register_index,Register scratch)1603 MemOperand RegExpMacroAssemblerARM64::capture_location(int register_index,
1604                                                      Register scratch) {
1605   DCHECK(register_index < (1<<30));
1606   DCHECK(register_index < num_saved_registers_);
1607   DCHECK_LE(kNumCachedRegisters, register_index);
1608   DCHECK_EQ(register_index % 2, 0);
1609   register_index -= kNumCachedRegisters;
1610   int offset = kFirstCaptureOnStack - register_index * kWRegSize;
1611   // capture_location is used with Stp instructions to load/store 2 registers.
1612   // The immediate field in the encoding is limited to 7 bits (signed).
1613   if (is_int7(offset)) {
1614     return MemOperand(frame_pointer(), offset);
1615   } else {
1616     __ Add(scratch, frame_pointer(), offset);
1617     return MemOperand(scratch);
1618   }
1619 }
1620 
LoadCurrentCharacterUnchecked(int cp_offset,int characters)1621 void RegExpMacroAssemblerARM64::LoadCurrentCharacterUnchecked(int cp_offset,
1622                                                               int characters) {
1623   Register offset = current_input_offset();
1624 
1625   // The ldr, str, ldrh, strh instructions can do unaligned accesses, if the CPU
1626   // and the operating system running on the target allow it.
1627   // If unaligned load/stores are not supported then this function must only
1628   // be used to load a single character at a time.
1629 
1630   // ARMv8 supports unaligned accesses but V8 or the kernel can decide to
1631   // disable it.
1632   // TODO(pielan): See whether or not we should disable unaligned accesses.
1633   if (!CanReadUnaligned()) {
1634     DCHECK_EQ(1, characters);
1635   }
1636 
1637   if (cp_offset != 0) {
1638     if (masm_->emit_debug_code()) {
1639       __ Mov(x10, cp_offset * char_size());
1640       __ Add(x10, x10, Operand(current_input_offset(), SXTW));
1641       __ Cmp(x10, Operand(w10, SXTW));
1642       // The offset needs to fit in a W register.
1643       __ Check(eq, AbortReason::kOffsetOutOfRange);
1644     } else {
1645       __ Add(w10, current_input_offset(), cp_offset * char_size());
1646     }
1647     offset = w10;
1648   }
1649 
1650   if (mode_ == LATIN1) {
1651     if (characters == 4) {
1652       __ Ldr(current_character(), MemOperand(input_end(), offset, SXTW));
1653     } else if (characters == 2) {
1654       __ Ldrh(current_character(), MemOperand(input_end(), offset, SXTW));
1655     } else {
1656       DCHECK_EQ(1, characters);
1657       __ Ldrb(current_character(), MemOperand(input_end(), offset, SXTW));
1658     }
1659   } else {
1660     DCHECK(mode_ == UC16);
1661     if (characters == 2) {
1662       __ Ldr(current_character(), MemOperand(input_end(), offset, SXTW));
1663     } else {
1664       DCHECK_EQ(1, characters);
1665       __ Ldrh(current_character(), MemOperand(input_end(), offset, SXTW));
1666     }
1667   }
1668 }
1669 
1670 }  // namespace internal
1671 }  // namespace v8
1672 
1673 #undef __
1674 
1675 #endif  // V8_TARGET_ARCH_ARM64
1676