• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2013 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #if V8_TARGET_ARCH_ARM64
6 
7 #include "src/regexp/arm64/regexp-macro-assembler-arm64.h"
8 
9 #include "src/code-stubs.h"
10 #include "src/log.h"
11 #include "src/macro-assembler.h"
12 #include "src/profiler/cpu-profiler.h"
13 #include "src/regexp/regexp-macro-assembler.h"
14 #include "src/regexp/regexp-stack.h"
15 #include "src/unicode.h"
16 
17 namespace v8 {
18 namespace internal {
19 
20 #ifndef V8_INTERPRETED_REGEXP
21 /*
22  * This assembler uses the following register assignment convention:
23  * - w19     : Used to temporarely store a value before a call to C code.
24  *             See CheckNotBackReferenceIgnoreCase.
25  * - x20     : Pointer to the current code object (Code*),
26  *             it includes the heap object tag.
27  * - w21     : Current position in input, as negative offset from
28  *             the end of the string. Please notice that this is
29  *             the byte offset, not the character offset!
30  * - w22     : Currently loaded character. Must be loaded using
31  *             LoadCurrentCharacter before using any of the dispatch methods.
32  * - x23     : Points to tip of backtrack stack.
33  * - w24     : Position of the first character minus one: non_position_value.
34  *             Used to initialize capture registers.
35  * - x25     : Address at the end of the input string: input_end.
36  *             Points to byte after last character in input.
37  * - x26     : Address at the start of the input string: input_start.
38  * - w27     : Where to start in the input string.
39  * - x28     : Output array pointer.
40  * - x29/fp  : Frame pointer. Used to access arguments, local variables and
41  *             RegExp registers.
42  * - x16/x17 : IP registers, used by assembler. Very volatile.
43  * - csp     : Points to tip of C stack.
44  *
45  * - x0-x7   : Used as a cache to store 32 bit capture registers. These
46  *             registers need to be retained every time a call to C code
47  *             is done.
48  *
49  * The remaining registers are free for computations.
50  * Each call to a public method should retain this convention.
51  *
52  * The stack will have the following structure:
53  *
54  *  Location    Name               Description
55  *              (as referred to in
56  *              the code)
57  *
58  *  - fp[104]   isolate            Address of the current isolate.
59  *  - fp[96]    return_address     Secondary link/return address
60  *                                 used by an exit frame if this is a
61  *                                 native call.
62  *  ^^^ csp when called ^^^
63  *  - fp[88]    lr                 Return from the RegExp code.
64  *  - fp[80]    r29                Old frame pointer (CalleeSaved).
65  *  - fp[0..72] r19-r28            Backup of CalleeSaved registers.
66  *  - fp[-8]    direct_call        1 => Direct call from JavaScript code.
67  *                                 0 => Call through the runtime system.
68  *  - fp[-16]   stack_base         High end of the memory area to use as
69  *                                 the backtracking stack.
70  *  - fp[-24]   output_size        Output may fit multiple sets of matches.
71  *  - fp[-32]   input              Handle containing the input string.
72  *  - fp[-40]   success_counter
73  *  ^^^^^^^^^^^^^ From here and downwards we store 32 bit values ^^^^^^^^^^^^^
74  *  - fp[-44]   register N         Capture registers initialized with
75  *  - fp[-48]   register N + 1     non_position_value.
76  *              ...                The first kNumCachedRegisters (N) registers
77  *              ...                are cached in x0 to x7.
78  *              ...                Only positions must be stored in the first
79  *  -           ...                num_saved_registers_ registers.
80  *  -           ...
81  *  -           register N + num_registers - 1
82  *  ^^^^^^^^^ csp ^^^^^^^^^
83  *
84  * The first num_saved_registers_ registers are initialized to point to
85  * "character -1" in the string (i.e., char_size() bytes before the first
86  * character of the string). The remaining registers start out as garbage.
87  *
88  * The data up to the return address must be placed there by the calling
89  * code and the remaining arguments are passed in registers, e.g. by calling the
90  * code entry as cast to a function with the signature:
91  * int (*match)(String* input,
92  *              int start_offset,
93  *              Address input_start,
94  *              Address input_end,
95  *              int* output,
96  *              int output_size,
97  *              Address stack_base,
98  *              bool direct_call = false,
99  *              Address secondary_return_address,  // Only used by native call.
100  *              Isolate* isolate)
101  * The call is performed by NativeRegExpMacroAssembler::Execute()
102  * (in regexp-macro-assembler.cc) via the CALL_GENERATED_REGEXP_CODE macro
103  * in arm64/simulator-arm64.h.
104  * When calling as a non-direct call (i.e., from C++ code), the return address
105  * area is overwritten with the LR register by the RegExp code. When doing a
106  * direct call from generated code, the return address is placed there by
107  * the calling code, as in a normal exit frame.
108  */
109 
110 #define __ ACCESS_MASM(masm_)
111 
RegExpMacroAssemblerARM64(Isolate * isolate,Zone * zone,Mode mode,int registers_to_save)112 RegExpMacroAssemblerARM64::RegExpMacroAssemblerARM64(Isolate* isolate,
113                                                      Zone* zone, Mode mode,
114                                                      int registers_to_save)
115     : NativeRegExpMacroAssembler(isolate, zone),
116       masm_(new MacroAssembler(isolate, NULL, kRegExpCodeSize,
117                                CodeObjectRequired::kYes)),
118       mode_(mode),
119       num_registers_(registers_to_save),
120       num_saved_registers_(registers_to_save),
121       entry_label_(),
122       start_label_(),
123       success_label_(),
124       backtrack_label_(),
125       exit_label_() {
126   __ SetStackPointer(csp);
127   DCHECK_EQ(0, registers_to_save % 2);
128   // We can cache at most 16 W registers in x0-x7.
129   STATIC_ASSERT(kNumCachedRegisters <= 16);
130   STATIC_ASSERT((kNumCachedRegisters % 2) == 0);
131   __ B(&entry_label_);   // We'll write the entry code later.
132   __ Bind(&start_label_);  // And then continue from here.
133 }
134 
135 
~RegExpMacroAssemblerARM64()136 RegExpMacroAssemblerARM64::~RegExpMacroAssemblerARM64() {
137   delete masm_;
138   // Unuse labels in case we throw away the assembler without calling GetCode.
139   entry_label_.Unuse();
140   start_label_.Unuse();
141   success_label_.Unuse();
142   backtrack_label_.Unuse();
143   exit_label_.Unuse();
144   check_preempt_label_.Unuse();
145   stack_overflow_label_.Unuse();
146 }
147 
stack_limit_slack()148 int RegExpMacroAssemblerARM64::stack_limit_slack()  {
149   return RegExpStack::kStackLimitSlack;
150 }
151 
152 
AdvanceCurrentPosition(int by)153 void RegExpMacroAssemblerARM64::AdvanceCurrentPosition(int by) {
154   if (by != 0) {
155     __ Add(current_input_offset(),
156            current_input_offset(), by * char_size());
157   }
158 }
159 
160 
AdvanceRegister(int reg,int by)161 void RegExpMacroAssemblerARM64::AdvanceRegister(int reg, int by) {
162   DCHECK((reg >= 0) && (reg < num_registers_));
163   if (by != 0) {
164     Register to_advance;
165     RegisterState register_state = GetRegisterState(reg);
166     switch (register_state) {
167       case STACKED:
168         __ Ldr(w10, register_location(reg));
169         __ Add(w10, w10, by);
170         __ Str(w10, register_location(reg));
171         break;
172       case CACHED_LSW:
173         to_advance = GetCachedRegister(reg);
174         __ Add(to_advance, to_advance, by);
175         break;
176       case CACHED_MSW:
177         to_advance = GetCachedRegister(reg);
178         __ Add(to_advance, to_advance,
179                static_cast<int64_t>(by) << kWRegSizeInBits);
180         break;
181       default:
182         UNREACHABLE();
183         break;
184     }
185   }
186 }
187 
188 
Backtrack()189 void RegExpMacroAssemblerARM64::Backtrack() {
190   CheckPreemption();
191   Pop(w10);
192   __ Add(x10, code_pointer(), Operand(w10, UXTW));
193   __ Br(x10);
194 }
195 
196 
Bind(Label * label)197 void RegExpMacroAssemblerARM64::Bind(Label* label) {
198   __ Bind(label);
199 }
200 
201 
CheckCharacter(uint32_t c,Label * on_equal)202 void RegExpMacroAssemblerARM64::CheckCharacter(uint32_t c, Label* on_equal) {
203   CompareAndBranchOrBacktrack(current_character(), c, eq, on_equal);
204 }
205 
206 
CheckCharacterGT(uc16 limit,Label * on_greater)207 void RegExpMacroAssemblerARM64::CheckCharacterGT(uc16 limit,
208                                                  Label* on_greater) {
209   CompareAndBranchOrBacktrack(current_character(), limit, hi, on_greater);
210 }
211 
212 
CheckAtStart(Label * on_at_start)213 void RegExpMacroAssemblerARM64::CheckAtStart(Label* on_at_start) {
214   __ Add(w10, current_input_offset(), Operand(-char_size()));
215   __ Cmp(w10, string_start_minus_one());
216   BranchOrBacktrack(eq, on_at_start);
217 }
218 
219 
CheckNotAtStart(int cp_offset,Label * on_not_at_start)220 void RegExpMacroAssemblerARM64::CheckNotAtStart(int cp_offset,
221                                                 Label* on_not_at_start) {
222   __ Add(w10, current_input_offset(),
223          Operand(-char_size() + cp_offset * char_size()));
224   __ Cmp(w10, string_start_minus_one());
225   BranchOrBacktrack(ne, on_not_at_start);
226 }
227 
228 
CheckCharacterLT(uc16 limit,Label * on_less)229 void RegExpMacroAssemblerARM64::CheckCharacterLT(uc16 limit, Label* on_less) {
230   CompareAndBranchOrBacktrack(current_character(), limit, lo, on_less);
231 }
232 
233 
CheckCharacters(Vector<const uc16> str,int cp_offset,Label * on_failure,bool check_end_of_string)234 void RegExpMacroAssemblerARM64::CheckCharacters(Vector<const uc16> str,
235                                               int cp_offset,
236                                               Label* on_failure,
237                                               bool check_end_of_string) {
238   // This method is only ever called from the cctests.
239 
240   if (check_end_of_string) {
241     // Is last character of required match inside string.
242     CheckPosition(cp_offset + str.length() - 1, on_failure);
243   }
244 
245   Register characters_address = x11;
246 
247   __ Add(characters_address,
248          input_end(),
249          Operand(current_input_offset(), SXTW));
250   if (cp_offset != 0) {
251     __ Add(characters_address, characters_address, cp_offset * char_size());
252   }
253 
254   for (int i = 0; i < str.length(); i++) {
255     if (mode_ == LATIN1) {
256       __ Ldrb(w10, MemOperand(characters_address, 1, PostIndex));
257       DCHECK(str[i] <= String::kMaxOneByteCharCode);
258     } else {
259       __ Ldrh(w10, MemOperand(characters_address, 2, PostIndex));
260     }
261     CompareAndBranchOrBacktrack(w10, str[i], ne, on_failure);
262   }
263 }
264 
265 
CheckGreedyLoop(Label * on_equal)266 void RegExpMacroAssemblerARM64::CheckGreedyLoop(Label* on_equal) {
267   __ Ldr(w10, MemOperand(backtrack_stackpointer()));
268   __ Cmp(current_input_offset(), w10);
269   __ Cset(x11, eq);
270   __ Add(backtrack_stackpointer(),
271          backtrack_stackpointer(), Operand(x11, LSL, kWRegSizeLog2));
272   BranchOrBacktrack(eq, on_equal);
273 }
274 
275 
CheckNotBackReferenceIgnoreCase(int start_reg,bool read_backward,Label * on_no_match)276 void RegExpMacroAssemblerARM64::CheckNotBackReferenceIgnoreCase(
277     int start_reg, bool read_backward, Label* on_no_match) {
278   Label fallthrough;
279 
280   Register capture_start_offset = w10;
281   // Save the capture length in a callee-saved register so it will
282   // be preserved if we call a C helper.
283   Register capture_length = w19;
284   DCHECK(kCalleeSaved.IncludesAliasOf(capture_length));
285 
286   // Find length of back-referenced capture.
287   DCHECK((start_reg % 2) == 0);
288   if (start_reg < kNumCachedRegisters) {
289     __ Mov(capture_start_offset.X(), GetCachedRegister(start_reg));
290     __ Lsr(x11, GetCachedRegister(start_reg), kWRegSizeInBits);
291   } else {
292     __ Ldp(w11, capture_start_offset, capture_location(start_reg, x10));
293   }
294   __ Sub(capture_length, w11, capture_start_offset);  // Length to check.
295 
296   // At this point, the capture registers are either both set or both cleared.
297   // If the capture length is zero, then the capture is either empty or cleared.
298   // Fall through in both cases.
299   __ CompareAndBranch(capture_length, Operand(0), eq, &fallthrough);
300 
301   // Check that there are enough characters left in the input.
302   if (read_backward) {
303     __ Add(w12, string_start_minus_one(), capture_length);
304     __ Cmp(current_input_offset(), w12);
305     BranchOrBacktrack(le, on_no_match);
306   } else {
307     __ Cmn(capture_length, current_input_offset());
308     BranchOrBacktrack(gt, on_no_match);
309   }
310 
311   if (mode_ == LATIN1) {
312     Label success;
313     Label fail;
314     Label loop_check;
315 
316     Register capture_start_address = x12;
317     Register capture_end_addresss = x13;
318     Register current_position_address = x14;
319 
320     __ Add(capture_start_address,
321            input_end(),
322            Operand(capture_start_offset, SXTW));
323     __ Add(capture_end_addresss,
324            capture_start_address,
325            Operand(capture_length, SXTW));
326     __ Add(current_position_address,
327            input_end(),
328            Operand(current_input_offset(), SXTW));
329     if (read_backward) {
330       // Offset by length when matching backwards.
331       __ Sub(current_position_address, current_position_address,
332              Operand(capture_length, SXTW));
333     }
334 
335     Label loop;
336     __ Bind(&loop);
337     __ Ldrb(w10, MemOperand(capture_start_address, 1, PostIndex));
338     __ Ldrb(w11, MemOperand(current_position_address, 1, PostIndex));
339     __ Cmp(w10, w11);
340     __ B(eq, &loop_check);
341 
342     // Mismatch, try case-insensitive match (converting letters to lower-case).
343     __ Orr(w10, w10, 0x20);  // Convert capture character to lower-case.
344     __ Orr(w11, w11, 0x20);  // Also convert input character.
345     __ Cmp(w11, w10);
346     __ B(ne, &fail);
347     __ Sub(w10, w10, 'a');
348     __ Cmp(w10, 'z' - 'a');  // Is w10 a lowercase letter?
349     __ B(ls, &loop_check);  // In range 'a'-'z'.
350     // Latin-1: Check for values in range [224,254] but not 247.
351     __ Sub(w10, w10, 224 - 'a');
352     __ Cmp(w10, 254 - 224);
353     __ Ccmp(w10, 247 - 224, ZFlag, ls);  // Check for 247.
354     __ B(eq, &fail);  // Weren't Latin-1 letters.
355 
356     __ Bind(&loop_check);
357     __ Cmp(capture_start_address, capture_end_addresss);
358     __ B(lt, &loop);
359     __ B(&success);
360 
361     __ Bind(&fail);
362     BranchOrBacktrack(al, on_no_match);
363 
364     __ Bind(&success);
365     // Compute new value of character position after the matched part.
366     __ Sub(current_input_offset().X(), current_position_address, input_end());
367     if (read_backward) {
368       __ Sub(current_input_offset().X(), current_input_offset().X(),
369              Operand(capture_length, SXTW));
370     }
371     if (masm_->emit_debug_code()) {
372       __ Cmp(current_input_offset().X(), Operand(current_input_offset(), SXTW));
373       __ Ccmp(current_input_offset(), 0, NoFlag, eq);
374       // The current input offset should be <= 0, and fit in a W register.
375       __ Check(le, kOffsetOutOfRange);
376     }
377   } else {
378     DCHECK(mode_ == UC16);
379     int argument_count = 4;
380 
381     // The cached registers need to be retained.
382     CPURegList cached_registers(CPURegister::kRegister, kXRegSizeInBits, 0, 7);
383     DCHECK((cached_registers.Count() * 2) == kNumCachedRegisters);
384     __ PushCPURegList(cached_registers);
385 
386     // Put arguments into arguments registers.
387     // Parameters are
388     //   x0: Address byte_offset1 - Address captured substring's start.
389     //   x1: Address byte_offset2 - Address of current character position.
390     //   w2: size_t byte_length - length of capture in bytes(!)
391     //   x3: Isolate* isolate
392 
393     // Address of start of capture.
394     __ Add(x0, input_end(), Operand(capture_start_offset, SXTW));
395     // Length of capture.
396     __ Mov(w2, capture_length);
397     // Address of current input position.
398     __ Add(x1, input_end(), Operand(current_input_offset(), SXTW));
399     if (read_backward) {
400       __ Sub(x1, x1, Operand(capture_length, SXTW));
401     }
402     // Isolate.
403     __ Mov(x3, ExternalReference::isolate_address(isolate()));
404 
405     {
406       AllowExternalCallThatCantCauseGC scope(masm_);
407       ExternalReference function =
408           ExternalReference::re_case_insensitive_compare_uc16(isolate());
409       __ CallCFunction(function, argument_count);
410     }
411 
412     // Check if function returned non-zero for success or zero for failure.
413     // x0 is one of the registers used as a cache so it must be tested before
414     // the cache is restored.
415     __ Cmp(x0, 0);
416     __ PopCPURegList(cached_registers);
417     BranchOrBacktrack(eq, on_no_match);
418 
419     // On success, advance position by length of capture.
420     if (read_backward) {
421       __ Sub(current_input_offset(), current_input_offset(), capture_length);
422     } else {
423       __ Add(current_input_offset(), current_input_offset(), capture_length);
424     }
425   }
426 
427   __ Bind(&fallthrough);
428 }
429 
CheckNotBackReference(int start_reg,bool read_backward,Label * on_no_match)430 void RegExpMacroAssemblerARM64::CheckNotBackReference(int start_reg,
431                                                       bool read_backward,
432                                                       Label* on_no_match) {
433   Label fallthrough;
434 
435   Register capture_start_address = x12;
436   Register capture_end_address = x13;
437   Register current_position_address = x14;
438   Register capture_length = w15;
439 
440   // Find length of back-referenced capture.
441   DCHECK((start_reg % 2) == 0);
442   if (start_reg < kNumCachedRegisters) {
443     __ Mov(x10, GetCachedRegister(start_reg));
444     __ Lsr(x11, GetCachedRegister(start_reg), kWRegSizeInBits);
445   } else {
446     __ Ldp(w11, w10, capture_location(start_reg, x10));
447   }
448   __ Sub(capture_length, w11, w10);  // Length to check.
449 
450   // At this point, the capture registers are either both set or both cleared.
451   // If the capture length is zero, then the capture is either empty or cleared.
452   // Fall through in both cases.
453   __ CompareAndBranch(capture_length, Operand(0), eq, &fallthrough);
454 
455   // Check that there are enough characters left in the input.
456   if (read_backward) {
457     __ Add(w12, string_start_minus_one(), capture_length);
458     __ Cmp(current_input_offset(), w12);
459     BranchOrBacktrack(le, on_no_match);
460   } else {
461     __ Cmn(capture_length, current_input_offset());
462     BranchOrBacktrack(gt, on_no_match);
463   }
464 
465   // Compute pointers to match string and capture string
466   __ Add(capture_start_address, input_end(), Operand(w10, SXTW));
467   __ Add(capture_end_address,
468          capture_start_address,
469          Operand(capture_length, SXTW));
470   __ Add(current_position_address,
471          input_end(),
472          Operand(current_input_offset(), SXTW));
473   if (read_backward) {
474     // Offset by length when matching backwards.
475     __ Sub(current_position_address, current_position_address,
476            Operand(capture_length, SXTW));
477   }
478 
479   Label loop;
480   __ Bind(&loop);
481   if (mode_ == LATIN1) {
482     __ Ldrb(w10, MemOperand(capture_start_address, 1, PostIndex));
483     __ Ldrb(w11, MemOperand(current_position_address, 1, PostIndex));
484   } else {
485     DCHECK(mode_ == UC16);
486     __ Ldrh(w10, MemOperand(capture_start_address, 2, PostIndex));
487     __ Ldrh(w11, MemOperand(current_position_address, 2, PostIndex));
488   }
489   __ Cmp(w10, w11);
490   BranchOrBacktrack(ne, on_no_match);
491   __ Cmp(capture_start_address, capture_end_address);
492   __ B(lt, &loop);
493 
494   // Move current character position to position after match.
495   __ Sub(current_input_offset().X(), current_position_address, input_end());
496   if (read_backward) {
497     __ Sub(current_input_offset().X(), current_input_offset().X(),
498            Operand(capture_length, SXTW));
499   }
500 
501   if (masm_->emit_debug_code()) {
502     __ Cmp(current_input_offset().X(), Operand(current_input_offset(), SXTW));
503     __ Ccmp(current_input_offset(), 0, NoFlag, eq);
504     // The current input offset should be <= 0, and fit in a W register.
505     __ Check(le, kOffsetOutOfRange);
506   }
507   __ Bind(&fallthrough);
508 }
509 
510 
CheckNotCharacter(unsigned c,Label * on_not_equal)511 void RegExpMacroAssemblerARM64::CheckNotCharacter(unsigned c,
512                                                   Label* on_not_equal) {
513   CompareAndBranchOrBacktrack(current_character(), c, ne, on_not_equal);
514 }
515 
516 
CheckCharacterAfterAnd(uint32_t c,uint32_t mask,Label * on_equal)517 void RegExpMacroAssemblerARM64::CheckCharacterAfterAnd(uint32_t c,
518                                                        uint32_t mask,
519                                                        Label* on_equal) {
520   __ And(w10, current_character(), mask);
521   CompareAndBranchOrBacktrack(w10, c, eq, on_equal);
522 }
523 
524 
CheckNotCharacterAfterAnd(unsigned c,unsigned mask,Label * on_not_equal)525 void RegExpMacroAssemblerARM64::CheckNotCharacterAfterAnd(unsigned c,
526                                                           unsigned mask,
527                                                           Label* on_not_equal) {
528   __ And(w10, current_character(), mask);
529   CompareAndBranchOrBacktrack(w10, c, ne, on_not_equal);
530 }
531 
532 
CheckNotCharacterAfterMinusAnd(uc16 c,uc16 minus,uc16 mask,Label * on_not_equal)533 void RegExpMacroAssemblerARM64::CheckNotCharacterAfterMinusAnd(
534     uc16 c,
535     uc16 minus,
536     uc16 mask,
537     Label* on_not_equal) {
538   DCHECK(minus < String::kMaxUtf16CodeUnit);
539   __ Sub(w10, current_character(), minus);
540   __ And(w10, w10, mask);
541   CompareAndBranchOrBacktrack(w10, c, ne, on_not_equal);
542 }
543 
544 
CheckCharacterInRange(uc16 from,uc16 to,Label * on_in_range)545 void RegExpMacroAssemblerARM64::CheckCharacterInRange(
546     uc16 from,
547     uc16 to,
548     Label* on_in_range) {
549   __ Sub(w10, current_character(), from);
550   // Unsigned lower-or-same condition.
551   CompareAndBranchOrBacktrack(w10, to - from, ls, on_in_range);
552 }
553 
554 
CheckCharacterNotInRange(uc16 from,uc16 to,Label * on_not_in_range)555 void RegExpMacroAssemblerARM64::CheckCharacterNotInRange(
556     uc16 from,
557     uc16 to,
558     Label* on_not_in_range) {
559   __ Sub(w10, current_character(), from);
560   // Unsigned higher condition.
561   CompareAndBranchOrBacktrack(w10, to - from, hi, on_not_in_range);
562 }
563 
564 
CheckBitInTable(Handle<ByteArray> table,Label * on_bit_set)565 void RegExpMacroAssemblerARM64::CheckBitInTable(
566     Handle<ByteArray> table,
567     Label* on_bit_set) {
568   __ Mov(x11, Operand(table));
569   if ((mode_ != LATIN1) || (kTableMask != String::kMaxOneByteCharCode)) {
570     __ And(w10, current_character(), kTableMask);
571     __ Add(w10, w10, ByteArray::kHeaderSize - kHeapObjectTag);
572   } else {
573     __ Add(w10, current_character(), ByteArray::kHeaderSize - kHeapObjectTag);
574   }
575   __ Ldrb(w11, MemOperand(x11, w10, UXTW));
576   CompareAndBranchOrBacktrack(w11, 0, ne, on_bit_set);
577 }
578 
579 
CheckSpecialCharacterClass(uc16 type,Label * on_no_match)580 bool RegExpMacroAssemblerARM64::CheckSpecialCharacterClass(uc16 type,
581                                                            Label* on_no_match) {
582   // Range checks (c in min..max) are generally implemented by an unsigned
583   // (c - min) <= (max - min) check
584   switch (type) {
585   case 's':
586     // Match space-characters
587     if (mode_ == LATIN1) {
588       // One byte space characters are '\t'..'\r', ' ' and \u00a0.
589       Label success;
590       // Check for ' ' or 0x00a0.
591       __ Cmp(current_character(), ' ');
592       __ Ccmp(current_character(), 0x00a0, ZFlag, ne);
593       __ B(eq, &success);
594       // Check range 0x09..0x0d.
595       __ Sub(w10, current_character(), '\t');
596       CompareAndBranchOrBacktrack(w10, '\r' - '\t', hi, on_no_match);
597       __ Bind(&success);
598       return true;
599     }
600     return false;
601   case 'S':
602     // The emitted code for generic character classes is good enough.
603     return false;
604   case 'd':
605     // Match ASCII digits ('0'..'9').
606     __ Sub(w10, current_character(), '0');
607     CompareAndBranchOrBacktrack(w10, '9' - '0', hi, on_no_match);
608     return true;
609   case 'D':
610     // Match ASCII non-digits.
611     __ Sub(w10, current_character(), '0');
612     CompareAndBranchOrBacktrack(w10, '9' - '0', ls, on_no_match);
613     return true;
614   case '.': {
615     // Match non-newlines (not 0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029)
616     // Here we emit the conditional branch only once at the end to make branch
617     // prediction more efficient, even though we could branch out of here
618     // as soon as a character matches.
619     __ Cmp(current_character(), 0x0a);
620     __ Ccmp(current_character(), 0x0d, ZFlag, ne);
621     if (mode_ == UC16) {
622       __ Sub(w10, current_character(), 0x2028);
623       // If the Z flag was set we clear the flags to force a branch.
624       __ Ccmp(w10, 0x2029 - 0x2028, NoFlag, ne);
625       // ls -> !((C==1) && (Z==0))
626       BranchOrBacktrack(ls, on_no_match);
627     } else {
628       BranchOrBacktrack(eq, on_no_match);
629     }
630     return true;
631   }
632   case 'n': {
633     // Match newlines (0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029)
634     // We have to check all 4 newline characters before emitting
635     // the conditional branch.
636     __ Cmp(current_character(), 0x0a);
637     __ Ccmp(current_character(), 0x0d, ZFlag, ne);
638     if (mode_ == UC16) {
639       __ Sub(w10, current_character(), 0x2028);
640       // If the Z flag was set we clear the flags to force a fall-through.
641       __ Ccmp(w10, 0x2029 - 0x2028, NoFlag, ne);
642       // hi -> (C==1) && (Z==0)
643       BranchOrBacktrack(hi, on_no_match);
644     } else {
645       BranchOrBacktrack(ne, on_no_match);
646     }
647     return true;
648   }
649   case 'w': {
650     if (mode_ != LATIN1) {
651       // Table is 256 entries, so all Latin1 characters can be tested.
652       CompareAndBranchOrBacktrack(current_character(), 'z', hi, on_no_match);
653     }
654     ExternalReference map = ExternalReference::re_word_character_map();
655     __ Mov(x10, map);
656     __ Ldrb(w10, MemOperand(x10, current_character(), UXTW));
657     CompareAndBranchOrBacktrack(w10, 0, eq, on_no_match);
658     return true;
659   }
660   case 'W': {
661     Label done;
662     if (mode_ != LATIN1) {
663       // Table is 256 entries, so all Latin1 characters can be tested.
664       __ Cmp(current_character(), 'z');
665       __ B(hi, &done);
666     }
667     ExternalReference map = ExternalReference::re_word_character_map();
668     __ Mov(x10, map);
669     __ Ldrb(w10, MemOperand(x10, current_character(), UXTW));
670     CompareAndBranchOrBacktrack(w10, 0, ne, on_no_match);
671     __ Bind(&done);
672     return true;
673   }
674   case '*':
675     // Match any character.
676     return true;
677   // No custom implementation (yet): s(UC16), S(UC16).
678   default:
679     return false;
680   }
681 }
682 
683 
Fail()684 void RegExpMacroAssemblerARM64::Fail() {
685   __ Mov(w0, FAILURE);
686   __ B(&exit_label_);
687 }
688 
689 
GetCode(Handle<String> source)690 Handle<HeapObject> RegExpMacroAssemblerARM64::GetCode(Handle<String> source) {
691   Label return_w0;
692   // Finalize code - write the entry point code now we know how many
693   // registers we need.
694 
695   // Entry code:
696   __ Bind(&entry_label_);
697 
698   // Arguments on entry:
699   // x0:  String*  input
700   // x1:  int      start_offset
701   // x2:  byte*    input_start
702   // x3:  byte*    input_end
703   // x4:  int*     output array
704   // x5:  int      output array size
705   // x6:  Address  stack_base
706   // x7:  int      direct_call
707 
708   // The stack pointer should be csp on entry.
709   //  csp[8]:  address of the current isolate
710   //  csp[0]:  secondary link/return address used by native call
711 
712   // Tell the system that we have a stack frame.  Because the type is MANUAL, no
713   // code is generated.
714   FrameScope scope(masm_, StackFrame::MANUAL);
715 
716   // Push registers on the stack, only push the argument registers that we need.
717   CPURegList argument_registers(x0, x5, x6, x7);
718 
719   CPURegList registers_to_retain = kCalleeSaved;
720   DCHECK(kCalleeSaved.Count() == 11);
721   registers_to_retain.Combine(lr);
722 
723   DCHECK(csp.Is(__ StackPointer()));
724   __ PushCPURegList(registers_to_retain);
725   __ PushCPURegList(argument_registers);
726 
727   // Set frame pointer in place.
728   __ Add(frame_pointer(), csp, argument_registers.Count() * kPointerSize);
729 
730   // Initialize callee-saved registers.
731   __ Mov(start_offset(), w1);
732   __ Mov(input_start(), x2);
733   __ Mov(input_end(), x3);
734   __ Mov(output_array(), x4);
735 
736   // Set the number of registers we will need to allocate, that is:
737   //   - success_counter (X register)
738   //   - (num_registers_ - kNumCachedRegisters) (W registers)
739   int num_wreg_to_allocate = num_registers_ - kNumCachedRegisters;
740   // Do not allocate registers on the stack if they can all be cached.
741   if (num_wreg_to_allocate < 0) { num_wreg_to_allocate = 0; }
742   // Make room for the success_counter.
743   num_wreg_to_allocate += 2;
744 
745   // Make sure the stack alignment will be respected.
746   int alignment = masm_->ActivationFrameAlignment();
747   DCHECK_EQ(alignment % 16, 0);
748   int align_mask = (alignment / kWRegSize) - 1;
749   num_wreg_to_allocate = (num_wreg_to_allocate + align_mask) & ~align_mask;
750 
751   // Check if we have space on the stack.
752   Label stack_limit_hit;
753   Label stack_ok;
754 
755   ExternalReference stack_limit =
756       ExternalReference::address_of_stack_limit(isolate());
757   __ Mov(x10, stack_limit);
758   __ Ldr(x10, MemOperand(x10));
759   __ Subs(x10, csp, x10);
760 
761   // Handle it if the stack pointer is already below the stack limit.
762   __ B(ls, &stack_limit_hit);
763 
764   // Check if there is room for the variable number of registers above
765   // the stack limit.
766   __ Cmp(x10, num_wreg_to_allocate * kWRegSize);
767   __ B(hs, &stack_ok);
768 
769   // Exit with OutOfMemory exception. There is not enough space on the stack
770   // for our working registers.
771   __ Mov(w0, EXCEPTION);
772   __ B(&return_w0);
773 
774   __ Bind(&stack_limit_hit);
775   CallCheckStackGuardState(x10);
776   // If returned value is non-zero, we exit with the returned value as result.
777   __ Cbnz(w0, &return_w0);
778 
779   __ Bind(&stack_ok);
780 
781   // Allocate space on stack.
782   __ Claim(num_wreg_to_allocate, kWRegSize);
783 
784   // Initialize success_counter with 0.
785   __ Str(wzr, MemOperand(frame_pointer(), kSuccessCounter));
786 
787   // Find negative length (offset of start relative to end).
788   __ Sub(x10, input_start(), input_end());
789   if (masm_->emit_debug_code()) {
790     // Check that the input string length is < 2^30.
791     __ Neg(x11, x10);
792     __ Cmp(x11, (1<<30) - 1);
793     __ Check(ls, kInputStringTooLong);
794   }
795   __ Mov(current_input_offset(), w10);
796 
797   // The non-position value is used as a clearing value for the
798   // capture registers, it corresponds to the position of the first character
799   // minus one.
800   __ Sub(string_start_minus_one(), current_input_offset(), char_size());
801   __ Sub(string_start_minus_one(), string_start_minus_one(),
802          Operand(start_offset(), LSL, (mode_ == UC16) ? 1 : 0));
803   // We can store this value twice in an X register for initializing
804   // on-stack registers later.
805   __ Orr(twice_non_position_value(), string_start_minus_one().X(),
806          Operand(string_start_minus_one().X(), LSL, kWRegSizeInBits));
807 
808   // Initialize code pointer register.
809   __ Mov(code_pointer(), Operand(masm_->CodeObject()));
810 
811   Label load_char_start_regexp, start_regexp;
812   // Load newline if index is at start, previous character otherwise.
813   __ Cbnz(start_offset(), &load_char_start_regexp);
814   __ Mov(current_character(), '\n');
815   __ B(&start_regexp);
816 
817   // Global regexp restarts matching here.
818   __ Bind(&load_char_start_regexp);
819   // Load previous char as initial value of current character register.
820   LoadCurrentCharacterUnchecked(-1, 1);
821   __ Bind(&start_regexp);
822   // Initialize on-stack registers.
823   if (num_saved_registers_ > 0) {
824     ClearRegisters(0, num_saved_registers_ - 1);
825   }
826 
827   // Initialize backtrack stack pointer.
828   __ Ldr(backtrack_stackpointer(), MemOperand(frame_pointer(), kStackBase));
829 
830   // Execute
831   __ B(&start_label_);
832 
833   if (backtrack_label_.is_linked()) {
834     __ Bind(&backtrack_label_);
835     Backtrack();
836   }
837 
838   if (success_label_.is_linked()) {
839     Register first_capture_start = w15;
840 
841     // Save captures when successful.
842     __ Bind(&success_label_);
843 
844     if (num_saved_registers_ > 0) {
845       // V8 expects the output to be an int32_t array.
846       Register capture_start = w12;
847       Register capture_end = w13;
848       Register input_length = w14;
849 
850       // Copy captures to output.
851 
852       // Get string length.
853       __ Sub(x10, input_end(), input_start());
854       if (masm_->emit_debug_code()) {
855         // Check that the input string length is < 2^30.
856         __ Cmp(x10, (1<<30) - 1);
857         __ Check(ls, kInputStringTooLong);
858       }
859       // input_start has a start_offset offset on entry. We need to include
860       // it when computing the length of the whole string.
861       if (mode_ == UC16) {
862         __ Add(input_length, start_offset(), Operand(w10, LSR, 1));
863       } else {
864         __ Add(input_length, start_offset(), w10);
865       }
866 
867       // Copy the results to the output array from the cached registers first.
868       for (int i = 0;
869            (i < num_saved_registers_) && (i < kNumCachedRegisters);
870            i += 2) {
871         __ Mov(capture_start.X(), GetCachedRegister(i));
872         __ Lsr(capture_end.X(), capture_start.X(), kWRegSizeInBits);
873         if ((i == 0) && global_with_zero_length_check()) {
874           // Keep capture start for the zero-length check later.
875           __ Mov(first_capture_start, capture_start);
876         }
877         // Offsets need to be relative to the start of the string.
878         if (mode_ == UC16) {
879           __ Add(capture_start, input_length, Operand(capture_start, ASR, 1));
880           __ Add(capture_end, input_length, Operand(capture_end, ASR, 1));
881         } else {
882           __ Add(capture_start, input_length, capture_start);
883           __ Add(capture_end, input_length, capture_end);
884         }
885         // The output pointer advances for a possible global match.
886         __ Stp(capture_start,
887                capture_end,
888                MemOperand(output_array(), kPointerSize, PostIndex));
889       }
890 
891       // Only carry on if there are more than kNumCachedRegisters capture
892       // registers.
893       int num_registers_left_on_stack =
894           num_saved_registers_ - kNumCachedRegisters;
895       if (num_registers_left_on_stack > 0) {
896         Register base = x10;
897         // There are always an even number of capture registers. A couple of
898         // registers determine one match with two offsets.
899         DCHECK_EQ(0, num_registers_left_on_stack % 2);
900         __ Add(base, frame_pointer(), kFirstCaptureOnStack);
901 
902         // We can unroll the loop here, we should not unroll for less than 2
903         // registers.
904         STATIC_ASSERT(kNumRegistersToUnroll > 2);
905         if (num_registers_left_on_stack <= kNumRegistersToUnroll) {
906           for (int i = 0; i < num_registers_left_on_stack / 2; i++) {
907             __ Ldp(capture_end,
908                    capture_start,
909                    MemOperand(base, -kPointerSize, PostIndex));
910             if ((i == 0) && global_with_zero_length_check()) {
911               // Keep capture start for the zero-length check later.
912               __ Mov(first_capture_start, capture_start);
913             }
914             // Offsets need to be relative to the start of the string.
915             if (mode_ == UC16) {
916               __ Add(capture_start,
917                      input_length,
918                      Operand(capture_start, ASR, 1));
919               __ Add(capture_end, input_length, Operand(capture_end, ASR, 1));
920             } else {
921               __ Add(capture_start, input_length, capture_start);
922               __ Add(capture_end, input_length, capture_end);
923             }
924             // The output pointer advances for a possible global match.
925             __ Stp(capture_start,
926                    capture_end,
927                    MemOperand(output_array(), kPointerSize, PostIndex));
928           }
929         } else {
930           Label loop, start;
931           __ Mov(x11, num_registers_left_on_stack);
932 
933           __ Ldp(capture_end,
934                  capture_start,
935                  MemOperand(base, -kPointerSize, PostIndex));
936           if (global_with_zero_length_check()) {
937             __ Mov(first_capture_start, capture_start);
938           }
939           __ B(&start);
940 
941           __ Bind(&loop);
942           __ Ldp(capture_end,
943                  capture_start,
944                  MemOperand(base, -kPointerSize, PostIndex));
945           __ Bind(&start);
946           if (mode_ == UC16) {
947             __ Add(capture_start, input_length, Operand(capture_start, ASR, 1));
948             __ Add(capture_end, input_length, Operand(capture_end, ASR, 1));
949           } else {
950             __ Add(capture_start, input_length, capture_start);
951             __ Add(capture_end, input_length, capture_end);
952           }
953           // The output pointer advances for a possible global match.
954           __ Stp(capture_start,
955                  capture_end,
956                  MemOperand(output_array(), kPointerSize, PostIndex));
957           __ Sub(x11, x11, 2);
958           __ Cbnz(x11, &loop);
959         }
960       }
961     }
962 
963     if (global()) {
964       Register success_counter = w0;
965       Register output_size = x10;
966       // Restart matching if the regular expression is flagged as global.
967 
968       // Increment success counter.
969       __ Ldr(success_counter, MemOperand(frame_pointer(), kSuccessCounter));
970       __ Add(success_counter, success_counter, 1);
971       __ Str(success_counter, MemOperand(frame_pointer(), kSuccessCounter));
972 
973       // Capture results have been stored, so the number of remaining global
974       // output registers is reduced by the number of stored captures.
975       __ Ldr(output_size, MemOperand(frame_pointer(), kOutputSize));
976       __ Sub(output_size, output_size, num_saved_registers_);
977       // Check whether we have enough room for another set of capture results.
978       __ Cmp(output_size, num_saved_registers_);
979       __ B(lt, &return_w0);
980 
981       // The output pointer is already set to the next field in the output
982       // array.
983       // Update output size on the frame before we restart matching.
984       __ Str(output_size, MemOperand(frame_pointer(), kOutputSize));
985 
986       if (global_with_zero_length_check()) {
987         // Special case for zero-length matches.
988         __ Cmp(current_input_offset(), first_capture_start);
989         // Not a zero-length match, restart.
990         __ B(ne, &load_char_start_regexp);
991         // Offset from the end is zero if we already reached the end.
992         __ Cbz(current_input_offset(), &return_w0);
993         // Advance current position after a zero-length match.
994         __ Add(current_input_offset(),
995                current_input_offset(),
996                Operand((mode_ == UC16) ? 2 : 1));
997       }
998 
999       __ B(&load_char_start_regexp);
1000     } else {
1001       __ Mov(w0, SUCCESS);
1002     }
1003   }
1004 
1005   if (exit_label_.is_linked()) {
1006     // Exit and return w0
1007     __ Bind(&exit_label_);
1008     if (global()) {
1009       __ Ldr(w0, MemOperand(frame_pointer(), kSuccessCounter));
1010     }
1011   }
1012 
1013   __ Bind(&return_w0);
1014 
1015   // Set stack pointer back to first register to retain
1016   DCHECK(csp.Is(__ StackPointer()));
1017   __ Mov(csp, fp);
1018   __ AssertStackConsistency();
1019 
1020   // Restore registers.
1021   __ PopCPURegList(registers_to_retain);
1022 
1023   __ Ret();
1024 
1025   Label exit_with_exception;
1026   // Registers x0 to x7 are used to store the first captures, they need to be
1027   // retained over calls to C++ code.
1028   CPURegList cached_registers(CPURegister::kRegister, kXRegSizeInBits, 0, 7);
1029   DCHECK((cached_registers.Count() * 2) == kNumCachedRegisters);
1030 
1031   if (check_preempt_label_.is_linked()) {
1032     __ Bind(&check_preempt_label_);
1033     SaveLinkRegister();
1034     // The cached registers need to be retained.
1035     __ PushCPURegList(cached_registers);
1036     CallCheckStackGuardState(x10);
1037     // Returning from the regexp code restores the stack (csp <- fp)
1038     // so we don't need to drop the link register from it before exiting.
1039     __ Cbnz(w0, &return_w0);
1040     // Reset the cached registers.
1041     __ PopCPURegList(cached_registers);
1042     RestoreLinkRegister();
1043     __ Ret();
1044   }
1045 
1046   if (stack_overflow_label_.is_linked()) {
1047     __ Bind(&stack_overflow_label_);
1048     SaveLinkRegister();
1049     // The cached registers need to be retained.
1050     __ PushCPURegList(cached_registers);
1051     // Call GrowStack(backtrack_stackpointer(), &stack_base)
1052     __ Mov(x2, ExternalReference::isolate_address(isolate()));
1053     __ Add(x1, frame_pointer(), kStackBase);
1054     __ Mov(x0, backtrack_stackpointer());
1055     ExternalReference grow_stack =
1056         ExternalReference::re_grow_stack(isolate());
1057     __ CallCFunction(grow_stack, 3);
1058     // If return NULL, we have failed to grow the stack, and
1059     // must exit with a stack-overflow exception.
1060     // Returning from the regexp code restores the stack (csp <- fp)
1061     // so we don't need to drop the link register from it before exiting.
1062     __ Cbz(w0, &exit_with_exception);
1063     // Otherwise use return value as new stack pointer.
1064     __ Mov(backtrack_stackpointer(), x0);
1065     // Reset the cached registers.
1066     __ PopCPURegList(cached_registers);
1067     RestoreLinkRegister();
1068     __ Ret();
1069   }
1070 
1071   if (exit_with_exception.is_linked()) {
1072     __ Bind(&exit_with_exception);
1073     __ Mov(w0, EXCEPTION);
1074     __ B(&return_w0);
1075   }
1076 
1077   CodeDesc code_desc;
1078   masm_->GetCode(&code_desc);
1079   Handle<Code> code = isolate()->factory()->NewCode(
1080       code_desc, Code::ComputeFlags(Code::REGEXP), masm_->CodeObject());
1081   PROFILE(masm_->isolate(), RegExpCodeCreateEvent(*code, *source));
1082   return Handle<HeapObject>::cast(code);
1083 }
1084 
1085 
GoTo(Label * to)1086 void RegExpMacroAssemblerARM64::GoTo(Label* to) {
1087   BranchOrBacktrack(al, to);
1088 }
1089 
IfRegisterGE(int reg,int comparand,Label * if_ge)1090 void RegExpMacroAssemblerARM64::IfRegisterGE(int reg, int comparand,
1091                                              Label* if_ge) {
1092   Register to_compare = GetRegister(reg, w10);
1093   CompareAndBranchOrBacktrack(to_compare, comparand, ge, if_ge);
1094 }
1095 
1096 
IfRegisterLT(int reg,int comparand,Label * if_lt)1097 void RegExpMacroAssemblerARM64::IfRegisterLT(int reg, int comparand,
1098                                              Label* if_lt) {
1099   Register to_compare = GetRegister(reg, w10);
1100   CompareAndBranchOrBacktrack(to_compare, comparand, lt, if_lt);
1101 }
1102 
1103 
IfRegisterEqPos(int reg,Label * if_eq)1104 void RegExpMacroAssemblerARM64::IfRegisterEqPos(int reg, Label* if_eq) {
1105   Register to_compare = GetRegister(reg, w10);
1106   __ Cmp(to_compare, current_input_offset());
1107   BranchOrBacktrack(eq, if_eq);
1108 }
1109 
1110 RegExpMacroAssembler::IrregexpImplementation
Implementation()1111     RegExpMacroAssemblerARM64::Implementation() {
1112   return kARM64Implementation;
1113 }
1114 
1115 
LoadCurrentCharacter(int cp_offset,Label * on_end_of_input,bool check_bounds,int characters)1116 void RegExpMacroAssemblerARM64::LoadCurrentCharacter(int cp_offset,
1117                                                      Label* on_end_of_input,
1118                                                      bool check_bounds,
1119                                                      int characters) {
1120   // TODO(pielan): Make sure long strings are caught before this, and not
1121   // just asserted in debug mode.
1122   // Be sane! (And ensure that an int32_t can be used to index the string)
1123   DCHECK(cp_offset < (1<<30));
1124   if (check_bounds) {
1125     if (cp_offset >= 0) {
1126       CheckPosition(cp_offset + characters - 1, on_end_of_input);
1127     } else {
1128       CheckPosition(cp_offset, on_end_of_input);
1129     }
1130   }
1131   LoadCurrentCharacterUnchecked(cp_offset, characters);
1132 }
1133 
1134 
PopCurrentPosition()1135 void RegExpMacroAssemblerARM64::PopCurrentPosition() {
1136   Pop(current_input_offset());
1137 }
1138 
1139 
PopRegister(int register_index)1140 void RegExpMacroAssemblerARM64::PopRegister(int register_index) {
1141   Pop(w10);
1142   StoreRegister(register_index, w10);
1143 }
1144 
1145 
PushBacktrack(Label * label)1146 void RegExpMacroAssemblerARM64::PushBacktrack(Label* label) {
1147   if (label->is_bound()) {
1148     int target = label->pos();
1149     __ Mov(w10, target + Code::kHeaderSize - kHeapObjectTag);
1150   } else {
1151     __ Adr(x10, label, MacroAssembler::kAdrFar);
1152     __ Sub(x10, x10, code_pointer());
1153     if (masm_->emit_debug_code()) {
1154       __ Cmp(x10, kWRegMask);
1155       // The code offset has to fit in a W register.
1156       __ Check(ls, kOffsetOutOfRange);
1157     }
1158   }
1159   Push(w10);
1160   CheckStackLimit();
1161 }
1162 
1163 
PushCurrentPosition()1164 void RegExpMacroAssemblerARM64::PushCurrentPosition() {
1165   Push(current_input_offset());
1166 }
1167 
1168 
PushRegister(int register_index,StackCheckFlag check_stack_limit)1169 void RegExpMacroAssemblerARM64::PushRegister(int register_index,
1170                                              StackCheckFlag check_stack_limit) {
1171   Register to_push = GetRegister(register_index, w10);
1172   Push(to_push);
1173   if (check_stack_limit) CheckStackLimit();
1174 }
1175 
1176 
ReadCurrentPositionFromRegister(int reg)1177 void RegExpMacroAssemblerARM64::ReadCurrentPositionFromRegister(int reg) {
1178   Register cached_register;
1179   RegisterState register_state = GetRegisterState(reg);
1180   switch (register_state) {
1181     case STACKED:
1182       __ Ldr(current_input_offset(), register_location(reg));
1183       break;
1184     case CACHED_LSW:
1185       cached_register = GetCachedRegister(reg);
1186       __ Mov(current_input_offset(), cached_register.W());
1187       break;
1188     case CACHED_MSW:
1189       cached_register = GetCachedRegister(reg);
1190       __ Lsr(current_input_offset().X(), cached_register, kWRegSizeInBits);
1191       break;
1192     default:
1193       UNREACHABLE();
1194       break;
1195   }
1196 }
1197 
1198 
ReadStackPointerFromRegister(int reg)1199 void RegExpMacroAssemblerARM64::ReadStackPointerFromRegister(int reg) {
1200   Register read_from = GetRegister(reg, w10);
1201   __ Ldr(x11, MemOperand(frame_pointer(), kStackBase));
1202   __ Add(backtrack_stackpointer(), x11, Operand(read_from, SXTW));
1203 }
1204 
1205 
SetCurrentPositionFromEnd(int by)1206 void RegExpMacroAssemblerARM64::SetCurrentPositionFromEnd(int by) {
1207   Label after_position;
1208   __ Cmp(current_input_offset(), -by * char_size());
1209   __ B(ge, &after_position);
1210   __ Mov(current_input_offset(), -by * char_size());
1211   // On RegExp code entry (where this operation is used), the character before
1212   // the current position is expected to be already loaded.
1213   // We have advanced the position, so it's safe to read backwards.
1214   LoadCurrentCharacterUnchecked(-1, 1);
1215   __ Bind(&after_position);
1216 }
1217 
1218 
SetRegister(int register_index,int to)1219 void RegExpMacroAssemblerARM64::SetRegister(int register_index, int to) {
1220   DCHECK(register_index >= num_saved_registers_);  // Reserved for positions!
1221   Register set_to = wzr;
1222   if (to != 0) {
1223     set_to = w10;
1224     __ Mov(set_to, to);
1225   }
1226   StoreRegister(register_index, set_to);
1227 }
1228 
1229 
Succeed()1230 bool RegExpMacroAssemblerARM64::Succeed() {
1231   __ B(&success_label_);
1232   return global();
1233 }
1234 
1235 
WriteCurrentPositionToRegister(int reg,int cp_offset)1236 void RegExpMacroAssemblerARM64::WriteCurrentPositionToRegister(int reg,
1237                                                                int cp_offset) {
1238   Register position = current_input_offset();
1239   if (cp_offset != 0) {
1240     position = w10;
1241     __ Add(position, current_input_offset(), cp_offset * char_size());
1242   }
1243   StoreRegister(reg, position);
1244 }
1245 
1246 
ClearRegisters(int reg_from,int reg_to)1247 void RegExpMacroAssemblerARM64::ClearRegisters(int reg_from, int reg_to) {
1248   DCHECK(reg_from <= reg_to);
1249   int num_registers = reg_to - reg_from + 1;
1250 
1251   // If the first capture register is cached in a hardware register but not
1252   // aligned on a 64-bit one, we need to clear the first one specifically.
1253   if ((reg_from < kNumCachedRegisters) && ((reg_from % 2) != 0)) {
1254     StoreRegister(reg_from, string_start_minus_one());
1255     num_registers--;
1256     reg_from++;
1257   }
1258 
1259   // Clear cached registers in pairs as far as possible.
1260   while ((num_registers >= 2) && (reg_from < kNumCachedRegisters)) {
1261     DCHECK(GetRegisterState(reg_from) == CACHED_LSW);
1262     __ Mov(GetCachedRegister(reg_from), twice_non_position_value());
1263     reg_from += 2;
1264     num_registers -= 2;
1265   }
1266 
1267   if ((num_registers % 2) == 1) {
1268     StoreRegister(reg_from, string_start_minus_one());
1269     num_registers--;
1270     reg_from++;
1271   }
1272 
1273   if (num_registers > 0) {
1274     // If there are some remaining registers, they are stored on the stack.
1275     DCHECK(reg_from >= kNumCachedRegisters);
1276 
1277     // Move down the indexes of the registers on stack to get the correct offset
1278     // in memory.
1279     reg_from -= kNumCachedRegisters;
1280     reg_to -= kNumCachedRegisters;
1281     // We should not unroll the loop for less than 2 registers.
1282     STATIC_ASSERT(kNumRegistersToUnroll > 2);
1283     // We position the base pointer to (reg_from + 1).
1284     int base_offset = kFirstRegisterOnStack -
1285         kWRegSize - (kWRegSize * reg_from);
1286     if (num_registers > kNumRegistersToUnroll) {
1287       Register base = x10;
1288       __ Add(base, frame_pointer(), base_offset);
1289 
1290       Label loop;
1291       __ Mov(x11, num_registers);
1292       __ Bind(&loop);
1293       __ Str(twice_non_position_value(),
1294              MemOperand(base, -kPointerSize, PostIndex));
1295       __ Sub(x11, x11, 2);
1296       __ Cbnz(x11, &loop);
1297     } else {
1298       for (int i = reg_from; i <= reg_to; i += 2) {
1299         __ Str(twice_non_position_value(),
1300                MemOperand(frame_pointer(), base_offset));
1301         base_offset -= kWRegSize * 2;
1302       }
1303     }
1304   }
1305 }
1306 
1307 
WriteStackPointerToRegister(int reg)1308 void RegExpMacroAssemblerARM64::WriteStackPointerToRegister(int reg) {
1309   __ Ldr(x10, MemOperand(frame_pointer(), kStackBase));
1310   __ Sub(x10, backtrack_stackpointer(), x10);
1311   if (masm_->emit_debug_code()) {
1312     __ Cmp(x10, Operand(w10, SXTW));
1313     // The stack offset needs to fit in a W register.
1314     __ Check(eq, kOffsetOutOfRange);
1315   }
1316   StoreRegister(reg, w10);
1317 }
1318 
1319 
1320 // Helper function for reading a value out of a stack frame.
1321 template <typename T>
frame_entry(Address re_frame,int frame_offset)1322 static T& frame_entry(Address re_frame, int frame_offset) {
1323   return *reinterpret_cast<T*>(re_frame + frame_offset);
1324 }
1325 
1326 
1327 template <typename T>
frame_entry_address(Address re_frame,int frame_offset)1328 static T* frame_entry_address(Address re_frame, int frame_offset) {
1329   return reinterpret_cast<T*>(re_frame + frame_offset);
1330 }
1331 
1332 
CheckStackGuardState(Address * return_address,Code * re_code,Address re_frame,int start_index,const byte ** input_start,const byte ** input_end)1333 int RegExpMacroAssemblerARM64::CheckStackGuardState(
1334     Address* return_address, Code* re_code, Address re_frame, int start_index,
1335     const byte** input_start, const byte** input_end) {
1336   return NativeRegExpMacroAssembler::CheckStackGuardState(
1337       frame_entry<Isolate*>(re_frame, kIsolate), start_index,
1338       frame_entry<int>(re_frame, kDirectCall) == 1, return_address, re_code,
1339       frame_entry_address<String*>(re_frame, kInput), input_start, input_end);
1340 }
1341 
1342 
CheckPosition(int cp_offset,Label * on_outside_input)1343 void RegExpMacroAssemblerARM64::CheckPosition(int cp_offset,
1344                                               Label* on_outside_input) {
1345   if (cp_offset >= 0) {
1346     CompareAndBranchOrBacktrack(current_input_offset(),
1347                                 -cp_offset * char_size(), ge, on_outside_input);
1348   } else {
1349     __ Add(w12, current_input_offset(), Operand(cp_offset * char_size()));
1350     __ Cmp(w12, string_start_minus_one());
1351     BranchOrBacktrack(le, on_outside_input);
1352   }
1353 }
1354 
1355 
CanReadUnaligned()1356 bool RegExpMacroAssemblerARM64::CanReadUnaligned() {
1357   // TODO(pielan): See whether or not we should disable unaligned accesses.
1358   return !slow_safe();
1359 }
1360 
1361 
1362 // Private methods:
1363 
CallCheckStackGuardState(Register scratch)1364 void RegExpMacroAssemblerARM64::CallCheckStackGuardState(Register scratch) {
1365   // Allocate space on the stack to store the return address. The
1366   // CheckStackGuardState C++ function will override it if the code
1367   // moved. Allocate extra space for 2 arguments passed by pointers.
1368   // AAPCS64 requires the stack to be 16 byte aligned.
1369   int alignment = masm_->ActivationFrameAlignment();
1370   DCHECK_EQ(alignment % 16, 0);
1371   int align_mask = (alignment / kXRegSize) - 1;
1372   int xreg_to_claim = (3 + align_mask) & ~align_mask;
1373 
1374   DCHECK(csp.Is(__ StackPointer()));
1375   __ Claim(xreg_to_claim);
1376 
1377   // CheckStackGuardState needs the end and start addresses of the input string.
1378   __ Poke(input_end(), 2 * kPointerSize);
1379   __ Add(x5, csp, 2 * kPointerSize);
1380   __ Poke(input_start(), kPointerSize);
1381   __ Add(x4, csp, kPointerSize);
1382 
1383   __ Mov(w3, start_offset());
1384   // RegExp code frame pointer.
1385   __ Mov(x2, frame_pointer());
1386   // Code* of self.
1387   __ Mov(x1, Operand(masm_->CodeObject()));
1388 
1389   // We need to pass a pointer to the return address as first argument.
1390   // The DirectCEntry stub will place the return address on the stack before
1391   // calling so the stack pointer will point to it.
1392   __ Mov(x0, csp);
1393 
1394   ExternalReference check_stack_guard_state =
1395       ExternalReference::re_check_stack_guard_state(isolate());
1396   __ Mov(scratch, check_stack_guard_state);
1397   DirectCEntryStub stub(isolate());
1398   stub.GenerateCall(masm_, scratch);
1399 
1400   // The input string may have been moved in memory, we need to reload it.
1401   __ Peek(input_start(), kPointerSize);
1402   __ Peek(input_end(), 2 * kPointerSize);
1403 
1404   DCHECK(csp.Is(__ StackPointer()));
1405   __ Drop(xreg_to_claim);
1406 
1407   // Reload the Code pointer.
1408   __ Mov(code_pointer(), Operand(masm_->CodeObject()));
1409 }
1410 
BranchOrBacktrack(Condition condition,Label * to)1411 void RegExpMacroAssemblerARM64::BranchOrBacktrack(Condition condition,
1412                                                   Label* to) {
1413   if (condition == al) {  // Unconditional.
1414     if (to == NULL) {
1415       Backtrack();
1416       return;
1417     }
1418     __ B(to);
1419     return;
1420   }
1421   if (to == NULL) {
1422     to = &backtrack_label_;
1423   }
1424   __ B(condition, to);
1425 }
1426 
CompareAndBranchOrBacktrack(Register reg,int immediate,Condition condition,Label * to)1427 void RegExpMacroAssemblerARM64::CompareAndBranchOrBacktrack(Register reg,
1428                                                             int immediate,
1429                                                             Condition condition,
1430                                                             Label* to) {
1431   if ((immediate == 0) && ((condition == eq) || (condition == ne))) {
1432     if (to == NULL) {
1433       to = &backtrack_label_;
1434     }
1435     if (condition == eq) {
1436       __ Cbz(reg, to);
1437     } else {
1438       __ Cbnz(reg, to);
1439     }
1440   } else {
1441     __ Cmp(reg, immediate);
1442     BranchOrBacktrack(condition, to);
1443   }
1444 }
1445 
1446 
CheckPreemption()1447 void RegExpMacroAssemblerARM64::CheckPreemption() {
1448   // Check for preemption.
1449   ExternalReference stack_limit =
1450       ExternalReference::address_of_stack_limit(isolate());
1451   __ Mov(x10, stack_limit);
1452   __ Ldr(x10, MemOperand(x10));
1453   DCHECK(csp.Is(__ StackPointer()));
1454   __ Cmp(csp, x10);
1455   CallIf(&check_preempt_label_, ls);
1456 }
1457 
1458 
CheckStackLimit()1459 void RegExpMacroAssemblerARM64::CheckStackLimit() {
1460   ExternalReference stack_limit =
1461       ExternalReference::address_of_regexp_stack_limit(isolate());
1462   __ Mov(x10, stack_limit);
1463   __ Ldr(x10, MemOperand(x10));
1464   __ Cmp(backtrack_stackpointer(), x10);
1465   CallIf(&stack_overflow_label_, ls);
1466 }
1467 
1468 
Push(Register source)1469 void RegExpMacroAssemblerARM64::Push(Register source) {
1470   DCHECK(source.Is32Bits());
1471   DCHECK(!source.is(backtrack_stackpointer()));
1472   __ Str(source,
1473          MemOperand(backtrack_stackpointer(),
1474                     -static_cast<int>(kWRegSize),
1475                     PreIndex));
1476 }
1477 
1478 
Pop(Register target)1479 void RegExpMacroAssemblerARM64::Pop(Register target) {
1480   DCHECK(target.Is32Bits());
1481   DCHECK(!target.is(backtrack_stackpointer()));
1482   __ Ldr(target,
1483          MemOperand(backtrack_stackpointer(), kWRegSize, PostIndex));
1484 }
1485 
1486 
GetCachedRegister(int register_index)1487 Register RegExpMacroAssemblerARM64::GetCachedRegister(int register_index) {
1488   DCHECK(register_index < kNumCachedRegisters);
1489   return Register::Create(register_index / 2, kXRegSizeInBits);
1490 }
1491 
1492 
GetRegister(int register_index,Register maybe_result)1493 Register RegExpMacroAssemblerARM64::GetRegister(int register_index,
1494                                                 Register maybe_result) {
1495   DCHECK(maybe_result.Is32Bits());
1496   DCHECK(register_index >= 0);
1497   if (num_registers_ <= register_index) {
1498     num_registers_ = register_index + 1;
1499   }
1500   Register result;
1501   RegisterState register_state = GetRegisterState(register_index);
1502   switch (register_state) {
1503     case STACKED:
1504       __ Ldr(maybe_result, register_location(register_index));
1505       result = maybe_result;
1506       break;
1507     case CACHED_LSW:
1508       result = GetCachedRegister(register_index).W();
1509       break;
1510     case CACHED_MSW:
1511       __ Lsr(maybe_result.X(), GetCachedRegister(register_index),
1512              kWRegSizeInBits);
1513       result = maybe_result;
1514       break;
1515     default:
1516       UNREACHABLE();
1517       break;
1518   }
1519   DCHECK(result.Is32Bits());
1520   return result;
1521 }
1522 
1523 
StoreRegister(int register_index,Register source)1524 void RegExpMacroAssemblerARM64::StoreRegister(int register_index,
1525                                               Register source) {
1526   DCHECK(source.Is32Bits());
1527   DCHECK(register_index >= 0);
1528   if (num_registers_ <= register_index) {
1529     num_registers_ = register_index + 1;
1530   }
1531 
1532   Register cached_register;
1533   RegisterState register_state = GetRegisterState(register_index);
1534   switch (register_state) {
1535     case STACKED:
1536       __ Str(source, register_location(register_index));
1537       break;
1538     case CACHED_LSW:
1539       cached_register = GetCachedRegister(register_index);
1540       if (!source.Is(cached_register.W())) {
1541         __ Bfi(cached_register, source.X(), 0, kWRegSizeInBits);
1542       }
1543       break;
1544     case CACHED_MSW:
1545       cached_register = GetCachedRegister(register_index);
1546       __ Bfi(cached_register, source.X(), kWRegSizeInBits, kWRegSizeInBits);
1547       break;
1548     default:
1549       UNREACHABLE();
1550       break;
1551   }
1552 }
1553 
1554 
CallIf(Label * to,Condition condition)1555 void RegExpMacroAssemblerARM64::CallIf(Label* to, Condition condition) {
1556   Label skip_call;
1557   if (condition != al) __ B(&skip_call, NegateCondition(condition));
1558   __ Bl(to);
1559   __ Bind(&skip_call);
1560 }
1561 
1562 
RestoreLinkRegister()1563 void RegExpMacroAssemblerARM64::RestoreLinkRegister() {
1564   DCHECK(csp.Is(__ StackPointer()));
1565   __ Pop(lr, xzr);
1566   __ Add(lr, lr, Operand(masm_->CodeObject()));
1567 }
1568 
1569 
SaveLinkRegister()1570 void RegExpMacroAssemblerARM64::SaveLinkRegister() {
1571   DCHECK(csp.Is(__ StackPointer()));
1572   __ Sub(lr, lr, Operand(masm_->CodeObject()));
1573   __ Push(xzr, lr);
1574 }
1575 
1576 
register_location(int register_index)1577 MemOperand RegExpMacroAssemblerARM64::register_location(int register_index) {
1578   DCHECK(register_index < (1<<30));
1579   DCHECK(register_index >= kNumCachedRegisters);
1580   if (num_registers_ <= register_index) {
1581     num_registers_ = register_index + 1;
1582   }
1583   register_index -= kNumCachedRegisters;
1584   int offset = kFirstRegisterOnStack - register_index * kWRegSize;
1585   return MemOperand(frame_pointer(), offset);
1586 }
1587 
capture_location(int register_index,Register scratch)1588 MemOperand RegExpMacroAssemblerARM64::capture_location(int register_index,
1589                                                      Register scratch) {
1590   DCHECK(register_index < (1<<30));
1591   DCHECK(register_index < num_saved_registers_);
1592   DCHECK(register_index >= kNumCachedRegisters);
1593   DCHECK_EQ(register_index % 2, 0);
1594   register_index -= kNumCachedRegisters;
1595   int offset = kFirstCaptureOnStack - register_index * kWRegSize;
1596   // capture_location is used with Stp instructions to load/store 2 registers.
1597   // The immediate field in the encoding is limited to 7 bits (signed).
1598   if (is_int7(offset)) {
1599     return MemOperand(frame_pointer(), offset);
1600   } else {
1601     __ Add(scratch, frame_pointer(), offset);
1602     return MemOperand(scratch);
1603   }
1604 }
1605 
LoadCurrentCharacterUnchecked(int cp_offset,int characters)1606 void RegExpMacroAssemblerARM64::LoadCurrentCharacterUnchecked(int cp_offset,
1607                                                               int characters) {
1608   Register offset = current_input_offset();
1609 
1610   // The ldr, str, ldrh, strh instructions can do unaligned accesses, if the CPU
1611   // and the operating system running on the target allow it.
1612   // If unaligned load/stores are not supported then this function must only
1613   // be used to load a single character at a time.
1614 
1615   // ARMv8 supports unaligned accesses but V8 or the kernel can decide to
1616   // disable it.
1617   // TODO(pielan): See whether or not we should disable unaligned accesses.
1618   if (!CanReadUnaligned()) {
1619     DCHECK(characters == 1);
1620   }
1621 
1622   if (cp_offset != 0) {
1623     if (masm_->emit_debug_code()) {
1624       __ Mov(x10, cp_offset * char_size());
1625       __ Add(x10, x10, Operand(current_input_offset(), SXTW));
1626       __ Cmp(x10, Operand(w10, SXTW));
1627       // The offset needs to fit in a W register.
1628       __ Check(eq, kOffsetOutOfRange);
1629     } else {
1630       __ Add(w10, current_input_offset(), cp_offset * char_size());
1631     }
1632     offset = w10;
1633   }
1634 
1635   if (mode_ == LATIN1) {
1636     if (characters == 4) {
1637       __ Ldr(current_character(), MemOperand(input_end(), offset, SXTW));
1638     } else if (characters == 2) {
1639       __ Ldrh(current_character(), MemOperand(input_end(), offset, SXTW));
1640     } else {
1641       DCHECK(characters == 1);
1642       __ Ldrb(current_character(), MemOperand(input_end(), offset, SXTW));
1643     }
1644   } else {
1645     DCHECK(mode_ == UC16);
1646     if (characters == 2) {
1647       __ Ldr(current_character(), MemOperand(input_end(), offset, SXTW));
1648     } else {
1649       DCHECK(characters == 1);
1650       __ Ldrh(current_character(), MemOperand(input_end(), offset, SXTW));
1651     }
1652   }
1653 }
1654 
1655 #endif  // V8_INTERPRETED_REGEXP
1656 
1657 }  // namespace internal
1658 }  // namespace v8
1659 
1660 #endif  // V8_TARGET_ARCH_ARM64
1661