1 // Copyright 2021 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #if V8_TARGET_ARCH_RISCV64
6
7 #include "src/regexp/riscv64/regexp-macro-assembler-riscv64.h"
8
9 #include "src/codegen/assembler-inl.h"
10 #include "src/codegen/macro-assembler.h"
11 #include "src/logging/log.h"
12 #include "src/objects/objects-inl.h"
13 #include "src/regexp/regexp-macro-assembler.h"
14 #include "src/regexp/regexp-stack.h"
15 #include "src/snapshot/embedded/embedded-data-inl.h"
16 #include "src/strings/unicode.h"
17
18 namespace v8 {
19 namespace internal {
20
21 /* clang-format off
22 *
23 * This assembler uses the following register assignment convention
24 * - s3 : kScratchReg. Temporarily stores the index of capture start after a matching pass
25 * for a global regexp.
26 * - s4 : Pointer to current Code object including heap object tag.
27 * - s1 : Current position in input, as negative offset from end of string.
28 * Please notice that this is the byte offset, not the character offset!
29 * - s2 : Currently loaded character. Must be loaded using
30 * LoadCurrentCharacter before using any of the dispatch methods.
31 * - t0 : Points to tip of backtrack stack
32 * - t1 : Unused.
33 * - t2 : End of input (points to byte after last character in input).
34 * - fp : Frame pointer. Used to access arguments, local variables and
35 * RegExp registers.
36 * - sp : Points to tip of C stack.
37 *
38 * The remaining registers are free for computations.
39 * Each call to a public method should retain this convention.
40 *
41 * The stack will have the following structure:
42 *
43 * kStackFrameHeader
44 * --- sp when called ---
45 * - fp[72] ra Return from RegExp code (ra). kReturnAddress
46 * - fp[64] s9, old-fp Old fp, callee saved(s9).
47 * - fp[0..63] fp..s7 Callee-saved registers fp..s7.
48 * --- frame pointer ----
49 * - fp[-8] Isolate* isolate (address of the current isolate) kIsolate
50 * - fp[-16] direct_call (1 = direct call from JS, 0 = from runtime) kDirectCall
51 * - fp[-24] output_size (may fit multiple sets of matches) kNumOutputRegisters
52 * - fp[-32] int* output (int[num_saved_registers_], for output). kRegisterOutput
53 * - fp[-40] end of input (address of end of string). kInputEnd
54 * - fp[-48] start of input (address of first character in string). kInputStart
55 * - fp[-56] start index (character index of start). kStartIndex
56 * - fp[-64] void* input_string (location of a handle containing the string). kInputString
57 * - fp[-72] success counter (only for global regexps to count matches). kSuccessfulCaptures
58 * - fp[-80] Offset of location before start of input (effectively character kStringStartMinusOne
59 * position -1). Used to initialize capture registers to a
60 * non-position.
61 * --------- The following output registers are 32-bit values. ---------
62 * - fp[-88] register 0 (Only positions must be stored in the first kRegisterZero
63 * - register 1 num_saved_registers_ registers)
64 * - ...
65 * - register num_registers-1
66 * --- sp ---
67 *
68 * The first num_saved_registers_ registers are initialized to point to
69 * "character -1" in the string (i.e., char_size() bytes before the first
70 * character of the string). The remaining registers start out as garbage.
71 *
72 * The data up to the return address must be placed there by the calling
73 * code and the remaining arguments are passed in registers, e.g. by calling the
74 * code entry as cast to a function with the signature:
75 * int (*match)(String input_string,
76 * int start_index,
77 * Address start,
78 * Address end,
79 * int* output,
80 * int output_size,
81 * bool direct_call = false,
82 * Isolate* isolate,
83 * Address regexp);
84 * The call is performed by NativeRegExpMacroAssembler::Execute()
85 * (in regexp-macro-assembler.cc) via the GeneratedCode wrapper.
86 *
87 * clang-format on
88 */
89
90 #define __ ACCESS_MASM(masm_)
91
92 const int RegExpMacroAssemblerRISCV::kRegExpCodeSize;
93
RegExpMacroAssemblerRISCV(Isolate * isolate,Zone * zone,Mode mode,int registers_to_save)94 RegExpMacroAssemblerRISCV::RegExpMacroAssemblerRISCV(Isolate* isolate,
95 Zone* zone, Mode mode,
96 int registers_to_save)
97 : NativeRegExpMacroAssembler(isolate, zone),
98 masm_(std::make_unique<MacroAssembler>(
99 isolate, CodeObjectRequired::kYes,
100 NewAssemblerBuffer(kRegExpCodeSize))),
101 no_root_array_scope_(masm_.get()),
102 mode_(mode),
103 num_registers_(registers_to_save),
104 num_saved_registers_(registers_to_save),
105 entry_label_(),
106 start_label_(),
107 success_label_(),
108 backtrack_label_(),
109 exit_label_(),
110 internal_failure_label_() {
111 DCHECK_EQ(0, registers_to_save % 2);
112 __ jmp(&entry_label_); // We'll write the entry code later.
113 // If the code gets too big or corrupted, an internal exception will be
114 // raised, and we will exit right away.
115 __ bind(&internal_failure_label_);
116 __ li(a0, Operand(FAILURE));
117 __ Ret();
118 __ bind(&start_label_); // And then continue from here.
119 }
120
~RegExpMacroAssemblerRISCV()121 RegExpMacroAssemblerRISCV::~RegExpMacroAssemblerRISCV() {
122 // Unuse labels in case we throw away the assembler without calling GetCode.
123 entry_label_.Unuse();
124 start_label_.Unuse();
125 success_label_.Unuse();
126 backtrack_label_.Unuse();
127 exit_label_.Unuse();
128 check_preempt_label_.Unuse();
129 stack_overflow_label_.Unuse();
130 internal_failure_label_.Unuse();
131 fallback_label_.Unuse();
132 }
133
stack_limit_slack()134 int RegExpMacroAssemblerRISCV::stack_limit_slack() {
135 return RegExpStack::kStackLimitSlack;
136 }
137
AdvanceCurrentPosition(int by)138 void RegExpMacroAssemblerRISCV::AdvanceCurrentPosition(int by) {
139 if (by != 0) {
140 __ Add64(current_input_offset(), current_input_offset(),
141 Operand(by * char_size()));
142 }
143 }
144
AdvanceRegister(int reg,int by)145 void RegExpMacroAssemblerRISCV::AdvanceRegister(int reg, int by) {
146 DCHECK_LE(0, reg);
147 DCHECK_GT(num_registers_, reg);
148 if (by != 0) {
149 __ Ld(a0, register_location(reg));
150 __ Add64(a0, a0, Operand(by));
151 __ Sd(a0, register_location(reg));
152 }
153 }
154
Backtrack()155 void RegExpMacroAssemblerRISCV::Backtrack() {
156 CheckPreemption();
157 if (has_backtrack_limit()) {
158 Label next;
159 __ Ld(a0, MemOperand(frame_pointer(), kBacktrackCount));
160 __ Add64(a0, a0, Operand(1));
161 __ Sd(a0, MemOperand(frame_pointer(), kBacktrackCount));
162 __ BranchShort(&next, ne, a0, Operand(backtrack_limit()));
163
164 // Backtrack limit exceeded.
165 if (can_fallback()) {
166 __ jmp(&fallback_label_);
167 } else {
168 // Can't fallback, so we treat it as a failed match.
169 Fail();
170 }
171
172 __ bind(&next);
173 }
174 // Pop Code offset from backtrack stack, add Code and jump to location.
175 Pop(a0);
176 __ Add64(a0, a0, code_pointer());
177 __ Jump(a0);
178 }
179
Bind(Label * label)180 void RegExpMacroAssemblerRISCV::Bind(Label* label) { __ bind(label); }
181
CheckCharacter(uint32_t c,Label * on_equal)182 void RegExpMacroAssemblerRISCV::CheckCharacter(uint32_t c, Label* on_equal) {
183 BranchOrBacktrack(on_equal, eq, current_character(), Operand(c));
184 }
185
CheckCharacterGT(base::uc16 limit,Label * on_greater)186 void RegExpMacroAssemblerRISCV::CheckCharacterGT(base::uc16 limit,
187 Label* on_greater) {
188 BranchOrBacktrack(on_greater, gt, current_character(), Operand(limit));
189 }
190
CheckAtStart(int cp_offset,Label * on_at_start)191 void RegExpMacroAssemblerRISCV::CheckAtStart(int cp_offset,
192 Label* on_at_start) {
193 __ Ld(a1, MemOperand(frame_pointer(), kStringStartMinusOne));
194 __ Add64(a0, current_input_offset(),
195 Operand(-char_size() + cp_offset * char_size()));
196 BranchOrBacktrack(on_at_start, eq, a0, Operand(a1));
197 }
198
CheckNotAtStart(int cp_offset,Label * on_not_at_start)199 void RegExpMacroAssemblerRISCV::CheckNotAtStart(int cp_offset,
200 Label* on_not_at_start) {
201 __ Ld(a1, MemOperand(frame_pointer(), kStringStartMinusOne));
202 __ Add64(a0, current_input_offset(),
203 Operand(-char_size() + cp_offset * char_size()));
204 BranchOrBacktrack(on_not_at_start, ne, a0, Operand(a1));
205 }
206
CheckCharacterLT(base::uc16 limit,Label * on_less)207 void RegExpMacroAssemblerRISCV::CheckCharacterLT(base::uc16 limit,
208 Label* on_less) {
209 BranchOrBacktrack(on_less, lt, current_character(), Operand(limit));
210 }
211
CheckGreedyLoop(Label * on_equal)212 void RegExpMacroAssemblerRISCV::CheckGreedyLoop(Label* on_equal) {
213 Label backtrack_non_equal;
214 __ Lw(a0, MemOperand(backtrack_stackpointer(), 0));
215 __ BranchShort(&backtrack_non_equal, ne, current_input_offset(), Operand(a0));
216 __ Add64(backtrack_stackpointer(), backtrack_stackpointer(),
217 Operand(kIntSize));
218 __ bind(&backtrack_non_equal);
219 BranchOrBacktrack(on_equal, eq, current_input_offset(), Operand(a0));
220 }
221
222 // Push (pop) caller-saved registers used by irregexp.
PushCallerSavedRegisters()223 void RegExpMacroAssemblerRISCV::PushCallerSavedRegisters() {
224 RegList caller_saved_regexp = {current_input_offset(), current_character(),
225 end_of_input_address(),
226 backtrack_stackpointer()};
227 __ MultiPush(caller_saved_regexp);
228 }
229
PopCallerSavedRegisters()230 void RegExpMacroAssemblerRISCV::PopCallerSavedRegisters() {
231 RegList caller_saved_regexp = {current_input_offset(), current_character(),
232 end_of_input_address(),
233 backtrack_stackpointer()};
234 __ MultiPop(caller_saved_regexp);
235 }
236
CallIsCharacterInRangeArray(const ZoneList<CharacterRange> * ranges)237 void RegExpMacroAssemblerRISCV::CallIsCharacterInRangeArray(
238 const ZoneList<CharacterRange>* ranges) {
239 PushCallerSavedRegisters();
240 static const int kNumArguments = 3;
241 __ PrepareCallCFunction(kNumArguments, a0);
242
243 __ mv(a0, current_character());
244 __ li(a1, Operand(GetOrAddRangeArray(ranges)));
245 __ li(a2, Operand(ExternalReference::isolate_address(isolate())));
246
247 {
248 // We have a frame (set up in GetCode), but the assembler doesn't know.
249 FrameScope scope(masm_.get(), StackFrame::MANUAL);
250 __ CallCFunction(ExternalReference::re_is_character_in_range_array(),
251 kNumArguments);
252 }
253 PopCallerSavedRegisters();
254 __ li(code_pointer(), Operand(masm_->CodeObject()));
255 }
256
CheckCharacterInRangeArray(const ZoneList<CharacterRange> * ranges,Label * on_in_range)257 bool RegExpMacroAssemblerRISCV::CheckCharacterInRangeArray(
258 const ZoneList<CharacterRange>* ranges, Label* on_in_range) {
259 CallIsCharacterInRangeArray(ranges);
260 BranchOrBacktrack(on_in_range, ne, a0, Operand(zero_reg));
261 return true;
262 }
263
CheckCharacterNotInRangeArray(const ZoneList<CharacterRange> * ranges,Label * on_not_in_range)264 bool RegExpMacroAssemblerRISCV::CheckCharacterNotInRangeArray(
265 const ZoneList<CharacterRange>* ranges, Label* on_not_in_range) {
266 CallIsCharacterInRangeArray(ranges);
267 BranchOrBacktrack(on_not_in_range, eq, a0, Operand(zero_reg));
268 return true;
269 }
270
CheckNotBackReferenceIgnoreCase(int start_reg,bool read_backward,bool unicode,Label * on_no_match)271 void RegExpMacroAssemblerRISCV::CheckNotBackReferenceIgnoreCase(
272 int start_reg, bool read_backward, bool unicode, Label* on_no_match) {
273 Label fallthrough;
274 __ Ld(a0, register_location(start_reg)); // Index of start of capture.
275 __ Ld(a1, register_location(start_reg + 1)); // Index of end of capture.
276 __ Sub64(a1, a1, a0); // Length of capture.
277
278 // At this point, the capture registers are either both set or both cleared.
279 // If the capture length is zero, then the capture is either empty or cleared.
280 // Fall through in both cases.
281 __ BranchShort(&fallthrough, eq, a1, Operand(zero_reg));
282
283 if (read_backward) {
284 __ Ld(t1, MemOperand(frame_pointer(), kStringStartMinusOne));
285 __ Add64(t1, t1, a1);
286 BranchOrBacktrack(on_no_match, le, current_input_offset(), Operand(t1));
287 } else {
288 __ Add64(t1, a1, current_input_offset());
289 // Check that there are enough characters left in the input.
290 BranchOrBacktrack(on_no_match, gt, t1, Operand(zero_reg));
291 }
292
293 if (mode_ == LATIN1) {
294 Label success;
295 Label fail;
296 Label loop_check;
297
298 // a0 - offset of start of capture.
299 // a1 - length of capture.
300 __ Add64(a0, a0, Operand(end_of_input_address()));
301 __ Add64(a2, end_of_input_address(), Operand(current_input_offset()));
302 if (read_backward) {
303 __ Sub64(a2, a2, Operand(a1));
304 }
305 __ Add64(a1, a0, Operand(a1));
306
307 // a0 - Address of start of capture.
308 // a1 - Address of end of capture.
309 // a2 - Address of current input position.
310
311 Label loop;
312 __ bind(&loop);
313 __ Lbu(a3, MemOperand(a0, 0));
314 __ addi(a0, a0, char_size());
315 __ Lbu(a4, MemOperand(a2, 0));
316 __ addi(a2, a2, char_size());
317
318 __ BranchShort(&loop_check, eq, a4, Operand(a3));
319
320 // Mismatch, try case-insensitive match (converting letters to lower-case).
321 __ Or(a3, a3, Operand(0x20)); // Convert capture character to lower-case.
322 __ Or(a4, a4, Operand(0x20)); // Also convert input character.
323 __ BranchShort(&fail, ne, a4, Operand(a3));
324 __ Sub64(a3, a3, Operand('a'));
325 __ BranchShort(&loop_check, Uless_equal, a3, Operand('z' - 'a'));
326 // Latin-1: Check for values in range [224,254] but not 247.
327 __ Sub64(a3, a3, Operand(224 - 'a'));
328 // Weren't Latin-1 letters.
329 __ BranchShort(&fail, Ugreater, a3, Operand(254 - 224));
330 // Check for 247.
331 __ BranchShort(&fail, eq, a3, Operand(247 - 224));
332
333 __ bind(&loop_check);
334 __ Branch(&loop, lt, a0, Operand(a1));
335 __ jmp(&success);
336
337 __ bind(&fail);
338 GoTo(on_no_match);
339
340 __ bind(&success);
341 // Compute new value of character position after the matched part.
342 __ Sub64(current_input_offset(), a2, end_of_input_address());
343 if (read_backward) {
344 __ Ld(t1, register_location(start_reg)); // Index of start of capture.
345 __ Ld(a2, register_location(start_reg + 1)); // Index of end of capture.
346 __ Add64(current_input_offset(), current_input_offset(), Operand(t1));
347 __ Sub64(current_input_offset(), current_input_offset(), Operand(a2));
348 }
349 } else {
350 DCHECK(mode_ == UC16);
351 PushCallerSavedRegisters();
352
353 int argument_count = 4;
354 __ PrepareCallCFunction(argument_count, a2);
355
356 // a0 - offset of start of capture.
357 // a1 - length of capture.
358
359 // Put arguments into arguments registers.
360 // Parameters are
361 // a0: Address byte_offset1 - Address captured substring's start.
362 // a1: Address byte_offset2 - Address of current character position.
363 // a2: size_t byte_length - length of capture in bytes(!).
364 // a3: Isolate* isolate.
365
366 // Address of start of capture.
367 __ Add64(a0, a0, Operand(end_of_input_address()));
368 // Length of capture.
369 __ mv(a2, a1);
370 // Save length in callee-save register for use on return.
371 __ mv(s3, a1);
372 // Address of current input position.
373 __ Add64(a1, current_input_offset(), Operand(end_of_input_address()));
374 if (read_backward) {
375 __ Sub64(a1, a1, Operand(s3));
376 }
377 // Isolate.
378 __ li(a3, Operand(ExternalReference::isolate_address(masm_->isolate())));
379
380 {
381 AllowExternalCallThatCantCauseGC scope(masm_.get());
382 ExternalReference function =
383 unicode
384 ? ExternalReference::re_case_insensitive_compare_unicode()
385 : ExternalReference::re_case_insensitive_compare_non_unicode();
386 __ CallCFunction(function, argument_count);
387 }
388
389 // Restore regexp engine registers.
390 PopCallerSavedRegisters();
391 __ li(code_pointer(), Operand(masm_->CodeObject()), CONSTANT_SIZE);
392 __ Ld(end_of_input_address(), MemOperand(frame_pointer(), kInputEnd));
393
394 // Check if function returned non-zero for success or zero for failure.
395 BranchOrBacktrack(on_no_match, eq, a0, Operand(zero_reg));
396 // On success, increment position by length of capture.
397 if (read_backward) {
398 __ Sub64(current_input_offset(), current_input_offset(), Operand(s3));
399 } else {
400 __ Add64(current_input_offset(), current_input_offset(), Operand(s3));
401 }
402 }
403
404 __ bind(&fallthrough);
405 }
406
CheckNotBackReference(int start_reg,bool read_backward,Label * on_no_match)407 void RegExpMacroAssemblerRISCV::CheckNotBackReference(int start_reg,
408 bool read_backward,
409 Label* on_no_match) {
410 Label fallthrough;
411
412 // Find length of back-referenced capture.
413 __ Ld(a0, register_location(start_reg));
414 __ Ld(a1, register_location(start_reg + 1));
415 __ Sub64(a1, a1, a0); // Length to check.
416
417 // At this point, the capture registers are either both set or both cleared.
418 // If the capture length is zero, then the capture is either empty or cleared.
419 // Fall through in both cases.
420 __ BranchShort(&fallthrough, eq, a1, Operand(zero_reg));
421
422 if (read_backward) {
423 __ Ld(t1, MemOperand(frame_pointer(), kStringStartMinusOne));
424 __ Add64(t1, t1, a1);
425 BranchOrBacktrack(on_no_match, le, current_input_offset(), Operand(t1));
426 } else {
427 __ Add64(t1, a1, current_input_offset());
428 // Check that there are enough characters left in the input.
429 BranchOrBacktrack(on_no_match, gt, t1, Operand(zero_reg));
430 }
431
432 // Compute pointers to match string and capture string.
433 __ Add64(a0, a0, Operand(end_of_input_address()));
434 __ Add64(a2, end_of_input_address(), Operand(current_input_offset()));
435 if (read_backward) {
436 __ Sub64(a2, a2, Operand(a1));
437 }
438 __ Add64(a1, a1, Operand(a0));
439
440 Label loop;
441 __ bind(&loop);
442 if (mode_ == LATIN1) {
443 __ Lbu(a3, MemOperand(a0, 0));
444 __ addi(a0, a0, char_size());
445 __ Lbu(a4, MemOperand(a2, 0));
446 __ addi(a2, a2, char_size());
447 } else {
448 DCHECK(mode_ == UC16);
449 __ Lhu(a3, MemOperand(a0, 0));
450 __ addi(a0, a0, char_size());
451 __ Lhu(a4, MemOperand(a2, 0));
452 __ addi(a2, a2, char_size());
453 }
454 BranchOrBacktrack(on_no_match, ne, a3, Operand(a4));
455 __ Branch(&loop, lt, a0, Operand(a1));
456
457 // Move current character position to position after match.
458 __ Sub64(current_input_offset(), a2, end_of_input_address());
459 if (read_backward) {
460 __ Ld(t1, register_location(start_reg)); // Index of start of capture.
461 __ Ld(a2, register_location(start_reg + 1)); // Index of end of capture.
462 __ Add64(current_input_offset(), current_input_offset(), Operand(t1));
463 __ Sub64(current_input_offset(), current_input_offset(), Operand(a2));
464 }
465 __ bind(&fallthrough);
466 }
467
CheckNotCharacter(uint32_t c,Label * on_not_equal)468 void RegExpMacroAssemblerRISCV::CheckNotCharacter(uint32_t c,
469 Label* on_not_equal) {
470 BranchOrBacktrack(on_not_equal, ne, current_character(), Operand(c));
471 }
472
CheckCharacterAfterAnd(uint32_t c,uint32_t mask,Label * on_equal)473 void RegExpMacroAssemblerRISCV::CheckCharacterAfterAnd(uint32_t c,
474 uint32_t mask,
475 Label* on_equal) {
476 __ And(a0, current_character(), Operand(mask));
477 Operand rhs = (c == 0) ? Operand(zero_reg) : Operand(c);
478 BranchOrBacktrack(on_equal, eq, a0, rhs);
479 }
480
CheckNotCharacterAfterAnd(uint32_t c,uint32_t mask,Label * on_not_equal)481 void RegExpMacroAssemblerRISCV::CheckNotCharacterAfterAnd(uint32_t c,
482 uint32_t mask,
483 Label* on_not_equal) {
484 __ And(a0, current_character(), Operand(mask));
485 Operand rhs = (c == 0) ? Operand(zero_reg) : Operand(c);
486 BranchOrBacktrack(on_not_equal, ne, a0, rhs);
487 }
488
CheckNotCharacterAfterMinusAnd(base::uc16 c,base::uc16 minus,base::uc16 mask,Label * on_not_equal)489 void RegExpMacroAssemblerRISCV::CheckNotCharacterAfterMinusAnd(
490 base::uc16 c, base::uc16 minus, base::uc16 mask, Label* on_not_equal) {
491 DCHECK_GT(String::kMaxUtf16CodeUnit, minus);
492 __ Sub64(a0, current_character(), Operand(minus));
493 __ And(a0, a0, Operand(mask));
494 BranchOrBacktrack(on_not_equal, ne, a0, Operand(c));
495 }
496
CheckCharacterInRange(base::uc16 from,base::uc16 to,Label * on_in_range)497 void RegExpMacroAssemblerRISCV::CheckCharacterInRange(base::uc16 from,
498 base::uc16 to,
499 Label* on_in_range) {
500 __ Sub64(a0, current_character(), Operand(from));
501 // Unsigned lower-or-same condition.
502 BranchOrBacktrack(on_in_range, Uless_equal, a0, Operand(to - from));
503 }
504
CheckCharacterNotInRange(base::uc16 from,base::uc16 to,Label * on_not_in_range)505 void RegExpMacroAssemblerRISCV::CheckCharacterNotInRange(
506 base::uc16 from, base::uc16 to, Label* on_not_in_range) {
507 __ Sub64(a0, current_character(), Operand(from));
508 // Unsigned higher condition.
509 BranchOrBacktrack(on_not_in_range, Ugreater, a0, Operand(to - from));
510 }
511
CheckBitInTable(Handle<ByteArray> table,Label * on_bit_set)512 void RegExpMacroAssemblerRISCV::CheckBitInTable(Handle<ByteArray> table,
513 Label* on_bit_set) {
514 __ li(a0, Operand(table));
515 if (mode_ != LATIN1 || kTableMask != String::kMaxOneByteCharCode) {
516 __ And(a1, current_character(), Operand(kTableSize - 1));
517 __ Add64(a0, a0, a1);
518 } else {
519 __ Add64(a0, a0, current_character());
520 }
521
522 __ Lbu(a0, FieldMemOperand(a0, ByteArray::kHeaderSize));
523 BranchOrBacktrack(on_bit_set, ne, a0, Operand(zero_reg));
524 }
525
CheckSpecialCharacterClass(StandardCharacterSet type,Label * on_no_match)526 bool RegExpMacroAssemblerRISCV::CheckSpecialCharacterClass(
527 StandardCharacterSet type, Label* on_no_match) {
528 // Range checks (c in min..max) are generally implemented by an unsigned
529 // (c - min) <= (max - min) check.
530 switch (type) {
531 case StandardCharacterSet::kWhitespace:
532 // Match space-characters.
533 if (mode_ == LATIN1) {
534 // One byte space characters are '\t'..'\r', ' ' and \u00a0.
535 Label success;
536 __ BranchShort(&success, eq, current_character(), Operand(' '));
537 // Check range 0x09..0x0D.
538 __ Sub64(a0, current_character(), Operand('\t'));
539 __ BranchShort(&success, Uless_equal, a0, Operand('\r' - '\t'));
540 // \u00a0 (NBSP).
541 BranchOrBacktrack(on_no_match, ne, a0, Operand(0x00A0 - '\t'));
542 __ bind(&success);
543 return true;
544 }
545 return false;
546 case StandardCharacterSet::kNotWhitespace:
547 // The emitted code for generic character classes is good enough.
548 return false;
549 case StandardCharacterSet::kDigit:
550 // Match Latin1 digits ('0'..'9').
551 __ Sub64(a0, current_character(), Operand('0'));
552 BranchOrBacktrack(on_no_match, Ugreater, a0, Operand('9' - '0'));
553 return true;
554 case StandardCharacterSet::kNotDigit:
555 // Match non Latin1-digits.
556 __ Sub64(a0, current_character(), Operand('0'));
557 BranchOrBacktrack(on_no_match, Uless_equal, a0, Operand('9' - '0'));
558 return true;
559 case StandardCharacterSet::kNotLineTerminator: {
560 // Match non-newlines (not 0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029).
561 __ Xor(a0, current_character(), Operand(0x01));
562 // See if current character is '\n'^1 or '\r'^1, i.e., 0x0B or 0x0C.
563 __ Sub64(a0, a0, Operand(0x0B));
564 BranchOrBacktrack(on_no_match, Uless_equal, a0, Operand(0x0C - 0x0B));
565 if (mode_ == UC16) {
566 // Compare original value to 0x2028 and 0x2029, using the already
567 // computed (current_char ^ 0x01 - 0x0B). I.e., check for
568 // 0x201D (0x2028 - 0x0B) or 0x201E.
569 __ Sub64(a0, a0, Operand(0x2028 - 0x0B));
570 BranchOrBacktrack(on_no_match, Uless_equal, a0, Operand(1));
571 }
572 return true;
573 }
574 case StandardCharacterSet::kLineTerminator: {
575 // Match newlines (0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029).
576 __ Xor(a0, current_character(), Operand(0x01));
577 // See if current character is '\n'^1 or '\r'^1, i.e., 0x0B or 0x0C.
578 __ Sub64(a0, a0, Operand(0x0B));
579 if (mode_ == LATIN1) {
580 BranchOrBacktrack(on_no_match, Ugreater, a0, Operand(0x0C - 0x0B));
581 } else {
582 Label done;
583 BranchOrBacktrack(&done, Uless_equal, a0, Operand(0x0C - 0x0B));
584 // Compare original value to 0x2028 and 0x2029, using the already
585 // computed (current_char ^ 0x01 - 0x0B). I.e., check for
586 // 0x201D (0x2028 - 0x0B) or 0x201E.
587 __ Sub64(a0, a0, Operand(0x2028 - 0x0B));
588 BranchOrBacktrack(on_no_match, Ugreater, a0, Operand(1));
589 __ bind(&done);
590 }
591 return true;
592 }
593 case StandardCharacterSet::kWord: {
594 if (mode_ != LATIN1) {
595 // Table is 256 entries, so all Latin1 characters can be tested.
596 BranchOrBacktrack(on_no_match, Ugreater, current_character(),
597 Operand('z'));
598 }
599 ExternalReference map = ExternalReference::re_word_character_map();
600 __ li(a0, Operand(map));
601 __ Add64(a0, a0, current_character());
602 __ Lbu(a0, MemOperand(a0, 0));
603 BranchOrBacktrack(on_no_match, eq, a0, Operand(zero_reg));
604 return true;
605 }
606 case StandardCharacterSet::kNotWord: {
607 Label done;
608 if (mode_ != LATIN1) {
609 // Table is 256 entries, so all Latin1 characters can be tested.
610 __ BranchShort(&done, Ugreater, current_character(), Operand('z'));
611 }
612 ExternalReference map = ExternalReference::re_word_character_map();
613 __ li(a0, Operand(map));
614 __ Add64(a0, a0, current_character());
615 __ Lbu(a0, MemOperand(a0, 0));
616 BranchOrBacktrack(on_no_match, ne, a0, Operand(zero_reg));
617 if (mode_ != LATIN1) {
618 __ bind(&done);
619 }
620 return true;
621 }
622 case StandardCharacterSet::kEverything:
623 // Match any character.
624 return true;
625 // No custom implementation (yet): s(UC16), S(UC16).
626 default:
627 return false;
628 }
629 }
630
Fail()631 void RegExpMacroAssemblerRISCV::Fail() {
632 __ li(a0, Operand(FAILURE));
633 __ jmp(&exit_label_);
634 }
635
LoadRegExpStackPointerFromMemory(Register dst)636 void RegExpMacroAssemblerRISCV::LoadRegExpStackPointerFromMemory(Register dst) {
637 ExternalReference ref =
638 ExternalReference::address_of_regexp_stack_stack_pointer(isolate());
639 __ li(dst, Operand(ref));
640 __ Ld(dst, MemOperand(dst));
641 }
642
StoreRegExpStackPointerToMemory(Register src,Register scratch)643 void RegExpMacroAssemblerRISCV::StoreRegExpStackPointerToMemory(
644 Register src, Register scratch) {
645 ExternalReference ref =
646 ExternalReference::address_of_regexp_stack_stack_pointer(isolate());
647 __ li(scratch, Operand(ref));
648 __ Sd(src, MemOperand(scratch));
649 }
650
PushRegExpBasePointer(Register scratch1,Register scratch2)651 void RegExpMacroAssemblerRISCV::PushRegExpBasePointer(Register scratch1,
652 Register scratch2) {
653 LoadRegExpStackPointerFromMemory(scratch1);
654 ExternalReference ref =
655 ExternalReference::address_of_regexp_stack_memory_top_address(isolate());
656 __ li(scratch2, Operand(ref));
657 __ Ld(scratch2, MemOperand(scratch2));
658 __ Sub64(scratch2, scratch1, scratch2);
659 __ Sd(scratch2, MemOperand(frame_pointer(), kRegExpStackBasePointer));
660 }
661
PopRegExpBasePointer(Register scratch1,Register scratch2)662 void RegExpMacroAssemblerRISCV::PopRegExpBasePointer(Register scratch1,
663 Register scratch2) {
664 ExternalReference ref =
665 ExternalReference::address_of_regexp_stack_memory_top_address(isolate());
666 __ Ld(scratch1, MemOperand(frame_pointer(), kRegExpStackBasePointer));
667 __ li(scratch2, ref);
668 __ Ld(scratch2, MemOperand(scratch2));
669 __ Add64(scratch1, scratch1, scratch2);
670 StoreRegExpStackPointerToMemory(scratch1, scratch2);
671 }
672
GetCode(Handle<String> source)673 Handle<HeapObject> RegExpMacroAssemblerRISCV::GetCode(Handle<String> source) {
674 Label return_a0;
675 if (masm_->has_exception()) {
676 // If the code gets corrupted due to long regular expressions and lack of
677 // space on trampolines, an internal exception flag is set. If this case
678 // is detected, we will jump into exit sequence right away.
679 __ bind_to(&entry_label_, internal_failure_label_.pos());
680 } else {
681 // Finalize code - write the entry point code now we know how many
682 // registers we need.
683
684 // Entry code:
685 __ bind(&entry_label_);
686
687 // Tell the system that we have a stack frame. Because the type is MANUAL,
688 // no is generated.
689 FrameScope scope(masm_.get(), StackFrame::MANUAL);
690
691 // Actually emit code to start a new stack frame.
692 // Push arguments
693 // Save callee-save registers.
694 // Start new stack frame.
695 // Store link register in existing stack-cell.
696 // Order here should correspond to order of offset constants in header file.
697 // TODO(plind): we save fp..s11, but ONLY use s3 here - use the regs
698 // or dont save.
699 RegList registers_to_retain = {fp, s1, s2, s3, s4,
700 s5, s6, s7, s8 /*, s9, s10, s11*/};
701 DCHECK(registers_to_retain.Count() == kNumCalleeRegsToRetain);
702
703 // The remaining arguments are passed in registers, e.g.by calling the code
704 // entry as cast to a function with the signature:
705 //
706 // *int(*match)(String input_string, // a0
707 // int start_offset, // a1
708 // byte* input_start, // a2
709 // byte* input_end, // a3
710 // int* output, // a4
711 // int output_size, // a5
712 // int call_origin, // a6
713 // Isolate* isolate, // a7
714 // Address regexp); // on the stack
715 RegList argument_registers = {a0, a1, a2, a3, a4, a5, a6, a7};
716
717 // According to MultiPush implementation, registers will be pushed in the
718 // order of ra, fp, then s8, ..., s1, and finally a7,...a0
719 __ MultiPush(RegList{ra} | registers_to_retain | argument_registers);
720
721 // Set frame pointer in space for it if this is not a direct call
722 // from generated code.
723 __ Add64(frame_pointer(), sp,
724 Operand(argument_registers.Count() * kSystemPointerSize));
725
726 STATIC_ASSERT(kSuccessfulCaptures == kInputString - kSystemPointerSize);
727 __ mv(a0, zero_reg);
728 __ push(a0); // Make room for success counter and initialize it to 0.
729 STATIC_ASSERT(kStringStartMinusOne ==
730 kSuccessfulCaptures - kSystemPointerSize);
731 __ push(a0); // Make room for "string start - 1" constant.
732 STATIC_ASSERT(kBacktrackCount == kStringStartMinusOne - kSystemPointerSize);
733 __ push(a0); // The backtrack counter
734 STATIC_ASSERT(kRegExpStackBasePointer ==
735 kBacktrackCount - kSystemPointerSize);
736 __ push(a0); // The regexp stack base ptr.
737 // Store the regexp base pointer - we'll later restore it / write it to
738 // memory when returning from this irregexp code object.
739 PushRegExpBasePointer(a0, a1);
740
741 // Check if we have space on the stack for registers.
742 Label stack_limit_hit;
743 Label stack_ok;
744
745 ExternalReference stack_limit =
746 ExternalReference::address_of_jslimit(masm_->isolate());
747 __ li(a0, Operand(stack_limit));
748 __ Ld(a0, MemOperand(a0));
749 __ Sub64(a0, sp, a0);
750 // Handle it if the stack pointer is already below the stack limit.
751 __ BranchShort(&stack_limit_hit, le, a0, Operand(zero_reg));
752 // Check if there is room for the variable number of registers above
753 // the stack limit.
754 __ BranchShort(&stack_ok, Ugreater_equal, a0,
755 Operand(num_registers_ * kSystemPointerSize));
756 // Exit with OutOfMemory exception. There is not enough space on the stack
757 // for our working registers.
758 __ li(a0, Operand(EXCEPTION));
759 __ jmp(&return_a0);
760
761 __ bind(&stack_limit_hit);
762 CallCheckStackGuardState(a0);
763 // If returned value is non-zero, we exit with the returned value as result.
764 __ Branch(&return_a0, ne, a0, Operand(zero_reg));
765
766 __ bind(&stack_ok);
767 // Allocate space on stack for registers.
768 __ Sub64(sp, sp, Operand(num_registers_ * kSystemPointerSize));
769 // Load string end.
770 __ Ld(end_of_input_address(), MemOperand(frame_pointer(), kInputEnd));
771 // Load input start.
772 __ Ld(a0, MemOperand(frame_pointer(), kInputStart));
773 // Find negative length (offset of start relative to end).
774 __ Sub64(current_input_offset(), a0, end_of_input_address());
775 // Set a0 to address of char before start of the input string
776 // (effectively string position -1).
777 __ Ld(a1, MemOperand(frame_pointer(), kStartIndex));
778 __ Sub64(a0, current_input_offset(), Operand(char_size()));
779 __ slli(t1, a1, (mode_ == UC16) ? 1 : 0);
780 __ Sub64(a0, a0, t1);
781 // Store this value in a local variable, for use when clearing
782 // position registers.
783 __ Sd(a0, MemOperand(frame_pointer(), kStringStartMinusOne));
784
785 // Initialize code pointer register
786 __ li(code_pointer(), Operand(masm_->CodeObject()), CONSTANT_SIZE);
787
788 Label load_char_start_regexp, start_regexp;
789 // Load newline if index is at start, previous character otherwise.
790 __ BranchShort(&load_char_start_regexp, ne, a1, Operand(zero_reg));
791 __ li(current_character(), Operand('\n'));
792 __ jmp(&start_regexp);
793
794 // Global regexp restarts matching here.
795 __ bind(&load_char_start_regexp);
796 // Load previous char as initial value of current character register.
797 LoadCurrentCharacterUnchecked(-1, 1);
798 __ bind(&start_regexp);
799
800 // Initialize on-stack registers.
801 if (num_saved_registers_ > 0) { // Always is, if generated from a regexp.
802 // Fill saved registers with initial value = start offset - 1.
803 if (num_saved_registers_ > 8) {
804 // Address of register 0.
805 __ Add64(a1, frame_pointer(), Operand(kRegisterZero));
806 __ li(a2, Operand(num_saved_registers_));
807 Label init_loop;
808 __ bind(&init_loop);
809 __ Sd(a0, MemOperand(a1));
810 __ Add64(a1, a1, Operand(-kSystemPointerSize));
811 __ Sub64(a2, a2, Operand(1));
812 __ Branch(&init_loop, ne, a2, Operand(zero_reg));
813 } else {
814 for (int i = 0; i < num_saved_registers_; i++) {
815 __ Sd(a0, register_location(i));
816 }
817 }
818 }
819
820 // Initialize backtrack stack pointer.
821 LoadRegExpStackPointerFromMemory(backtrack_stackpointer());
822
823 __ jmp(&start_label_);
824
825 // Exit code:
826 if (success_label_.is_linked()) {
827 // Save captures when successful.
828 __ bind(&success_label_);
829 if (num_saved_registers_ > 0) {
830 // Copy captures to output.
831 __ Ld(a1, MemOperand(frame_pointer(), kInputStart));
832 __ Ld(a0, MemOperand(frame_pointer(), kRegisterOutput));
833 __ Ld(a2, MemOperand(frame_pointer(), kStartIndex));
834 __ Sub64(a1, end_of_input_address(), a1);
835 // a1 is length of input in bytes.
836 if (mode_ == UC16) {
837 __ srli(a1, a1, 1);
838 }
839 // a1 is length of input in characters.
840 __ Add64(a1, a1, Operand(a2));
841 // a1 is length of string in characters.
842
843 DCHECK_EQ(0, num_saved_registers_ % 2);
844 // Always an even number of capture registers. This allows us to
845 // unroll the loop once to add an operation between a load of a
846 // register and the following use of that register.
847 for (int i = 0; i < num_saved_registers_; i += 2) {
848 __ Ld(a2, register_location(i));
849 __ Ld(a3, register_location(i + 1));
850 if (i == 0 && global_with_zero_length_check()) {
851 // Keep capture start in a4 for the zero-length check later.
852 __ mv(s3, a2);
853 }
854 if (mode_ == UC16) {
855 __ srai(a2, a2, 1);
856 __ Add64(a2, a2, a1);
857 __ srai(a3, a3, 1);
858 __ Add64(a3, a3, a1);
859 } else {
860 __ Add64(a2, a1, Operand(a2));
861 __ Add64(a3, a1, Operand(a3));
862 }
863 // V8 expects the output to be an int32_t array.
864 __ Sw(a2, MemOperand(a0));
865 __ Add64(a0, a0, kIntSize);
866 __ Sw(a3, MemOperand(a0));
867 __ Add64(a0, a0, kIntSize);
868 }
869 }
870
871 if (global()) {
872 // Restart matching if the regular expression is flagged as global.
873 __ Ld(a0, MemOperand(frame_pointer(), kSuccessfulCaptures));
874 __ Ld(a1, MemOperand(frame_pointer(), kNumOutputRegisters));
875 __ Ld(a2, MemOperand(frame_pointer(), kRegisterOutput));
876 // Increment success counter.
877 __ Add64(a0, a0, 1);
878 __ Sd(a0, MemOperand(frame_pointer(), kSuccessfulCaptures));
879 // Capture results have been stored, so the number of remaining global
880 // output registers is reduced by the number of stored captures.
881 __ Sub64(a1, a1, num_saved_registers_);
882 // Check whether we have enough room for another set of capture results.
883 __ Branch(&return_a0, lt, a1, Operand(num_saved_registers_));
884
885 __ Sd(a1, MemOperand(frame_pointer(), kNumOutputRegisters));
886 // Advance the location for output.
887 __ Add64(a2, a2, num_saved_registers_ * kIntSize);
888 __ Sd(a2, MemOperand(frame_pointer(), kRegisterOutput));
889
890 // Prepare a0 to initialize registers with its value in the next run.
891 __ Ld(a0, MemOperand(frame_pointer(), kStringStartMinusOne));
892
893 if (global_with_zero_length_check()) {
894 // Special case for zero-length matches.
895 // s3: capture start index
896 // Not a zero-length match, restart.
897 __ Branch(&load_char_start_regexp, ne, current_input_offset(),
898 Operand(s3));
899 // Offset from the end is zero if we already reached the end.
900 __ Branch(&exit_label_, eq, current_input_offset(),
901 Operand(zero_reg));
902 // Advance current position after a zero-length match.
903 Label advance;
904 __ bind(&advance);
905 __ Add64(current_input_offset(), current_input_offset(),
906 Operand((mode_ == UC16) ? 2 : 1));
907 if (global_unicode()) CheckNotInSurrogatePair(0, &advance);
908 }
909
910 __ Branch(&load_char_start_regexp);
911 } else {
912 __ li(a0, Operand(SUCCESS));
913 }
914 }
915 // Exit and return a0.
916 __ bind(&exit_label_);
917 if (global()) {
918 __ Ld(a0, MemOperand(frame_pointer(), kSuccessfulCaptures));
919 }
920
921 __ bind(&return_a0);
922 // Restore the original regexp stack pointer value (effectively, pop the
923 // stored base pointer).
924 PopRegExpBasePointer(a1, a2);
925 // Skip sp past regexp registers and local variables..
926 __ mv(sp, frame_pointer());
927
928 // Restore registers fp..s11 and return (restoring ra to pc).
929 __ MultiPop(registers_to_retain | ra);
930
931 __ Ret();
932
933 // Backtrack code (branch target for conditional backtracks).
934 if (backtrack_label_.is_linked()) {
935 __ bind(&backtrack_label_);
936 Backtrack();
937 }
938
939 Label exit_with_exception;
940
941 // Preempt-code.
942 if (check_preempt_label_.is_linked()) {
943 SafeCallTarget(&check_preempt_label_);
944 StoreRegExpStackPointerToMemory(backtrack_stackpointer(), a1);
945 // Put regexp engine registers on stack.
946 PushCallerSavedRegisters();
947 CallCheckStackGuardState(a0);
948 PopCallerSavedRegisters();
949 // If returning non-zero, we should end execution with the given
950 // result as return value.
951 __ Branch(&return_a0, ne, a0, Operand(zero_reg));
952 LoadRegExpStackPointerFromMemory(backtrack_stackpointer());
953 // String might have moved: Reload end of string from frame.
954 __ Ld(end_of_input_address(), MemOperand(frame_pointer(), kInputEnd));
955 __ li(code_pointer(), Operand(masm_->CodeObject()), CONSTANT_SIZE);
956 SafeReturn();
957 }
958
959 // Backtrack stack overflow code.
960 if (stack_overflow_label_.is_linked()) {
961 SafeCallTarget(&stack_overflow_label_);
962 // Call GrowStack(isolate).
963 StoreRegExpStackPointerToMemory(backtrack_stackpointer(),
964 a1);
965
966 static constexpr int kNumArguments = 1;
967 __ PrepareCallCFunction(kNumArguments, 0, a0);
968 __ li(a0, ExternalReference::isolate_address(isolate()));
969 ExternalReference grow_stack = ExternalReference::re_grow_stack();
970 __ CallCFunction(grow_stack, kNumArguments);
971 // If nullptr is returned, we have failed to grow the stack, and must exit
972 // with a stack-overflow exception.
973 __ BranchShort(&exit_with_exception, eq, a0, Operand(zero_reg));
974 // Otherwise use return value as new stack pointer.
975 __ mv(backtrack_stackpointer(), a0);
976 // Restore saved registers and continue.
977 __ li(code_pointer(), Operand(masm_->CodeObject()), CONSTANT_SIZE);
978 __ Ld(end_of_input_address(), MemOperand(frame_pointer(), kInputEnd));
979 SafeReturn();
980 }
981
982 if (exit_with_exception.is_linked()) {
983 // If any of the code above needed to exit with an exception.
984 __ bind(&exit_with_exception);
985 // Exit with Result EXCEPTION(-1) to signal thrown exception.
986 __ li(a0, Operand(EXCEPTION));
987 __ jmp(&return_a0);
988 }
989
990 if (fallback_label_.is_linked()) {
991 __ bind(&fallback_label_);
992 __ li(a0, Operand(FALLBACK_TO_EXPERIMENTAL));
993 __ jmp(&return_a0);
994 }
995 }
996
997 CodeDesc code_desc;
998 masm_->GetCode(isolate(), &code_desc);
999 Handle<Code> code =
1000 Factory::CodeBuilder(isolate(), code_desc, CodeKind::REGEXP)
1001 .set_self_reference(masm_->CodeObject())
1002 .Build();
1003 LOG(masm_->isolate(),
1004 RegExpCodeCreateEvent(Handle<AbstractCode>::cast(code), source));
1005 return Handle<HeapObject>::cast(code);
1006 }
1007
GoTo(Label * to)1008 void RegExpMacroAssemblerRISCV::GoTo(Label* to) {
1009 if (to == nullptr) {
1010 Backtrack();
1011 return;
1012 }
1013 __ jmp(to);
1014 return;
1015 }
1016
IfRegisterGE(int reg,int comparand,Label * if_ge)1017 void RegExpMacroAssemblerRISCV::IfRegisterGE(int reg, int comparand,
1018 Label* if_ge) {
1019 __ Ld(a0, register_location(reg));
1020 BranchOrBacktrack(if_ge, ge, a0, Operand(comparand));
1021 }
1022
IfRegisterLT(int reg,int comparand,Label * if_lt)1023 void RegExpMacroAssemblerRISCV::IfRegisterLT(int reg, int comparand,
1024 Label* if_lt) {
1025 __ Ld(a0, register_location(reg));
1026 BranchOrBacktrack(if_lt, lt, a0, Operand(comparand));
1027 }
1028
IfRegisterEqPos(int reg,Label * if_eq)1029 void RegExpMacroAssemblerRISCV::IfRegisterEqPos(int reg, Label* if_eq) {
1030 __ Ld(a0, register_location(reg));
1031 BranchOrBacktrack(if_eq, eq, a0, Operand(current_input_offset()));
1032 }
1033
1034 RegExpMacroAssembler::IrregexpImplementation
Implementation()1035 RegExpMacroAssemblerRISCV::Implementation() {
1036 return kRISCVImplementation;
1037 }
1038
PopCurrentPosition()1039 void RegExpMacroAssemblerRISCV::PopCurrentPosition() {
1040 Pop(current_input_offset());
1041 }
1042
PopRegister(int register_index)1043 void RegExpMacroAssemblerRISCV::PopRegister(int register_index) {
1044 Pop(a0);
1045 __ Sd(a0, register_location(register_index));
1046 }
1047
PushBacktrack(Label * label)1048 void RegExpMacroAssemblerRISCV::PushBacktrack(Label* label) {
1049 if (label->is_bound()) {
1050 int target = label->pos();
1051 __ li(a0, Operand(target + Code::kHeaderSize - kHeapObjectTag));
1052 } else {
1053 Assembler::BlockTrampolinePoolScope block_trampoline_pool(masm_.get());
1054 Label after_constant;
1055 __ BranchShort(&after_constant);
1056 int offset = masm_->pc_offset();
1057 int cp_offset = offset + Code::kHeaderSize - kHeapObjectTag;
1058 __ emit(0);
1059 masm_->label_at_put(label, offset);
1060 __ bind(&after_constant);
1061 if (is_int16(cp_offset)) {
1062 __ Lwu(a0, MemOperand(code_pointer(), cp_offset));
1063 } else {
1064 __ Add64(a0, code_pointer(), cp_offset);
1065 __ Lwu(a0, MemOperand(a0, 0));
1066 }
1067 }
1068 Push(a0);
1069 CheckStackLimit();
1070 }
1071
PushCurrentPosition()1072 void RegExpMacroAssemblerRISCV::PushCurrentPosition() {
1073 Push(current_input_offset());
1074 }
1075
PushRegister(int register_index,StackCheckFlag check_stack_limit)1076 void RegExpMacroAssemblerRISCV::PushRegister(int register_index,
1077 StackCheckFlag check_stack_limit) {
1078 __ Ld(a0, register_location(register_index));
1079 Push(a0);
1080 if (check_stack_limit) CheckStackLimit();
1081 }
1082
ReadCurrentPositionFromRegister(int reg)1083 void RegExpMacroAssemblerRISCV::ReadCurrentPositionFromRegister(int reg) {
1084 __ Ld(current_input_offset(), register_location(reg));
1085 }
1086
WriteStackPointerToRegister(int reg)1087 void RegExpMacroAssemblerRISCV::WriteStackPointerToRegister(int reg) {
1088 ExternalReference ref =
1089 ExternalReference::address_of_regexp_stack_memory_top_address(isolate());
1090 __ li(a0, ref);
1091 __ Ld(a0, MemOperand(a0));
1092 __ Sub64(a0, backtrack_stackpointer(), a0);
1093 __ Sw(a0, register_location(reg));
1094 }
1095
ReadStackPointerFromRegister(int reg)1096 void RegExpMacroAssemblerRISCV::ReadStackPointerFromRegister(int reg) {
1097 ExternalReference ref =
1098 ExternalReference::address_of_regexp_stack_memory_top_address(isolate());
1099 __ li(a1, ref);
1100 __ Ld(a1, MemOperand(a1));
1101 __ Lw(backtrack_stackpointer(), register_location(reg));
1102 __ Add64(backtrack_stackpointer(), backtrack_stackpointer(), a1);
1103 }
1104
SetCurrentPositionFromEnd(int by)1105 void RegExpMacroAssemblerRISCV::SetCurrentPositionFromEnd(int by) {
1106 Label after_position;
1107 __ BranchShort(&after_position, ge, current_input_offset(),
1108 Operand(-by * char_size()));
1109 __ li(current_input_offset(), -by * char_size());
1110 // On RegExp code entry (where this operation is used), the character before
1111 // the current position is expected to be already loaded.
1112 // We have advanced the position, so it's safe to read backwards.
1113 LoadCurrentCharacterUnchecked(-1, 1);
1114 __ bind(&after_position);
1115 }
1116
SetRegister(int register_index,int to)1117 void RegExpMacroAssemblerRISCV::SetRegister(int register_index, int to) {
1118 DCHECK(register_index >= num_saved_registers_); // Reserved for positions!
1119 __ li(a0, Operand(to));
1120 __ Sd(a0, register_location(register_index));
1121 }
1122
Succeed()1123 bool RegExpMacroAssemblerRISCV::Succeed() {
1124 __ jmp(&success_label_);
1125 return global();
1126 }
1127
WriteCurrentPositionToRegister(int reg,int cp_offset)1128 void RegExpMacroAssemblerRISCV::WriteCurrentPositionToRegister(int reg,
1129 int cp_offset) {
1130 if (cp_offset == 0) {
1131 __ Sd(current_input_offset(), register_location(reg));
1132 } else {
1133 __ Add64(a0, current_input_offset(), Operand(cp_offset * char_size()));
1134 __ Sd(a0, register_location(reg));
1135 }
1136 }
1137
ClearRegisters(int reg_from,int reg_to)1138 void RegExpMacroAssemblerRISCV::ClearRegisters(int reg_from, int reg_to) {
1139 DCHECK(reg_from <= reg_to);
1140 __ Ld(a0, MemOperand(frame_pointer(), kStringStartMinusOne));
1141 for (int reg = reg_from; reg <= reg_to; reg++) {
1142 __ Sd(a0, register_location(reg));
1143 }
1144 }
1145 #ifdef RISCV_HAS_NO_UNALIGNED
CanReadUnaligned() const1146 bool RegExpMacroAssemblerRISCV::CanReadUnaligned() const { return false; }
1147 #endif
1148 // Private methods:
1149
CallCheckStackGuardState(Register scratch)1150 void RegExpMacroAssemblerRISCV::CallCheckStackGuardState(Register scratch) {
1151 DCHECK(!isolate()->IsGeneratingEmbeddedBuiltins());
1152 DCHECK(!masm_->options().isolate_independent_code);
1153
1154 int stack_alignment = base::OS::ActivationFrameAlignment();
1155
1156 // Align the stack pointer and save the original sp value on the stack.
1157 __ mv(scratch, sp);
1158 __ Sub64(sp, sp, Operand(kSystemPointerSize));
1159 DCHECK(base::bits::IsPowerOfTwo(stack_alignment));
1160 __ And(sp, sp, Operand(-stack_alignment));
1161 __ Sd(scratch, MemOperand(sp));
1162
1163 __ mv(a2, frame_pointer());
1164 // Code of self.
1165 __ li(a1, Operand(masm_->CodeObject()), CONSTANT_SIZE);
1166
1167 // We need to make room for the return address on the stack.
1168 DCHECK(IsAligned(stack_alignment, kSystemPointerSize));
1169 __ Sub64(sp, sp, Operand(stack_alignment));
1170
1171 // The stack pointer now points to cell where the return address will be
1172 // written. Arguments are in registers, meaning we treat the return address as
1173 // argument 5. Since DirectCEntry will handle allocating space for the C
1174 // argument slots, we don't need to care about that here. This is how the
1175 // stack will look (sp meaning the value of sp at this moment):
1176 // [sp + 3] - empty slot if needed for alignment.
1177 // [sp + 2] - saved sp.
1178 // [sp + 1] - second word reserved for return value.
1179 // [sp + 0] - first word reserved for return value.
1180
1181 // a0 will point to the return address, placed by DirectCEntry.
1182 __ mv(a0, sp);
1183
1184 ExternalReference stack_guard_check =
1185 ExternalReference::re_check_stack_guard_state();
1186 __ li(t6, Operand(stack_guard_check));
1187
1188 EmbeddedData d = EmbeddedData::FromBlob();
1189 CHECK(Builtins::IsIsolateIndependent(Builtin::kDirectCEntry));
1190 Address entry = d.InstructionStartOfBuiltin(Builtin::kDirectCEntry);
1191 __ li(kScratchReg, Operand(entry, RelocInfo::OFF_HEAP_TARGET));
1192 __ Call(kScratchReg);
1193
1194 // DirectCEntry allocated space for the C argument slots so we have to
1195 // drop them with the return address from the stack with loading saved sp.
1196 // At this point stack must look:
1197 // [sp + 7] - empty slot if needed for alignment.
1198 // [sp + 6] - saved sp.
1199 // [sp + 5] - second word reserved for return value.
1200 // [sp + 4] - first word reserved for return value.
1201 // [sp + 3] - C argument slot.
1202 // [sp + 2] - C argument slot.
1203 // [sp + 1] - C argument slot.
1204 // [sp + 0] - C argument slot.
1205 __ Ld(sp, MemOperand(sp, stack_alignment + kCArgsSlotsSize));
1206
1207 __ li(code_pointer(), Operand(masm_->CodeObject()));
1208 }
1209
1210 // Helper function for reading a value out of a stack frame.
1211 template <typename T>
frame_entry(Address re_frame,int frame_offset)1212 static T& frame_entry(Address re_frame, int frame_offset) {
1213 return reinterpret_cast<T&>(Memory<int32_t>(re_frame + frame_offset));
1214 }
1215
1216 template <typename T>
frame_entry_address(Address re_frame,int frame_offset)1217 static T* frame_entry_address(Address re_frame, int frame_offset) {
1218 return reinterpret_cast<T*>(re_frame + frame_offset);
1219 }
1220
CheckStackGuardState(Address * return_address,Address raw_code,Address re_frame)1221 int64_t RegExpMacroAssemblerRISCV::CheckStackGuardState(Address* return_address,
1222 Address raw_code,
1223 Address re_frame) {
1224 Code re_code = Code::cast(Object(raw_code));
1225 return NativeRegExpMacroAssembler::CheckStackGuardState(
1226 frame_entry<Isolate*>(re_frame, kIsolate),
1227 static_cast<int>(frame_entry<int64_t>(re_frame, kStartIndex)),
1228 static_cast<RegExp::CallOrigin>(
1229 frame_entry<int64_t>(re_frame, kDirectCall)),
1230 return_address, re_code,
1231 frame_entry_address<Address>(re_frame, kInputString),
1232 frame_entry_address<const byte*>(re_frame, kInputStart),
1233 frame_entry_address<const byte*>(re_frame, kInputEnd));
1234 }
1235
register_location(int register_index)1236 MemOperand RegExpMacroAssemblerRISCV::register_location(int register_index) {
1237 DCHECK(register_index < (1 << 30));
1238 if (num_registers_ <= register_index) {
1239 num_registers_ = register_index + 1;
1240 }
1241 return MemOperand(frame_pointer(),
1242 kRegisterZero - register_index * kSystemPointerSize);
1243 }
1244
CheckPosition(int cp_offset,Label * on_outside_input)1245 void RegExpMacroAssemblerRISCV::CheckPosition(int cp_offset,
1246 Label* on_outside_input) {
1247 if (cp_offset >= 0) {
1248 BranchOrBacktrack(on_outside_input, ge, current_input_offset(),
1249 Operand(-cp_offset * char_size()));
1250 } else {
1251 __ Ld(a1, MemOperand(frame_pointer(), kStringStartMinusOne));
1252 __ Add64(a0, current_input_offset(), Operand(cp_offset * char_size()));
1253 BranchOrBacktrack(on_outside_input, le, a0, Operand(a1));
1254 }
1255 }
1256
BranchOrBacktrack(Label * to,Condition condition,Register rs,const Operand & rt)1257 void RegExpMacroAssemblerRISCV::BranchOrBacktrack(Label* to,
1258 Condition condition,
1259 Register rs,
1260 const Operand& rt) {
1261 if (condition == al) { // Unconditional.
1262 if (to == nullptr) {
1263 Backtrack();
1264 return;
1265 }
1266 __ jmp(to);
1267 return;
1268 }
1269 if (to == nullptr) {
1270 __ Branch(&backtrack_label_, condition, rs, rt);
1271 return;
1272 }
1273 __ Branch(to, condition, rs, rt);
1274 }
1275
SafeCall(Label * to,Condition cond,Register rs,const Operand & rt)1276 void RegExpMacroAssemblerRISCV::SafeCall(Label* to, Condition cond, Register rs,
1277 const Operand& rt) {
1278 __ BranchAndLink(to, cond, rs, rt);
1279 }
1280
SafeReturn()1281 void RegExpMacroAssemblerRISCV::SafeReturn() {
1282 __ pop(ra);
1283 __ Add64(t1, ra, Operand(masm_->CodeObject()));
1284 __ Jump(t1);
1285 }
1286
SafeCallTarget(Label * name)1287 void RegExpMacroAssemblerRISCV::SafeCallTarget(Label* name) {
1288 __ bind(name);
1289 __ Sub64(ra, ra, Operand(masm_->CodeObject()));
1290 __ push(ra);
1291 }
1292
Push(Register source)1293 void RegExpMacroAssemblerRISCV::Push(Register source) {
1294 DCHECK(source != backtrack_stackpointer());
1295 __ Add64(backtrack_stackpointer(), backtrack_stackpointer(),
1296 Operand(-kIntSize));
1297 __ Sw(source, MemOperand(backtrack_stackpointer()));
1298 }
1299
Pop(Register target)1300 void RegExpMacroAssemblerRISCV::Pop(Register target) {
1301 DCHECK(target != backtrack_stackpointer());
1302 __ Lw(target, MemOperand(backtrack_stackpointer()));
1303 __ Add64(backtrack_stackpointer(), backtrack_stackpointer(), kIntSize);
1304 }
1305
CheckPreemption()1306 void RegExpMacroAssemblerRISCV::CheckPreemption() {
1307 // Check for preemption.
1308 ExternalReference stack_limit =
1309 ExternalReference::address_of_jslimit(masm_->isolate());
1310 __ li(a0, Operand(stack_limit));
1311 __ Ld(a0, MemOperand(a0));
1312 SafeCall(&check_preempt_label_, Uless_equal, sp, Operand(a0));
1313 }
1314
CheckStackLimit()1315 void RegExpMacroAssemblerRISCV::CheckStackLimit() {
1316 ExternalReference stack_limit =
1317 ExternalReference::address_of_regexp_stack_limit_address(
1318 masm_->isolate());
1319
1320 __ li(a0, Operand(stack_limit));
1321 __ Ld(a0, MemOperand(a0));
1322 SafeCall(&stack_overflow_label_, Uless_equal, backtrack_stackpointer(),
1323 Operand(a0));
1324 }
1325
LoadCurrentCharacterUnchecked(int cp_offset,int characters)1326 void RegExpMacroAssemblerRISCV::LoadCurrentCharacterUnchecked(int cp_offset,
1327 int characters) {
1328 Register offset = current_input_offset();
1329
1330 // If unaligned load/stores are not supported then this function must only
1331 // be used to load a single character at a time.
1332 if (!CanReadUnaligned()) {
1333 DCHECK_EQ(1, characters);
1334 }
1335 if (cp_offset != 0) {
1336 // t3 is not being used to store the capture start index at this point.
1337 __ Add64(t3, current_input_offset(), Operand(cp_offset * char_size()));
1338 offset = t3;
1339 }
1340
1341 if (mode_ == LATIN1) {
1342 if (characters == 4) {
1343 __ Add64(kScratchReg, end_of_input_address(), offset);
1344 __ Lwu(current_character(), MemOperand(kScratchReg));
1345 } else if (characters == 2) {
1346 __ Add64(kScratchReg, end_of_input_address(), offset);
1347 __ Lhu(current_character(), MemOperand(kScratchReg));
1348 } else {
1349 DCHECK_EQ(1, characters);
1350 __ Add64(kScratchReg, end_of_input_address(), offset);
1351 __ Lbu(current_character(), MemOperand(kScratchReg));
1352 }
1353 } else {
1354 DCHECK(mode_ == UC16);
1355 if (characters == 2) {
1356 __ Add64(kScratchReg, end_of_input_address(), offset);
1357 __ Lwu(current_character(), MemOperand(kScratchReg));
1358 } else {
1359 DCHECK_EQ(1, characters);
1360 __ Add64(kScratchReg, end_of_input_address(), offset);
1361 __ Lhu(current_character(), MemOperand(kScratchReg));
1362 }
1363 }
1364 }
1365
1366 #undef __
1367
1368 } // namespace internal
1369 } // namespace v8
1370
1371 #endif // V8_TARGET_ARCH_RISCV64
1372