1 // Copyright 2021 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include <limits.h> // For LONG_MIN, LONG_MAX.
6
7 #if V8_TARGET_ARCH_RISCV64
8
9 #include "src/base/bits.h"
10 #include "src/base/division-by-constant.h"
11 #include "src/codegen/assembler-inl.h"
12 #include "src/codegen/callable.h"
13 #include "src/codegen/code-factory.h"
14 #include "src/codegen/external-reference-table.h"
15 #include "src/codegen/interface-descriptors-inl.h"
16 #include "src/codegen/macro-assembler.h"
17 #include "src/codegen/register-configuration.h"
18 #include "src/debug/debug.h"
19 #include "src/deoptimizer/deoptimizer.h"
20 #include "src/execution/frames-inl.h"
21 #include "src/heap/memory-chunk.h"
22 #include "src/init/bootstrapper.h"
23 #include "src/logging/counters.h"
24 #include "src/objects/heap-number.h"
25 #include "src/runtime/runtime.h"
26 #include "src/snapshot/snapshot.h"
27 #include "src/wasm/wasm-code-manager.h"
28
29 // Satisfy cpplint check, but don't include platform-specific header. It is
30 // included recursively via macro-assembler.h.
31 #if 0
32 #include "src/codegen/riscv64/macro-assembler-riscv64.h"
33 #endif
34
35 namespace v8 {
36 namespace internal {
37
IsZero(const Operand & rt)38 static inline bool IsZero(const Operand& rt) {
39 if (rt.is_reg()) {
40 return rt.rm() == zero_reg;
41 } else {
42 return rt.immediate() == 0;
43 }
44 }
45
RequiredStackSizeForCallerSaved(SaveFPRegsMode fp_mode,Register exclusion1,Register exclusion2,Register exclusion3) const46 int TurboAssembler::RequiredStackSizeForCallerSaved(SaveFPRegsMode fp_mode,
47 Register exclusion1,
48 Register exclusion2,
49 Register exclusion3) const {
50 int bytes = 0;
51
52 RegList exclusions = {exclusion1, exclusion2, exclusion3};
53 RegList list = kJSCallerSaved - exclusions;
54 bytes += list.Count() * kSystemPointerSize;
55
56 if (fp_mode == SaveFPRegsMode::kSave) {
57 bytes += kCallerSavedFPU.Count() * kDoubleSize;
58 }
59
60 return bytes;
61 }
62
PushCallerSaved(SaveFPRegsMode fp_mode,Register exclusion1,Register exclusion2,Register exclusion3)63 int TurboAssembler::PushCallerSaved(SaveFPRegsMode fp_mode, Register exclusion1,
64 Register exclusion2, Register exclusion3) {
65 int bytes = 0;
66
67 RegList exclusions = {exclusion1, exclusion2, exclusion3};
68 RegList list = kJSCallerSaved - exclusions;
69 MultiPush(list);
70 bytes += list.Count() * kSystemPointerSize;
71
72 if (fp_mode == SaveFPRegsMode::kSave) {
73 MultiPushFPU(kCallerSavedFPU);
74 bytes += kCallerSavedFPU.Count() * kDoubleSize;
75 }
76
77 return bytes;
78 }
79
PopCallerSaved(SaveFPRegsMode fp_mode,Register exclusion1,Register exclusion2,Register exclusion3)80 int TurboAssembler::PopCallerSaved(SaveFPRegsMode fp_mode, Register exclusion1,
81 Register exclusion2, Register exclusion3) {
82 int bytes = 0;
83 if (fp_mode == SaveFPRegsMode::kSave) {
84 MultiPopFPU(kCallerSavedFPU);
85 bytes += kCallerSavedFPU.Count() * kDoubleSize;
86 }
87
88 RegList exclusions = {exclusion1, exclusion2, exclusion3};
89 RegList list = kJSCallerSaved - exclusions;
90 MultiPop(list);
91 bytes += list.Count() * kSystemPointerSize;
92
93 return bytes;
94 }
95
LoadRoot(Register destination,RootIndex index)96 void TurboAssembler::LoadRoot(Register destination, RootIndex index) {
97 Ld(destination,
98 MemOperand(kRootRegister, RootRegisterOffsetForRootIndex(index)));
99 }
100
LoadRoot(Register destination,RootIndex index,Condition cond,Register src1,const Operand & src2)101 void TurboAssembler::LoadRoot(Register destination, RootIndex index,
102 Condition cond, Register src1,
103 const Operand& src2) {
104 Label skip;
105 BranchShort(&skip, NegateCondition(cond), src1, src2);
106 Ld(destination,
107 MemOperand(kRootRegister, RootRegisterOffsetForRootIndex(index)));
108 bind(&skip);
109 }
110
PushCommonFrame(Register marker_reg)111 void TurboAssembler::PushCommonFrame(Register marker_reg) {
112 if (marker_reg.is_valid()) {
113 Push(ra, fp, marker_reg);
114 Add64(fp, sp, Operand(kSystemPointerSize));
115 } else {
116 Push(ra, fp);
117 Mv(fp, sp);
118 }
119 }
120
PushStandardFrame(Register function_reg)121 void TurboAssembler::PushStandardFrame(Register function_reg) {
122 int offset = -StandardFrameConstants::kContextOffset;
123 if (function_reg.is_valid()) {
124 Push(ra, fp, cp, function_reg, kJavaScriptCallArgCountRegister);
125 offset += 2 * kSystemPointerSize;
126 } else {
127 Push(ra, fp, cp, kJavaScriptCallArgCountRegister);
128 offset += kSystemPointerSize;
129 }
130 Add64(fp, sp, Operand(offset));
131 }
132
SafepointRegisterStackIndex(int reg_code)133 int MacroAssembler::SafepointRegisterStackIndex(int reg_code) {
134 // The registers are pushed starting with the highest encoding,
135 // which means that lowest encodings are closest to the stack pointer.
136 return kSafepointRegisterStackIndexMap[reg_code];
137 }
138
139 // Clobbers object, dst, value, and ra, if (ra_status == kRAHasBeenSaved)
140 // The register 'object' contains a heap object pointer. The heap object
141 // tag is shifted away.
RecordWriteField(Register object,int offset,Register value,RAStatus ra_status,SaveFPRegsMode save_fp,RememberedSetAction remembered_set_action,SmiCheck smi_check)142 void MacroAssembler::RecordWriteField(Register object, int offset,
143 Register value, RAStatus ra_status,
144 SaveFPRegsMode save_fp,
145 RememberedSetAction remembered_set_action,
146 SmiCheck smi_check) {
147 DCHECK(!AreAliased(object, value));
148 // First, check if a write barrier is even needed. The tests below
149 // catch stores of Smis.
150 Label done;
151
152 // Skip the barrier if writing a smi.
153 if (smi_check == SmiCheck::kInline) {
154 JumpIfSmi(value, &done);
155 }
156
157 // Although the object register is tagged, the offset is relative to the start
158 // of the object, so offset must be a multiple of kTaggedSize.
159 DCHECK(IsAligned(offset, kTaggedSize));
160
161 if (FLAG_debug_code) {
162 Label ok;
163 UseScratchRegisterScope temps(this);
164 Register scratch = temps.Acquire();
165 DCHECK(!AreAliased(object, value, scratch));
166 Add64(scratch, object, offset - kHeapObjectTag);
167 And(scratch, scratch, Operand(kTaggedSize - 1));
168 BranchShort(&ok, eq, scratch, Operand(zero_reg));
169 Abort(AbortReason::kUnalignedCellInWriteBarrier);
170 bind(&ok);
171 }
172
173 RecordWrite(object, Operand(offset - kHeapObjectTag), value, ra_status,
174 save_fp, remembered_set_action, SmiCheck::kOmit);
175
176 bind(&done);
177 }
178
MaybeSaveRegisters(RegList registers)179 void TurboAssembler::MaybeSaveRegisters(RegList registers) {
180 if (registers.is_empty()) return;
181 MultiPush(registers);
182 }
183
MaybeRestoreRegisters(RegList registers)184 void TurboAssembler::MaybeRestoreRegisters(RegList registers) {
185 if (registers.is_empty()) return;
186 MultiPop(registers);
187 }
188
CallEphemeronKeyBarrier(Register object,Register slot_address,SaveFPRegsMode fp_mode)189 void TurboAssembler::CallEphemeronKeyBarrier(Register object,
190 Register slot_address,
191 SaveFPRegsMode fp_mode) {
192 DCHECK(!AreAliased(object, slot_address));
193 RegList registers =
194 WriteBarrierDescriptor::ComputeSavedRegisters(object, slot_address);
195 MaybeSaveRegisters(registers);
196
197 Register object_parameter = WriteBarrierDescriptor::ObjectRegister();
198 Register slot_address_parameter =
199 WriteBarrierDescriptor::SlotAddressRegister();
200
201 Push(object);
202 Push(slot_address);
203 Pop(slot_address_parameter);
204 Pop(object_parameter);
205
206 Call(isolate()->builtins()->code_handle(
207 Builtins::GetEphemeronKeyBarrierStub(fp_mode)),
208 RelocInfo::CODE_TARGET);
209 MaybeRestoreRegisters(registers);
210 }
211
CallRecordWriteStubSaveRegisters(Register object,Register slot_address,RememberedSetAction remembered_set_action,SaveFPRegsMode fp_mode,StubCallMode mode)212 void TurboAssembler::CallRecordWriteStubSaveRegisters(
213 Register object, Register slot_address,
214 RememberedSetAction remembered_set_action, SaveFPRegsMode fp_mode,
215 StubCallMode mode) {
216 DCHECK(!AreAliased(object, slot_address));
217 RegList registers =
218 WriteBarrierDescriptor::ComputeSavedRegisters(object, slot_address);
219 MaybeSaveRegisters(registers);
220
221 Register object_parameter = WriteBarrierDescriptor::ObjectRegister();
222 Register slot_address_parameter =
223 WriteBarrierDescriptor::SlotAddressRegister();
224
225 Push(object);
226 Push(slot_address);
227 Pop(slot_address_parameter);
228 Pop(object_parameter);
229
230 CallRecordWriteStub(object_parameter, slot_address_parameter,
231 remembered_set_action, fp_mode, mode);
232
233 MaybeRestoreRegisters(registers);
234 }
235
CallRecordWriteStub(Register object,Register slot_address,RememberedSetAction remembered_set_action,SaveFPRegsMode fp_mode,StubCallMode mode)236 void TurboAssembler::CallRecordWriteStub(
237 Register object, Register slot_address,
238 RememberedSetAction remembered_set_action, SaveFPRegsMode fp_mode,
239 StubCallMode mode) {
240 // Use CallRecordWriteStubSaveRegisters if the object and slot registers
241 // need to be caller saved.
242 DCHECK_EQ(WriteBarrierDescriptor::ObjectRegister(), object);
243 DCHECK_EQ(WriteBarrierDescriptor::SlotAddressRegister(), slot_address);
244 if (mode == StubCallMode::kCallWasmRuntimeStub) {
245 auto wasm_target =
246 wasm::WasmCode::GetRecordWriteStub(remembered_set_action, fp_mode);
247 Call(wasm_target, RelocInfo::WASM_STUB_CALL);
248 } else {
249 auto builtin = Builtins::GetRecordWriteStub(remembered_set_action, fp_mode);
250 if (options().inline_offheap_trampolines) {
251 // Inline the trampoline. //qj
252 RecordCommentForOffHeapTrampoline(builtin);
253
254 UseScratchRegisterScope temps(this);
255 BlockTrampolinePoolScope block_trampoline_pool(this);
256 Register scratch = temps.Acquire();
257 li(scratch, Operand(BuiltinEntry(builtin), RelocInfo::OFF_HEAP_TARGET));
258 Call(scratch);
259 RecordComment("]");
260 } else {
261 Handle<Code> code_target = isolate()->builtins()->code_handle(builtin);
262 Call(code_target, RelocInfo::CODE_TARGET);
263 }
264 }
265 }
266
267 // Clobbers object, address, value, and ra, if (ra_status == kRAHasBeenSaved)
268 // The register 'object' contains a heap object pointer. The heap object
269 // tag is shifted away.
RecordWrite(Register object,Operand offset,Register value,RAStatus ra_status,SaveFPRegsMode fp_mode,RememberedSetAction remembered_set_action,SmiCheck smi_check)270 void MacroAssembler::RecordWrite(Register object, Operand offset,
271 Register value, RAStatus ra_status,
272 SaveFPRegsMode fp_mode,
273 RememberedSetAction remembered_set_action,
274 SmiCheck smi_check) {
275 DCHECK(!AreAliased(object, value));
276
277 if (FLAG_debug_code) {
278 UseScratchRegisterScope temps(this);
279 Register temp = temps.Acquire();
280 DCHECK(!AreAliased(object, value, temp));
281 Add64(temp, object, offset);
282 LoadTaggedPointerField(temp, MemOperand(temp));
283 Assert(eq, AbortReason::kWrongAddressOrValuePassedToRecordWrite, temp,
284 Operand(value));
285 }
286
287 if ((remembered_set_action == RememberedSetAction::kOmit &&
288 !FLAG_incremental_marking) ||
289 FLAG_disable_write_barriers) {
290 return;
291 }
292
293 // First, check if a write barrier is even needed. The tests below
294 // catch stores of smis and stores into the young generation.
295 Label done;
296
297 if (smi_check == SmiCheck::kInline) {
298 DCHECK_EQ(0, kSmiTag);
299 JumpIfSmi(value, &done);
300 }
301
302 {
303 UseScratchRegisterScope temps(this);
304 Register temp = temps.Acquire();
305 CheckPageFlag(value,
306 temp, // Used as scratch.
307 MemoryChunk::kPointersToHereAreInterestingMask,
308 eq, // In RISC-V, it uses cc for a comparison with 0, so if
309 // no bits are set, and cc is eq, it will branch to done
310 &done);
311
312 CheckPageFlag(object,
313 temp, // Used as scratch.
314 MemoryChunk::kPointersFromHereAreInterestingMask,
315 eq, // In RISC-V, it uses cc for a comparison with 0, so if
316 // no bits are set, and cc is eq, it will branch to done
317 &done);
318 }
319 // Record the actual write.
320 if (ra_status == kRAHasNotBeenSaved) {
321 push(ra);
322 }
323 Register slot_address = WriteBarrierDescriptor::SlotAddressRegister();
324 DCHECK(!AreAliased(object, slot_address, value));
325 // TODO(cbruni): Turn offset into int.
326 DCHECK(offset.IsImmediate());
327 Add64(slot_address, object, offset);
328 CallRecordWriteStub(object, slot_address, remembered_set_action, fp_mode);
329 if (ra_status == kRAHasNotBeenSaved) {
330 pop(ra);
331 }
332 if (FLAG_debug_code) li(slot_address, Operand(kZapValue));
333
334 bind(&done);
335 }
336
337 // ---------------------------------------------------------------------------
338 // Instruction macros.
339
Add32(Register rd,Register rs,const Operand & rt)340 void TurboAssembler::Add32(Register rd, Register rs, const Operand& rt) {
341 if (rt.is_reg()) {
342 if (FLAG_riscv_c_extension && (rd.code() == rs.code()) &&
343 ((rd.code() & 0b11000) == 0b01000) &&
344 ((rt.rm().code() & 0b11000) == 0b01000)) {
345 c_addw(rd, rt.rm());
346 } else {
347 addw(rd, rs, rt.rm());
348 }
349 } else {
350 if (FLAG_riscv_c_extension && is_int6(rt.immediate()) &&
351 (rd.code() == rs.code()) && (rd != zero_reg) &&
352 !MustUseReg(rt.rmode())) {
353 c_addiw(rd, static_cast<int8_t>(rt.immediate()));
354 } else if (is_int12(rt.immediate()) && !MustUseReg(rt.rmode())) {
355 addiw(rd, rs, static_cast<int32_t>(rt.immediate()));
356 } else if ((-4096 <= rt.immediate() && rt.immediate() <= -2049) ||
357 (2048 <= rt.immediate() && rt.immediate() <= 4094)) {
358 addiw(rd, rs, rt.immediate() / 2);
359 addiw(rd, rd, rt.immediate() - (rt.immediate() / 2));
360 } else {
361 // li handles the relocation.
362 UseScratchRegisterScope temps(this);
363 Register scratch = temps.Acquire();
364 Li(scratch, rt.immediate());
365 addw(rd, rs, scratch);
366 }
367 }
368 }
369
Add64(Register rd,Register rs,const Operand & rt)370 void TurboAssembler::Add64(Register rd, Register rs, const Operand& rt) {
371 if (rt.is_reg()) {
372 if (FLAG_riscv_c_extension && (rd.code() == rs.code()) &&
373 (rt.rm() != zero_reg) && (rs != zero_reg)) {
374 c_add(rd, rt.rm());
375 } else {
376 add(rd, rs, rt.rm());
377 }
378 } else {
379 if (FLAG_riscv_c_extension && is_int6(rt.immediate()) &&
380 (rd.code() == rs.code()) && (rd != zero_reg) && (rt.immediate() != 0) &&
381 !MustUseReg(rt.rmode())) {
382 c_addi(rd, static_cast<int8_t>(rt.immediate()));
383 } else if (FLAG_riscv_c_extension && is_int10(rt.immediate()) &&
384 (rt.immediate() != 0) && ((rt.immediate() & 0xf) == 0) &&
385 (rd.code() == rs.code()) && (rd == sp) &&
386 !MustUseReg(rt.rmode())) {
387 c_addi16sp(static_cast<int16_t>(rt.immediate()));
388 } else if (FLAG_riscv_c_extension && ((rd.code() & 0b11000) == 0b01000) &&
389 (rs == sp) && is_uint10(rt.immediate()) &&
390 (rt.immediate() != 0) && !MustUseReg(rt.rmode())) {
391 c_addi4spn(rd, static_cast<uint16_t>(rt.immediate()));
392 } else if (is_int12(rt.immediate()) && !MustUseReg(rt.rmode())) {
393 addi(rd, rs, static_cast<int32_t>(rt.immediate()));
394 } else if ((-4096 <= rt.immediate() && rt.immediate() <= -2049) ||
395 (2048 <= rt.immediate() && rt.immediate() <= 4094)) {
396 addi(rd, rs, rt.immediate() / 2);
397 addi(rd, rd, rt.immediate() - (rt.immediate() / 2));
398 } else {
399 // li handles the relocation.
400 UseScratchRegisterScope temps(this);
401 Register scratch = temps.Acquire();
402 BlockTrampolinePoolScope block_trampoline_pool(this);
403 Li(scratch, rt.immediate());
404 add(rd, rs, scratch);
405 }
406 }
407 }
408
Sub32(Register rd,Register rs,const Operand & rt)409 void TurboAssembler::Sub32(Register rd, Register rs, const Operand& rt) {
410 if (rt.is_reg()) {
411 if (FLAG_riscv_c_extension && (rd.code() == rs.code()) &&
412 ((rd.code() & 0b11000) == 0b01000) &&
413 ((rt.rm().code() & 0b11000) == 0b01000)) {
414 c_subw(rd, rt.rm());
415 } else {
416 subw(rd, rs, rt.rm());
417 }
418 } else {
419 DCHECK(is_int32(rt.immediate()));
420 if (FLAG_riscv_c_extension && (rd.code() == rs.code()) &&
421 (rd != zero_reg) && is_int6(-rt.immediate()) &&
422 !MustUseReg(rt.rmode())) {
423 c_addiw(
424 rd,
425 static_cast<int8_t>(
426 -rt.immediate())); // No c_subiw instr, use c_addiw(x, y, -imm).
427 } else if (is_int12(-rt.immediate()) && !MustUseReg(rt.rmode())) {
428 addiw(rd, rs,
429 static_cast<int32_t>(
430 -rt.immediate())); // No subiw instr, use addiw(x, y, -imm).
431 } else if ((-4096 <= -rt.immediate() && -rt.immediate() <= -2049) ||
432 (2048 <= -rt.immediate() && -rt.immediate() <= 4094)) {
433 addiw(rd, rs, -rt.immediate() / 2);
434 addiw(rd, rd, -rt.immediate() - (-rt.immediate() / 2));
435 } else {
436 UseScratchRegisterScope temps(this);
437 Register scratch = temps.Acquire();
438 if (-rt.immediate() >> 12 == 0 && !MustUseReg(rt.rmode())) {
439 // Use load -imm and addu when loading -imm generates one instruction.
440 Li(scratch, -rt.immediate());
441 addw(rd, rs, scratch);
442 } else {
443 // li handles the relocation.
444 Li(scratch, rt.immediate());
445 subw(rd, rs, scratch);
446 }
447 }
448 }
449 }
450
Sub64(Register rd,Register rs,const Operand & rt)451 void TurboAssembler::Sub64(Register rd, Register rs, const Operand& rt) {
452 if (rt.is_reg()) {
453 if (FLAG_riscv_c_extension && (rd.code() == rs.code()) &&
454 ((rd.code() & 0b11000) == 0b01000) &&
455 ((rt.rm().code() & 0b11000) == 0b01000)) {
456 c_sub(rd, rt.rm());
457 } else {
458 sub(rd, rs, rt.rm());
459 }
460 } else if (FLAG_riscv_c_extension && (rd.code() == rs.code()) &&
461 (rd != zero_reg) && is_int6(-rt.immediate()) &&
462 (rt.immediate() != 0) && !MustUseReg(rt.rmode())) {
463 c_addi(rd,
464 static_cast<int8_t>(
465 -rt.immediate())); // No c_subi instr, use c_addi(x, y, -imm).
466
467 } else if (FLAG_riscv_c_extension && is_int10(-rt.immediate()) &&
468 (rt.immediate() != 0) && ((rt.immediate() & 0xf) == 0) &&
469 (rd.code() == rs.code()) && (rd == sp) &&
470 !MustUseReg(rt.rmode())) {
471 c_addi16sp(static_cast<int16_t>(-rt.immediate()));
472 } else if (is_int12(-rt.immediate()) && !MustUseReg(rt.rmode())) {
473 addi(rd, rs,
474 static_cast<int32_t>(
475 -rt.immediate())); // No subi instr, use addi(x, y, -imm).
476 } else if ((-4096 <= -rt.immediate() && -rt.immediate() <= -2049) ||
477 (2048 <= -rt.immediate() && -rt.immediate() <= 4094)) {
478 addi(rd, rs, -rt.immediate() / 2);
479 addi(rd, rd, -rt.immediate() - (-rt.immediate() / 2));
480 } else {
481 int li_count = InstrCountForLi64Bit(rt.immediate());
482 int li_neg_count = InstrCountForLi64Bit(-rt.immediate());
483 if (li_neg_count < li_count && !MustUseReg(rt.rmode())) {
484 // Use load -imm and add when loading -imm generates one instruction.
485 DCHECK(rt.immediate() != std::numeric_limits<int32_t>::min());
486 UseScratchRegisterScope temps(this);
487 Register scratch = temps.Acquire();
488 Li(scratch, -rt.immediate());
489 add(rd, rs, scratch);
490 } else {
491 // li handles the relocation.
492 UseScratchRegisterScope temps(this);
493 Register scratch = temps.Acquire();
494 Li(scratch, rt.immediate());
495 sub(rd, rs, scratch);
496 }
497 }
498 }
499
Mul32(Register rd,Register rs,const Operand & rt)500 void TurboAssembler::Mul32(Register rd, Register rs, const Operand& rt) {
501 if (rt.is_reg()) {
502 mulw(rd, rs, rt.rm());
503 } else {
504 // li handles the relocation.
505 UseScratchRegisterScope temps(this);
506 Register scratch = temps.Acquire();
507 Li(scratch, rt.immediate());
508 mulw(rd, rs, scratch);
509 }
510 }
511
Mulh32(Register rd,Register rs,const Operand & rt)512 void TurboAssembler::Mulh32(Register rd, Register rs, const Operand& rt) {
513 if (rt.is_reg()) {
514 mul(rd, rs, rt.rm());
515 } else {
516 // li handles the relocation.
517 UseScratchRegisterScope temps(this);
518 Register scratch = temps.Acquire();
519 Li(scratch, rt.immediate());
520 mul(rd, rs, scratch);
521 }
522 srai(rd, rd, 32);
523 }
524
Mulhu32(Register rd,Register rs,const Operand & rt,Register rsz,Register rtz)525 void TurboAssembler::Mulhu32(Register rd, Register rs, const Operand& rt,
526 Register rsz, Register rtz) {
527 slli(rsz, rs, 32);
528 if (rt.is_reg()) {
529 slli(rtz, rt.rm(), 32);
530 } else {
531 Li(rtz, rt.immediate() << 32);
532 }
533 mulhu(rd, rsz, rtz);
534 srai(rd, rd, 32);
535 }
536
Mul64(Register rd,Register rs,const Operand & rt)537 void TurboAssembler::Mul64(Register rd, Register rs, const Operand& rt) {
538 if (rt.is_reg()) {
539 mul(rd, rs, rt.rm());
540 } else {
541 // li handles the relocation.
542 UseScratchRegisterScope temps(this);
543 Register scratch = temps.Acquire();
544 Li(scratch, rt.immediate());
545 mul(rd, rs, scratch);
546 }
547 }
548
Mulh64(Register rd,Register rs,const Operand & rt)549 void TurboAssembler::Mulh64(Register rd, Register rs, const Operand& rt) {
550 if (rt.is_reg()) {
551 mulh(rd, rs, rt.rm());
552 } else {
553 // li handles the relocation.
554 UseScratchRegisterScope temps(this);
555 Register scratch = temps.Acquire();
556 Li(scratch, rt.immediate());
557 mulh(rd, rs, scratch);
558 }
559 }
560
Div32(Register res,Register rs,const Operand & rt)561 void TurboAssembler::Div32(Register res, Register rs, const Operand& rt) {
562 if (rt.is_reg()) {
563 divw(res, rs, rt.rm());
564 } else {
565 // li handles the relocation.
566 UseScratchRegisterScope temps(this);
567 Register scratch = temps.Acquire();
568 Li(scratch, rt.immediate());
569 divw(res, rs, scratch);
570 }
571 }
572
Mod32(Register rd,Register rs,const Operand & rt)573 void TurboAssembler::Mod32(Register rd, Register rs, const Operand& rt) {
574 if (rt.is_reg()) {
575 remw(rd, rs, rt.rm());
576 } else {
577 // li handles the relocation.
578 UseScratchRegisterScope temps(this);
579 Register scratch = temps.Acquire();
580 Li(scratch, rt.immediate());
581 remw(rd, rs, scratch);
582 }
583 }
584
Modu32(Register rd,Register rs,const Operand & rt)585 void TurboAssembler::Modu32(Register rd, Register rs, const Operand& rt) {
586 if (rt.is_reg()) {
587 remuw(rd, rs, rt.rm());
588 } else {
589 // li handles the relocation.
590 UseScratchRegisterScope temps(this);
591 Register scratch = temps.Acquire();
592 Li(scratch, rt.immediate());
593 remuw(rd, rs, scratch);
594 }
595 }
596
Div64(Register rd,Register rs,const Operand & rt)597 void TurboAssembler::Div64(Register rd, Register rs, const Operand& rt) {
598 if (rt.is_reg()) {
599 div(rd, rs, rt.rm());
600 } else {
601 // li handles the relocation.
602 UseScratchRegisterScope temps(this);
603 Register scratch = temps.Acquire();
604 Li(scratch, rt.immediate());
605 div(rd, rs, scratch);
606 }
607 }
608
Divu32(Register res,Register rs,const Operand & rt)609 void TurboAssembler::Divu32(Register res, Register rs, const Operand& rt) {
610 if (rt.is_reg()) {
611 divuw(res, rs, rt.rm());
612 } else {
613 // li handles the relocation.
614 UseScratchRegisterScope temps(this);
615 Register scratch = temps.Acquire();
616 Li(scratch, rt.immediate());
617 divuw(res, rs, scratch);
618 }
619 }
620
Divu64(Register res,Register rs,const Operand & rt)621 void TurboAssembler::Divu64(Register res, Register rs, const Operand& rt) {
622 if (rt.is_reg()) {
623 divu(res, rs, rt.rm());
624 } else {
625 // li handles the relocation.
626 UseScratchRegisterScope temps(this);
627 Register scratch = temps.Acquire();
628 Li(scratch, rt.immediate());
629 divu(res, rs, scratch);
630 }
631 }
632
Mod64(Register rd,Register rs,const Operand & rt)633 void TurboAssembler::Mod64(Register rd, Register rs, const Operand& rt) {
634 if (rt.is_reg()) {
635 rem(rd, rs, rt.rm());
636 } else {
637 // li handles the relocation.
638 UseScratchRegisterScope temps(this);
639 Register scratch = temps.Acquire();
640 Li(scratch, rt.immediate());
641 rem(rd, rs, scratch);
642 }
643 }
644
Modu64(Register rd,Register rs,const Operand & rt)645 void TurboAssembler::Modu64(Register rd, Register rs, const Operand& rt) {
646 if (rt.is_reg()) {
647 remu(rd, rs, rt.rm());
648 } else {
649 // li handles the relocation.
650 UseScratchRegisterScope temps(this);
651 Register scratch = temps.Acquire();
652 Li(scratch, rt.immediate());
653 remu(rd, rs, scratch);
654 }
655 }
656
And(Register rd,Register rs,const Operand & rt)657 void TurboAssembler::And(Register rd, Register rs, const Operand& rt) {
658 if (rt.is_reg()) {
659 if (FLAG_riscv_c_extension && (rd.code() == rs.code()) &&
660 ((rd.code() & 0b11000) == 0b01000) &&
661 ((rt.rm().code() & 0b11000) == 0b01000)) {
662 c_and(rd, rt.rm());
663 } else {
664 and_(rd, rs, rt.rm());
665 }
666 } else {
667 if (FLAG_riscv_c_extension && is_int6(rt.immediate()) &&
668 !MustUseReg(rt.rmode()) && (rd.code() == rs.code()) &&
669 ((rd.code() & 0b11000) == 0b01000)) {
670 c_andi(rd, static_cast<int8_t>(rt.immediate()));
671 } else if (is_int12(rt.immediate()) && !MustUseReg(rt.rmode())) {
672 andi(rd, rs, static_cast<int32_t>(rt.immediate()));
673 } else {
674 // li handles the relocation.
675 UseScratchRegisterScope temps(this);
676 Register scratch = temps.Acquire();
677 Li(scratch, rt.immediate());
678 and_(rd, rs, scratch);
679 }
680 }
681 }
682
Or(Register rd,Register rs,const Operand & rt)683 void TurboAssembler::Or(Register rd, Register rs, const Operand& rt) {
684 if (rt.is_reg()) {
685 if (FLAG_riscv_c_extension && (rd.code() == rs.code()) &&
686 ((rd.code() & 0b11000) == 0b01000) &&
687 ((rt.rm().code() & 0b11000) == 0b01000)) {
688 c_or(rd, rt.rm());
689 } else {
690 or_(rd, rs, rt.rm());
691 }
692 } else {
693 if (is_int12(rt.immediate()) && !MustUseReg(rt.rmode())) {
694 ori(rd, rs, static_cast<int32_t>(rt.immediate()));
695 } else {
696 // li handles the relocation.
697 UseScratchRegisterScope temps(this);
698 Register scratch = temps.Acquire();
699 Li(scratch, rt.immediate());
700 or_(rd, rs, scratch);
701 }
702 }
703 }
704
Xor(Register rd,Register rs,const Operand & rt)705 void TurboAssembler::Xor(Register rd, Register rs, const Operand& rt) {
706 if (rt.is_reg()) {
707 if (FLAG_riscv_c_extension && (rd.code() == rs.code()) &&
708 ((rd.code() & 0b11000) == 0b01000) &&
709 ((rt.rm().code() & 0b11000) == 0b01000)) {
710 c_xor(rd, rt.rm());
711 } else {
712 xor_(rd, rs, rt.rm());
713 }
714 } else {
715 if (is_int12(rt.immediate()) && !MustUseReg(rt.rmode())) {
716 xori(rd, rs, static_cast<int32_t>(rt.immediate()));
717 } else {
718 // li handles the relocation.
719 UseScratchRegisterScope temps(this);
720 Register scratch = temps.Acquire();
721 Li(scratch, rt.immediate());
722 xor_(rd, rs, scratch);
723 }
724 }
725 }
726
Nor(Register rd,Register rs,const Operand & rt)727 void TurboAssembler::Nor(Register rd, Register rs, const Operand& rt) {
728 if (rt.is_reg()) {
729 or_(rd, rs, rt.rm());
730 not_(rd, rd);
731 } else {
732 Or(rd, rs, rt);
733 not_(rd, rd);
734 }
735 }
736
Neg(Register rs,const Operand & rt)737 void TurboAssembler::Neg(Register rs, const Operand& rt) {
738 DCHECK(rt.is_reg());
739 neg(rs, rt.rm());
740 }
741
Seqz(Register rd,const Operand & rt)742 void TurboAssembler::Seqz(Register rd, const Operand& rt) {
743 if (rt.is_reg()) {
744 seqz(rd, rt.rm());
745 } else {
746 li(rd, rt.immediate() == 0);
747 }
748 }
749
Snez(Register rd,const Operand & rt)750 void TurboAssembler::Snez(Register rd, const Operand& rt) {
751 if (rt.is_reg()) {
752 snez(rd, rt.rm());
753 } else {
754 li(rd, rt.immediate() != 0);
755 }
756 }
757
Seq(Register rd,Register rs,const Operand & rt)758 void TurboAssembler::Seq(Register rd, Register rs, const Operand& rt) {
759 if (rs == zero_reg) {
760 Seqz(rd, rt);
761 } else if (IsZero(rt)) {
762 seqz(rd, rs);
763 } else {
764 Sub64(rd, rs, rt);
765 seqz(rd, rd);
766 }
767 }
768
Sne(Register rd,Register rs,const Operand & rt)769 void TurboAssembler::Sne(Register rd, Register rs, const Operand& rt) {
770 if (rs == zero_reg) {
771 Snez(rd, rt);
772 } else if (IsZero(rt)) {
773 snez(rd, rs);
774 } else {
775 Sub64(rd, rs, rt);
776 snez(rd, rd);
777 }
778 }
779
Slt(Register rd,Register rs,const Operand & rt)780 void TurboAssembler::Slt(Register rd, Register rs, const Operand& rt) {
781 if (rt.is_reg()) {
782 slt(rd, rs, rt.rm());
783 } else {
784 if (is_int12(rt.immediate()) && !MustUseReg(rt.rmode())) {
785 slti(rd, rs, static_cast<int32_t>(rt.immediate()));
786 } else {
787 // li handles the relocation.
788 UseScratchRegisterScope temps(this);
789 Register scratch = temps.Acquire();
790 BlockTrampolinePoolScope block_trampoline_pool(this);
791 Li(scratch, rt.immediate());
792 slt(rd, rs, scratch);
793 }
794 }
795 }
796
Sltu(Register rd,Register rs,const Operand & rt)797 void TurboAssembler::Sltu(Register rd, Register rs, const Operand& rt) {
798 if (rt.is_reg()) {
799 sltu(rd, rs, rt.rm());
800 } else {
801 if (is_int12(rt.immediate()) && !MustUseReg(rt.rmode())) {
802 sltiu(rd, rs, static_cast<int32_t>(rt.immediate()));
803 } else {
804 // li handles the relocation.
805 UseScratchRegisterScope temps(this);
806 Register scratch = temps.Acquire();
807 BlockTrampolinePoolScope block_trampoline_pool(this);
808 Li(scratch, rt.immediate());
809 sltu(rd, rs, scratch);
810 }
811 }
812 }
813
Sle(Register rd,Register rs,const Operand & rt)814 void TurboAssembler::Sle(Register rd, Register rs, const Operand& rt) {
815 if (rt.is_reg()) {
816 slt(rd, rt.rm(), rs);
817 } else {
818 // li handles the relocation.
819 UseScratchRegisterScope temps(this);
820 Register scratch = temps.Acquire();
821 BlockTrampolinePoolScope block_trampoline_pool(this);
822 Li(scratch, rt.immediate());
823 slt(rd, scratch, rs);
824 }
825 xori(rd, rd, 1);
826 }
827
Sleu(Register rd,Register rs,const Operand & rt)828 void TurboAssembler::Sleu(Register rd, Register rs, const Operand& rt) {
829 if (rt.is_reg()) {
830 sltu(rd, rt.rm(), rs);
831 } else {
832 // li handles the relocation.
833 UseScratchRegisterScope temps(this);
834 Register scratch = temps.Acquire();
835 BlockTrampolinePoolScope block_trampoline_pool(this);
836 Li(scratch, rt.immediate());
837 sltu(rd, scratch, rs);
838 }
839 xori(rd, rd, 1);
840 }
841
Sge(Register rd,Register rs,const Operand & rt)842 void TurboAssembler::Sge(Register rd, Register rs, const Operand& rt) {
843 Slt(rd, rs, rt);
844 xori(rd, rd, 1);
845 }
846
Sgeu(Register rd,Register rs,const Operand & rt)847 void TurboAssembler::Sgeu(Register rd, Register rs, const Operand& rt) {
848 Sltu(rd, rs, rt);
849 xori(rd, rd, 1);
850 }
851
Sgt(Register rd,Register rs,const Operand & rt)852 void TurboAssembler::Sgt(Register rd, Register rs, const Operand& rt) {
853 if (rt.is_reg()) {
854 slt(rd, rt.rm(), rs);
855 } else {
856 // li handles the relocation.
857 UseScratchRegisterScope temps(this);
858 Register scratch = temps.Acquire();
859 BlockTrampolinePoolScope block_trampoline_pool(this);
860 Li(scratch, rt.immediate());
861 slt(rd, scratch, rs);
862 }
863 }
864
Sgtu(Register rd,Register rs,const Operand & rt)865 void TurboAssembler::Sgtu(Register rd, Register rs, const Operand& rt) {
866 if (rt.is_reg()) {
867 sltu(rd, rt.rm(), rs);
868 } else {
869 // li handles the relocation.
870 UseScratchRegisterScope temps(this);
871 Register scratch = temps.Acquire();
872 BlockTrampolinePoolScope block_trampoline_pool(this);
873 Li(scratch, rt.immediate());
874 sltu(rd, scratch, rs);
875 }
876 }
877
Sll32(Register rd,Register rs,const Operand & rt)878 void TurboAssembler::Sll32(Register rd, Register rs, const Operand& rt) {
879 if (rt.is_reg()) {
880 sllw(rd, rs, rt.rm());
881 } else {
882 uint8_t shamt = static_cast<uint8_t>(rt.immediate());
883 slliw(rd, rs, shamt);
884 }
885 }
886
Sra32(Register rd,Register rs,const Operand & rt)887 void TurboAssembler::Sra32(Register rd, Register rs, const Operand& rt) {
888 if (rt.is_reg()) {
889 sraw(rd, rs, rt.rm());
890 } else {
891 uint8_t shamt = static_cast<uint8_t>(rt.immediate());
892 sraiw(rd, rs, shamt);
893 }
894 }
895
Srl32(Register rd,Register rs,const Operand & rt)896 void TurboAssembler::Srl32(Register rd, Register rs, const Operand& rt) {
897 if (rt.is_reg()) {
898 srlw(rd, rs, rt.rm());
899 } else {
900 uint8_t shamt = static_cast<uint8_t>(rt.immediate());
901 srliw(rd, rs, shamt);
902 }
903 }
904
Sra64(Register rd,Register rs,const Operand & rt)905 void TurboAssembler::Sra64(Register rd, Register rs, const Operand& rt) {
906 if (rt.is_reg()) {
907 sra(rd, rs, rt.rm());
908 } else if (FLAG_riscv_c_extension && (rd.code() == rs.code()) &&
909 ((rd.code() & 0b11000) == 0b01000) && is_int6(rt.immediate())) {
910 uint8_t shamt = static_cast<uint8_t>(rt.immediate());
911 c_srai(rd, shamt);
912 } else {
913 uint8_t shamt = static_cast<uint8_t>(rt.immediate());
914 srai(rd, rs, shamt);
915 }
916 }
917
Srl64(Register rd,Register rs,const Operand & rt)918 void TurboAssembler::Srl64(Register rd, Register rs, const Operand& rt) {
919 if (rt.is_reg()) {
920 srl(rd, rs, rt.rm());
921 } else if (FLAG_riscv_c_extension && (rd.code() == rs.code()) &&
922 ((rd.code() & 0b11000) == 0b01000) && is_int6(rt.immediate())) {
923 uint8_t shamt = static_cast<uint8_t>(rt.immediate());
924 c_srli(rd, shamt);
925 } else {
926 uint8_t shamt = static_cast<uint8_t>(rt.immediate());
927 srli(rd, rs, shamt);
928 }
929 }
930
Sll64(Register rd,Register rs,const Operand & rt)931 void TurboAssembler::Sll64(Register rd, Register rs, const Operand& rt) {
932 if (rt.is_reg()) {
933 sll(rd, rs, rt.rm());
934 } else {
935 uint8_t shamt = static_cast<uint8_t>(rt.immediate());
936 if (FLAG_riscv_c_extension && (rd.code() == rs.code()) &&
937 (rd != zero_reg) && (shamt != 0) && is_uint6(shamt)) {
938 c_slli(rd, shamt);
939 } else {
940 slli(rd, rs, shamt);
941 }
942 }
943 }
944
Li(Register rd,int64_t imm)945 void TurboAssembler::Li(Register rd, int64_t imm) {
946 if (FLAG_riscv_c_extension && (rd != zero_reg) && is_int6(imm)) {
947 c_li(rd, imm);
948 } else {
949 RV_li(rd, imm);
950 }
951 }
952
Mv(Register rd,const Operand & rt)953 void TurboAssembler::Mv(Register rd, const Operand& rt) {
954 if (FLAG_riscv_c_extension && (rd != zero_reg) && (rt.rm() != zero_reg)) {
955 c_mv(rd, rt.rm());
956 } else {
957 mv(rd, rt.rm());
958 }
959 }
960
Ror(Register rd,Register rs,const Operand & rt)961 void TurboAssembler::Ror(Register rd, Register rs, const Operand& rt) {
962 UseScratchRegisterScope temps(this);
963 Register scratch = temps.Acquire();
964 BlockTrampolinePoolScope block_trampoline_pool(this);
965 if (rt.is_reg()) {
966 negw(scratch, rt.rm());
967 sllw(scratch, rs, scratch);
968 srlw(rd, rs, rt.rm());
969 or_(rd, scratch, rd);
970 sext_w(rd, rd);
971 } else {
972 int64_t ror_value = rt.immediate() % 32;
973 if (ror_value == 0) {
974 Mv(rd, rs);
975 return;
976 } else if (ror_value < 0) {
977 ror_value += 32;
978 }
979 srliw(scratch, rs, ror_value);
980 slliw(rd, rs, 32 - ror_value);
981 or_(rd, scratch, rd);
982 sext_w(rd, rd);
983 }
984 }
985
Dror(Register rd,Register rs,const Operand & rt)986 void TurboAssembler::Dror(Register rd, Register rs, const Operand& rt) {
987 UseScratchRegisterScope temps(this);
988 Register scratch = temps.Acquire();
989 BlockTrampolinePoolScope block_trampoline_pool(this);
990 if (rt.is_reg()) {
991 negw(scratch, rt.rm());
992 sll(scratch, rs, scratch);
993 srl(rd, rs, rt.rm());
994 or_(rd, scratch, rd);
995 } else {
996 int64_t dror_value = rt.immediate() % 64;
997 if (dror_value == 0) {
998 Mv(rd, rs);
999 return;
1000 } else if (dror_value < 0) {
1001 dror_value += 64;
1002 }
1003 srli(scratch, rs, dror_value);
1004 slli(rd, rs, 64 - dror_value);
1005 or_(rd, scratch, rd);
1006 }
1007 }
1008
CalcScaledAddress(Register rd,Register rt,Register rs,uint8_t sa)1009 void TurboAssembler::CalcScaledAddress(Register rd, Register rt, Register rs,
1010 uint8_t sa) {
1011 DCHECK(sa >= 1 && sa <= 31);
1012 UseScratchRegisterScope temps(this);
1013 Register tmp = rd == rt ? temps.Acquire() : rd;
1014 DCHECK(tmp != rt);
1015 slli(tmp, rs, sa);
1016 Add64(rd, rt, tmp);
1017 }
1018
1019 // ------------Pseudo-instructions-------------
1020 // Change endianness
ByteSwap(Register rd,Register rs,int operand_size,Register scratch)1021 void TurboAssembler::ByteSwap(Register rd, Register rs, int operand_size,
1022 Register scratch) {
1023 DCHECK_NE(scratch, rs);
1024 DCHECK_NE(scratch, rd);
1025 DCHECK(operand_size == 4 || operand_size == 8);
1026 if (operand_size == 4) {
1027 // Uint32_t x1 = 0x00FF00FF;
1028 // x0 = (x0 << 16 | x0 >> 16);
1029 // x0 = (((x0 & x1) << 8) | ((x0 & (x1 << 8)) >> 8));
1030 UseScratchRegisterScope temps(this);
1031 BlockTrampolinePoolScope block_trampoline_pool(this);
1032 DCHECK((rd != t6) && (rs != t6));
1033 Register x0 = temps.Acquire();
1034 Register x1 = temps.Acquire();
1035 Register x2 = scratch;
1036 li(x1, 0x00FF00FF);
1037 slliw(x0, rs, 16);
1038 srliw(rd, rs, 16);
1039 or_(x0, rd, x0); // x0 <- x0 << 16 | x0 >> 16
1040 and_(x2, x0, x1); // x2 <- x0 & 0x00FF00FF
1041 slliw(x2, x2, 8); // x2 <- (x0 & x1) << 8
1042 slliw(x1, x1, 8); // x1 <- 0xFF00FF00
1043 and_(rd, x0, x1); // x0 & 0xFF00FF00
1044 srliw(rd, rd, 8);
1045 or_(rd, rd, x2); // (((x0 & x1) << 8) | ((x0 & (x1 << 8)) >> 8))
1046 } else {
1047 // uinx24_t x1 = 0x0000FFFF0000FFFFl;
1048 // uinx24_t x1 = 0x00FF00FF00FF00FFl;
1049 // x0 = (x0 << 32 | x0 >> 32);
1050 // x0 = (x0 & x1) << 16 | (x0 & (x1 << 16)) >> 16;
1051 // x0 = (x0 & x1) << 8 | (x0 & (x1 << 8)) >> 8;
1052 UseScratchRegisterScope temps(this);
1053 BlockTrampolinePoolScope block_trampoline_pool(this);
1054 DCHECK((rd != t6) && (rs != t6));
1055 Register x0 = temps.Acquire();
1056 Register x1 = temps.Acquire();
1057 Register x2 = scratch;
1058 li(x1, 0x0000FFFF0000FFFFl);
1059 slli(x0, rs, 32);
1060 srli(rd, rs, 32);
1061 or_(x0, rd, x0); // x0 <- x0 << 32 | x0 >> 32
1062 and_(x2, x0, x1); // x2 <- x0 & 0x0000FFFF0000FFFF
1063 slli(x2, x2, 16); // x2 <- (x0 & 0x0000FFFF0000FFFF) << 16
1064 slli(x1, x1, 16); // x1 <- 0xFFFF0000FFFF0000
1065 and_(rd, x0, x1); // rd <- x0 & 0xFFFF0000FFFF0000
1066 srli(rd, rd, 16); // rd <- x0 & (x1 << 16)) >> 16
1067 or_(x0, rd, x2); // (x0 & x1) << 16 | (x0 & (x1 << 16)) >> 16;
1068 li(x1, 0x00FF00FF00FF00FFl);
1069 and_(x2, x0, x1); // x2 <- x0 & 0x00FF00FF00FF00FF
1070 slli(x2, x2, 8); // x2 <- (x0 & x1) << 8
1071 slli(x1, x1, 8); // x1 <- 0xFF00FF00FF00FF00
1072 and_(rd, x0, x1);
1073 srli(rd, rd, 8); // rd <- (x0 & (x1 << 8)) >> 8
1074 or_(rd, rd, x2); // (((x0 & x1) << 8) | ((x0 & (x1 << 8)) >> 8))
1075 }
1076 }
1077
1078 template <int NBYTES, bool LOAD_SIGNED>
LoadNBytes(Register rd,const MemOperand & rs,Register scratch)1079 void TurboAssembler::LoadNBytes(Register rd, const MemOperand& rs,
1080 Register scratch) {
1081 DCHECK(rd != rs.rm() && rd != scratch);
1082 DCHECK_LE(NBYTES, 8);
1083
1084 // load the most significant byte
1085 if (LOAD_SIGNED) {
1086 lb(rd, rs.rm(), rs.offset() + (NBYTES - 1));
1087 } else {
1088 lbu(rd, rs.rm(), rs.offset() + (NBYTES - 1));
1089 }
1090
1091 // load remaining (nbytes-1) bytes from higher to lower
1092 slli(rd, rd, 8 * (NBYTES - 1));
1093 for (int i = (NBYTES - 2); i >= 0; i--) {
1094 lbu(scratch, rs.rm(), rs.offset() + i);
1095 if (i) slli(scratch, scratch, i * 8);
1096 or_(rd, rd, scratch);
1097 }
1098 }
1099
1100 template <int NBYTES, bool LOAD_SIGNED>
LoadNBytesOverwritingBaseReg(const MemOperand & rs,Register scratch0,Register scratch1)1101 void TurboAssembler::LoadNBytesOverwritingBaseReg(const MemOperand& rs,
1102 Register scratch0,
1103 Register scratch1) {
1104 // This function loads nbytes from memory specified by rs and into rs.rm()
1105 DCHECK(rs.rm() != scratch0 && rs.rm() != scratch1 && scratch0 != scratch1);
1106 DCHECK_LE(NBYTES, 8);
1107
1108 // load the most significant byte
1109 if (LOAD_SIGNED) {
1110 lb(scratch0, rs.rm(), rs.offset() + (NBYTES - 1));
1111 } else {
1112 lbu(scratch0, rs.rm(), rs.offset() + (NBYTES - 1));
1113 }
1114
1115 // load remaining (nbytes-1) bytes from higher to lower
1116 slli(scratch0, scratch0, 8 * (NBYTES - 1));
1117 for (int i = (NBYTES - 2); i >= 0; i--) {
1118 lbu(scratch1, rs.rm(), rs.offset() + i);
1119 if (i) {
1120 slli(scratch1, scratch1, i * 8);
1121 or_(scratch0, scratch0, scratch1);
1122 } else {
1123 // write to rs.rm() when processing the last byte
1124 or_(rs.rm(), scratch0, scratch1);
1125 }
1126 }
1127 }
1128
1129 template <int NBYTES, bool IS_SIGNED>
UnalignedLoadHelper(Register rd,const MemOperand & rs)1130 void TurboAssembler::UnalignedLoadHelper(Register rd, const MemOperand& rs) {
1131 BlockTrampolinePoolScope block_trampoline_pool(this);
1132 UseScratchRegisterScope temps(this);
1133
1134 if (NeedAdjustBaseAndOffset(rs, OffsetAccessType::TWO_ACCESSES, NBYTES - 1)) {
1135 // Adjust offset for two accesses and check if offset + 3 fits into int12.
1136 MemOperand source = rs;
1137 Register scratch_base = temps.Acquire();
1138 DCHECK(scratch_base != rs.rm());
1139 AdjustBaseAndOffset(&source, scratch_base, OffsetAccessType::TWO_ACCESSES,
1140 NBYTES - 1);
1141
1142 // Since source.rm() is scratch_base, assume rd != source.rm()
1143 DCHECK(rd != source.rm());
1144 Register scratch_other = temps.Acquire();
1145 LoadNBytes<NBYTES, IS_SIGNED>(rd, source, scratch_other);
1146 } else {
1147 // no need to adjust base-and-offset
1148 if (rd != rs.rm()) {
1149 Register scratch = temps.Acquire();
1150 LoadNBytes<NBYTES, IS_SIGNED>(rd, rs, scratch);
1151 } else { // rd == rs.rm()
1152 Register scratch = temps.Acquire();
1153 Register scratch2 = temps.Acquire();
1154 LoadNBytesOverwritingBaseReg<NBYTES, IS_SIGNED>(rs, scratch, scratch2);
1155 }
1156 }
1157 }
1158
1159 template <int NBYTES>
UnalignedFLoadHelper(FPURegister frd,const MemOperand & rs,Register scratch_base)1160 void TurboAssembler::UnalignedFLoadHelper(FPURegister frd, const MemOperand& rs,
1161 Register scratch_base) {
1162 DCHECK(NBYTES == 4 || NBYTES == 8);
1163 DCHECK_NE(scratch_base, rs.rm());
1164 BlockTrampolinePoolScope block_trampoline_pool(this);
1165 MemOperand source = rs;
1166 if (NeedAdjustBaseAndOffset(rs, OffsetAccessType::TWO_ACCESSES, NBYTES - 1)) {
1167 // Adjust offset for two accesses and check if offset + 3 fits into int12.
1168 DCHECK(scratch_base != rs.rm());
1169 AdjustBaseAndOffset(&source, scratch_base, OffsetAccessType::TWO_ACCESSES,
1170 NBYTES - 1);
1171 }
1172 UseScratchRegisterScope temps(this);
1173 Register scratch_other = temps.Acquire();
1174 Register scratch = temps.Acquire();
1175 DCHECK(scratch != rs.rm() && scratch_other != scratch &&
1176 scratch_other != rs.rm());
1177 LoadNBytes<NBYTES, true>(scratch, source, scratch_other);
1178 if (NBYTES == 4)
1179 fmv_w_x(frd, scratch);
1180 else
1181 fmv_d_x(frd, scratch);
1182 }
1183
1184 template <int NBYTES>
UnalignedStoreHelper(Register rd,const MemOperand & rs,Register scratch_other)1185 void TurboAssembler::UnalignedStoreHelper(Register rd, const MemOperand& rs,
1186 Register scratch_other) {
1187 DCHECK(scratch_other != rs.rm());
1188 DCHECK_LE(NBYTES, 8);
1189 MemOperand source = rs;
1190 UseScratchRegisterScope temps(this);
1191 Register scratch_base = temps.Acquire();
1192 // Adjust offset for two accesses and check if offset + 3 fits into int12.
1193 if (NeedAdjustBaseAndOffset(rs, OffsetAccessType::TWO_ACCESSES, NBYTES - 1)) {
1194 DCHECK(scratch_base != rd && scratch_base != rs.rm());
1195 AdjustBaseAndOffset(&source, scratch_base, OffsetAccessType::TWO_ACCESSES,
1196 NBYTES - 1);
1197 }
1198
1199 BlockTrampolinePoolScope block_trampoline_pool(this);
1200 if (scratch_other == no_reg) {
1201 if (temps.hasAvailable()) {
1202 scratch_other = temps.Acquire();
1203 } else {
1204 push(t2);
1205 scratch_other = t2;
1206 }
1207 }
1208
1209 DCHECK(scratch_other != rd && scratch_other != rs.rm() &&
1210 scratch_other != source.rm());
1211
1212 sb(rd, source.rm(), source.offset());
1213 for (size_t i = 1; i <= (NBYTES - 1); i++) {
1214 srli(scratch_other, rd, i * 8);
1215 sb(scratch_other, source.rm(), source.offset() + i);
1216 }
1217 if (scratch_other == t2) {
1218 pop(t2);
1219 }
1220 }
1221
1222 template <int NBYTES>
UnalignedFStoreHelper(FPURegister frd,const MemOperand & rs,Register scratch)1223 void TurboAssembler::UnalignedFStoreHelper(FPURegister frd,
1224 const MemOperand& rs,
1225 Register scratch) {
1226 DCHECK(NBYTES == 8 || NBYTES == 4);
1227 DCHECK_NE(scratch, rs.rm());
1228 if (NBYTES == 4) {
1229 fmv_x_w(scratch, frd);
1230 } else {
1231 fmv_x_d(scratch, frd);
1232 }
1233 UnalignedStoreHelper<NBYTES>(scratch, rs);
1234 }
1235
1236 template <typename Reg_T, typename Func>
AlignedLoadHelper(Reg_T target,const MemOperand & rs,Func generator)1237 void TurboAssembler::AlignedLoadHelper(Reg_T target, const MemOperand& rs,
1238 Func generator) {
1239 MemOperand source = rs;
1240 UseScratchRegisterScope temps(this);
1241 BlockTrampolinePoolScope block_trampoline_pool(this);
1242 if (NeedAdjustBaseAndOffset(source)) {
1243 Register scratch = temps.Acquire();
1244 DCHECK(scratch != rs.rm());
1245 AdjustBaseAndOffset(&source, scratch);
1246 }
1247 generator(target, source);
1248 }
1249
1250 template <typename Reg_T, typename Func>
AlignedStoreHelper(Reg_T value,const MemOperand & rs,Func generator)1251 void TurboAssembler::AlignedStoreHelper(Reg_T value, const MemOperand& rs,
1252 Func generator) {
1253 MemOperand source = rs;
1254 UseScratchRegisterScope temps(this);
1255 BlockTrampolinePoolScope block_trampoline_pool(this);
1256 if (NeedAdjustBaseAndOffset(source)) {
1257 Register scratch = temps.Acquire();
1258 // make sure scratch does not overwrite value
1259 if (std::is_same<Reg_T, Register>::value)
1260 DCHECK(scratch.code() != value.code());
1261 DCHECK(scratch != rs.rm());
1262 AdjustBaseAndOffset(&source, scratch);
1263 }
1264 generator(value, source);
1265 }
1266
Ulw(Register rd,const MemOperand & rs)1267 void TurboAssembler::Ulw(Register rd, const MemOperand& rs) {
1268 UnalignedLoadHelper<4, true>(rd, rs);
1269 }
1270
Ulwu(Register rd,const MemOperand & rs)1271 void TurboAssembler::Ulwu(Register rd, const MemOperand& rs) {
1272 UnalignedLoadHelper<4, false>(rd, rs);
1273 }
1274
Usw(Register rd,const MemOperand & rs)1275 void TurboAssembler::Usw(Register rd, const MemOperand& rs) {
1276 UnalignedStoreHelper<4>(rd, rs);
1277 }
1278
Ulh(Register rd,const MemOperand & rs)1279 void TurboAssembler::Ulh(Register rd, const MemOperand& rs) {
1280 UnalignedLoadHelper<2, true>(rd, rs);
1281 }
1282
Ulhu(Register rd,const MemOperand & rs)1283 void TurboAssembler::Ulhu(Register rd, const MemOperand& rs) {
1284 UnalignedLoadHelper<2, false>(rd, rs);
1285 }
1286
Ush(Register rd,const MemOperand & rs)1287 void TurboAssembler::Ush(Register rd, const MemOperand& rs) {
1288 UnalignedStoreHelper<2>(rd, rs);
1289 }
1290
Uld(Register rd,const MemOperand & rs)1291 void TurboAssembler::Uld(Register rd, const MemOperand& rs) {
1292 UnalignedLoadHelper<8, true>(rd, rs);
1293 }
1294
1295 // Load consequent 32-bit word pair in 64-bit reg. and put first word in low
1296 // bits,
1297 // second word in high bits.
LoadWordPair(Register rd,const MemOperand & rs)1298 void MacroAssembler::LoadWordPair(Register rd, const MemOperand& rs) {
1299 UseScratchRegisterScope temps(this);
1300 Register scratch = temps.Acquire();
1301 Lwu(rd, rs);
1302 Lw(scratch, MemOperand(rs.rm(), rs.offset() + kSystemPointerSize / 2));
1303 slli(scratch, scratch, 32);
1304 Add64(rd, rd, scratch);
1305 }
1306
Usd(Register rd,const MemOperand & rs)1307 void TurboAssembler::Usd(Register rd, const MemOperand& rs) {
1308 UnalignedStoreHelper<8>(rd, rs);
1309 }
1310
1311 // Do 64-bit store as two consequent 32-bit stores to unaligned address.
StoreWordPair(Register rd,const MemOperand & rs)1312 void MacroAssembler::StoreWordPair(Register rd, const MemOperand& rs) {
1313 UseScratchRegisterScope temps(this);
1314 Register scratch = temps.Acquire();
1315 Sw(rd, rs);
1316 srai(scratch, rd, 32);
1317 Sw(scratch, MemOperand(rs.rm(), rs.offset() + kSystemPointerSize / 2));
1318 }
1319
ULoadFloat(FPURegister fd,const MemOperand & rs,Register scratch)1320 void TurboAssembler::ULoadFloat(FPURegister fd, const MemOperand& rs,
1321 Register scratch) {
1322 DCHECK_NE(scratch, rs.rm());
1323 UnalignedFLoadHelper<4>(fd, rs, scratch);
1324 }
1325
UStoreFloat(FPURegister fd,const MemOperand & rs,Register scratch)1326 void TurboAssembler::UStoreFloat(FPURegister fd, const MemOperand& rs,
1327 Register scratch) {
1328 DCHECK_NE(scratch, rs.rm());
1329 UnalignedFStoreHelper<4>(fd, rs, scratch);
1330 }
1331
ULoadDouble(FPURegister fd,const MemOperand & rs,Register scratch)1332 void TurboAssembler::ULoadDouble(FPURegister fd, const MemOperand& rs,
1333 Register scratch) {
1334 DCHECK_NE(scratch, rs.rm());
1335 UnalignedFLoadHelper<8>(fd, rs, scratch);
1336 }
1337
UStoreDouble(FPURegister fd,const MemOperand & rs,Register scratch)1338 void TurboAssembler::UStoreDouble(FPURegister fd, const MemOperand& rs,
1339 Register scratch) {
1340 DCHECK_NE(scratch, rs.rm());
1341 UnalignedFStoreHelper<8>(fd, rs, scratch);
1342 }
1343
Lb(Register rd,const MemOperand & rs)1344 void TurboAssembler::Lb(Register rd, const MemOperand& rs) {
1345 auto fn = [this](Register target, const MemOperand& source) {
1346 this->lb(target, source.rm(), source.offset());
1347 };
1348 AlignedLoadHelper(rd, rs, fn);
1349 }
1350
Lbu(Register rd,const MemOperand & rs)1351 void TurboAssembler::Lbu(Register rd, const MemOperand& rs) {
1352 auto fn = [this](Register target, const MemOperand& source) {
1353 this->lbu(target, source.rm(), source.offset());
1354 };
1355 AlignedLoadHelper(rd, rs, fn);
1356 }
1357
Sb(Register rd,const MemOperand & rs)1358 void TurboAssembler::Sb(Register rd, const MemOperand& rs) {
1359 auto fn = [this](Register value, const MemOperand& source) {
1360 this->sb(value, source.rm(), source.offset());
1361 };
1362 AlignedStoreHelper(rd, rs, fn);
1363 }
1364
Lh(Register rd,const MemOperand & rs)1365 void TurboAssembler::Lh(Register rd, const MemOperand& rs) {
1366 auto fn = [this](Register target, const MemOperand& source) {
1367 this->lh(target, source.rm(), source.offset());
1368 };
1369 AlignedLoadHelper(rd, rs, fn);
1370 }
1371
Lhu(Register rd,const MemOperand & rs)1372 void TurboAssembler::Lhu(Register rd, const MemOperand& rs) {
1373 auto fn = [this](Register target, const MemOperand& source) {
1374 this->lhu(target, source.rm(), source.offset());
1375 };
1376 AlignedLoadHelper(rd, rs, fn);
1377 }
1378
Sh(Register rd,const MemOperand & rs)1379 void TurboAssembler::Sh(Register rd, const MemOperand& rs) {
1380 auto fn = [this](Register value, const MemOperand& source) {
1381 this->sh(value, source.rm(), source.offset());
1382 };
1383 AlignedStoreHelper(rd, rs, fn);
1384 }
1385
Lw(Register rd,const MemOperand & rs)1386 void TurboAssembler::Lw(Register rd, const MemOperand& rs) {
1387 auto fn = [this](Register target, const MemOperand& source) {
1388 if (FLAG_riscv_c_extension && ((target.code() & 0b11000) == 0b01000) &&
1389 ((source.rm().code() & 0b11000) == 0b01000) &&
1390 is_uint7(source.offset()) && ((source.offset() & 0x3) == 0)) {
1391 this->c_lw(target, source.rm(), source.offset());
1392 } else if (FLAG_riscv_c_extension && (target != zero_reg) &&
1393 is_uint8(source.offset()) && (source.rm() == sp) &&
1394 ((source.offset() & 0x3) == 0)) {
1395 this->c_lwsp(target, source.offset());
1396 } else {
1397 this->lw(target, source.rm(), source.offset());
1398 }
1399 };
1400 AlignedLoadHelper(rd, rs, fn);
1401 }
1402
Lwu(Register rd,const MemOperand & rs)1403 void TurboAssembler::Lwu(Register rd, const MemOperand& rs) {
1404 auto fn = [this](Register target, const MemOperand& source) {
1405 this->lwu(target, source.rm(), source.offset());
1406 };
1407 AlignedLoadHelper(rd, rs, fn);
1408 }
1409
Sw(Register rd,const MemOperand & rs)1410 void TurboAssembler::Sw(Register rd, const MemOperand& rs) {
1411 auto fn = [this](Register value, const MemOperand& source) {
1412 if (FLAG_riscv_c_extension && ((value.code() & 0b11000) == 0b01000) &&
1413 ((source.rm().code() & 0b11000) == 0b01000) &&
1414 is_uint7(source.offset()) && ((source.offset() & 0x3) == 0)) {
1415 this->c_sw(value, source.rm(), source.offset());
1416 } else if (FLAG_riscv_c_extension && (source.rm() == sp) &&
1417 is_uint8(source.offset()) && (((source.offset() & 0x3) == 0))) {
1418 this->c_swsp(value, source.offset());
1419 } else {
1420 this->sw(value, source.rm(), source.offset());
1421 }
1422 };
1423 AlignedStoreHelper(rd, rs, fn);
1424 }
1425
Ld(Register rd,const MemOperand & rs)1426 void TurboAssembler::Ld(Register rd, const MemOperand& rs) {
1427 auto fn = [this](Register target, const MemOperand& source) {
1428 if (FLAG_riscv_c_extension && ((target.code() & 0b11000) == 0b01000) &&
1429 ((source.rm().code() & 0b11000) == 0b01000) &&
1430 is_uint8(source.offset()) && ((source.offset() & 0x7) == 0)) {
1431 this->c_ld(target, source.rm(), source.offset());
1432 } else if (FLAG_riscv_c_extension && (target != zero_reg) &&
1433 is_uint9(source.offset()) && (source.rm() == sp) &&
1434 ((source.offset() & 0x7) == 0)) {
1435 this->c_ldsp(target, source.offset());
1436 } else {
1437 this->ld(target, source.rm(), source.offset());
1438 }
1439 };
1440 AlignedLoadHelper(rd, rs, fn);
1441 }
1442
Sd(Register rd,const MemOperand & rs)1443 void TurboAssembler::Sd(Register rd, const MemOperand& rs) {
1444 auto fn = [this](Register value, const MemOperand& source) {
1445 if (FLAG_riscv_c_extension && ((value.code() & 0b11000) == 0b01000) &&
1446 ((source.rm().code() & 0b11000) == 0b01000) &&
1447 is_uint8(source.offset()) && ((source.offset() & 0x7) == 0)) {
1448 this->c_sd(value, source.rm(), source.offset());
1449 } else if (FLAG_riscv_c_extension && (source.rm() == sp) &&
1450 is_uint9(source.offset()) && ((source.offset() & 0x7) == 0)) {
1451 this->c_sdsp(value, source.offset());
1452 } else {
1453 this->sd(value, source.rm(), source.offset());
1454 }
1455 };
1456 AlignedStoreHelper(rd, rs, fn);
1457 }
1458
LoadFloat(FPURegister fd,const MemOperand & src)1459 void TurboAssembler::LoadFloat(FPURegister fd, const MemOperand& src) {
1460 auto fn = [this](FPURegister target, const MemOperand& source) {
1461 this->flw(target, source.rm(), source.offset());
1462 };
1463 AlignedLoadHelper(fd, src, fn);
1464 }
1465
StoreFloat(FPURegister fs,const MemOperand & src)1466 void TurboAssembler::StoreFloat(FPURegister fs, const MemOperand& src) {
1467 auto fn = [this](FPURegister value, const MemOperand& source) {
1468 this->fsw(value, source.rm(), source.offset());
1469 };
1470 AlignedStoreHelper(fs, src, fn);
1471 }
1472
LoadDouble(FPURegister fd,const MemOperand & src)1473 void TurboAssembler::LoadDouble(FPURegister fd, const MemOperand& src) {
1474 auto fn = [this](FPURegister target, const MemOperand& source) {
1475 if (FLAG_riscv_c_extension && ((target.code() & 0b11000) == 0b01000) &&
1476 ((source.rm().code() & 0b11000) == 0b01000) &&
1477 is_uint8(source.offset()) && ((source.offset() & 0x7) == 0)) {
1478 this->c_fld(target, source.rm(), source.offset());
1479 } else if (FLAG_riscv_c_extension && (source.rm() == sp) &&
1480 is_uint9(source.offset()) && ((source.offset() & 0x7) == 0)) {
1481 this->c_fldsp(target, source.offset());
1482 } else {
1483 this->fld(target, source.rm(), source.offset());
1484 }
1485 };
1486 AlignedLoadHelper(fd, src, fn);
1487 }
1488
StoreDouble(FPURegister fs,const MemOperand & src)1489 void TurboAssembler::StoreDouble(FPURegister fs, const MemOperand& src) {
1490 auto fn = [this](FPURegister value, const MemOperand& source) {
1491 if (FLAG_riscv_c_extension && ((value.code() & 0b11000) == 0b01000) &&
1492 ((source.rm().code() & 0b11000) == 0b01000) &&
1493 is_uint8(source.offset()) && ((source.offset() & 0x7) == 0)) {
1494 this->c_fsd(value, source.rm(), source.offset());
1495 } else if (FLAG_riscv_c_extension && (source.rm() == sp) &&
1496 is_uint9(source.offset()) && ((source.offset() & 0x7) == 0)) {
1497 this->c_fsdsp(value, source.offset());
1498 } else {
1499 this->fsd(value, source.rm(), source.offset());
1500 }
1501 };
1502 AlignedStoreHelper(fs, src, fn);
1503 }
1504
Ll(Register rd,const MemOperand & rs)1505 void TurboAssembler::Ll(Register rd, const MemOperand& rs) {
1506 bool is_one_instruction = rs.offset() == 0;
1507 if (is_one_instruction) {
1508 lr_w(false, false, rd, rs.rm());
1509 } else {
1510 UseScratchRegisterScope temps(this);
1511 Register scratch = temps.Acquire();
1512 Add64(scratch, rs.rm(), rs.offset());
1513 lr_w(false, false, rd, scratch);
1514 }
1515 }
1516
Lld(Register rd,const MemOperand & rs)1517 void TurboAssembler::Lld(Register rd, const MemOperand& rs) {
1518 bool is_one_instruction = rs.offset() == 0;
1519 if (is_one_instruction) {
1520 lr_d(false, false, rd, rs.rm());
1521 } else {
1522 UseScratchRegisterScope temps(this);
1523 Register scratch = temps.Acquire();
1524 Add64(scratch, rs.rm(), rs.offset());
1525 lr_d(false, false, rd, scratch);
1526 }
1527 }
1528
Sc(Register rd,const MemOperand & rs)1529 void TurboAssembler::Sc(Register rd, const MemOperand& rs) {
1530 bool is_one_instruction = rs.offset() == 0;
1531 if (is_one_instruction) {
1532 sc_w(false, false, rd, rs.rm(), rd);
1533 } else {
1534 UseScratchRegisterScope temps(this);
1535 Register scratch = temps.Acquire();
1536 Add64(scratch, rs.rm(), rs.offset());
1537 sc_w(false, false, rd, scratch, rd);
1538 }
1539 }
1540
Scd(Register rd,const MemOperand & rs)1541 void TurboAssembler::Scd(Register rd, const MemOperand& rs) {
1542 bool is_one_instruction = rs.offset() == 0;
1543 if (is_one_instruction) {
1544 sc_d(false, false, rd, rs.rm(), rd);
1545 } else {
1546 UseScratchRegisterScope temps(this);
1547 Register scratch = temps.Acquire();
1548 Add64(scratch, rs.rm(), rs.offset());
1549 sc_d(false, false, rd, scratch, rd);
1550 }
1551 }
1552
li(Register dst,Handle<HeapObject> value,RelocInfo::Mode rmode)1553 void TurboAssembler::li(Register dst, Handle<HeapObject> value,
1554 RelocInfo::Mode rmode) {
1555 // TODO(jgruber,v8:8887): Also consider a root-relative load when generating
1556 // non-isolate-independent code. In many cases it might be cheaper than
1557 // embedding the relocatable value.
1558 if (root_array_available_ && options().isolate_independent_code) {
1559 IndirectLoadConstant(dst, value);
1560 return;
1561 } else if (RelocInfo::IsCompressedEmbeddedObject(rmode)) {
1562 EmbeddedObjectIndex index = AddEmbeddedObject(value);
1563 DCHECK(is_uint32(index));
1564 li(dst, Operand(index, rmode));
1565 } else {
1566 DCHECK(RelocInfo::IsFullEmbeddedObject(rmode));
1567 li(dst, Operand(value.address(), rmode));
1568 }
1569 }
1570
li(Register dst,ExternalReference value,LiFlags mode)1571 void TurboAssembler::li(Register dst, ExternalReference value, LiFlags mode) {
1572 // TODO(jgruber,v8:8887): Also consider a root-relative load when generating
1573 // non-isolate-independent code. In many cases it might be cheaper than
1574 // embedding the relocatable value.
1575 if (root_array_available_ && options().isolate_independent_code) {
1576 IndirectLoadExternalReference(dst, value);
1577 return;
1578 }
1579 li(dst, Operand(value), mode);
1580 }
1581
li(Register dst,const StringConstantBase * string,LiFlags mode)1582 void TurboAssembler::li(Register dst, const StringConstantBase* string,
1583 LiFlags mode) {
1584 li(dst, Operand::EmbeddedStringConstant(string), mode);
1585 }
1586
InstrCountForLiLower32Bit(int64_t value)1587 static inline int InstrCountForLiLower32Bit(int64_t value) {
1588 int64_t Hi20 = ((value + 0x800) >> 12);
1589 int64_t Lo12 = value << 52 >> 52;
1590 if (Hi20 == 0 || Lo12 == 0) {
1591 return 1;
1592 }
1593 return 2;
1594 }
1595
InstrCountForLi64Bit(int64_t value)1596 int TurboAssembler::InstrCountForLi64Bit(int64_t value) {
1597 if (is_int32(value + 0x800)) {
1598 return InstrCountForLiLower32Bit(value);
1599 } else {
1600 return li_estimate(value);
1601 }
1602 UNREACHABLE();
1603 return INT_MAX;
1604 }
1605
li_optimized(Register rd,Operand j,LiFlags mode)1606 void TurboAssembler::li_optimized(Register rd, Operand j, LiFlags mode) {
1607 DCHECK(!j.is_reg());
1608 DCHECK(!MustUseReg(j.rmode()));
1609 DCHECK(mode == OPTIMIZE_SIZE);
1610 Li(rd, j.immediate());
1611 }
1612
li(Register rd,Operand j,LiFlags mode)1613 void TurboAssembler::li(Register rd, Operand j, LiFlags mode) {
1614 DCHECK(!j.is_reg());
1615 BlockTrampolinePoolScope block_trampoline_pool(this);
1616 if (!MustUseReg(j.rmode()) && mode == OPTIMIZE_SIZE) {
1617 UseScratchRegisterScope temps(this);
1618 int count = li_estimate(j.immediate(), temps.hasAvailable());
1619 int reverse_count = li_estimate(~j.immediate(), temps.hasAvailable());
1620 if (FLAG_riscv_constant_pool && count >= 4 && reverse_count >= 4) {
1621 // Ld a Address from a constant pool.
1622 RecordEntry((uint64_t)j.immediate(), j.rmode());
1623 auipc(rd, 0);
1624 // Record a value into constant pool.
1625 ld(rd, rd, 0);
1626 } else {
1627 if ((count - reverse_count) > 1) {
1628 Li(rd, ~j.immediate());
1629 not_(rd, rd);
1630 } else {
1631 Li(rd, j.immediate());
1632 }
1633 }
1634 } else if (MustUseReg(j.rmode())) {
1635 int64_t immediate;
1636 if (j.IsHeapObjectRequest()) {
1637 RequestHeapObject(j.heap_object_request());
1638 immediate = 0;
1639 } else {
1640 immediate = j.immediate();
1641 }
1642
1643 RecordRelocInfo(j.rmode(), immediate);
1644 li_ptr(rd, immediate);
1645 } else if (mode == ADDRESS_LOAD) {
1646 // We always need the same number of instructions as we may need to patch
1647 // this code to load another value which may need all 6 instructions.
1648 RecordRelocInfo(j.rmode());
1649 li_ptr(rd, j.immediate());
1650 } else { // Always emit the same 48 bit instruction
1651 // sequence.
1652 li_ptr(rd, j.immediate());
1653 }
1654 }
1655
1656 static RegList t_regs = {t0, t1, t2, t3, t4, t5, t6};
1657 static RegList a_regs = {a0, a1, a2, a3, a4, a5, a6, a7};
1658 static RegList s_regs = {s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11};
1659
MultiPush(RegList regs)1660 void TurboAssembler::MultiPush(RegList regs) {
1661 int16_t num_to_push = regs.Count();
1662 int16_t stack_offset = num_to_push * kSystemPointerSize;
1663
1664 #define TEST_AND_PUSH_REG(reg) \
1665 if (regs.has(reg)) { \
1666 stack_offset -= kSystemPointerSize; \
1667 Sd(reg, MemOperand(sp, stack_offset)); \
1668 regs.clear(reg); \
1669 }
1670
1671 #define T_REGS(V) V(t6) V(t5) V(t4) V(t3) V(t2) V(t1) V(t0)
1672 #define A_REGS(V) V(a7) V(a6) V(a5) V(a4) V(a3) V(a2) V(a1) V(a0)
1673 #define S_REGS(V) \
1674 V(s11) V(s10) V(s9) V(s8) V(s7) V(s6) V(s5) V(s4) V(s3) V(s2) V(s1)
1675
1676 Sub64(sp, sp, Operand(stack_offset));
1677
1678 // Certain usage of MultiPush requires that registers are pushed onto the
1679 // stack in a particular: ra, fp, sp, gp, .... (basically in the decreasing
1680 // order of register numbers according to MIPS register numbers)
1681 TEST_AND_PUSH_REG(ra);
1682 TEST_AND_PUSH_REG(fp);
1683 TEST_AND_PUSH_REG(sp);
1684 TEST_AND_PUSH_REG(gp);
1685 TEST_AND_PUSH_REG(tp);
1686 if (!(regs & s_regs).is_empty()) {
1687 S_REGS(TEST_AND_PUSH_REG)
1688 }
1689 if (!(regs & a_regs).is_empty()) {
1690 A_REGS(TEST_AND_PUSH_REG)
1691 }
1692 if (!(regs & t_regs).is_empty()) {
1693 T_REGS(TEST_AND_PUSH_REG)
1694 }
1695
1696 DCHECK(regs.is_empty());
1697
1698 #undef TEST_AND_PUSH_REG
1699 #undef T_REGS
1700 #undef A_REGS
1701 #undef S_REGS
1702 }
1703
MultiPop(RegList regs)1704 void TurboAssembler::MultiPop(RegList regs) {
1705 int16_t stack_offset = 0;
1706
1707 #define TEST_AND_POP_REG(reg) \
1708 if (regs.has(reg)) { \
1709 Ld(reg, MemOperand(sp, stack_offset)); \
1710 stack_offset += kSystemPointerSize; \
1711 regs.clear(reg); \
1712 }
1713
1714 #define T_REGS(V) V(t0) V(t1) V(t2) V(t3) V(t4) V(t5) V(t6)
1715 #define A_REGS(V) V(a0) V(a1) V(a2) V(a3) V(a4) V(a5) V(a6) V(a7)
1716 #define S_REGS(V) \
1717 V(s1) V(s2) V(s3) V(s4) V(s5) V(s6) V(s7) V(s8) V(s9) V(s10) V(s11)
1718
1719 // MultiPop pops from the stack in reverse order as MultiPush
1720 if (!(regs & t_regs).is_empty()) {
1721 T_REGS(TEST_AND_POP_REG)
1722 }
1723 if (!(regs & a_regs).is_empty()) {
1724 A_REGS(TEST_AND_POP_REG)
1725 }
1726 if (!(regs & s_regs).is_empty()) {
1727 S_REGS(TEST_AND_POP_REG)
1728 }
1729 TEST_AND_POP_REG(tp);
1730 TEST_AND_POP_REG(gp);
1731 TEST_AND_POP_REG(sp);
1732 TEST_AND_POP_REG(fp);
1733 TEST_AND_POP_REG(ra);
1734
1735 DCHECK(regs.is_empty());
1736
1737 addi(sp, sp, stack_offset);
1738
1739 #undef TEST_AND_POP_REG
1740 #undef T_REGS
1741 #undef S_REGS
1742 #undef A_REGS
1743 }
1744
MultiPushFPU(DoubleRegList regs)1745 void TurboAssembler::MultiPushFPU(DoubleRegList regs) {
1746 int16_t num_to_push = regs.Count();
1747 int16_t stack_offset = num_to_push * kDoubleSize;
1748
1749 Sub64(sp, sp, Operand(stack_offset));
1750 for (int16_t i = kNumRegisters - 1; i >= 0; i--) {
1751 if ((regs.bits() & (1 << i)) != 0) {
1752 stack_offset -= kDoubleSize;
1753 StoreDouble(FPURegister::from_code(i), MemOperand(sp, stack_offset));
1754 }
1755 }
1756 }
1757
MultiPopFPU(DoubleRegList regs)1758 void TurboAssembler::MultiPopFPU(DoubleRegList regs) {
1759 int16_t stack_offset = 0;
1760
1761 for (int16_t i = 0; i < kNumRegisters; i++) {
1762 if ((regs.bits() & (1 << i)) != 0) {
1763 LoadDouble(FPURegister::from_code(i), MemOperand(sp, stack_offset));
1764 stack_offset += kDoubleSize;
1765 }
1766 }
1767 addi(sp, sp, stack_offset);
1768 }
1769
ExtractBits(Register rt,Register rs,uint16_t pos,uint16_t size,bool sign_extend)1770 void TurboAssembler::ExtractBits(Register rt, Register rs, uint16_t pos,
1771 uint16_t size, bool sign_extend) {
1772 DCHECK(pos < 64 && 0 < size && size <= 64 && 0 < pos + size &&
1773 pos + size <= 64);
1774 slli(rt, rs, 64 - (pos + size));
1775 if (sign_extend) {
1776 srai(rt, rt, 64 - size);
1777 } else {
1778 srli(rt, rt, 64 - size);
1779 }
1780 }
1781
InsertBits(Register dest,Register source,Register pos,int size)1782 void TurboAssembler::InsertBits(Register dest, Register source, Register pos,
1783 int size) {
1784 DCHECK_LT(size, 64);
1785 UseScratchRegisterScope temps(this);
1786 Register mask = temps.Acquire();
1787 BlockTrampolinePoolScope block_trampoline_pool(this);
1788 Register source_ = temps.Acquire();
1789 // Create a mask of the length=size.
1790 li(mask, 1);
1791 slli(mask, mask, size);
1792 addi(mask, mask, -1);
1793 and_(source_, mask, source);
1794 sll(source_, source_, pos);
1795 // Make a mask containing 0's. 0's start at "pos" with length=size.
1796 sll(mask, mask, pos);
1797 not_(mask, mask);
1798 // cut area for insertion of source.
1799 and_(dest, mask, dest);
1800 // insert source
1801 or_(dest, dest, source_);
1802 }
1803
Neg_s(FPURegister fd,FPURegister fs)1804 void TurboAssembler::Neg_s(FPURegister fd, FPURegister fs) { fneg_s(fd, fs); }
1805
Neg_d(FPURegister fd,FPURegister fs)1806 void TurboAssembler::Neg_d(FPURegister fd, FPURegister fs) { fneg_d(fd, fs); }
1807
Cvt_d_uw(FPURegister fd,Register rs)1808 void TurboAssembler::Cvt_d_uw(FPURegister fd, Register rs) {
1809 // Convert rs to a FP value in fd.
1810 fcvt_d_wu(fd, rs);
1811 }
1812
Cvt_d_w(FPURegister fd,Register rs)1813 void TurboAssembler::Cvt_d_w(FPURegister fd, Register rs) {
1814 // Convert rs to a FP value in fd.
1815 fcvt_d_w(fd, rs);
1816 }
1817
Cvt_d_ul(FPURegister fd,Register rs)1818 void TurboAssembler::Cvt_d_ul(FPURegister fd, Register rs) {
1819 // Convert rs to a FP value in fd.
1820 fcvt_d_lu(fd, rs);
1821 }
1822
Cvt_s_uw(FPURegister fd,Register rs)1823 void TurboAssembler::Cvt_s_uw(FPURegister fd, Register rs) {
1824 // Convert rs to a FP value in fd.
1825 fcvt_s_wu(fd, rs);
1826 }
1827
Cvt_s_w(FPURegister fd,Register rs)1828 void TurboAssembler::Cvt_s_w(FPURegister fd, Register rs) {
1829 // Convert rs to a FP value in fd.
1830 fcvt_s_w(fd, rs);
1831 }
1832
Cvt_s_ul(FPURegister fd,Register rs)1833 void TurboAssembler::Cvt_s_ul(FPURegister fd, Register rs) {
1834 // Convert rs to a FP value in fd.
1835 fcvt_s_lu(fd, rs);
1836 }
1837
1838 template <typename CvtFunc>
RoundFloatingPointToInteger(Register rd,FPURegister fs,Register result,CvtFunc fcvt_generator)1839 void TurboAssembler::RoundFloatingPointToInteger(Register rd, FPURegister fs,
1840 Register result,
1841 CvtFunc fcvt_generator) {
1842 // Save csr_fflags to scratch & clear exception flags
1843 if (result.is_valid()) {
1844 BlockTrampolinePoolScope block_trampoline_pool(this);
1845 UseScratchRegisterScope temps(this);
1846 Register scratch = temps.Acquire();
1847
1848 int exception_flags = kInvalidOperation;
1849 csrrci(scratch, csr_fflags, exception_flags);
1850
1851 // actual conversion instruction
1852 fcvt_generator(this, rd, fs);
1853
1854 // check kInvalidOperation flag (out-of-range, NaN)
1855 // set result to 1 if normal, otherwise set result to 0 for abnormal
1856 frflags(result);
1857 andi(result, result, exception_flags);
1858 seqz(result, result); // result <-- 1 (normal), result <-- 0 (abnormal)
1859
1860 // restore csr_fflags
1861 csrw(csr_fflags, scratch);
1862 } else {
1863 // actual conversion instruction
1864 fcvt_generator(this, rd, fs);
1865 }
1866 }
1867
Clear_if_nan_d(Register rd,FPURegister fs)1868 void TurboAssembler::Clear_if_nan_d(Register rd, FPURegister fs) {
1869 Label no_nan;
1870 feq_d(kScratchReg, fs, fs);
1871 bnez(kScratchReg, &no_nan);
1872 Move(rd, zero_reg);
1873 bind(&no_nan);
1874 }
1875
Clear_if_nan_s(Register rd,FPURegister fs)1876 void TurboAssembler::Clear_if_nan_s(Register rd, FPURegister fs) {
1877 Label no_nan;
1878 feq_s(kScratchReg, fs, fs);
1879 bnez(kScratchReg, &no_nan);
1880 Move(rd, zero_reg);
1881 bind(&no_nan);
1882 }
1883
Trunc_uw_d(Register rd,FPURegister fs,Register result)1884 void TurboAssembler::Trunc_uw_d(Register rd, FPURegister fs, Register result) {
1885 RoundFloatingPointToInteger(
1886 rd, fs, result, [](TurboAssembler* tasm, Register dst, FPURegister src) {
1887 tasm->fcvt_wu_d(dst, src, RTZ);
1888 });
1889 }
1890
Trunc_w_d(Register rd,FPURegister fs,Register result)1891 void TurboAssembler::Trunc_w_d(Register rd, FPURegister fs, Register result) {
1892 RoundFloatingPointToInteger(
1893 rd, fs, result, [](TurboAssembler* tasm, Register dst, FPURegister src) {
1894 tasm->fcvt_w_d(dst, src, RTZ);
1895 });
1896 }
1897
Trunc_uw_s(Register rd,FPURegister fs,Register result)1898 void TurboAssembler::Trunc_uw_s(Register rd, FPURegister fs, Register result) {
1899 RoundFloatingPointToInteger(
1900 rd, fs, result, [](TurboAssembler* tasm, Register dst, FPURegister src) {
1901 tasm->fcvt_wu_s(dst, src, RTZ);
1902 });
1903 }
1904
Trunc_w_s(Register rd,FPURegister fs,Register result)1905 void TurboAssembler::Trunc_w_s(Register rd, FPURegister fs, Register result) {
1906 RoundFloatingPointToInteger(
1907 rd, fs, result, [](TurboAssembler* tasm, Register dst, FPURegister src) {
1908 tasm->fcvt_w_s(dst, src, RTZ);
1909 });
1910 }
1911
Trunc_ul_d(Register rd,FPURegister fs,Register result)1912 void TurboAssembler::Trunc_ul_d(Register rd, FPURegister fs, Register result) {
1913 RoundFloatingPointToInteger(
1914 rd, fs, result, [](TurboAssembler* tasm, Register dst, FPURegister src) {
1915 tasm->fcvt_lu_d(dst, src, RTZ);
1916 });
1917 }
1918
Trunc_l_d(Register rd,FPURegister fs,Register result)1919 void TurboAssembler::Trunc_l_d(Register rd, FPURegister fs, Register result) {
1920 RoundFloatingPointToInteger(
1921 rd, fs, result, [](TurboAssembler* tasm, Register dst, FPURegister src) {
1922 tasm->fcvt_l_d(dst, src, RTZ);
1923 });
1924 }
1925
Trunc_ul_s(Register rd,FPURegister fs,Register result)1926 void TurboAssembler::Trunc_ul_s(Register rd, FPURegister fs, Register result) {
1927 RoundFloatingPointToInteger(
1928 rd, fs, result, [](TurboAssembler* tasm, Register dst, FPURegister src) {
1929 tasm->fcvt_lu_s(dst, src, RTZ);
1930 });
1931 }
1932
Trunc_l_s(Register rd,FPURegister fs,Register result)1933 void TurboAssembler::Trunc_l_s(Register rd, FPURegister fs, Register result) {
1934 RoundFloatingPointToInteger(
1935 rd, fs, result, [](TurboAssembler* tasm, Register dst, FPURegister src) {
1936 tasm->fcvt_l_s(dst, src, RTZ);
1937 });
1938 }
1939
Round_w_s(Register rd,FPURegister fs,Register result)1940 void TurboAssembler::Round_w_s(Register rd, FPURegister fs, Register result) {
1941 RoundFloatingPointToInteger(
1942 rd, fs, result, [](TurboAssembler* tasm, Register dst, FPURegister src) {
1943 tasm->fcvt_w_s(dst, src, RNE);
1944 });
1945 }
1946
Round_w_d(Register rd,FPURegister fs,Register result)1947 void TurboAssembler::Round_w_d(Register rd, FPURegister fs, Register result) {
1948 RoundFloatingPointToInteger(
1949 rd, fs, result, [](TurboAssembler* tasm, Register dst, FPURegister src) {
1950 tasm->fcvt_w_d(dst, src, RNE);
1951 });
1952 }
1953
Ceil_w_s(Register rd,FPURegister fs,Register result)1954 void TurboAssembler::Ceil_w_s(Register rd, FPURegister fs, Register result) {
1955 RoundFloatingPointToInteger(
1956 rd, fs, result, [](TurboAssembler* tasm, Register dst, FPURegister src) {
1957 tasm->fcvt_w_s(dst, src, RUP);
1958 });
1959 }
1960
Ceil_w_d(Register rd,FPURegister fs,Register result)1961 void TurboAssembler::Ceil_w_d(Register rd, FPURegister fs, Register result) {
1962 RoundFloatingPointToInteger(
1963 rd, fs, result, [](TurboAssembler* tasm, Register dst, FPURegister src) {
1964 tasm->fcvt_w_d(dst, src, RUP);
1965 });
1966 }
1967
Floor_w_s(Register rd,FPURegister fs,Register result)1968 void TurboAssembler::Floor_w_s(Register rd, FPURegister fs, Register result) {
1969 RoundFloatingPointToInteger(
1970 rd, fs, result, [](TurboAssembler* tasm, Register dst, FPURegister src) {
1971 tasm->fcvt_w_s(dst, src, RDN);
1972 });
1973 }
1974
Floor_w_d(Register rd,FPURegister fs,Register result)1975 void TurboAssembler::Floor_w_d(Register rd, FPURegister fs, Register result) {
1976 RoundFloatingPointToInteger(
1977 rd, fs, result, [](TurboAssembler* tasm, Register dst, FPURegister src) {
1978 tasm->fcvt_w_d(dst, src, RDN);
1979 });
1980 }
1981
1982 // According to JS ECMA specification, for floating-point round operations, if
1983 // the input is NaN, +/-infinity, or +/-0, the same input is returned as the
1984 // rounded result; this differs from behavior of RISCV fcvt instructions (which
1985 // round out-of-range values to the nearest max or min value), therefore special
1986 // handling is needed by NaN, +/-Infinity, +/-0
1987 template <typename F>
RoundHelper(FPURegister dst,FPURegister src,FPURegister fpu_scratch,RoundingMode frm)1988 void TurboAssembler::RoundHelper(FPURegister dst, FPURegister src,
1989 FPURegister fpu_scratch, RoundingMode frm) {
1990 BlockTrampolinePoolScope block_trampoline_pool(this);
1991 UseScratchRegisterScope temps(this);
1992 Register scratch2 = temps.Acquire();
1993
1994 DCHECK((std::is_same<float, F>::value) || (std::is_same<double, F>::value));
1995 // Need at least two FPRs, so check against dst == src == fpu_scratch
1996 DCHECK(!(dst == src && dst == fpu_scratch));
1997
1998 const int kFloatMantissaBits =
1999 sizeof(F) == 4 ? kFloat32MantissaBits : kFloat64MantissaBits;
2000 const int kFloatExponentBits =
2001 sizeof(F) == 4 ? kFloat32ExponentBits : kFloat64ExponentBits;
2002 const int kFloatExponentBias =
2003 sizeof(F) == 4 ? kFloat32ExponentBias : kFloat64ExponentBias;
2004 Label done;
2005
2006 {
2007 UseScratchRegisterScope temps2(this);
2008 Register scratch = temps2.Acquire();
2009 // extract exponent value of the source floating-point to scratch
2010 if (std::is_same<F, double>::value) {
2011 fmv_x_d(scratch, src);
2012 } else {
2013 fmv_x_w(scratch, src);
2014 }
2015 ExtractBits(scratch2, scratch, kFloatMantissaBits, kFloatExponentBits);
2016 }
2017
2018 // if src is NaN/+-Infinity/+-Zero or if the exponent is larger than # of bits
2019 // in mantissa, the result is the same as src, so move src to dest (to avoid
2020 // generating another branch)
2021 if (dst != src) {
2022 if (std::is_same<F, double>::value) {
2023 fmv_d(dst, src);
2024 } else {
2025 fmv_s(dst, src);
2026 }
2027 }
2028 {
2029 Label not_NaN;
2030 UseScratchRegisterScope temps2(this);
2031 Register scratch = temps2.Acquire();
2032 // According to the wasm spec
2033 // (https://webassembly.github.io/spec/core/exec/numerics.html#aux-nans)
2034 // if input is canonical NaN, then output is canonical NaN, and if input is
2035 // any other NaN, then output is any NaN with most significant bit of
2036 // payload is 1. In RISC-V, feq_d will set scratch to 0 if src is a NaN. If
2037 // src is not a NaN, branch to the label and do nothing, but if it is,
2038 // fmin_d will set dst to the canonical NaN.
2039 if (std::is_same<F, double>::value) {
2040 feq_d(scratch, src, src);
2041 bnez(scratch, ¬_NaN);
2042 fmin_d(dst, src, src);
2043 } else {
2044 feq_s(scratch, src, src);
2045 bnez(scratch, ¬_NaN);
2046 fmin_s(dst, src, src);
2047 }
2048 bind(¬_NaN);
2049 }
2050
2051 // If real exponent (i.e., scratch2 - kFloatExponentBias) is greater than
2052 // kFloat32MantissaBits, it means the floating-point value has no fractional
2053 // part, thus the input is already rounded, jump to done. Note that, NaN and
2054 // Infinity in floating-point representation sets maximal exponent value, so
2055 // they also satisfy (scratch2 - kFloatExponentBias >= kFloatMantissaBits),
2056 // and JS round semantics specify that rounding of NaN (Infinity) returns NaN
2057 // (Infinity), so NaN and Infinity are considered rounded value too.
2058 Branch(&done, greater_equal, scratch2,
2059 Operand(kFloatExponentBias + kFloatMantissaBits));
2060
2061 // Actual rounding is needed along this path
2062
2063 // old_src holds the original input, needed for the case of src == dst
2064 FPURegister old_src = src;
2065 if (src == dst) {
2066 DCHECK(fpu_scratch != dst);
2067 Move(fpu_scratch, src);
2068 old_src = fpu_scratch;
2069 }
2070
2071 // Since only input whose real exponent value is less than kMantissaBits
2072 // (i.e., 23 or 52-bits) falls into this path, the value range of the input
2073 // falls into that of 23- or 53-bit integers. So we round the input to integer
2074 // values, then convert them back to floating-point.
2075 {
2076 UseScratchRegisterScope temps(this);
2077 Register scratch = temps.Acquire();
2078 if (std::is_same<F, double>::value) {
2079 fcvt_l_d(scratch, src, frm);
2080 fcvt_d_l(dst, scratch, frm);
2081 } else {
2082 fcvt_w_s(scratch, src, frm);
2083 fcvt_s_w(dst, scratch, frm);
2084 }
2085 }
2086 // A special handling is needed if the input is a very small positive/negative
2087 // number that rounds to zero. JS semantics requires that the rounded result
2088 // retains the sign of the input, so a very small positive (negative)
2089 // floating-point number should be rounded to positive (negative) 0.
2090 // Therefore, we use sign-bit injection to produce +/-0 correctly. Instead of
2091 // testing for zero w/ a branch, we just insert sign-bit for everyone on this
2092 // path (this is where old_src is needed)
2093 if (std::is_same<F, double>::value) {
2094 fsgnj_d(dst, dst, old_src);
2095 } else {
2096 fsgnj_s(dst, dst, old_src);
2097 }
2098
2099 bind(&done);
2100 }
2101
2102 // According to JS ECMA specification, for floating-point round operations, if
2103 // the input is NaN, +/-infinity, or +/-0, the same input is returned as the
2104 // rounded result; this differs from behavior of RISCV fcvt instructions (which
2105 // round out-of-range values to the nearest max or min value), therefore special
2106 // handling is needed by NaN, +/-Infinity, +/-0
2107 template <typename F>
RoundHelper(VRegister dst,VRegister src,Register scratch,VRegister v_scratch,RoundingMode frm)2108 void TurboAssembler::RoundHelper(VRegister dst, VRegister src, Register scratch,
2109 VRegister v_scratch, RoundingMode frm) {
2110 VU.set(scratch, std::is_same<F, float>::value ? E32 : E64, m1);
2111 // if src is NaN/+-Infinity/+-Zero or if the exponent is larger than # of bits
2112 // in mantissa, the result is the same as src, so move src to dest (to avoid
2113 // generating another branch)
2114
2115 // If real exponent (i.e., scratch2 - kFloatExponentBias) is greater than
2116 // kFloat32MantissaBits, it means the floating-point value has no fractional
2117 // part, thus the input is already rounded, jump to done. Note that, NaN and
2118 // Infinity in floating-point representation sets maximal exponent value, so
2119 // they also satisfy (scratch2 - kFloatExponentBias >= kFloatMantissaBits),
2120 // and JS round semantics specify that rounding of NaN (Infinity) returns NaN
2121 // (Infinity), so NaN and Infinity are considered rounded value too.
2122 const int kFloatMantissaBits =
2123 sizeof(F) == 4 ? kFloat32MantissaBits : kFloat64MantissaBits;
2124 const int kFloatExponentBits =
2125 sizeof(F) == 4 ? kFloat32ExponentBits : kFloat64ExponentBits;
2126 const int kFloatExponentBias =
2127 sizeof(F) == 4 ? kFloat32ExponentBias : kFloat64ExponentBias;
2128
2129 // slli(rt, rs, 64 - (pos + size));
2130 // if (sign_extend) {
2131 // srai(rt, rt, 64 - size);
2132 // } else {
2133 // srli(rt, rt, 64 - size);
2134 // }
2135
2136 li(scratch, 64 - kFloatMantissaBits - kFloatExponentBits);
2137 vsll_vx(v_scratch, src, scratch);
2138 li(scratch, 64 - kFloatExponentBits);
2139 vsrl_vx(v_scratch, v_scratch, scratch);
2140 li(scratch, kFloatExponentBias + kFloatMantissaBits);
2141 vmslt_vx(v0, v_scratch, scratch);
2142
2143 VU.set(frm);
2144 vmv_vv(dst, src);
2145 if (dst == src) {
2146 vmv_vv(v_scratch, src);
2147 }
2148 vfcvt_x_f_v(dst, src, MaskType::Mask);
2149 vfcvt_f_x_v(dst, dst, MaskType::Mask);
2150
2151 // A special handling is needed if the input is a very small positive/negative
2152 // number that rounds to zero. JS semantics requires that the rounded result
2153 // retains the sign of the input, so a very small positive (negative)
2154 // floating-point number should be rounded to positive (negative) 0.
2155 if (dst == src) {
2156 vfsngj_vv(dst, dst, v_scratch);
2157 } else {
2158 vfsngj_vv(dst, dst, src);
2159 }
2160 }
2161
Ceil_f(VRegister vdst,VRegister vsrc,Register scratch,VRegister v_scratch)2162 void TurboAssembler::Ceil_f(VRegister vdst, VRegister vsrc, Register scratch,
2163 VRegister v_scratch) {
2164 RoundHelper<float>(vdst, vsrc, scratch, v_scratch, RUP);
2165 }
2166
Ceil_d(VRegister vdst,VRegister vsrc,Register scratch,VRegister v_scratch)2167 void TurboAssembler::Ceil_d(VRegister vdst, VRegister vsrc, Register scratch,
2168 VRegister v_scratch) {
2169 RoundHelper<double>(vdst, vsrc, scratch, v_scratch, RUP);
2170 }
2171
Floor_f(VRegister vdst,VRegister vsrc,Register scratch,VRegister v_scratch)2172 void TurboAssembler::Floor_f(VRegister vdst, VRegister vsrc, Register scratch,
2173 VRegister v_scratch) {
2174 RoundHelper<float>(vdst, vsrc, scratch, v_scratch, RDN);
2175 }
2176
Floor_d(VRegister vdst,VRegister vsrc,Register scratch,VRegister v_scratch)2177 void TurboAssembler::Floor_d(VRegister vdst, VRegister vsrc, Register scratch,
2178 VRegister v_scratch) {
2179 RoundHelper<double>(vdst, vsrc, scratch, v_scratch, RDN);
2180 }
2181
Trunc_d(VRegister vdst,VRegister vsrc,Register scratch,VRegister v_scratch)2182 void TurboAssembler::Trunc_d(VRegister vdst, VRegister vsrc, Register scratch,
2183 VRegister v_scratch) {
2184 RoundHelper<double>(vdst, vsrc, scratch, v_scratch, RTZ);
2185 }
2186
Trunc_f(VRegister vdst,VRegister vsrc,Register scratch,VRegister v_scratch)2187 void TurboAssembler::Trunc_f(VRegister vdst, VRegister vsrc, Register scratch,
2188 VRegister v_scratch) {
2189 RoundHelper<float>(vdst, vsrc, scratch, v_scratch, RTZ);
2190 }
2191
Round_f(VRegister vdst,VRegister vsrc,Register scratch,VRegister v_scratch)2192 void TurboAssembler::Round_f(VRegister vdst, VRegister vsrc, Register scratch,
2193 VRegister v_scratch) {
2194 RoundHelper<float>(vdst, vsrc, scratch, v_scratch, RNE);
2195 }
2196
Round_d(VRegister vdst,VRegister vsrc,Register scratch,VRegister v_scratch)2197 void TurboAssembler::Round_d(VRegister vdst, VRegister vsrc, Register scratch,
2198 VRegister v_scratch) {
2199 RoundHelper<double>(vdst, vsrc, scratch, v_scratch, RNE);
2200 }
2201
Floor_d_d(FPURegister dst,FPURegister src,FPURegister fpu_scratch)2202 void TurboAssembler::Floor_d_d(FPURegister dst, FPURegister src,
2203 FPURegister fpu_scratch) {
2204 RoundHelper<double>(dst, src, fpu_scratch, RDN);
2205 }
2206
Ceil_d_d(FPURegister dst,FPURegister src,FPURegister fpu_scratch)2207 void TurboAssembler::Ceil_d_d(FPURegister dst, FPURegister src,
2208 FPURegister fpu_scratch) {
2209 RoundHelper<double>(dst, src, fpu_scratch, RUP);
2210 }
2211
Trunc_d_d(FPURegister dst,FPURegister src,FPURegister fpu_scratch)2212 void TurboAssembler::Trunc_d_d(FPURegister dst, FPURegister src,
2213 FPURegister fpu_scratch) {
2214 RoundHelper<double>(dst, src, fpu_scratch, RTZ);
2215 }
2216
Round_d_d(FPURegister dst,FPURegister src,FPURegister fpu_scratch)2217 void TurboAssembler::Round_d_d(FPURegister dst, FPURegister src,
2218 FPURegister fpu_scratch) {
2219 RoundHelper<double>(dst, src, fpu_scratch, RNE);
2220 }
2221
Floor_s_s(FPURegister dst,FPURegister src,FPURegister fpu_scratch)2222 void TurboAssembler::Floor_s_s(FPURegister dst, FPURegister src,
2223 FPURegister fpu_scratch) {
2224 RoundHelper<float>(dst, src, fpu_scratch, RDN);
2225 }
2226
Ceil_s_s(FPURegister dst,FPURegister src,FPURegister fpu_scratch)2227 void TurboAssembler::Ceil_s_s(FPURegister dst, FPURegister src,
2228 FPURegister fpu_scratch) {
2229 RoundHelper<float>(dst, src, fpu_scratch, RUP);
2230 }
2231
Trunc_s_s(FPURegister dst,FPURegister src,FPURegister fpu_scratch)2232 void TurboAssembler::Trunc_s_s(FPURegister dst, FPURegister src,
2233 FPURegister fpu_scratch) {
2234 RoundHelper<float>(dst, src, fpu_scratch, RTZ);
2235 }
2236
Round_s_s(FPURegister dst,FPURegister src,FPURegister fpu_scratch)2237 void TurboAssembler::Round_s_s(FPURegister dst, FPURegister src,
2238 FPURegister fpu_scratch) {
2239 RoundHelper<float>(dst, src, fpu_scratch, RNE);
2240 }
2241
Madd_s(FPURegister fd,FPURegister fr,FPURegister fs,FPURegister ft)2242 void MacroAssembler::Madd_s(FPURegister fd, FPURegister fr, FPURegister fs,
2243 FPURegister ft) {
2244 fmadd_s(fd, fs, ft, fr);
2245 }
2246
Madd_d(FPURegister fd,FPURegister fr,FPURegister fs,FPURegister ft)2247 void MacroAssembler::Madd_d(FPURegister fd, FPURegister fr, FPURegister fs,
2248 FPURegister ft) {
2249 fmadd_d(fd, fs, ft, fr);
2250 }
2251
Msub_s(FPURegister fd,FPURegister fr,FPURegister fs,FPURegister ft)2252 void MacroAssembler::Msub_s(FPURegister fd, FPURegister fr, FPURegister fs,
2253 FPURegister ft) {
2254 fmsub_s(fd, fs, ft, fr);
2255 }
2256
Msub_d(FPURegister fd,FPURegister fr,FPURegister fs,FPURegister ft)2257 void MacroAssembler::Msub_d(FPURegister fd, FPURegister fr, FPURegister fs,
2258 FPURegister ft) {
2259 fmsub_d(fd, fs, ft, fr);
2260 }
2261
CompareF32(Register rd,FPUCondition cc,FPURegister cmp1,FPURegister cmp2)2262 void TurboAssembler::CompareF32(Register rd, FPUCondition cc, FPURegister cmp1,
2263 FPURegister cmp2) {
2264 switch (cc) {
2265 case EQ:
2266 feq_s(rd, cmp1, cmp2);
2267 break;
2268 case NE:
2269 feq_s(rd, cmp1, cmp2);
2270 NegateBool(rd, rd);
2271 break;
2272 case LT:
2273 flt_s(rd, cmp1, cmp2);
2274 break;
2275 case GE:
2276 fle_s(rd, cmp2, cmp1);
2277 break;
2278 case LE:
2279 fle_s(rd, cmp1, cmp2);
2280 break;
2281 case GT:
2282 flt_s(rd, cmp2, cmp1);
2283 break;
2284 default:
2285 UNREACHABLE();
2286 }
2287 }
2288
CompareF64(Register rd,FPUCondition cc,FPURegister cmp1,FPURegister cmp2)2289 void TurboAssembler::CompareF64(Register rd, FPUCondition cc, FPURegister cmp1,
2290 FPURegister cmp2) {
2291 switch (cc) {
2292 case EQ:
2293 feq_d(rd, cmp1, cmp2);
2294 break;
2295 case NE:
2296 feq_d(rd, cmp1, cmp2);
2297 NegateBool(rd, rd);
2298 break;
2299 case LT:
2300 flt_d(rd, cmp1, cmp2);
2301 break;
2302 case GE:
2303 fle_d(rd, cmp2, cmp1);
2304 break;
2305 case LE:
2306 fle_d(rd, cmp1, cmp2);
2307 break;
2308 case GT:
2309 flt_d(rd, cmp2, cmp1);
2310 break;
2311 default:
2312 UNREACHABLE();
2313 }
2314 }
2315
CompareIsNotNanF32(Register rd,FPURegister cmp1,FPURegister cmp2)2316 void TurboAssembler::CompareIsNotNanF32(Register rd, FPURegister cmp1,
2317 FPURegister cmp2) {
2318 UseScratchRegisterScope temps(this);
2319 BlockTrampolinePoolScope block_trampoline_pool(this);
2320 Register scratch = temps.Acquire();
2321
2322 feq_s(rd, cmp1, cmp1); // rd <- !isNan(cmp1)
2323 feq_s(scratch, cmp2, cmp2); // scratch <- !isNaN(cmp2)
2324 And(rd, rd, scratch); // rd <- !isNan(cmp1) && !isNan(cmp2)
2325 }
2326
CompareIsNotNanF64(Register rd,FPURegister cmp1,FPURegister cmp2)2327 void TurboAssembler::CompareIsNotNanF64(Register rd, FPURegister cmp1,
2328 FPURegister cmp2) {
2329 UseScratchRegisterScope temps(this);
2330 BlockTrampolinePoolScope block_trampoline_pool(this);
2331 Register scratch = temps.Acquire();
2332
2333 feq_d(rd, cmp1, cmp1); // rd <- !isNan(cmp1)
2334 feq_d(scratch, cmp2, cmp2); // scratch <- !isNaN(cmp2)
2335 And(rd, rd, scratch); // rd <- !isNan(cmp1) && !isNan(cmp2)
2336 }
2337
CompareIsNanF32(Register rd,FPURegister cmp1,FPURegister cmp2)2338 void TurboAssembler::CompareIsNanF32(Register rd, FPURegister cmp1,
2339 FPURegister cmp2) {
2340 CompareIsNotNanF32(rd, cmp1, cmp2); // rd <- !isNan(cmp1) && !isNan(cmp2)
2341 Xor(rd, rd, 1); // rd <- isNan(cmp1) || isNan(cmp2)
2342 }
2343
CompareIsNanF64(Register rd,FPURegister cmp1,FPURegister cmp2)2344 void TurboAssembler::CompareIsNanF64(Register rd, FPURegister cmp1,
2345 FPURegister cmp2) {
2346 CompareIsNotNanF64(rd, cmp1, cmp2); // rd <- !isNan(cmp1) && !isNan(cmp2)
2347 Xor(rd, rd, 1); // rd <- isNan(cmp1) || isNan(cmp2)
2348 }
2349
BranchTrueShortF(Register rs,Label * target)2350 void TurboAssembler::BranchTrueShortF(Register rs, Label* target) {
2351 Branch(target, not_equal, rs, Operand(zero_reg));
2352 }
2353
BranchFalseShortF(Register rs,Label * target)2354 void TurboAssembler::BranchFalseShortF(Register rs, Label* target) {
2355 Branch(target, equal, rs, Operand(zero_reg));
2356 }
2357
BranchTrueF(Register rs,Label * target)2358 void TurboAssembler::BranchTrueF(Register rs, Label* target) {
2359 bool long_branch =
2360 target->is_bound() ? !is_near(target) : is_trampoline_emitted();
2361 if (long_branch) {
2362 Label skip;
2363 BranchFalseShortF(rs, &skip);
2364 BranchLong(target);
2365 bind(&skip);
2366 } else {
2367 BranchTrueShortF(rs, target);
2368 }
2369 }
2370
BranchFalseF(Register rs,Label * target)2371 void TurboAssembler::BranchFalseF(Register rs, Label* target) {
2372 bool long_branch =
2373 target->is_bound() ? !is_near(target) : is_trampoline_emitted();
2374 if (long_branch) {
2375 Label skip;
2376 BranchTrueShortF(rs, &skip);
2377 BranchLong(target);
2378 bind(&skip);
2379 } else {
2380 BranchFalseShortF(rs, target);
2381 }
2382 }
2383
InsertHighWordF64(FPURegister dst,Register src_high)2384 void TurboAssembler::InsertHighWordF64(FPURegister dst, Register src_high) {
2385 UseScratchRegisterScope temps(this);
2386 Register scratch = temps.Acquire();
2387 Register scratch2 = temps.Acquire();
2388 BlockTrampolinePoolScope block_trampoline_pool(this);
2389
2390 DCHECK(src_high != scratch2 && src_high != scratch);
2391
2392 fmv_x_d(scratch, dst);
2393 slli(scratch2, src_high, 32);
2394 slli(scratch, scratch, 32);
2395 srli(scratch, scratch, 32);
2396 or_(scratch, scratch, scratch2);
2397 fmv_d_x(dst, scratch);
2398 }
2399
InsertLowWordF64(FPURegister dst,Register src_low)2400 void TurboAssembler::InsertLowWordF64(FPURegister dst, Register src_low) {
2401 UseScratchRegisterScope temps(this);
2402 Register scratch = temps.Acquire();
2403 Register scratch2 = temps.Acquire();
2404 BlockTrampolinePoolScope block_trampoline_pool(this);
2405
2406 DCHECK(src_low != scratch && src_low != scratch2);
2407 fmv_x_d(scratch, dst);
2408 slli(scratch2, src_low, 32);
2409 srli(scratch2, scratch2, 32);
2410 srli(scratch, scratch, 32);
2411 slli(scratch, scratch, 32);
2412 or_(scratch, scratch, scratch2);
2413 fmv_d_x(dst, scratch);
2414 }
2415
LoadFPRImmediate(FPURegister dst,uint32_t src)2416 void TurboAssembler::LoadFPRImmediate(FPURegister dst, uint32_t src) {
2417 // Handle special values first.
2418 if (src == bit_cast<uint32_t>(0.0f) && has_single_zero_reg_set_) {
2419 if (dst != kDoubleRegZero) fmv_s(dst, kDoubleRegZero);
2420 } else if (src == bit_cast<uint32_t>(-0.0f) && has_single_zero_reg_set_) {
2421 Neg_s(dst, kDoubleRegZero);
2422 } else {
2423 if (dst == kDoubleRegZero) {
2424 DCHECK(src == bit_cast<uint32_t>(0.0f));
2425 fmv_w_x(dst, zero_reg);
2426 has_single_zero_reg_set_ = true;
2427 has_double_zero_reg_set_ = false;
2428 } else {
2429 UseScratchRegisterScope temps(this);
2430 Register scratch = temps.Acquire();
2431 li(scratch, Operand(static_cast<int32_t>(src)));
2432 fmv_w_x(dst, scratch);
2433 }
2434 }
2435 }
2436
LoadFPRImmediate(FPURegister dst,uint64_t src)2437 void TurboAssembler::LoadFPRImmediate(FPURegister dst, uint64_t src) {
2438 // Handle special values first.
2439 if (src == bit_cast<uint64_t>(0.0) && has_double_zero_reg_set_) {
2440 if (dst != kDoubleRegZero) fmv_d(dst, kDoubleRegZero);
2441 } else if (src == bit_cast<uint64_t>(-0.0) && has_double_zero_reg_set_) {
2442 Neg_d(dst, kDoubleRegZero);
2443 } else {
2444 if (dst == kDoubleRegZero) {
2445 DCHECK(src == bit_cast<uint64_t>(0.0));
2446 fmv_d_x(dst, zero_reg);
2447 has_double_zero_reg_set_ = true;
2448 has_single_zero_reg_set_ = false;
2449 } else {
2450 UseScratchRegisterScope temps(this);
2451 Register scratch = temps.Acquire();
2452 li(scratch, Operand(src));
2453 fmv_d_x(dst, scratch);
2454 }
2455 }
2456 }
2457
CompareI(Register rd,Register rs,const Operand & rt,Condition cond)2458 void TurboAssembler::CompareI(Register rd, Register rs, const Operand& rt,
2459 Condition cond) {
2460 switch (cond) {
2461 case eq:
2462 Seq(rd, rs, rt);
2463 break;
2464 case ne:
2465 Sne(rd, rs, rt);
2466 break;
2467
2468 // Signed comparison.
2469 case greater:
2470 Sgt(rd, rs, rt);
2471 break;
2472 case greater_equal:
2473 Sge(rd, rs, rt); // rs >= rt
2474 break;
2475 case less:
2476 Slt(rd, rs, rt); // rs < rt
2477 break;
2478 case less_equal:
2479 Sle(rd, rs, rt); // rs <= rt
2480 break;
2481
2482 // Unsigned comparison.
2483 case Ugreater:
2484 Sgtu(rd, rs, rt); // rs > rt
2485 break;
2486 case Ugreater_equal:
2487 Sgeu(rd, rs, rt); // rs >= rt
2488 break;
2489 case Uless:
2490 Sltu(rd, rs, rt); // rs < rt
2491 break;
2492 case Uless_equal:
2493 Sleu(rd, rs, rt); // rs <= rt
2494 break;
2495 case cc_always:
2496 UNREACHABLE();
2497 default:
2498 UNREACHABLE();
2499 }
2500 }
2501
2502 // dest <- (condition != 0 ? zero : dest)
LoadZeroIfConditionNotZero(Register dest,Register condition)2503 void TurboAssembler::LoadZeroIfConditionNotZero(Register dest,
2504 Register condition) {
2505 UseScratchRegisterScope temps(this);
2506 Register scratch = temps.Acquire();
2507 seqz(scratch, condition);
2508 // neg + and may be more efficient than mul(dest, dest, scratch)
2509 neg(scratch, scratch); // 0 is still 0, 1 becomes all 1s
2510 and_(dest, dest, scratch);
2511 }
2512
2513 // dest <- (condition == 0 ? 0 : dest)
LoadZeroIfConditionZero(Register dest,Register condition)2514 void TurboAssembler::LoadZeroIfConditionZero(Register dest,
2515 Register condition) {
2516 UseScratchRegisterScope temps(this);
2517 Register scratch = temps.Acquire();
2518 snez(scratch, condition);
2519 // neg + and may be more efficient than mul(dest, dest, scratch);
2520 neg(scratch, scratch); // 0 is still 0, 1 becomes all 1s
2521 and_(dest, dest, scratch);
2522 }
2523
Clz32(Register rd,Register xx)2524 void TurboAssembler::Clz32(Register rd, Register xx) {
2525 // 32 bit unsigned in lower word: count number of leading zeros.
2526 // int n = 32;
2527 // unsigned y;
2528
2529 // y = x >>16; if (y != 0) { n = n -16; x = y; }
2530 // y = x >> 8; if (y != 0) { n = n - 8; x = y; }
2531 // y = x >> 4; if (y != 0) { n = n - 4; x = y; }
2532 // y = x >> 2; if (y != 0) { n = n - 2; x = y; }
2533 // y = x >> 1; if (y != 0) {rd = n - 2; return;}
2534 // rd = n - x;
2535
2536 Label L0, L1, L2, L3, L4;
2537 UseScratchRegisterScope temps(this);
2538 BlockTrampolinePoolScope block_trampoline_pool(this);
2539 Register x = rd;
2540 Register y = temps.Acquire();
2541 Register n = temps.Acquire();
2542 DCHECK(xx != y && xx != n);
2543 Move(x, xx);
2544 li(n, Operand(32));
2545 srliw(y, x, 16);
2546 BranchShort(&L0, eq, y, Operand(zero_reg));
2547 Move(x, y);
2548 addiw(n, n, -16);
2549 bind(&L0);
2550 srliw(y, x, 8);
2551 BranchShort(&L1, eq, y, Operand(zero_reg));
2552 addiw(n, n, -8);
2553 Move(x, y);
2554 bind(&L1);
2555 srliw(y, x, 4);
2556 BranchShort(&L2, eq, y, Operand(zero_reg));
2557 addiw(n, n, -4);
2558 Move(x, y);
2559 bind(&L2);
2560 srliw(y, x, 2);
2561 BranchShort(&L3, eq, y, Operand(zero_reg));
2562 addiw(n, n, -2);
2563 Move(x, y);
2564 bind(&L3);
2565 srliw(y, x, 1);
2566 subw(rd, n, x);
2567 BranchShort(&L4, eq, y, Operand(zero_reg));
2568 addiw(rd, n, -2);
2569 bind(&L4);
2570 }
2571
Clz64(Register rd,Register xx)2572 void TurboAssembler::Clz64(Register rd, Register xx) {
2573 // 64 bit: count number of leading zeros.
2574 // int n = 64;
2575 // unsigned y;
2576
2577 // y = x >>32; if (y != 0) { n = n - 32; x = y; }
2578 // y = x >>16; if (y != 0) { n = n - 16; x = y; }
2579 // y = x >> 8; if (y != 0) { n = n - 8; x = y; }
2580 // y = x >> 4; if (y != 0) { n = n - 4; x = y; }
2581 // y = x >> 2; if (y != 0) { n = n - 2; x = y; }
2582 // y = x >> 1; if (y != 0) {rd = n - 2; return;}
2583 // rd = n - x;
2584
2585 Label L0, L1, L2, L3, L4, L5;
2586 UseScratchRegisterScope temps(this);
2587 BlockTrampolinePoolScope block_trampoline_pool(this);
2588 Register x = rd;
2589 Register y = temps.Acquire();
2590 Register n = temps.Acquire();
2591 DCHECK(xx != y && xx != n);
2592 Move(x, xx);
2593 li(n, Operand(64));
2594 srli(y, x, 32);
2595 BranchShort(&L0, eq, y, Operand(zero_reg));
2596 addiw(n, n, -32);
2597 Move(x, y);
2598 bind(&L0);
2599 srli(y, x, 16);
2600 BranchShort(&L1, eq, y, Operand(zero_reg));
2601 addiw(n, n, -16);
2602 Move(x, y);
2603 bind(&L1);
2604 srli(y, x, 8);
2605 BranchShort(&L2, eq, y, Operand(zero_reg));
2606 addiw(n, n, -8);
2607 Move(x, y);
2608 bind(&L2);
2609 srli(y, x, 4);
2610 BranchShort(&L3, eq, y, Operand(zero_reg));
2611 addiw(n, n, -4);
2612 Move(x, y);
2613 bind(&L3);
2614 srli(y, x, 2);
2615 BranchShort(&L4, eq, y, Operand(zero_reg));
2616 addiw(n, n, -2);
2617 Move(x, y);
2618 bind(&L4);
2619 srli(y, x, 1);
2620 subw(rd, n, x);
2621 BranchShort(&L5, eq, y, Operand(zero_reg));
2622 addiw(rd, n, -2);
2623 bind(&L5);
2624 }
2625
Ctz32(Register rd,Register rs)2626 void TurboAssembler::Ctz32(Register rd, Register rs) {
2627 // Convert trailing zeroes to trailing ones, and bits to their left
2628 // to zeroes.
2629
2630 BlockTrampolinePoolScope block_trampoline_pool(this);
2631 {
2632 UseScratchRegisterScope temps(this);
2633 Register scratch = temps.Acquire();
2634 Add64(scratch, rs, -1);
2635 Xor(rd, scratch, rs);
2636 And(rd, rd, scratch);
2637 // Count number of leading zeroes.
2638 }
2639 Clz32(rd, rd);
2640 {
2641 // Subtract number of leading zeroes from 32 to get number of trailing
2642 // ones. Remember that the trailing ones were formerly trailing zeroes.
2643 UseScratchRegisterScope temps(this);
2644 Register scratch = temps.Acquire();
2645 li(scratch, 32);
2646 Sub32(rd, scratch, rd);
2647 }
2648 }
2649
Ctz64(Register rd,Register rs)2650 void TurboAssembler::Ctz64(Register rd, Register rs) {
2651 // Convert trailing zeroes to trailing ones, and bits to their left
2652 // to zeroes.
2653
2654 BlockTrampolinePoolScope block_trampoline_pool(this);
2655 {
2656 UseScratchRegisterScope temps(this);
2657 Register scratch = temps.Acquire();
2658 Add64(scratch, rs, -1);
2659 Xor(rd, scratch, rs);
2660 And(rd, rd, scratch);
2661 // Count number of leading zeroes.
2662 }
2663 Clz64(rd, rd);
2664 {
2665 // Subtract number of leading zeroes from 64 to get number of trailing
2666 // ones. Remember that the trailing ones were formerly trailing zeroes.
2667 UseScratchRegisterScope temps(this);
2668 Register scratch = temps.Acquire();
2669 li(scratch, 64);
2670 Sub64(rd, scratch, rd);
2671 }
2672 }
2673
Popcnt32(Register rd,Register rs,Register scratch)2674 void TurboAssembler::Popcnt32(Register rd, Register rs, Register scratch) {
2675 DCHECK_NE(scratch, rs);
2676 DCHECK_NE(scratch, rd);
2677 // https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
2678 //
2679 // A generalization of the best bit counting method to integers of
2680 // bit-widths up to 128 (parameterized by type T) is this:
2681 //
2682 // v = v - ((v >> 1) & (T)~(T)0/3); // temp
2683 // v = (v & (T)~(T)0/15*3) + ((v >> 2) & (T)~(T)0/15*3); // temp
2684 // v = (v + (v >> 4)) & (T)~(T)0/255*15; // temp
2685 // c = (T)(v * ((T)~(T)0/255)) >> (sizeof(T) - 1) * BITS_PER_BYTE; //count
2686 //
2687 // There are algorithms which are faster in the cases where very few
2688 // bits are set but the algorithm here attempts to minimize the total
2689 // number of instructions executed even when a large number of bits
2690 // are set.
2691 // The number of instruction is 20.
2692 // uint32_t B0 = 0x55555555; // (T)~(T)0/3
2693 // uint32_t B1 = 0x33333333; // (T)~(T)0/15*3
2694 // uint32_t B2 = 0x0F0F0F0F; // (T)~(T)0/255*15
2695 // uint32_t value = 0x01010101; // (T)~(T)0/255
2696
2697 uint32_t shift = 24;
2698 UseScratchRegisterScope temps(this);
2699 BlockTrampolinePoolScope block_trampoline_pool(this);
2700 Register scratch2 = temps.Acquire();
2701 Register value = temps.Acquire();
2702 DCHECK((rd != value) && (rs != value));
2703 li(value, 0x01010101); // value = 0x01010101;
2704 li(scratch2, 0x55555555); // B0 = 0x55555555;
2705 Srl32(scratch, rs, 1);
2706 And(scratch, scratch, scratch2);
2707 Sub32(scratch, rs, scratch);
2708 li(scratch2, 0x33333333); // B1 = 0x33333333;
2709 slli(rd, scratch2, 4);
2710 or_(scratch2, scratch2, rd);
2711 And(rd, scratch, scratch2);
2712 Srl32(scratch, scratch, 2);
2713 And(scratch, scratch, scratch2);
2714 Add32(scratch, rd, scratch);
2715 srliw(rd, scratch, 4);
2716 Add32(rd, rd, scratch);
2717 li(scratch2, 0xF);
2718 Mul32(scratch2, value, scratch2); // B2 = 0x0F0F0F0F;
2719 And(rd, rd, scratch2);
2720 Mul32(rd, rd, value);
2721 Srl32(rd, rd, shift);
2722 }
2723
Popcnt64(Register rd,Register rs,Register scratch)2724 void TurboAssembler::Popcnt64(Register rd, Register rs, Register scratch) {
2725 DCHECK_NE(scratch, rs);
2726 DCHECK_NE(scratch, rd);
2727 // uint64_t B0 = 0x5555555555555555l; // (T)~(T)0/3
2728 // uint64_t B1 = 0x3333333333333333l; // (T)~(T)0/15*3
2729 // uint64_t B2 = 0x0F0F0F0F0F0F0F0Fl; // (T)~(T)0/255*15
2730 // uint64_t value = 0x0101010101010101l; // (T)~(T)0/255
2731 // uint64_t shift = 24; // (sizeof(T) - 1) * BITS_PER_BYTE
2732
2733 uint64_t shift = 24;
2734 UseScratchRegisterScope temps(this);
2735 BlockTrampolinePoolScope block_trampoline_pool(this);
2736 Register scratch2 = temps.Acquire();
2737 Register value = temps.Acquire();
2738 DCHECK((rd != value) && (rs != value));
2739 li(value, 0x1111111111111111l); // value = 0x1111111111111111l;
2740 li(scratch2, 5);
2741 Mul64(scratch2, value, scratch2); // B0 = 0x5555555555555555l;
2742 Srl64(scratch, rs, 1);
2743 And(scratch, scratch, scratch2);
2744 Sub64(scratch, rs, scratch);
2745 li(scratch2, 3);
2746 Mul64(scratch2, value, scratch2); // B1 = 0x3333333333333333l;
2747 And(rd, scratch, scratch2);
2748 Srl64(scratch, scratch, 2);
2749 And(scratch, scratch, scratch2);
2750 Add64(scratch, rd, scratch);
2751 Srl64(rd, scratch, 4);
2752 Add64(rd, rd, scratch);
2753 li(scratch2, 0xF);
2754 li(value, 0x0101010101010101l); // value = 0x0101010101010101l;
2755 Mul64(scratch2, value, scratch2); // B2 = 0x0F0F0F0F0F0F0F0Fl;
2756 And(rd, rd, scratch2);
2757 Mul64(rd, rd, value);
2758 srli(rd, rd, 32 + shift);
2759 }
2760
TryInlineTruncateDoubleToI(Register result,DoubleRegister double_input,Label * done)2761 void TurboAssembler::TryInlineTruncateDoubleToI(Register result,
2762 DoubleRegister double_input,
2763 Label* done) {
2764 UseScratchRegisterScope temps(this);
2765 Register scratch = temps.Acquire();
2766 // if scratch == 1, exception happens during truncation
2767 Trunc_w_d(result, double_input, scratch);
2768 // If we had no exceptions (i.e., scratch==1) we are done.
2769 Branch(done, eq, scratch, Operand(1));
2770 }
2771
TruncateDoubleToI(Isolate * isolate,Zone * zone,Register result,DoubleRegister double_input,StubCallMode stub_mode)2772 void TurboAssembler::TruncateDoubleToI(Isolate* isolate, Zone* zone,
2773 Register result,
2774 DoubleRegister double_input,
2775 StubCallMode stub_mode) {
2776 Label done;
2777
2778 TryInlineTruncateDoubleToI(result, double_input, &done);
2779
2780 // If we fell through then inline version didn't succeed - call stub
2781 // instead.
2782 push(ra);
2783 Sub64(sp, sp, Operand(kDoubleSize)); // Put input on stack.
2784 fsd(double_input, sp, 0);
2785
2786 if (stub_mode == StubCallMode::kCallWasmRuntimeStub) {
2787 Call(wasm::WasmCode::kDoubleToI, RelocInfo::WASM_STUB_CALL);
2788 } else {
2789 Call(BUILTIN_CODE(isolate, DoubleToI), RelocInfo::CODE_TARGET);
2790 }
2791 ld(result, sp, 0);
2792
2793 Add64(sp, sp, Operand(kDoubleSize));
2794 pop(ra);
2795
2796 bind(&done);
2797 }
2798
2799 // BRANCH_ARGS_CHECK checks that conditional jump arguments are correct.
2800 #define BRANCH_ARGS_CHECK(cond, rs, rt) \
2801 DCHECK((cond == cc_always && rs == zero_reg && rt.rm() == zero_reg) || \
2802 (cond != cc_always && (rs != zero_reg || rt.rm() != zero_reg)))
2803
Branch(int32_t offset)2804 void TurboAssembler::Branch(int32_t offset) {
2805 DCHECK(is_int21(offset));
2806 BranchShort(offset);
2807 }
2808
Branch(int32_t offset,Condition cond,Register rs,const Operand & rt,Label::Distance near_jump)2809 void TurboAssembler::Branch(int32_t offset, Condition cond, Register rs,
2810 const Operand& rt, Label::Distance near_jump) {
2811 bool is_near = BranchShortCheck(offset, nullptr, cond, rs, rt);
2812 DCHECK(is_near);
2813 USE(is_near);
2814 }
2815
Branch(Label * L)2816 void TurboAssembler::Branch(Label* L) {
2817 if (L->is_bound()) {
2818 if (is_near(L)) {
2819 BranchShort(L);
2820 } else {
2821 BranchLong(L);
2822 }
2823 } else {
2824 if (is_trampoline_emitted()) {
2825 BranchLong(L);
2826 } else {
2827 BranchShort(L);
2828 }
2829 }
2830 }
2831
Branch(Label * L,Condition cond,Register rs,const Operand & rt,Label::Distance near_jump)2832 void TurboAssembler::Branch(Label* L, Condition cond, Register rs,
2833 const Operand& rt, Label::Distance near_jump) {
2834 if (L->is_bound()) {
2835 if (!BranchShortCheck(0, L, cond, rs, rt)) {
2836 if (cond != cc_always) {
2837 Label skip;
2838 Condition neg_cond = NegateCondition(cond);
2839 BranchShort(&skip, neg_cond, rs, rt);
2840 BranchLong(L);
2841 bind(&skip);
2842 } else {
2843 BranchLong(L);
2844 EmitConstPoolWithJumpIfNeeded();
2845 }
2846 }
2847 } else {
2848 if (is_trampoline_emitted() && near_jump == Label::Distance::kFar) {
2849 if (cond != cc_always) {
2850 Label skip;
2851 Condition neg_cond = NegateCondition(cond);
2852 BranchShort(&skip, neg_cond, rs, rt);
2853 BranchLong(L);
2854 bind(&skip);
2855 } else {
2856 BranchLong(L);
2857 EmitConstPoolWithJumpIfNeeded();
2858 }
2859 } else {
2860 BranchShort(L, cond, rs, rt);
2861 }
2862 }
2863 }
2864
Branch(Label * L,Condition cond,Register rs,RootIndex index)2865 void TurboAssembler::Branch(Label* L, Condition cond, Register rs,
2866 RootIndex index) {
2867 UseScratchRegisterScope temps(this);
2868 Register scratch = temps.Acquire();
2869 LoadRoot(scratch, index);
2870 Branch(L, cond, rs, Operand(scratch));
2871 }
2872
BranchShortHelper(int32_t offset,Label * L)2873 void TurboAssembler::BranchShortHelper(int32_t offset, Label* L) {
2874 DCHECK(L == nullptr || offset == 0);
2875 offset = GetOffset(offset, L, OffsetSize::kOffset21);
2876 j(offset);
2877 }
2878
BranchShort(int32_t offset)2879 void TurboAssembler::BranchShort(int32_t offset) {
2880 DCHECK(is_int21(offset));
2881 BranchShortHelper(offset, nullptr);
2882 }
2883
BranchShort(Label * L)2884 void TurboAssembler::BranchShort(Label* L) { BranchShortHelper(0, L); }
2885
GetOffset(int32_t offset,Label * L,OffsetSize bits)2886 int32_t TurboAssembler::GetOffset(int32_t offset, Label* L, OffsetSize bits) {
2887 if (L) {
2888 offset = branch_offset_helper(L, bits);
2889 } else {
2890 DCHECK(is_intn(offset, bits));
2891 }
2892 return offset;
2893 }
2894
GetRtAsRegisterHelper(const Operand & rt,Register scratch)2895 Register TurboAssembler::GetRtAsRegisterHelper(const Operand& rt,
2896 Register scratch) {
2897 Register r2 = no_reg;
2898 if (rt.is_reg()) {
2899 r2 = rt.rm();
2900 } else {
2901 r2 = scratch;
2902 li(r2, rt);
2903 }
2904
2905 return r2;
2906 }
2907
CalculateOffset(Label * L,int32_t * offset,OffsetSize bits)2908 bool TurboAssembler::CalculateOffset(Label* L, int32_t* offset,
2909 OffsetSize bits) {
2910 if (!is_near(L, bits)) return false;
2911 *offset = GetOffset(*offset, L, bits);
2912 return true;
2913 }
2914
CalculateOffset(Label * L,int32_t * offset,OffsetSize bits,Register * scratch,const Operand & rt)2915 bool TurboAssembler::CalculateOffset(Label* L, int32_t* offset, OffsetSize bits,
2916 Register* scratch, const Operand& rt) {
2917 if (!is_near(L, bits)) return false;
2918 *scratch = GetRtAsRegisterHelper(rt, *scratch);
2919 *offset = GetOffset(*offset, L, bits);
2920 return true;
2921 }
2922
BranchShortHelper(int32_t offset,Label * L,Condition cond,Register rs,const Operand & rt)2923 bool TurboAssembler::BranchShortHelper(int32_t offset, Label* L, Condition cond,
2924 Register rs, const Operand& rt) {
2925 DCHECK(L == nullptr || offset == 0);
2926 UseScratchRegisterScope temps(this);
2927 BlockTrampolinePoolScope block_trampoline_pool(this);
2928 Register scratch = no_reg;
2929 if (!rt.is_reg()) {
2930 scratch = temps.Acquire();
2931 li(scratch, rt);
2932 } else {
2933 scratch = rt.rm();
2934 }
2935 {
2936 BlockTrampolinePoolScope block_trampoline_pool(this);
2937 switch (cond) {
2938 case cc_always:
2939 if (!CalculateOffset(L, &offset, OffsetSize::kOffset21)) return false;
2940 j(offset);
2941 EmitConstPoolWithJumpIfNeeded();
2942 break;
2943 case eq:
2944 // rs == rt
2945 if (rt.is_reg() && rs == rt.rm()) {
2946 if (!CalculateOffset(L, &offset, OffsetSize::kOffset21)) return false;
2947 j(offset);
2948 } else {
2949 if (!CalculateOffset(L, &offset, OffsetSize::kOffset13)) return false;
2950 beq(rs, scratch, offset);
2951 }
2952 break;
2953 case ne:
2954 // rs != rt
2955 if (rt.is_reg() && rs == rt.rm()) {
2956 break; // No code needs to be emitted
2957 } else {
2958 if (!CalculateOffset(L, &offset, OffsetSize::kOffset13)) return false;
2959 bne(rs, scratch, offset);
2960 }
2961 break;
2962
2963 // Signed comparison.
2964 case greater:
2965 // rs > rt
2966 if (rt.is_reg() && rs == rt.rm()) {
2967 break; // No code needs to be emitted.
2968 } else {
2969 if (!CalculateOffset(L, &offset, OffsetSize::kOffset13)) return false;
2970 bgt(rs, scratch, offset);
2971 }
2972 break;
2973 case greater_equal:
2974 // rs >= rt
2975 if (rt.is_reg() && rs == rt.rm()) {
2976 if (!CalculateOffset(L, &offset, OffsetSize::kOffset21)) return false;
2977 j(offset);
2978 } else {
2979 if (!CalculateOffset(L, &offset, OffsetSize::kOffset13)) return false;
2980 bge(rs, scratch, offset);
2981 }
2982 break;
2983 case less:
2984 // rs < rt
2985 if (rt.is_reg() && rs == rt.rm()) {
2986 break; // No code needs to be emitted.
2987 } else {
2988 if (!CalculateOffset(L, &offset, OffsetSize::kOffset13)) return false;
2989 blt(rs, scratch, offset);
2990 }
2991 break;
2992 case less_equal:
2993 // rs <= rt
2994 if (rt.is_reg() && rs == rt.rm()) {
2995 if (!CalculateOffset(L, &offset, OffsetSize::kOffset21)) return false;
2996 j(offset);
2997 } else {
2998 if (!CalculateOffset(L, &offset, OffsetSize::kOffset13)) return false;
2999 ble(rs, scratch, offset);
3000 }
3001 break;
3002
3003 // Unsigned comparison.
3004 case Ugreater:
3005 // rs > rt
3006 if (rt.is_reg() && rs == rt.rm()) {
3007 break; // No code needs to be emitted.
3008 } else {
3009 if (!CalculateOffset(L, &offset, OffsetSize::kOffset13)) return false;
3010 bgtu(rs, scratch, offset);
3011 }
3012 break;
3013 case Ugreater_equal:
3014 // rs >= rt
3015 if (rt.is_reg() && rs == rt.rm()) {
3016 if (!CalculateOffset(L, &offset, OffsetSize::kOffset21)) return false;
3017 j(offset);
3018 } else {
3019 if (!CalculateOffset(L, &offset, OffsetSize::kOffset13)) return false;
3020 bgeu(rs, scratch, offset);
3021 }
3022 break;
3023 case Uless:
3024 // rs < rt
3025 if (rt.is_reg() && rs == rt.rm()) {
3026 break; // No code needs to be emitted.
3027 } else {
3028 if (!CalculateOffset(L, &offset, OffsetSize::kOffset13)) return false;
3029 bltu(rs, scratch, offset);
3030 }
3031 break;
3032 case Uless_equal:
3033 // rs <= rt
3034 if (rt.is_reg() && rs == rt.rm()) {
3035 if (!CalculateOffset(L, &offset, OffsetSize::kOffset21)) return false;
3036 j(offset);
3037 } else {
3038 if (!CalculateOffset(L, &offset, OffsetSize::kOffset13)) return false;
3039 bleu(rs, scratch, offset);
3040 }
3041 break;
3042 default:
3043 UNREACHABLE();
3044 }
3045 }
3046
3047 CheckTrampolinePoolQuick(1);
3048 return true;
3049 }
3050
BranchShortCheck(int32_t offset,Label * L,Condition cond,Register rs,const Operand & rt)3051 bool TurboAssembler::BranchShortCheck(int32_t offset, Label* L, Condition cond,
3052 Register rs, const Operand& rt) {
3053 BRANCH_ARGS_CHECK(cond, rs, rt);
3054
3055 if (!L) {
3056 DCHECK(is_int13(offset));
3057 return BranchShortHelper(offset, nullptr, cond, rs, rt);
3058 } else {
3059 DCHECK_EQ(offset, 0);
3060 return BranchShortHelper(0, L, cond, rs, rt);
3061 }
3062 }
3063
BranchShort(int32_t offset,Condition cond,Register rs,const Operand & rt)3064 void TurboAssembler::BranchShort(int32_t offset, Condition cond, Register rs,
3065 const Operand& rt) {
3066 BranchShortCheck(offset, nullptr, cond, rs, rt);
3067 }
3068
BranchShort(Label * L,Condition cond,Register rs,const Operand & rt)3069 void TurboAssembler::BranchShort(Label* L, Condition cond, Register rs,
3070 const Operand& rt) {
3071 BranchShortCheck(0, L, cond, rs, rt);
3072 }
3073
BranchAndLink(int32_t offset)3074 void TurboAssembler::BranchAndLink(int32_t offset) {
3075 BranchAndLinkShort(offset);
3076 }
3077
BranchAndLink(int32_t offset,Condition cond,Register rs,const Operand & rt)3078 void TurboAssembler::BranchAndLink(int32_t offset, Condition cond, Register rs,
3079 const Operand& rt) {
3080 bool is_near = BranchAndLinkShortCheck(offset, nullptr, cond, rs, rt);
3081 DCHECK(is_near);
3082 USE(is_near);
3083 }
3084
BranchAndLink(Label * L)3085 void TurboAssembler::BranchAndLink(Label* L) {
3086 if (L->is_bound()) {
3087 if (is_near(L)) {
3088 BranchAndLinkShort(L);
3089 } else {
3090 BranchAndLinkLong(L);
3091 }
3092 } else {
3093 if (is_trampoline_emitted()) {
3094 BranchAndLinkLong(L);
3095 } else {
3096 BranchAndLinkShort(L);
3097 }
3098 }
3099 }
3100
BranchAndLink(Label * L,Condition cond,Register rs,const Operand & rt)3101 void TurboAssembler::BranchAndLink(Label* L, Condition cond, Register rs,
3102 const Operand& rt) {
3103 if (L->is_bound()) {
3104 if (!BranchAndLinkShortCheck(0, L, cond, rs, rt)) {
3105 Label skip;
3106 Condition neg_cond = NegateCondition(cond);
3107 BranchShort(&skip, neg_cond, rs, rt);
3108 BranchAndLinkLong(L);
3109 bind(&skip);
3110 }
3111 } else {
3112 if (is_trampoline_emitted()) {
3113 Label skip;
3114 Condition neg_cond = NegateCondition(cond);
3115 BranchShort(&skip, neg_cond, rs, rt);
3116 BranchAndLinkLong(L);
3117 bind(&skip);
3118 } else {
3119 BranchAndLinkShortCheck(0, L, cond, rs, rt);
3120 }
3121 }
3122 }
3123
BranchAndLinkShortHelper(int32_t offset,Label * L)3124 void TurboAssembler::BranchAndLinkShortHelper(int32_t offset, Label* L) {
3125 DCHECK(L == nullptr || offset == 0);
3126 offset = GetOffset(offset, L, OffsetSize::kOffset21);
3127 jal(offset);
3128 }
3129
BranchAndLinkShort(int32_t offset)3130 void TurboAssembler::BranchAndLinkShort(int32_t offset) {
3131 DCHECK(is_int21(offset));
3132 BranchAndLinkShortHelper(offset, nullptr);
3133 }
3134
BranchAndLinkShort(Label * L)3135 void TurboAssembler::BranchAndLinkShort(Label* L) {
3136 BranchAndLinkShortHelper(0, L);
3137 }
3138
3139 // Pre r6 we need to use a bgezal or bltzal, but they can't be used directly
3140 // with the slt instructions. We could use sub or add instead but we would miss
3141 // overflow cases, so we keep slt and add an intermediate third instruction.
BranchAndLinkShortHelper(int32_t offset,Label * L,Condition cond,Register rs,const Operand & rt)3142 bool TurboAssembler::BranchAndLinkShortHelper(int32_t offset, Label* L,
3143 Condition cond, Register rs,
3144 const Operand& rt) {
3145 DCHECK(L == nullptr || offset == 0);
3146 if (!is_near(L, OffsetSize::kOffset21)) return false;
3147
3148 UseScratchRegisterScope temps(this);
3149 Register scratch = temps.Acquire();
3150 BlockTrampolinePoolScope block_trampoline_pool(this);
3151
3152 if (cond == cc_always) {
3153 offset = GetOffset(offset, L, OffsetSize::kOffset21);
3154 jal(offset);
3155 } else {
3156 Branch(kInstrSize * 2, NegateCondition(cond), rs,
3157 Operand(GetRtAsRegisterHelper(rt, scratch)));
3158 offset = GetOffset(offset, L, OffsetSize::kOffset21);
3159 jal(offset);
3160 }
3161
3162 return true;
3163 }
3164
BranchAndLinkShortCheck(int32_t offset,Label * L,Condition cond,Register rs,const Operand & rt)3165 bool TurboAssembler::BranchAndLinkShortCheck(int32_t offset, Label* L,
3166 Condition cond, Register rs,
3167 const Operand& rt) {
3168 BRANCH_ARGS_CHECK(cond, rs, rt);
3169
3170 if (!L) {
3171 DCHECK(is_int21(offset));
3172 return BranchAndLinkShortHelper(offset, nullptr, cond, rs, rt);
3173 } else {
3174 DCHECK_EQ(offset, 0);
3175 return BranchAndLinkShortHelper(0, L, cond, rs, rt);
3176 }
3177 }
3178
LoadFromConstantsTable(Register destination,int constant_index)3179 void TurboAssembler::LoadFromConstantsTable(Register destination,
3180 int constant_index) {
3181 DCHECK(RootsTable::IsImmortalImmovable(RootIndex::kBuiltinsConstantsTable));
3182 LoadRoot(destination, RootIndex::kBuiltinsConstantsTable);
3183 LoadTaggedPointerField(
3184 destination, FieldMemOperand(destination, FixedArray::OffsetOfElementAt(
3185 constant_index)));
3186 }
3187
LoadRootRelative(Register destination,int32_t offset)3188 void TurboAssembler::LoadRootRelative(Register destination, int32_t offset) {
3189 Ld(destination, MemOperand(kRootRegister, offset));
3190 }
3191
LoadRootRegisterOffset(Register destination,intptr_t offset)3192 void TurboAssembler::LoadRootRegisterOffset(Register destination,
3193 intptr_t offset) {
3194 if (offset == 0) {
3195 Move(destination, kRootRegister);
3196 } else {
3197 Add64(destination, kRootRegister, Operand(offset));
3198 }
3199 }
3200
Jump(Register target,Condition cond,Register rs,const Operand & rt)3201 void TurboAssembler::Jump(Register target, Condition cond, Register rs,
3202 const Operand& rt) {
3203 BlockTrampolinePoolScope block_trampoline_pool(this);
3204 if (cond == cc_always) {
3205 jr(target);
3206 ForceConstantPoolEmissionWithoutJump();
3207 } else {
3208 BRANCH_ARGS_CHECK(cond, rs, rt);
3209 Branch(kInstrSize * 2, NegateCondition(cond), rs, rt);
3210 jr(target);
3211 }
3212 }
3213
Jump(intptr_t target,RelocInfo::Mode rmode,Condition cond,Register rs,const Operand & rt)3214 void TurboAssembler::Jump(intptr_t target, RelocInfo::Mode rmode,
3215 Condition cond, Register rs, const Operand& rt) {
3216 Label skip;
3217 if (cond != cc_always) {
3218 Branch(&skip, NegateCondition(cond), rs, rt);
3219 }
3220 {
3221 BlockTrampolinePoolScope block_trampoline_pool(this);
3222 li(t6, Operand(target, rmode));
3223 Jump(t6, al, zero_reg, Operand(zero_reg));
3224 EmitConstPoolWithJumpIfNeeded();
3225 bind(&skip);
3226 }
3227 }
3228
Jump(Address target,RelocInfo::Mode rmode,Condition cond,Register rs,const Operand & rt)3229 void TurboAssembler::Jump(Address target, RelocInfo::Mode rmode, Condition cond,
3230 Register rs, const Operand& rt) {
3231 DCHECK(!RelocInfo::IsCodeTarget(rmode));
3232 Jump(static_cast<intptr_t>(target), rmode, cond, rs, rt);
3233 }
3234
Jump(Handle<Code> code,RelocInfo::Mode rmode,Condition cond,Register rs,const Operand & rt)3235 void TurboAssembler::Jump(Handle<Code> code, RelocInfo::Mode rmode,
3236 Condition cond, Register rs, const Operand& rt) {
3237 DCHECK(RelocInfo::IsCodeTarget(rmode));
3238
3239 BlockTrampolinePoolScope block_trampoline_pool(this);
3240 Builtin builtin = Builtin::kNoBuiltinId;
3241 bool target_is_isolate_independent_builtin =
3242 isolate()->builtins()->IsBuiltinHandle(code, &builtin) &&
3243 Builtins::IsIsolateIndependent(builtin);
3244 if (target_is_isolate_independent_builtin &&
3245 options().use_pc_relative_calls_and_jumps) {
3246 int32_t code_target_index = AddCodeTarget(code);
3247 Label skip;
3248 BlockTrampolinePoolScope block_trampoline_pool(this);
3249 if (cond != al) {
3250 Branch(&skip, NegateCondition(cond), rs, rt);
3251 }
3252 RecordRelocInfo(RelocInfo::RELATIVE_CODE_TARGET);
3253 GenPCRelativeJump(t6, code_target_index);
3254 bind(&skip);
3255 return;
3256 } else if (root_array_available_ && options().isolate_independent_code &&
3257 target_is_isolate_independent_builtin) {
3258 int offset = static_cast<int>(code->builtin_id()) * kSystemPointerSize +
3259 IsolateData::builtin_entry_table_offset();
3260 Ld(t6, MemOperand(kRootRegister, offset));
3261 Jump(t6, cond, rs, rt);
3262 return;
3263 } else if (options().inline_offheap_trampolines &&
3264 target_is_isolate_independent_builtin) {
3265 // Inline the trampoline.
3266 RecordCommentForOffHeapTrampoline(builtin);
3267 li(t6, Operand(BuiltinEntry(builtin), RelocInfo::OFF_HEAP_TARGET));
3268 Jump(t6, cond, rs, rt);
3269 RecordComment("]");
3270 return;
3271 }
3272
3273 int32_t target_index = AddCodeTarget(code);
3274 Jump(static_cast<intptr_t>(target_index), rmode, cond, rs, rt);
3275 }
3276
Jump(const ExternalReference & reference)3277 void TurboAssembler::Jump(const ExternalReference& reference) {
3278 li(t6, reference);
3279 Jump(t6);
3280 }
3281
3282 // Note: To call gcc-compiled C code on riscv64, you must call through t6.
Call(Register target,Condition cond,Register rs,const Operand & rt)3283 void TurboAssembler::Call(Register target, Condition cond, Register rs,
3284 const Operand& rt) {
3285 BlockTrampolinePoolScope block_trampoline_pool(this);
3286 if (cond == cc_always) {
3287 jalr(ra, target, 0);
3288 } else {
3289 BRANCH_ARGS_CHECK(cond, rs, rt);
3290 Branch(kInstrSize * 2, NegateCondition(cond), rs, rt);
3291 jalr(ra, target, 0);
3292 }
3293 }
3294
JumpIfIsInRange(Register value,unsigned lower_limit,unsigned higher_limit,Label * on_in_range)3295 void MacroAssembler::JumpIfIsInRange(Register value, unsigned lower_limit,
3296 unsigned higher_limit,
3297 Label* on_in_range) {
3298 if (lower_limit != 0) {
3299 UseScratchRegisterScope temps(this);
3300 Register scratch = temps.Acquire();
3301 Sub64(scratch, value, Operand(lower_limit));
3302 Branch(on_in_range, Uless_equal, scratch,
3303 Operand(higher_limit - lower_limit));
3304 } else {
3305 Branch(on_in_range, Uless_equal, value,
3306 Operand(higher_limit - lower_limit));
3307 }
3308 }
3309
Call(Address target,RelocInfo::Mode rmode,Condition cond,Register rs,const Operand & rt)3310 void TurboAssembler::Call(Address target, RelocInfo::Mode rmode, Condition cond,
3311 Register rs, const Operand& rt) {
3312 li(t6, Operand(static_cast<int64_t>(target), rmode), ADDRESS_LOAD);
3313 Call(t6, cond, rs, rt);
3314 }
3315
Call(Handle<Code> code,RelocInfo::Mode rmode,Condition cond,Register rs,const Operand & rt)3316 void TurboAssembler::Call(Handle<Code> code, RelocInfo::Mode rmode,
3317 Condition cond, Register rs, const Operand& rt) {
3318 Builtin builtin = Builtin::kNoBuiltinId;
3319 bool target_is_isolate_independent_builtin =
3320 isolate()->builtins()->IsBuiltinHandle(code, &builtin) &&
3321 Builtins::IsIsolateIndependent(builtin);
3322 if (target_is_isolate_independent_builtin &&
3323 options().use_pc_relative_calls_and_jumps) {
3324 int32_t code_target_index = AddCodeTarget(code);
3325 Label skip;
3326 BlockTrampolinePoolScope block_trampoline_pool(this);
3327 RecordCommentForOffHeapTrampoline(builtin);
3328 if (cond != al) {
3329 Branch(&skip, NegateCondition(cond), rs, rt);
3330 }
3331 RecordRelocInfo(RelocInfo::RELATIVE_CODE_TARGET);
3332 GenPCRelativeJumpAndLink(t6, code_target_index);
3333 bind(&skip);
3334 RecordComment("]");
3335 return;
3336 } else if (root_array_available_ && options().isolate_independent_code &&
3337 target_is_isolate_independent_builtin) {
3338 int offset = static_cast<int>(code->builtin_id()) * kSystemPointerSize +
3339 IsolateData::builtin_entry_table_offset();
3340 LoadRootRelative(t6, offset);
3341 Call(t6, cond, rs, rt);
3342 return;
3343 } else if (options().inline_offheap_trampolines &&
3344 target_is_isolate_independent_builtin) {
3345 // Inline the trampoline.
3346 RecordCommentForOffHeapTrampoline(builtin);
3347 li(t6, Operand(BuiltinEntry(builtin), RelocInfo::OFF_HEAP_TARGET));
3348 Call(t6, cond, rs, rt);
3349 RecordComment("]");
3350 return;
3351 }
3352
3353 DCHECK(RelocInfo::IsCodeTarget(rmode));
3354 DCHECK(code->IsExecutable());
3355 int32_t target_index = AddCodeTarget(code);
3356 Call(static_cast<Address>(target_index), rmode, cond, rs, rt);
3357 }
3358
LoadEntryFromBuiltinIndex(Register builtin)3359 void TurboAssembler::LoadEntryFromBuiltinIndex(Register builtin) {
3360 STATIC_ASSERT(kSystemPointerSize == 8);
3361 STATIC_ASSERT(kSmiTagSize == 1);
3362 STATIC_ASSERT(kSmiTag == 0);
3363
3364 // The builtin register contains the builtin index as a Smi.
3365 SmiUntag(builtin, builtin);
3366 CalcScaledAddress(builtin, kRootRegister, builtin, kSystemPointerSizeLog2);
3367 Ld(builtin, MemOperand(builtin, IsolateData::builtin_entry_table_offset()));
3368 }
3369
CallBuiltinByIndex(Register builtin)3370 void TurboAssembler::CallBuiltinByIndex(Register builtin) {
3371 LoadEntryFromBuiltinIndex(builtin);
3372 Call(builtin);
3373 }
3374
CallBuiltin(Builtin builtin)3375 void TurboAssembler::CallBuiltin(Builtin builtin) {
3376 RecordCommentForOffHeapTrampoline(builtin);
3377 if (options().short_builtin_calls) {
3378 Call(BuiltinEntry(builtin), RelocInfo::RUNTIME_ENTRY);
3379 } else {
3380 Call(BuiltinEntry(builtin), RelocInfo::OFF_HEAP_TARGET);
3381 }
3382 RecordComment("]");
3383 }
3384
TailCallBuiltin(Builtin builtin)3385 void TurboAssembler::TailCallBuiltin(Builtin builtin) {
3386 RecordCommentForOffHeapTrampoline(builtin);
3387 if (options().short_builtin_calls) {
3388 Jump(BuiltinEntry(builtin), RelocInfo::RUNTIME_ENTRY);
3389 } else {
3390 Jump(BuiltinEntry(builtin), RelocInfo::OFF_HEAP_TARGET);
3391 }
3392 RecordComment("]");
3393 }
3394
LoadEntryFromBuiltin(Builtin builtin,Register destination)3395 void TurboAssembler::LoadEntryFromBuiltin(Builtin builtin,
3396 Register destination) {
3397 Ld(destination, EntryFromBuiltinAsOperand(builtin));
3398 }
3399
EntryFromBuiltinAsOperand(Builtin builtin)3400 MemOperand TurboAssembler::EntryFromBuiltinAsOperand(Builtin builtin) {
3401 DCHECK(root_array_available());
3402 return MemOperand(kRootRegister,
3403 IsolateData::BuiltinEntrySlotOffset(builtin));
3404 }
3405
PatchAndJump(Address target)3406 void TurboAssembler::PatchAndJump(Address target) {
3407 UseScratchRegisterScope temps(this);
3408 Register scratch = temps.Acquire();
3409 auipc(scratch, 0); // Load PC into scratch
3410 Ld(t6, MemOperand(scratch, kInstrSize * 4));
3411 jr(t6);
3412 nop(); // For alignment
3413 DCHECK_EQ(reinterpret_cast<uint64_t>(pc_) % 8, 0);
3414 *reinterpret_cast<uint64_t*>(pc_) = target; // pc_ should be align.
3415 pc_ += sizeof(uint64_t);
3416 }
3417
StoreReturnAddressAndCall(Register target)3418 void TurboAssembler::StoreReturnAddressAndCall(Register target) {
3419 // This generates the final instruction sequence for calls to C functions
3420 // once an exit frame has been constructed.
3421 //
3422 // Note that this assumes the caller code (i.e. the Code object currently
3423 // being generated) is immovable or that the callee function cannot trigger
3424 // GC, since the callee function will return to it.
3425 //
3426 // Compute the return address in lr to return to after the jump below. The
3427 // pc is already at '+ 8' from the current instruction; but return is after
3428 // three instructions, so add another 4 to pc to get the return address.
3429 //
3430 Assembler::BlockTrampolinePoolScope block_trampoline_pool(this);
3431 int kNumInstructionsToJump = 5;
3432 if (FLAG_riscv_c_extension) kNumInstructionsToJump = 4;
3433 Label find_ra;
3434 // Adjust the value in ra to point to the correct return location, one
3435 // instruction past the real call into C code (the jalr(t6)), and push it.
3436 // This is the return address of the exit frame.
3437 auipc(ra, 0); // Set ra the current PC
3438 bind(&find_ra);
3439 addi(ra, ra,
3440 (kNumInstructionsToJump + 1) *
3441 kInstrSize); // Set ra to insn after the call
3442
3443 // This spot was reserved in EnterExitFrame.
3444 Sd(ra, MemOperand(sp));
3445 addi(sp, sp, -kCArgsSlotsSize);
3446 // Stack is still aligned.
3447
3448 // Call the C routine.
3449 Mv(t6,
3450 target); // Function pointer to t6 to conform to ABI for PIC.
3451 jalr(t6);
3452 // Make sure the stored 'ra' points to this position.
3453 DCHECK_EQ(kNumInstructionsToJump, InstructionsGeneratedSince(&find_ra));
3454 }
3455
Ret(Condition cond,Register rs,const Operand & rt)3456 void TurboAssembler::Ret(Condition cond, Register rs, const Operand& rt) {
3457 Jump(ra, cond, rs, rt);
3458 if (cond == al) {
3459 ForceConstantPoolEmissionWithoutJump();
3460 }
3461 }
3462
3463
BranchLong(Label * L)3464 void TurboAssembler::BranchLong(Label* L) {
3465 // Generate position independent long branch.
3466 BlockTrampolinePoolScope block_trampoline_pool(this);
3467 int64_t imm64;
3468 imm64 = branch_long_offset(L);
3469 GenPCRelativeJump(t6, imm64);
3470 EmitConstPoolWithJumpIfNeeded();
3471 }
3472
BranchAndLinkLong(Label * L)3473 void TurboAssembler::BranchAndLinkLong(Label* L) {
3474 // Generate position independent long branch and link.
3475 BlockTrampolinePoolScope block_trampoline_pool(this);
3476 int64_t imm64;
3477 imm64 = branch_long_offset(L);
3478 GenPCRelativeJumpAndLink(t6, imm64);
3479 }
3480
DropAndRet(int drop)3481 void TurboAssembler::DropAndRet(int drop) {
3482 Add64(sp, sp, drop * kSystemPointerSize);
3483 Ret();
3484 }
3485
DropAndRet(int drop,Condition cond,Register r1,const Operand & r2)3486 void TurboAssembler::DropAndRet(int drop, Condition cond, Register r1,
3487 const Operand& r2) {
3488 // Both Drop and Ret need to be conditional.
3489 Label skip;
3490 if (cond != cc_always) {
3491 Branch(&skip, NegateCondition(cond), r1, r2);
3492 }
3493
3494 Drop(drop);
3495 Ret();
3496
3497 if (cond != cc_always) {
3498 bind(&skip);
3499 }
3500 }
3501
Drop(int count,Condition cond,Register reg,const Operand & op)3502 void TurboAssembler::Drop(int count, Condition cond, Register reg,
3503 const Operand& op) {
3504 if (count <= 0) {
3505 return;
3506 }
3507
3508 Label skip;
3509
3510 if (cond != al) {
3511 Branch(&skip, NegateCondition(cond), reg, op);
3512 }
3513
3514 Add64(sp, sp, Operand(count * kSystemPointerSize));
3515
3516 if (cond != al) {
3517 bind(&skip);
3518 }
3519 }
3520
Swap(Register reg1,Register reg2,Register scratch)3521 void MacroAssembler::Swap(Register reg1, Register reg2, Register scratch) {
3522 if (scratch == no_reg) {
3523 Xor(reg1, reg1, Operand(reg2));
3524 Xor(reg2, reg2, Operand(reg1));
3525 Xor(reg1, reg1, Operand(reg2));
3526 } else {
3527 Mv(scratch, reg1);
3528 Mv(reg1, reg2);
3529 Mv(reg2, scratch);
3530 }
3531 }
3532
Call(Label * target)3533 void TurboAssembler::Call(Label* target) { BranchAndLink(target); }
3534
LoadAddress(Register dst,Label * target,RelocInfo::Mode rmode)3535 void TurboAssembler::LoadAddress(Register dst, Label* target,
3536 RelocInfo::Mode rmode) {
3537 int32_t offset;
3538 if (CalculateOffset(target, &offset, OffsetSize::kOffset32)) {
3539 CHECK(is_int32(offset + 0x800));
3540 int32_t Hi20 = (((int32_t)offset + 0x800) >> 12);
3541 int32_t Lo12 = (int32_t)offset << 20 >> 20;
3542 BlockTrampolinePoolScope block_trampoline_pool(this);
3543 auipc(dst, Hi20);
3544 addi(dst, dst, Lo12);
3545 } else {
3546 uint64_t address = jump_address(target);
3547 li(dst, Operand(address, rmode), ADDRESS_LOAD);
3548 }
3549 }
3550
Push(Smi smi)3551 void TurboAssembler::Push(Smi smi) {
3552 UseScratchRegisterScope temps(this);
3553 Register scratch = temps.Acquire();
3554 li(scratch, Operand(smi));
3555 push(scratch);
3556 }
3557
PushArray(Register array,Register size,PushArrayOrder order)3558 void TurboAssembler::PushArray(Register array, Register size,
3559 PushArrayOrder order) {
3560 UseScratchRegisterScope temps(this);
3561 Register scratch = temps.Acquire();
3562 Register scratch2 = temps.Acquire();
3563 Label loop, entry;
3564 if (order == PushArrayOrder::kReverse) {
3565 Mv(scratch, zero_reg);
3566 jmp(&entry);
3567 bind(&loop);
3568 CalcScaledAddress(scratch2, array, scratch, kSystemPointerSizeLog2);
3569 Ld(scratch2, MemOperand(scratch2));
3570 push(scratch2);
3571 Add64(scratch, scratch, Operand(1));
3572 bind(&entry);
3573 Branch(&loop, less, scratch, Operand(size));
3574 } else {
3575 Mv(scratch, size);
3576 jmp(&entry);
3577 bind(&loop);
3578 CalcScaledAddress(scratch2, array, scratch, kSystemPointerSizeLog2);
3579 Ld(scratch2, MemOperand(scratch2));
3580 push(scratch2);
3581 bind(&entry);
3582 Add64(scratch, scratch, Operand(-1));
3583 Branch(&loop, greater_equal, scratch, Operand(zero_reg));
3584 }
3585 }
3586
Push(Handle<HeapObject> handle)3587 void TurboAssembler::Push(Handle<HeapObject> handle) {
3588 UseScratchRegisterScope temps(this);
3589 Register scratch = temps.Acquire();
3590 li(scratch, Operand(handle));
3591 push(scratch);
3592 }
3593
3594 // ---------------------------------------------------------------------------
3595 // Exception handling.
3596
PushStackHandler()3597 void MacroAssembler::PushStackHandler() {
3598 // Adjust this code if not the case.
3599 STATIC_ASSERT(StackHandlerConstants::kSize == 2 * kSystemPointerSize);
3600 STATIC_ASSERT(StackHandlerConstants::kNextOffset == 0 * kSystemPointerSize);
3601
3602 Push(Smi::zero()); // Padding.
3603
3604 // Link the current handler as the next handler.
3605 UseScratchRegisterScope temps(this);
3606 Register handler_address = temps.Acquire();
3607 li(handler_address,
3608 ExternalReference::Create(IsolateAddressId::kHandlerAddress, isolate()));
3609 Register handler = temps.Acquire();
3610 Ld(handler, MemOperand(handler_address));
3611 push(handler);
3612
3613 // Set this new handler as the current one.
3614 Sd(sp, MemOperand(handler_address));
3615 }
3616
PopStackHandler()3617 void MacroAssembler::PopStackHandler() {
3618 STATIC_ASSERT(StackHandlerConstants::kNextOffset == 0);
3619 pop(a1);
3620 Add64(sp, sp,
3621 Operand(static_cast<int64_t>(StackHandlerConstants::kSize -
3622 kSystemPointerSize)));
3623 UseScratchRegisterScope temps(this);
3624 Register scratch = temps.Acquire();
3625 li(scratch,
3626 ExternalReference::Create(IsolateAddressId::kHandlerAddress, isolate()));
3627 Sd(a1, MemOperand(scratch));
3628 }
3629
FPUCanonicalizeNaN(const DoubleRegister dst,const DoubleRegister src)3630 void TurboAssembler::FPUCanonicalizeNaN(const DoubleRegister dst,
3631 const DoubleRegister src) {
3632 // Subtracting 0.0 preserves all inputs except for signalling NaNs, which
3633 // become quiet NaNs. We use fsub rather than fadd because fsub preserves -0.0
3634 // inputs: -0.0 + 0.0 = 0.0, but -0.0 - 0.0 = -0.0.
3635 fsub_d(dst, src, kDoubleRegZero);
3636 }
3637
MovFromFloatResult(const DoubleRegister dst)3638 void TurboAssembler::MovFromFloatResult(const DoubleRegister dst) {
3639 Move(dst, fa0); // Reg fa0 is FP return value.
3640 }
3641
MovFromFloatParameter(const DoubleRegister dst)3642 void TurboAssembler::MovFromFloatParameter(const DoubleRegister dst) {
3643 Move(dst, fa0); // Reg fa0 is FP first argument value.
3644 }
3645
MovToFloatParameter(DoubleRegister src)3646 void TurboAssembler::MovToFloatParameter(DoubleRegister src) { Move(fa0, src); }
3647
MovToFloatResult(DoubleRegister src)3648 void TurboAssembler::MovToFloatResult(DoubleRegister src) { Move(fa0, src); }
3649
MovToFloatParameters(DoubleRegister src1,DoubleRegister src2)3650 void TurboAssembler::MovToFloatParameters(DoubleRegister src1,
3651 DoubleRegister src2) {
3652 const DoubleRegister fparg2 = fa1;
3653 if (src2 == fa0) {
3654 DCHECK(src1 != fparg2);
3655 Move(fparg2, src2);
3656 Move(fa0, src1);
3657 } else {
3658 Move(fa0, src1);
3659 Move(fparg2, src2);
3660 }
3661 }
3662
3663 // -----------------------------------------------------------------------------
3664 // JavaScript invokes.
3665
LoadStackLimit(Register destination,StackLimitKind kind)3666 void MacroAssembler::LoadStackLimit(Register destination, StackLimitKind kind) {
3667 DCHECK(root_array_available());
3668 Isolate* isolate = this->isolate();
3669 ExternalReference limit =
3670 kind == StackLimitKind::kRealStackLimit
3671 ? ExternalReference::address_of_real_jslimit(isolate)
3672 : ExternalReference::address_of_jslimit(isolate);
3673 DCHECK(TurboAssembler::IsAddressableThroughRootRegister(isolate, limit));
3674
3675 intptr_t offset =
3676 TurboAssembler::RootRegisterOffsetForExternalReference(isolate, limit);
3677 CHECK(is_int32(offset));
3678 Ld(destination, MemOperand(kRootRegister, static_cast<int32_t>(offset)));
3679 }
3680
StackOverflowCheck(Register num_args,Register scratch1,Register scratch2,Label * stack_overflow,Label * done)3681 void MacroAssembler::StackOverflowCheck(Register num_args, Register scratch1,
3682 Register scratch2,
3683 Label* stack_overflow, Label* done) {
3684 // Check the stack for overflow. We are not trying to catch
3685 // interruptions (e.g. debug break and preemption) here, so the "real stack
3686 // limit" is checked.
3687 DCHECK(stack_overflow != nullptr || done != nullptr);
3688 LoadStackLimit(scratch1, StackLimitKind::kRealStackLimit);
3689 // Make scratch1 the space we have left. The stack might already be overflowed
3690 // here which will cause scratch1 to become negative.
3691 Sub64(scratch1, sp, scratch1);
3692 // Check if the arguments will overflow the stack.
3693 Sll64(scratch2, num_args, kSystemPointerSizeLog2);
3694 // Signed comparison.
3695 if (stack_overflow != nullptr) {
3696 Branch(stack_overflow, le, scratch1, Operand(scratch2));
3697 } else if (done != nullptr) {
3698 Branch(done, gt, scratch1, Operand(scratch2));
3699 } else {
3700 UNREACHABLE();
3701 }
3702 }
3703
InvokePrologue(Register expected_parameter_count,Register actual_parameter_count,Label * done,InvokeType type)3704 void MacroAssembler::InvokePrologue(Register expected_parameter_count,
3705 Register actual_parameter_count,
3706 Label* done, InvokeType type) {
3707 Label regular_invoke;
3708
3709 // a0: actual arguments count
3710 // a1: function (passed through to callee)
3711 // a2: expected arguments count
3712
3713 DCHECK_EQ(actual_parameter_count, a0);
3714 DCHECK_EQ(expected_parameter_count, a2);
3715
3716 // If the expected parameter count is equal to the adaptor sentinel, no need
3717 // to push undefined value as arguments.
3718 if (kDontAdaptArgumentsSentinel != 0) {
3719 Branch(®ular_invoke, eq, expected_parameter_count,
3720 Operand(kDontAdaptArgumentsSentinel));
3721 }
3722 // If overapplication or if the actual argument count is equal to the
3723 // formal parameter count, no need to push extra undefined values.
3724 Sub64(expected_parameter_count, expected_parameter_count,
3725 actual_parameter_count);
3726 Branch(®ular_invoke, le, expected_parameter_count, Operand(zero_reg));
3727
3728 Label stack_overflow;
3729 {
3730 UseScratchRegisterScope temps(this);
3731 StackOverflowCheck(expected_parameter_count, temps.Acquire(),
3732 temps.Acquire(), &stack_overflow);
3733 }
3734 // Underapplication. Move the arguments already in the stack, including the
3735 // receiver and the return address.
3736 {
3737 Label copy;
3738 Register src = a6, dest = a7;
3739 Move(src, sp);
3740 Sll64(t0, expected_parameter_count, kSystemPointerSizeLog2);
3741 Sub64(sp, sp, Operand(t0));
3742 // Update stack pointer.
3743 Move(dest, sp);
3744 Move(t0, actual_parameter_count);
3745 bind(©);
3746 Ld(t1, MemOperand(src, 0));
3747 Sd(t1, MemOperand(dest, 0));
3748 Sub64(t0, t0, Operand(1));
3749 Add64(src, src, Operand(kSystemPointerSize));
3750 Add64(dest, dest, Operand(kSystemPointerSize));
3751 Branch(©, gt, t0, Operand(zero_reg));
3752 }
3753
3754 // Fill remaining expected arguments with undefined values.
3755 LoadRoot(t0, RootIndex::kUndefinedValue);
3756 {
3757 Label loop;
3758 bind(&loop);
3759 Sd(t0, MemOperand(a7, 0));
3760 Sub64(expected_parameter_count, expected_parameter_count, Operand(1));
3761 Add64(a7, a7, Operand(kSystemPointerSize));
3762 Branch(&loop, gt, expected_parameter_count, Operand(zero_reg));
3763 }
3764 Branch(®ular_invoke);
3765
3766 bind(&stack_overflow);
3767 {
3768 FrameScope frame(
3769 this, has_frame() ? StackFrame::NO_FRAME_TYPE : StackFrame::INTERNAL);
3770 CallRuntime(Runtime::kThrowStackOverflow);
3771 break_(0xCC);
3772 }
3773 bind(®ular_invoke);
3774 }
3775
CheckDebugHook(Register fun,Register new_target,Register expected_parameter_count,Register actual_parameter_count)3776 void MacroAssembler::CheckDebugHook(Register fun, Register new_target,
3777 Register expected_parameter_count,
3778 Register actual_parameter_count) {
3779 Label skip_hook;
3780 {
3781 UseScratchRegisterScope temps(this);
3782 Register scratch = temps.Acquire();
3783 li(scratch,
3784 ExternalReference::debug_hook_on_function_call_address(isolate()));
3785 Lb(scratch, MemOperand(scratch));
3786 Branch(&skip_hook, eq, scratch, Operand(zero_reg));
3787 }
3788 {
3789 // Load receiver to pass it later to DebugOnFunctionCall hook.
3790 UseScratchRegisterScope temps(this);
3791 Register receiver = temps.Acquire();
3792 LoadReceiver(receiver, actual_parameter_count);
3793
3794 FrameScope frame(
3795 this, has_frame() ? StackFrame::NO_FRAME_TYPE : StackFrame::INTERNAL);
3796 SmiTag(expected_parameter_count);
3797 Push(expected_parameter_count);
3798
3799 SmiTag(actual_parameter_count);
3800 Push(actual_parameter_count);
3801
3802 if (new_target.is_valid()) {
3803 Push(new_target);
3804 }
3805 Push(fun);
3806 Push(fun);
3807 Push(receiver);
3808 CallRuntime(Runtime::kDebugOnFunctionCall);
3809 Pop(fun);
3810 if (new_target.is_valid()) {
3811 Pop(new_target);
3812 }
3813
3814 Pop(actual_parameter_count);
3815 SmiUntag(actual_parameter_count);
3816
3817 Pop(expected_parameter_count);
3818 SmiUntag(expected_parameter_count);
3819 }
3820 bind(&skip_hook);
3821 }
3822
InvokeFunctionCode(Register function,Register new_target,Register expected_parameter_count,Register actual_parameter_count,InvokeType type)3823 void MacroAssembler::InvokeFunctionCode(Register function, Register new_target,
3824 Register expected_parameter_count,
3825 Register actual_parameter_count,
3826 InvokeType type) {
3827 // You can't call a function without a valid frame.
3828 DCHECK_IMPLIES(type == InvokeType::kCall, has_frame());
3829 DCHECK_EQ(function, a1);
3830 DCHECK_IMPLIES(new_target.is_valid(), new_target == a3);
3831
3832 // On function call, call into the debugger if necessary.
3833 CheckDebugHook(function, new_target, expected_parameter_count,
3834 actual_parameter_count);
3835
3836 // Clear the new.target register if not given.
3837 if (!new_target.is_valid()) {
3838 LoadRoot(a3, RootIndex::kUndefinedValue);
3839 }
3840
3841 Label done;
3842 InvokePrologue(expected_parameter_count, actual_parameter_count, &done, type);
3843 // We call indirectly through the code field in the function to
3844 // allow recompilation to take effect without changing any of the
3845 // call sites.
3846 Register code = kJavaScriptCallCodeStartRegister;
3847 LoadTaggedPointerField(code,
3848 FieldMemOperand(function, JSFunction::kCodeOffset));
3849 switch (type) {
3850 case InvokeType::kCall:
3851 CallCodeObject(code);
3852 break;
3853 case InvokeType::kJump:
3854 JumpCodeObject(code);
3855 break;
3856 }
3857
3858 // Continue here if InvokePrologue does handle the invocation due to
3859 // mismatched parameter counts.
3860 bind(&done);
3861 }
3862
InvokeFunctionWithNewTarget(Register function,Register new_target,Register actual_parameter_count,InvokeType type)3863 void MacroAssembler::InvokeFunctionWithNewTarget(
3864 Register function, Register new_target, Register actual_parameter_count,
3865 InvokeType type) {
3866 // You can't call a function without a valid frame.
3867 DCHECK_IMPLIES(type == InvokeType::kCall, has_frame());
3868
3869 // Contract with called JS functions requires that function is passed in a1.
3870 DCHECK_EQ(function, a1);
3871 Register expected_parameter_count = a2;
3872 {
3873 UseScratchRegisterScope temps(this);
3874 Register temp_reg = temps.Acquire();
3875 LoadTaggedPointerField(
3876 temp_reg,
3877 FieldMemOperand(function, JSFunction::kSharedFunctionInfoOffset));
3878 LoadTaggedPointerField(
3879 cp, FieldMemOperand(function, JSFunction::kContextOffset));
3880 // The argument count is stored as uint16_t
3881 Lhu(expected_parameter_count,
3882 FieldMemOperand(temp_reg,
3883 SharedFunctionInfo::kFormalParameterCountOffset));
3884 }
3885 InvokeFunctionCode(function, new_target, expected_parameter_count,
3886 actual_parameter_count, type);
3887 }
3888
InvokeFunction(Register function,Register expected_parameter_count,Register actual_parameter_count,InvokeType type)3889 void MacroAssembler::InvokeFunction(Register function,
3890 Register expected_parameter_count,
3891 Register actual_parameter_count,
3892 InvokeType type) {
3893 // You can't call a function without a valid frame.
3894 DCHECK_IMPLIES(type == InvokeType::kCall, has_frame());
3895
3896 // Contract with called JS functions requires that function is passed in a1.
3897 DCHECK_EQ(function, a1);
3898
3899 // Get the function and setup the context.
3900 LoadTaggedPointerField(cp, FieldMemOperand(a1, JSFunction::kContextOffset));
3901
3902 InvokeFunctionCode(a1, no_reg, expected_parameter_count,
3903 actual_parameter_count, type);
3904 }
3905
3906 // ---------------------------------------------------------------------------
3907 // Support functions.
3908
GetObjectType(Register object,Register map,Register type_reg)3909 void MacroAssembler::GetObjectType(Register object, Register map,
3910 Register type_reg) {
3911 LoadMap(map, object);
3912 Lhu(type_reg, FieldMemOperand(map, Map::kInstanceTypeOffset));
3913 }
3914
GetInstanceTypeRange(Register map,Register type_reg,InstanceType lower_limit,Register range)3915 void MacroAssembler::GetInstanceTypeRange(Register map, Register type_reg,
3916 InstanceType lower_limit,
3917 Register range) {
3918 Lhu(type_reg, FieldMemOperand(map, Map::kInstanceTypeOffset));
3919 Sub64(range, type_reg, Operand(lower_limit));
3920 }
3921 //------------------------------------------------------------------------------
3922 // Wasm
WasmRvvEq(VRegister dst,VRegister lhs,VRegister rhs,VSew sew,Vlmul lmul)3923 void TurboAssembler::WasmRvvEq(VRegister dst, VRegister lhs, VRegister rhs,
3924 VSew sew, Vlmul lmul) {
3925 VU.set(kScratchReg, sew, lmul);
3926 vmseq_vv(v0, lhs, rhs);
3927 li(kScratchReg, -1);
3928 vmv_vx(dst, zero_reg);
3929 vmerge_vx(dst, kScratchReg, dst);
3930 }
3931
WasmRvvNe(VRegister dst,VRegister lhs,VRegister rhs,VSew sew,Vlmul lmul)3932 void TurboAssembler::WasmRvvNe(VRegister dst, VRegister lhs, VRegister rhs,
3933 VSew sew, Vlmul lmul) {
3934 VU.set(kScratchReg, sew, lmul);
3935 vmsne_vv(v0, lhs, rhs);
3936 li(kScratchReg, -1);
3937 vmv_vx(dst, zero_reg);
3938 vmerge_vx(dst, kScratchReg, dst);
3939 }
3940
WasmRvvGeS(VRegister dst,VRegister lhs,VRegister rhs,VSew sew,Vlmul lmul)3941 void TurboAssembler::WasmRvvGeS(VRegister dst, VRegister lhs, VRegister rhs,
3942 VSew sew, Vlmul lmul) {
3943 VU.set(kScratchReg, sew, lmul);
3944 vmsle_vv(v0, rhs, lhs);
3945 li(kScratchReg, -1);
3946 vmv_vx(dst, zero_reg);
3947 vmerge_vx(dst, kScratchReg, dst);
3948 }
3949
WasmRvvGeU(VRegister dst,VRegister lhs,VRegister rhs,VSew sew,Vlmul lmul)3950 void TurboAssembler::WasmRvvGeU(VRegister dst, VRegister lhs, VRegister rhs,
3951 VSew sew, Vlmul lmul) {
3952 VU.set(kScratchReg, sew, lmul);
3953 vmsleu_vv(v0, rhs, lhs);
3954 li(kScratchReg, -1);
3955 vmv_vx(dst, zero_reg);
3956 vmerge_vx(dst, kScratchReg, dst);
3957 }
3958
WasmRvvGtS(VRegister dst,VRegister lhs,VRegister rhs,VSew sew,Vlmul lmul)3959 void TurboAssembler::WasmRvvGtS(VRegister dst, VRegister lhs, VRegister rhs,
3960 VSew sew, Vlmul lmul) {
3961 VU.set(kScratchReg, sew, lmul);
3962 vmslt_vv(v0, rhs, lhs);
3963 li(kScratchReg, -1);
3964 vmv_vx(dst, zero_reg);
3965 vmerge_vx(dst, kScratchReg, dst);
3966 }
3967
WasmRvvGtU(VRegister dst,VRegister lhs,VRegister rhs,VSew sew,Vlmul lmul)3968 void TurboAssembler::WasmRvvGtU(VRegister dst, VRegister lhs, VRegister rhs,
3969 VSew sew, Vlmul lmul) {
3970 VU.set(kScratchReg, sew, lmul);
3971 vmsltu_vv(v0, rhs, lhs);
3972 li(kScratchReg, -1);
3973 vmv_vx(dst, zero_reg);
3974 vmerge_vx(dst, kScratchReg, dst);
3975 }
3976
WasmRvvS128const(VRegister dst,const uint8_t imms[16])3977 void TurboAssembler::WasmRvvS128const(VRegister dst, const uint8_t imms[16]) {
3978 uint64_t imm1 = *(reinterpret_cast<const uint64_t*>(imms));
3979 uint64_t imm2 = *((reinterpret_cast<const uint64_t*>(imms)) + 1);
3980 VU.set(kScratchReg, VSew::E64, Vlmul::m1);
3981 li(kScratchReg, 1);
3982 vmv_vx(v0, kScratchReg);
3983 li(kScratchReg, imm1);
3984 vmerge_vx(dst, kScratchReg, dst);
3985 li(kScratchReg, imm2);
3986 vsll_vi(v0, v0, 1);
3987 vmerge_vx(dst, kScratchReg, dst);
3988 }
3989
LoadLane(int ts,VRegister dst,uint8_t laneidx,MemOperand src)3990 void TurboAssembler::LoadLane(int ts, VRegister dst, uint8_t laneidx,
3991 MemOperand src) {
3992 if (ts == 8) {
3993 Lbu(kScratchReg2, src);
3994 VU.set(kScratchReg, E64, m1);
3995 li(kScratchReg, 0x1 << laneidx);
3996 vmv_sx(v0, kScratchReg);
3997 VU.set(kScratchReg, E8, m1);
3998 vmerge_vx(dst, kScratchReg2, dst);
3999 } else if (ts == 16) {
4000 Lhu(kScratchReg2, src);
4001 VU.set(kScratchReg, E16, m1);
4002 li(kScratchReg, 0x1 << laneidx);
4003 vmv_sx(v0, kScratchReg);
4004 vmerge_vx(dst, kScratchReg2, dst);
4005 } else if (ts == 32) {
4006 Lwu(kScratchReg2, src);
4007 VU.set(kScratchReg, E32, m1);
4008 li(kScratchReg, 0x1 << laneidx);
4009 vmv_sx(v0, kScratchReg);
4010 vmerge_vx(dst, kScratchReg2, dst);
4011 } else if (ts == 64) {
4012 Ld(kScratchReg2, src);
4013 VU.set(kScratchReg, E64, m1);
4014 li(kScratchReg, 0x1 << laneidx);
4015 vmv_sx(v0, kScratchReg);
4016 vmerge_vx(dst, kScratchReg2, dst);
4017 } else {
4018 UNREACHABLE();
4019 }
4020 }
4021
StoreLane(int sz,VRegister src,uint8_t laneidx,MemOperand dst)4022 void TurboAssembler::StoreLane(int sz, VRegister src, uint8_t laneidx,
4023 MemOperand dst) {
4024 if (sz == 8) {
4025 VU.set(kScratchReg, E8, m1);
4026 vslidedown_vi(kSimd128ScratchReg, src, laneidx);
4027 vmv_xs(kScratchReg, kSimd128ScratchReg);
4028 Sb(kScratchReg, dst);
4029 } else if (sz == 16) {
4030 VU.set(kScratchReg, E16, m1);
4031 vslidedown_vi(kSimd128ScratchReg, src, laneidx);
4032 vmv_xs(kScratchReg, kSimd128ScratchReg);
4033 Sh(kScratchReg, dst);
4034 } else if (sz == 32) {
4035 VU.set(kScratchReg, E32, m1);
4036 vslidedown_vi(kSimd128ScratchReg, src, laneidx);
4037 vmv_xs(kScratchReg, kSimd128ScratchReg);
4038 Sw(kScratchReg, dst);
4039 } else {
4040 DCHECK_EQ(sz, 64);
4041 VU.set(kScratchReg, E64, m1);
4042 vslidedown_vi(kSimd128ScratchReg, src, laneidx);
4043 vmv_xs(kScratchReg, kSimd128ScratchReg);
4044 Sd(kScratchReg, dst);
4045 }
4046 }
4047 // -----------------------------------------------------------------------------
4048 // Runtime calls.
4049
AddOverflow64(Register dst,Register left,const Operand & right,Register overflow)4050 void TurboAssembler::AddOverflow64(Register dst, Register left,
4051 const Operand& right, Register overflow) {
4052 UseScratchRegisterScope temps(this);
4053 BlockTrampolinePoolScope block_trampoline_pool(this);
4054 Register right_reg = no_reg;
4055 Register scratch = temps.Acquire();
4056 Register scratch2 = temps.Acquire();
4057 if (!right.is_reg()) {
4058 li(scratch, Operand(right));
4059 right_reg = scratch;
4060 } else {
4061 right_reg = right.rm();
4062 }
4063 DCHECK(left != scratch2 && right_reg != scratch2 && dst != scratch2 &&
4064 overflow != scratch2);
4065 DCHECK(overflow != left && overflow != right_reg);
4066 if (dst == left || dst == right_reg) {
4067 add(scratch2, left, right_reg);
4068 xor_(overflow, scratch2, left);
4069 xor_(scratch, scratch2, right_reg);
4070 and_(overflow, overflow, scratch);
4071 Mv(dst, scratch2);
4072 } else {
4073 add(dst, left, right_reg);
4074 xor_(overflow, dst, left);
4075 xor_(scratch, dst, right_reg);
4076 and_(overflow, overflow, scratch);
4077 }
4078 }
4079
SubOverflow64(Register dst,Register left,const Operand & right,Register overflow)4080 void TurboAssembler::SubOverflow64(Register dst, Register left,
4081 const Operand& right, Register overflow) {
4082 UseScratchRegisterScope temps(this);
4083 BlockTrampolinePoolScope block_trampoline_pool(this);
4084 Register right_reg = no_reg;
4085 Register scratch = temps.Acquire();
4086 Register scratch2 = temps.Acquire();
4087 if (!right.is_reg()) {
4088 li(scratch, Operand(right));
4089 right_reg = scratch;
4090 } else {
4091 right_reg = right.rm();
4092 }
4093
4094 DCHECK(left != scratch2 && right_reg != scratch2 && dst != scratch2 &&
4095 overflow != scratch2);
4096 DCHECK(overflow != left && overflow != right_reg);
4097
4098 if (dst == left || dst == right_reg) {
4099 sub(scratch2, left, right_reg);
4100 xor_(overflow, left, scratch2);
4101 xor_(scratch, left, right_reg);
4102 and_(overflow, overflow, scratch);
4103 Mv(dst, scratch2);
4104 } else {
4105 sub(dst, left, right_reg);
4106 xor_(overflow, left, dst);
4107 xor_(scratch, left, right_reg);
4108 and_(overflow, overflow, scratch);
4109 }
4110 }
4111
MulOverflow32(Register dst,Register left,const Operand & right,Register overflow)4112 void TurboAssembler::MulOverflow32(Register dst, Register left,
4113 const Operand& right, Register overflow) {
4114 ASM_CODE_COMMENT(this);
4115 UseScratchRegisterScope temps(this);
4116 BlockTrampolinePoolScope block_trampoline_pool(this);
4117 Register right_reg = no_reg;
4118 Register scratch = temps.Acquire();
4119 Register scratch2 = temps.Acquire();
4120 if (!right.is_reg()) {
4121 li(scratch, Operand(right));
4122 right_reg = scratch;
4123 } else {
4124 right_reg = right.rm();
4125 }
4126
4127 DCHECK(left != scratch2 && right_reg != scratch2 && dst != scratch2 &&
4128 overflow != scratch2);
4129 DCHECK(overflow != left && overflow != right_reg);
4130 sext_w(overflow, left);
4131 sext_w(scratch2, right_reg);
4132
4133 mul(overflow, overflow, scratch2);
4134 sext_w(dst, overflow);
4135 xor_(overflow, overflow, dst);
4136 }
4137
CallRuntime(const Runtime::Function * f,int num_arguments,SaveFPRegsMode save_doubles)4138 void MacroAssembler::CallRuntime(const Runtime::Function* f, int num_arguments,
4139 SaveFPRegsMode save_doubles) {
4140 ASM_CODE_COMMENT(this);
4141 // All parameters are on the stack. a0 has the return value after call.
4142
4143 // If the expected number of arguments of the runtime function is
4144 // constant, we check that the actual number of arguments match the
4145 // expectation.
4146 CHECK(f->nargs < 0 || f->nargs == num_arguments);
4147
4148 // TODO(1236192): Most runtime routines don't need the number of
4149 // arguments passed in because it is constant. At some point we
4150 // should remove this need and make the runtime routine entry code
4151 // smarter.
4152 PrepareCEntryArgs(num_arguments);
4153 PrepareCEntryFunction(ExternalReference::Create(f));
4154 Handle<Code> code =
4155 CodeFactory::CEntry(isolate(), f->result_size, save_doubles);
4156 Call(code, RelocInfo::CODE_TARGET);
4157 }
4158
TailCallRuntime(Runtime::FunctionId fid)4159 void MacroAssembler::TailCallRuntime(Runtime::FunctionId fid) {
4160 ASM_CODE_COMMENT(this);
4161 const Runtime::Function* function = Runtime::FunctionForId(fid);
4162 DCHECK_EQ(1, function->result_size);
4163 if (function->nargs >= 0) {
4164 PrepareCEntryArgs(function->nargs);
4165 }
4166 JumpToExternalReference(ExternalReference::Create(fid));
4167 }
4168
JumpToExternalReference(const ExternalReference & builtin,bool builtin_exit_frame)4169 void MacroAssembler::JumpToExternalReference(const ExternalReference& builtin,
4170 bool builtin_exit_frame) {
4171 ASM_CODE_COMMENT(this);
4172 PrepareCEntryFunction(builtin);
4173 Handle<Code> code = CodeFactory::CEntry(isolate(), 1, SaveFPRegsMode::kIgnore,
4174 ArgvMode::kStack, builtin_exit_frame);
4175 Jump(code, RelocInfo::CODE_TARGET, al, zero_reg, Operand(zero_reg));
4176 }
4177
JumpToOffHeapInstructionStream(Address entry)4178 void MacroAssembler::JumpToOffHeapInstructionStream(Address entry) {
4179 // Ld a Address from a constant pool.
4180 // Record a value into constant pool.
4181 ASM_CODE_COMMENT(this);
4182 if (!FLAG_riscv_constant_pool) {
4183 li(kOffHeapTrampolineRegister, Operand(entry, RelocInfo::OFF_HEAP_TARGET));
4184 } else {
4185 RecordEntry(entry, RelocInfo::OFF_HEAP_TARGET);
4186 RecordRelocInfo(RelocInfo::OFF_HEAP_TARGET, entry);
4187 auipc(kOffHeapTrampolineRegister, 0);
4188 ld(kOffHeapTrampolineRegister, kOffHeapTrampolineRegister, 0);
4189 }
4190 Jump(kOffHeapTrampolineRegister);
4191 }
4192
LoadWeakValue(Register out,Register in,Label * target_if_cleared)4193 void MacroAssembler::LoadWeakValue(Register out, Register in,
4194 Label* target_if_cleared) {
4195 ASM_CODE_COMMENT(this);
4196 Branch(target_if_cleared, eq, in, Operand(kClearedWeakHeapObjectLower32));
4197 And(out, in, Operand(~kWeakHeapObjectMask));
4198 }
4199
EmitIncrementCounter(StatsCounter * counter,int value,Register scratch1,Register scratch2)4200 void MacroAssembler::EmitIncrementCounter(StatsCounter* counter, int value,
4201 Register scratch1,
4202 Register scratch2) {
4203 DCHECK_GT(value, 0);
4204 if (FLAG_native_code_counters && counter->Enabled()) {
4205 ASM_CODE_COMMENT(this);
4206 // This operation has to be exactly 32-bit wide in case the external
4207 // reference table redirects the counter to a uint32_t
4208 // dummy_stats_counter_ field.
4209 li(scratch2, ExternalReference::Create(counter));
4210 Lw(scratch1, MemOperand(scratch2));
4211 Add32(scratch1, scratch1, Operand(value));
4212 Sw(scratch1, MemOperand(scratch2));
4213 }
4214 }
4215
EmitDecrementCounter(StatsCounter * counter,int value,Register scratch1,Register scratch2)4216 void MacroAssembler::EmitDecrementCounter(StatsCounter* counter, int value,
4217 Register scratch1,
4218 Register scratch2) {
4219 DCHECK_GT(value, 0);
4220 if (FLAG_native_code_counters && counter->Enabled()) {
4221 ASM_CODE_COMMENT(this);
4222 // This operation has to be exactly 32-bit wide in case the external
4223 // reference table redirects the counter to a uint32_t
4224 // dummy_stats_counter_ field.
4225 li(scratch2, ExternalReference::Create(counter));
4226 Lw(scratch1, MemOperand(scratch2));
4227 Sub32(scratch1, scratch1, Operand(value));
4228 Sw(scratch1, MemOperand(scratch2));
4229 }
4230 }
4231
4232 // -----------------------------------------------------------------------------
4233 // Debugging.
4234
Trap()4235 void TurboAssembler::Trap() { stop(); }
DebugBreak()4236 void TurboAssembler::DebugBreak() { stop(); }
4237
Assert(Condition cc,AbortReason reason,Register rs,Operand rt)4238 void TurboAssembler::Assert(Condition cc, AbortReason reason, Register rs,
4239 Operand rt) {
4240 if (FLAG_debug_code) Check(cc, reason, rs, rt);
4241 }
4242
Check(Condition cc,AbortReason reason,Register rs,Operand rt)4243 void TurboAssembler::Check(Condition cc, AbortReason reason, Register rs,
4244 Operand rt) {
4245 Label L;
4246 BranchShort(&L, cc, rs, rt);
4247 Abort(reason);
4248 // Will not return here.
4249 bind(&L);
4250 }
4251
Abort(AbortReason reason)4252 void TurboAssembler::Abort(AbortReason reason) {
4253 Label abort_start;
4254 bind(&abort_start);
4255 if (FLAG_code_comments) {
4256 const char* msg = GetAbortReason(reason);
4257 RecordComment("Abort message: ");
4258 RecordComment(msg);
4259 }
4260
4261 // Avoid emitting call to builtin if requested.
4262 if (trap_on_abort()) {
4263 ebreak();
4264 return;
4265 }
4266
4267 if (should_abort_hard()) {
4268 // We don't care if we constructed a frame. Just pretend we did.
4269 FrameScope assume_frame(this, StackFrame::NO_FRAME_TYPE);
4270 PrepareCallCFunction(0, a0);
4271 li(a0, Operand(static_cast<int64_t>(reason)));
4272 CallCFunction(ExternalReference::abort_with_reason(), 1);
4273 return;
4274 }
4275
4276 Move(a0, Smi::FromInt(static_cast<int>(reason)));
4277
4278 // Disable stub call restrictions to always allow calls to abort.
4279 if (!has_frame()) {
4280 // We don't actually want to generate a pile of code for this, so just
4281 // claim there is a stack frame, without generating one.
4282 FrameScope scope(this, StackFrame::NO_FRAME_TYPE);
4283 Call(BUILTIN_CODE(isolate(), Abort), RelocInfo::CODE_TARGET);
4284 } else {
4285 Call(BUILTIN_CODE(isolate(), Abort), RelocInfo::CODE_TARGET);
4286 }
4287 // Will not return here.
4288 if (is_trampoline_pool_blocked()) {
4289 // If the calling code cares about the exact number of
4290 // instructions generated, we insert padding here to keep the size
4291 // of the Abort macro constant.
4292 // Currently in debug mode with debug_code enabled the number of
4293 // generated instructions is 10, so we use this as a maximum value.
4294 static const int kExpectedAbortInstructions = 10;
4295 int abort_instructions = InstructionsGeneratedSince(&abort_start);
4296 DCHECK_LE(abort_instructions, kExpectedAbortInstructions);
4297 while (abort_instructions++ < kExpectedAbortInstructions) {
4298 nop();
4299 }
4300 }
4301 }
4302
LoadMap(Register destination,Register object)4303 void TurboAssembler::LoadMap(Register destination, Register object) {
4304 ASM_CODE_COMMENT(this);
4305 LoadTaggedPointerField(destination,
4306 FieldMemOperand(object, HeapObject::kMapOffset));
4307 }
4308
LoadNativeContextSlot(Register dst,int index)4309 void MacroAssembler::LoadNativeContextSlot(Register dst, int index) {
4310 ASM_CODE_COMMENT(this);
4311 LoadMap(dst, cp);
4312 LoadTaggedPointerField(
4313 dst, FieldMemOperand(
4314 dst, Map::kConstructorOrBackPointerOrNativeContextOffset));
4315 LoadTaggedPointerField(dst, MemOperand(dst, Context::SlotOffset(index)));
4316 }
4317
StubPrologue(StackFrame::Type type)4318 void TurboAssembler::StubPrologue(StackFrame::Type type) {
4319 ASM_CODE_COMMENT(this);
4320 UseScratchRegisterScope temps(this);
4321 Register scratch = temps.Acquire();
4322 li(scratch, Operand(StackFrame::TypeToMarker(type)));
4323 PushCommonFrame(scratch);
4324 }
4325
Prologue()4326 void TurboAssembler::Prologue() { PushStandardFrame(a1); }
4327
EnterFrame(StackFrame::Type type)4328 void TurboAssembler::EnterFrame(StackFrame::Type type) {
4329 ASM_CODE_COMMENT(this);
4330 UseScratchRegisterScope temps(this);
4331 Register scratch = temps.Acquire();
4332 BlockTrampolinePoolScope block_trampoline_pool(this);
4333 Push(ra, fp);
4334 Move(fp, sp);
4335 if (!StackFrame::IsJavaScript(type)) {
4336 li(scratch, Operand(StackFrame::TypeToMarker(type)));
4337 Push(scratch);
4338 }
4339 #if V8_ENABLE_WEBASSEMBLY
4340 if (type == StackFrame::WASM) Push(kWasmInstanceRegister);
4341 #endif // V8_ENABLE_WEBASSEMBLY
4342 }
4343
LeaveFrame(StackFrame::Type type)4344 void TurboAssembler::LeaveFrame(StackFrame::Type type) {
4345 ASM_CODE_COMMENT(this);
4346 addi(sp, fp, 2 * kSystemPointerSize);
4347 Ld(ra, MemOperand(fp, 1 * kSystemPointerSize));
4348 Ld(fp, MemOperand(fp, 0 * kSystemPointerSize));
4349 }
4350
EnterExitFrame(bool save_doubles,int stack_space,StackFrame::Type frame_type)4351 void MacroAssembler::EnterExitFrame(bool save_doubles, int stack_space,
4352 StackFrame::Type frame_type) {
4353 ASM_CODE_COMMENT(this);
4354 DCHECK(frame_type == StackFrame::EXIT ||
4355 frame_type == StackFrame::BUILTIN_EXIT);
4356
4357 // Set up the frame structure on the stack.
4358 STATIC_ASSERT(2 * kSystemPointerSize ==
4359 ExitFrameConstants::kCallerSPDisplacement);
4360 STATIC_ASSERT(1 * kSystemPointerSize == ExitFrameConstants::kCallerPCOffset);
4361 STATIC_ASSERT(0 * kSystemPointerSize == ExitFrameConstants::kCallerFPOffset);
4362
4363 // This is how the stack will look:
4364 // fp + 2 (==kCallerSPDisplacement) - old stack's end
4365 // [fp + 1 (==kCallerPCOffset)] - saved old ra
4366 // [fp + 0 (==kCallerFPOffset)] - saved old fp
4367 // [fp - 1 StackFrame::EXIT Smi
4368 // [fp - 2 (==kSPOffset)] - sp of the called function
4369 // fp - (2 + stack_space + alignment) == sp == [fp - kSPOffset] - top of the
4370 // new stack (will contain saved ra)
4371
4372 // Save registers and reserve room for saved entry sp.
4373 addi(sp, sp,
4374 -2 * kSystemPointerSize - ExitFrameConstants::kFixedFrameSizeFromFp);
4375 Sd(ra, MemOperand(sp, 3 * kSystemPointerSize));
4376 Sd(fp, MemOperand(sp, 2 * kSystemPointerSize));
4377 {
4378 UseScratchRegisterScope temps(this);
4379 Register scratch = temps.Acquire();
4380 li(scratch, Operand(StackFrame::TypeToMarker(frame_type)));
4381 Sd(scratch, MemOperand(sp, 1 * kSystemPointerSize));
4382 }
4383 // Set up new frame pointer.
4384 addi(fp, sp, ExitFrameConstants::kFixedFrameSizeFromFp);
4385
4386 if (FLAG_debug_code) {
4387 Sd(zero_reg, MemOperand(fp, ExitFrameConstants::kSPOffset));
4388 }
4389
4390 {
4391 UseScratchRegisterScope temps(this);
4392 Register scratch = temps.Acquire();
4393 BlockTrampolinePoolScope block_trampoline_pool(this);
4394 // Save the frame pointer and the context in top.
4395 li(scratch, ExternalReference::Create(IsolateAddressId::kCEntryFPAddress,
4396 isolate()));
4397 Sd(fp, MemOperand(scratch));
4398 li(scratch,
4399 ExternalReference::Create(IsolateAddressId::kContextAddress, isolate()));
4400 Sd(cp, MemOperand(scratch));
4401 }
4402
4403 const int frame_alignment = MacroAssembler::ActivationFrameAlignment();
4404 if (save_doubles) {
4405 // The stack is already aligned to 0 modulo 8 for stores with sdc1.
4406 int space = kNumCallerSavedFPU * kDoubleSize;
4407 Sub64(sp, sp, Operand(space));
4408 int count = 0;
4409 for (int i = 0; i < kNumFPURegisters; i++) {
4410 if (kCallerSavedFPU.bits() & (1 << i)) {
4411 FPURegister reg = FPURegister::from_code(i);
4412 StoreDouble(reg, MemOperand(sp, count * kDoubleSize));
4413 count++;
4414 }
4415 }
4416 }
4417
4418 // Reserve place for the return address, stack space and an optional slot
4419 // (used by DirectCEntry to hold the return value if a struct is
4420 // returned) and align the frame preparing for calling the runtime function.
4421 DCHECK_GE(stack_space, 0);
4422 Sub64(sp, sp, Operand((stack_space + 2) * kSystemPointerSize));
4423 if (frame_alignment > 0) {
4424 DCHECK(base::bits::IsPowerOfTwo(frame_alignment));
4425 And(sp, sp, Operand(-frame_alignment)); // Align stack.
4426 }
4427
4428 // Set the exit frame sp value to point just before the return address
4429 // location.
4430 UseScratchRegisterScope temps(this);
4431 Register scratch = temps.Acquire();
4432 addi(scratch, sp, kSystemPointerSize);
4433 Sd(scratch, MemOperand(fp, ExitFrameConstants::kSPOffset));
4434 }
4435
LeaveExitFrame(bool save_doubles,Register argument_count,bool do_return,bool argument_count_is_length)4436 void MacroAssembler::LeaveExitFrame(bool save_doubles, Register argument_count,
4437 bool do_return,
4438 bool argument_count_is_length) {
4439 ASM_CODE_COMMENT(this);
4440 UseScratchRegisterScope temps(this);
4441 Register scratch = temps.Acquire();
4442 BlockTrampolinePoolScope block_trampoline_pool(this);
4443 // Optionally restore all double registers.
4444 if (save_doubles) {
4445 // Remember: we only need to restore kCallerSavedFPU.
4446 Sub64(scratch, fp,
4447 Operand(ExitFrameConstants::kFixedFrameSizeFromFp +
4448 kNumCallerSavedFPU * kDoubleSize));
4449 int cout = 0;
4450 for (int i = 0; i < kNumFPURegisters; i++) {
4451 if (kCalleeSavedFPU.bits() & (1 << i)) {
4452 FPURegister reg = FPURegister::from_code(i);
4453 LoadDouble(reg, MemOperand(scratch, cout * kDoubleSize));
4454 cout++;
4455 }
4456 }
4457 }
4458
4459 // Clear top frame.
4460 li(scratch,
4461 ExternalReference::Create(IsolateAddressId::kCEntryFPAddress, isolate()));
4462 Sd(zero_reg, MemOperand(scratch));
4463
4464 // Restore current context from top and clear it in debug mode.
4465 li(scratch,
4466 ExternalReference::Create(IsolateAddressId::kContextAddress, isolate()));
4467 Ld(cp, MemOperand(scratch));
4468
4469 if (FLAG_debug_code) {
4470 UseScratchRegisterScope temp(this);
4471 Register scratch2 = temp.Acquire();
4472 li(scratch2, Operand(Context::kInvalidContext));
4473 Sd(scratch2, MemOperand(scratch));
4474 }
4475
4476 // Pop the arguments, restore registers, and return.
4477 Mv(sp, fp); // Respect ABI stack constraint.
4478 Ld(fp, MemOperand(sp, ExitFrameConstants::kCallerFPOffset));
4479 Ld(ra, MemOperand(sp, ExitFrameConstants::kCallerPCOffset));
4480
4481 if (argument_count.is_valid()) {
4482 if (argument_count_is_length) {
4483 add(sp, sp, argument_count);
4484 } else {
4485 CalcScaledAddress(sp, sp, argument_count, kSystemPointerSizeLog2);
4486 }
4487 }
4488
4489 addi(sp, sp, 2 * kSystemPointerSize);
4490
4491 if (do_return) {
4492 Ret();
4493 }
4494 }
4495
ActivationFrameAlignment()4496 int TurboAssembler::ActivationFrameAlignment() {
4497 #if V8_HOST_ARCH_RISCV64
4498 // Running on the real platform. Use the alignment as mandated by the local
4499 // environment.
4500 // Note: This will break if we ever start generating snapshots on one RISC-V
4501 // platform for another RISC-V platform with a different alignment.
4502 return base::OS::ActivationFrameAlignment();
4503 #else // V8_HOST_ARCH_RISCV64
4504 // If we are using the simulator then we should always align to the expected
4505 // alignment. As the simulator is used to generate snapshots we do not know
4506 // if the target platform will need alignment, so this is controlled from a
4507 // flag.
4508 return FLAG_sim_stack_alignment;
4509 #endif // V8_HOST_ARCH_RISCV64
4510 }
4511
AssertStackIsAligned()4512 void MacroAssembler::AssertStackIsAligned() {
4513 if (FLAG_debug_code) {
4514 ASM_CODE_COMMENT(this);
4515 const int frame_alignment = ActivationFrameAlignment();
4516 const int frame_alignment_mask = frame_alignment - 1;
4517
4518 if (frame_alignment > kSystemPointerSize) {
4519 Label alignment_as_expected;
4520 DCHECK(base::bits::IsPowerOfTwo(frame_alignment));
4521 {
4522 UseScratchRegisterScope temps(this);
4523 Register scratch = temps.Acquire();
4524 andi(scratch, sp, frame_alignment_mask);
4525 BranchShort(&alignment_as_expected, eq, scratch, Operand(zero_reg));
4526 }
4527 // Don't use Check here, as it will call Runtime_Abort re-entering here.
4528 ebreak();
4529 bind(&alignment_as_expected);
4530 }
4531 }
4532 }
4533
SmiUntag(Register dst,const MemOperand & src)4534 void TurboAssembler::SmiUntag(Register dst, const MemOperand& src) {
4535 ASM_CODE_COMMENT(this);
4536 if (SmiValuesAre32Bits()) {
4537 Lw(dst, MemOperand(src.rm(), SmiWordOffset(src.offset())));
4538 } else {
4539 DCHECK(SmiValuesAre31Bits());
4540 if (COMPRESS_POINTERS_BOOL) {
4541 Lw(dst, src);
4542 } else {
4543 Ld(dst, src);
4544 }
4545 SmiUntag(dst);
4546 }
4547 }
4548
SmiToInt32(Register smi)4549 void TurboAssembler::SmiToInt32(Register smi) {
4550 ASM_CODE_COMMENT(this);
4551 if (FLAG_enable_slow_asserts) {
4552 AssertSmi(smi);
4553 }
4554 DCHECK(SmiValuesAre32Bits() || SmiValuesAre31Bits());
4555 SmiUntag(smi);
4556 }
4557
JumpIfSmi(Register value,Label * smi_label)4558 void TurboAssembler::JumpIfSmi(Register value, Label* smi_label) {
4559 ASM_CODE_COMMENT(this);
4560 DCHECK_EQ(0, kSmiTag);
4561 UseScratchRegisterScope temps(this);
4562 Register scratch = temps.Acquire();
4563 andi(scratch, value, kSmiTagMask);
4564 Branch(smi_label, eq, scratch, Operand(zero_reg));
4565 }
4566
JumpIfNotSmi(Register value,Label * not_smi_label)4567 void MacroAssembler::JumpIfNotSmi(Register value, Label* not_smi_label) {
4568 ASM_CODE_COMMENT(this);
4569 UseScratchRegisterScope temps(this);
4570 Register scratch = temps.Acquire();
4571 DCHECK_EQ(0, kSmiTag);
4572 andi(scratch, value, kSmiTagMask);
4573 Branch(not_smi_label, ne, scratch, Operand(zero_reg));
4574 }
4575
AssertNotSmi(Register object,AbortReason reason)4576 void TurboAssembler::AssertNotSmi(Register object, AbortReason reason) {
4577 if (FLAG_debug_code) {
4578 ASM_CODE_COMMENT(this);
4579 STATIC_ASSERT(kSmiTag == 0);
4580 DCHECK(object != kScratchReg);
4581 andi(kScratchReg, object, kSmiTagMask);
4582 Check(ne, reason, kScratchReg, Operand(zero_reg));
4583 }
4584 }
4585
AssertSmi(Register object,AbortReason reason)4586 void TurboAssembler::AssertSmi(Register object, AbortReason reason) {
4587 if (FLAG_debug_code) {
4588 ASM_CODE_COMMENT(this);
4589 STATIC_ASSERT(kSmiTag == 0);
4590 DCHECK(object != kScratchReg);
4591 andi(kScratchReg, object, kSmiTagMask);
4592 Check(eq, reason, kScratchReg, Operand(zero_reg));
4593 }
4594 }
4595
AssertConstructor(Register object)4596 void MacroAssembler::AssertConstructor(Register object) {
4597 if (FLAG_debug_code) {
4598 ASM_CODE_COMMENT(this);
4599 DCHECK(object != kScratchReg);
4600 BlockTrampolinePoolScope block_trampoline_pool(this);
4601 STATIC_ASSERT(kSmiTag == 0);
4602 SmiTst(object, kScratchReg);
4603 Check(ne, AbortReason::kOperandIsASmiAndNotAConstructor, kScratchReg,
4604 Operand(zero_reg));
4605
4606 LoadMap(kScratchReg, object);
4607 Lbu(kScratchReg, FieldMemOperand(kScratchReg, Map::kBitFieldOffset));
4608 And(kScratchReg, kScratchReg, Operand(Map::Bits1::IsConstructorBit::kMask));
4609 Check(ne, AbortReason::kOperandIsNotAConstructor, kScratchReg,
4610 Operand(zero_reg));
4611 }
4612 }
4613
AssertFunction(Register object)4614 void MacroAssembler::AssertFunction(Register object) {
4615 if (FLAG_debug_code) {
4616 ASM_CODE_COMMENT(this);
4617 BlockTrampolinePoolScope block_trampoline_pool(this);
4618 STATIC_ASSERT(kSmiTag == 0);
4619 DCHECK(object != kScratchReg);
4620 SmiTst(object, kScratchReg);
4621 Check(ne, AbortReason::kOperandIsASmiAndNotAFunction, kScratchReg,
4622 Operand(zero_reg));
4623 push(object);
4624 LoadMap(object, object);
4625 UseScratchRegisterScope temps(this);
4626 Register range = temps.Acquire();
4627 GetInstanceTypeRange(object, object, FIRST_JS_FUNCTION_TYPE, range);
4628 Check(Uless_equal, AbortReason::kOperandIsNotAFunction, range,
4629 Operand(LAST_JS_FUNCTION_TYPE - FIRST_JS_FUNCTION_TYPE));
4630 pop(object);
4631 }
4632 }
4633
AssertCallableFunction(Register object)4634 void MacroAssembler::AssertCallableFunction(Register object) {
4635 if (!FLAG_debug_code) return;
4636 ASM_CODE_COMMENT(this);
4637 STATIC_ASSERT(kSmiTag == 0);
4638 AssertNotSmi(object, AbortReason::kOperandIsASmiAndNotAFunction);
4639 push(object);
4640 LoadMap(object, object);
4641 UseScratchRegisterScope temps(this);
4642 Register range = temps.Acquire();
4643 GetInstanceTypeRange(object, object, FIRST_CALLABLE_JS_FUNCTION_TYPE, range);
4644 Check(Uless_equal, AbortReason::kOperandIsNotACallableFunction, range,
4645 Operand(LAST_CALLABLE_JS_FUNCTION_TYPE -
4646 FIRST_CALLABLE_JS_FUNCTION_TYPE));
4647 pop(object);
4648 }
4649
AssertBoundFunction(Register object)4650 void MacroAssembler::AssertBoundFunction(Register object) {
4651 if (FLAG_debug_code) {
4652 ASM_CODE_COMMENT(this);
4653 BlockTrampolinePoolScope block_trampoline_pool(this);
4654 STATIC_ASSERT(kSmiTag == 0);
4655 DCHECK(object != kScratchReg);
4656 SmiTst(object, kScratchReg);
4657 Check(ne, AbortReason::kOperandIsASmiAndNotABoundFunction, kScratchReg,
4658 Operand(zero_reg));
4659 GetObjectType(object, kScratchReg, kScratchReg);
4660 Check(eq, AbortReason::kOperandIsNotABoundFunction, kScratchReg,
4661 Operand(JS_BOUND_FUNCTION_TYPE));
4662 }
4663 }
4664
AssertGeneratorObject(Register object)4665 void MacroAssembler::AssertGeneratorObject(Register object) {
4666 if (!FLAG_debug_code) return;
4667 ASM_CODE_COMMENT(this);
4668 BlockTrampolinePoolScope block_trampoline_pool(this);
4669 STATIC_ASSERT(kSmiTag == 0);
4670 DCHECK(object != kScratchReg);
4671 SmiTst(object, kScratchReg);
4672 Check(ne, AbortReason::kOperandIsASmiAndNotAGeneratorObject, kScratchReg,
4673 Operand(zero_reg));
4674
4675 GetObjectType(object, kScratchReg, kScratchReg);
4676
4677 Label done;
4678
4679 // Check if JSGeneratorObject
4680 BranchShort(&done, eq, kScratchReg, Operand(JS_GENERATOR_OBJECT_TYPE));
4681
4682 // Check if JSAsyncFunctionObject (See MacroAssembler::CompareInstanceType)
4683 BranchShort(&done, eq, kScratchReg, Operand(JS_ASYNC_FUNCTION_OBJECT_TYPE));
4684
4685 // Check if JSAsyncGeneratorObject
4686 BranchShort(&done, eq, kScratchReg, Operand(JS_ASYNC_GENERATOR_OBJECT_TYPE));
4687
4688 Abort(AbortReason::kOperandIsNotAGeneratorObject);
4689
4690 bind(&done);
4691 }
4692
AssertUndefinedOrAllocationSite(Register object,Register scratch)4693 void MacroAssembler::AssertUndefinedOrAllocationSite(Register object,
4694 Register scratch) {
4695 if (FLAG_debug_code) {
4696 ASM_CODE_COMMENT(this);
4697 Label done_checking;
4698 AssertNotSmi(object);
4699 LoadRoot(scratch, RootIndex::kUndefinedValue);
4700 BranchShort(&done_checking, eq, object, Operand(scratch));
4701 GetObjectType(object, scratch, scratch);
4702 Assert(eq, AbortReason::kExpectedUndefinedOrCell, scratch,
4703 Operand(ALLOCATION_SITE_TYPE));
4704 bind(&done_checking);
4705 }
4706 }
4707
4708 template <typename F_TYPE>
FloatMinMaxHelper(FPURegister dst,FPURegister src1,FPURegister src2,MaxMinKind kind)4709 void TurboAssembler::FloatMinMaxHelper(FPURegister dst, FPURegister src1,
4710 FPURegister src2, MaxMinKind kind) {
4711 DCHECK((std::is_same<F_TYPE, float>::value) ||
4712 (std::is_same<F_TYPE, double>::value));
4713
4714 if (src1 == src2 && dst != src1) {
4715 if (std::is_same<float, F_TYPE>::value) {
4716 fmv_s(dst, src1);
4717 } else {
4718 fmv_d(dst, src1);
4719 }
4720 return;
4721 }
4722
4723 Label done, nan;
4724
4725 // For RISCV, fmin_s returns the other non-NaN operand as result if only one
4726 // operand is NaN; but for JS, if any operand is NaN, result is Nan. The
4727 // following handles the discrepency between handling of NaN between ISA and
4728 // JS semantics
4729 UseScratchRegisterScope temps(this);
4730 Register scratch = temps.Acquire();
4731 if (std::is_same<float, F_TYPE>::value) {
4732 CompareIsNotNanF32(scratch, src1, src2);
4733 } else {
4734 CompareIsNotNanF64(scratch, src1, src2);
4735 }
4736 BranchFalseF(scratch, &nan);
4737
4738 if (kind == MaxMinKind::kMax) {
4739 if (std::is_same<float, F_TYPE>::value) {
4740 fmax_s(dst, src1, src2);
4741 } else {
4742 fmax_d(dst, src1, src2);
4743 }
4744 } else {
4745 if (std::is_same<float, F_TYPE>::value) {
4746 fmin_s(dst, src1, src2);
4747 } else {
4748 fmin_d(dst, src1, src2);
4749 }
4750 }
4751 j(&done);
4752
4753 bind(&nan);
4754 // if any operand is NaN, return NaN (fadd returns NaN if any operand is NaN)
4755 if (std::is_same<float, F_TYPE>::value) {
4756 fadd_s(dst, src1, src2);
4757 } else {
4758 fadd_d(dst, src1, src2);
4759 }
4760
4761 bind(&done);
4762 }
4763
Float32Max(FPURegister dst,FPURegister src1,FPURegister src2)4764 void TurboAssembler::Float32Max(FPURegister dst, FPURegister src1,
4765 FPURegister src2) {
4766 ASM_CODE_COMMENT(this);
4767 FloatMinMaxHelper<float>(dst, src1, src2, MaxMinKind::kMax);
4768 }
4769
Float32Min(FPURegister dst,FPURegister src1,FPURegister src2)4770 void TurboAssembler::Float32Min(FPURegister dst, FPURegister src1,
4771 FPURegister src2) {
4772 ASM_CODE_COMMENT(this);
4773 FloatMinMaxHelper<float>(dst, src1, src2, MaxMinKind::kMin);
4774 }
4775
Float64Max(FPURegister dst,FPURegister src1,FPURegister src2)4776 void TurboAssembler::Float64Max(FPURegister dst, FPURegister src1,
4777 FPURegister src2) {
4778 ASM_CODE_COMMENT(this);
4779 FloatMinMaxHelper<double>(dst, src1, src2, MaxMinKind::kMax);
4780 }
4781
Float64Min(FPURegister dst,FPURegister src1,FPURegister src2)4782 void TurboAssembler::Float64Min(FPURegister dst, FPURegister src1,
4783 FPURegister src2) {
4784 ASM_CODE_COMMENT(this);
4785 FloatMinMaxHelper<double>(dst, src1, src2, MaxMinKind::kMin);
4786 }
4787
4788 static const int kRegisterPassedArguments = 8;
4789
CalculateStackPassedDWords(int num_gp_arguments,int num_fp_arguments)4790 int TurboAssembler::CalculateStackPassedDWords(int num_gp_arguments,
4791 int num_fp_arguments) {
4792 int stack_passed_dwords = 0;
4793
4794 // Up to eight integer arguments are passed in registers a0..a7 and
4795 // up to eight floating point arguments are passed in registers fa0..fa7
4796 if (num_gp_arguments > kRegisterPassedArguments) {
4797 stack_passed_dwords += num_gp_arguments - kRegisterPassedArguments;
4798 }
4799 if (num_fp_arguments > kRegisterPassedArguments) {
4800 stack_passed_dwords += num_fp_arguments - kRegisterPassedArguments;
4801 }
4802 stack_passed_dwords += kCArgSlotCount;
4803 return stack_passed_dwords;
4804 }
4805
PrepareCallCFunction(int num_reg_arguments,int num_double_arguments,Register scratch)4806 void TurboAssembler::PrepareCallCFunction(int num_reg_arguments,
4807 int num_double_arguments,
4808 Register scratch) {
4809 ASM_CODE_COMMENT(this);
4810 int frame_alignment = ActivationFrameAlignment();
4811
4812 // Up to eight simple arguments in a0..a7, fa0..fa7.
4813 // Remaining arguments are pushed on the stack (arg slot calculation handled
4814 // by CalculateStackPassedDWords()).
4815 int stack_passed_arguments =
4816 CalculateStackPassedDWords(num_reg_arguments, num_double_arguments);
4817 if (frame_alignment > kSystemPointerSize) {
4818 // Make stack end at alignment and make room for stack arguments and the
4819 // original value of sp.
4820 Mv(scratch, sp);
4821 Sub64(sp, sp, Operand((stack_passed_arguments + 1) * kSystemPointerSize));
4822 DCHECK(base::bits::IsPowerOfTwo(frame_alignment));
4823 And(sp, sp, Operand(-frame_alignment));
4824 Sd(scratch, MemOperand(sp, stack_passed_arguments * kSystemPointerSize));
4825 } else {
4826 Sub64(sp, sp, Operand(stack_passed_arguments * kSystemPointerSize));
4827 }
4828 }
4829
PrepareCallCFunction(int num_reg_arguments,Register scratch)4830 void TurboAssembler::PrepareCallCFunction(int num_reg_arguments,
4831 Register scratch) {
4832 PrepareCallCFunction(num_reg_arguments, 0, scratch);
4833 }
4834
CallCFunction(ExternalReference function,int num_reg_arguments,int num_double_arguments)4835 void TurboAssembler::CallCFunction(ExternalReference function,
4836 int num_reg_arguments,
4837 int num_double_arguments) {
4838 BlockTrampolinePoolScope block_trampoline_pool(this);
4839 li(t6, function);
4840 CallCFunctionHelper(t6, num_reg_arguments, num_double_arguments);
4841 }
4842
CallCFunction(Register function,int num_reg_arguments,int num_double_arguments)4843 void TurboAssembler::CallCFunction(Register function, int num_reg_arguments,
4844 int num_double_arguments) {
4845 CallCFunctionHelper(function, num_reg_arguments, num_double_arguments);
4846 }
4847
CallCFunction(ExternalReference function,int num_arguments)4848 void TurboAssembler::CallCFunction(ExternalReference function,
4849 int num_arguments) {
4850 CallCFunction(function, num_arguments, 0);
4851 }
4852
CallCFunction(Register function,int num_arguments)4853 void TurboAssembler::CallCFunction(Register function, int num_arguments) {
4854 CallCFunction(function, num_arguments, 0);
4855 }
4856
CallCFunctionHelper(Register function,int num_reg_arguments,int num_double_arguments)4857 void TurboAssembler::CallCFunctionHelper(Register function,
4858 int num_reg_arguments,
4859 int num_double_arguments) {
4860 DCHECK_LE(num_reg_arguments + num_double_arguments, kMaxCParameters);
4861 DCHECK(has_frame());
4862 ASM_CODE_COMMENT(this);
4863 // Make sure that the stack is aligned before calling a C function unless
4864 // running in the simulator. The simulator has its own alignment check which
4865 // provides more information.
4866 // The argument stots are presumed to have been set up by
4867 // PrepareCallCFunction.
4868
4869 #if V8_HOST_ARCH_RISCV64
4870 if (FLAG_debug_code) {
4871 int frame_alignment = base::OS::ActivationFrameAlignment();
4872 int frame_alignment_mask = frame_alignment - 1;
4873 if (frame_alignment > kSystemPointerSize) {
4874 DCHECK(base::bits::IsPowerOfTwo(frame_alignment));
4875 Label alignment_as_expected;
4876 {
4877 UseScratchRegisterScope temps(this);
4878 Register scratch = temps.Acquire();
4879 And(scratch, sp, Operand(frame_alignment_mask));
4880 BranchShort(&alignment_as_expected, eq, scratch, Operand(zero_reg));
4881 }
4882 // Don't use Check here, as it will call Runtime_Abort possibly
4883 // re-entering here.
4884 ebreak();
4885 bind(&alignment_as_expected);
4886 }
4887 }
4888 #endif // V8_HOST_ARCH_RISCV64
4889
4890 // Just call directly. The function called cannot cause a GC, or
4891 // allow preemption, so the return address in the link register
4892 // stays correct.
4893 {
4894 if (function != t6) {
4895 Mv(t6, function);
4896 function = t6;
4897 }
4898
4899 // Save the frame pointer and PC so that the stack layout remains
4900 // iterable, even without an ExitFrame which normally exists between JS
4901 // and C frames.
4902 // 't' registers are caller-saved so this is safe as a scratch register.
4903 Register pc_scratch = t1;
4904 Register scratch = t2;
4905
4906 auipc(pc_scratch, 0);
4907 // See x64 code for reasoning about how to address the isolate data fields.
4908 if (root_array_available()) {
4909 Sd(pc_scratch, MemOperand(kRootRegister,
4910 IsolateData::fast_c_call_caller_pc_offset()));
4911 Sd(fp, MemOperand(kRootRegister,
4912 IsolateData::fast_c_call_caller_fp_offset()));
4913 } else {
4914 DCHECK_NOT_NULL(isolate());
4915 li(scratch, ExternalReference::fast_c_call_caller_pc_address(isolate()));
4916 Sd(pc_scratch, MemOperand(scratch));
4917 li(scratch, ExternalReference::fast_c_call_caller_fp_address(isolate()));
4918 Sd(fp, MemOperand(scratch));
4919 }
4920
4921 Call(function);
4922
4923 if (isolate() != nullptr) {
4924 // We don't unset the PC; the FP is the source of truth.
4925 UseScratchRegisterScope temps(this);
4926 Register scratch = temps.Acquire();
4927 li(scratch, ExternalReference::fast_c_call_caller_fp_address(isolate()));
4928 Sd(zero_reg, MemOperand(scratch));
4929 }
4930 }
4931
4932 int stack_passed_arguments =
4933 CalculateStackPassedDWords(num_reg_arguments, num_double_arguments);
4934
4935 if (base::OS::ActivationFrameAlignment() > kSystemPointerSize) {
4936 Ld(sp, MemOperand(sp, stack_passed_arguments * kSystemPointerSize));
4937 } else {
4938 Add64(sp, sp, Operand(stack_passed_arguments * kSystemPointerSize));
4939 }
4940 }
4941
4942 #undef BRANCH_ARGS_CHECK
4943
CheckPageFlag(Register object,Register scratch,int mask,Condition cc,Label * condition_met)4944 void TurboAssembler::CheckPageFlag(Register object, Register scratch, int mask,
4945 Condition cc, Label* condition_met) {
4946 And(scratch, object, Operand(~kPageAlignmentMask));
4947 Ld(scratch, MemOperand(scratch, BasicMemoryChunk::kFlagsOffset));
4948 And(scratch, scratch, Operand(mask));
4949 Branch(condition_met, cc, scratch, Operand(zero_reg));
4950 }
4951
GetRegisterThatIsNotOneOf(Register reg1,Register reg2,Register reg3,Register reg4,Register reg5,Register reg6)4952 Register GetRegisterThatIsNotOneOf(Register reg1, Register reg2, Register reg3,
4953 Register reg4, Register reg5,
4954 Register reg6) {
4955 RegList regs = {reg1, reg2, reg3, reg4, reg5, reg6};
4956
4957 const RegisterConfiguration* config = RegisterConfiguration::Default();
4958 for (int i = 0; i < config->num_allocatable_general_registers(); ++i) {
4959 int code = config->GetAllocatableGeneralCode(i);
4960 Register candidate = Register::from_code(code);
4961 if (regs.has(candidate)) continue;
4962 return candidate;
4963 }
4964 UNREACHABLE();
4965 }
4966
ComputeCodeStartAddress(Register dst)4967 void TurboAssembler::ComputeCodeStartAddress(Register dst) {
4968 // This push on ra and the pop below together ensure that we restore the
4969 // register ra, which is needed while computing the code start address.
4970 push(ra);
4971
4972 auipc(ra, 0);
4973 addi(ra, ra, kInstrSize * 2); // ra = address of li
4974 int pc = pc_offset();
4975 li(dst, Operand(pc));
4976 Sub64(dst, ra, dst);
4977
4978 pop(ra); // Restore ra
4979 }
4980
CallForDeoptimization(Builtin target,int,Label * exit,DeoptimizeKind kind,Label * ret,Label *)4981 void TurboAssembler::CallForDeoptimization(Builtin target, int, Label* exit,
4982 DeoptimizeKind kind, Label* ret,
4983 Label*) {
4984 ASM_CODE_COMMENT(this);
4985 BlockTrampolinePoolScope block_trampoline_pool(this);
4986 Ld(t6,
4987 MemOperand(kRootRegister, IsolateData::BuiltinEntrySlotOffset(target)));
4988 Call(t6);
4989 DCHECK_EQ(SizeOfCodeGeneratedSince(exit),
4990 (kind == DeoptimizeKind::kLazy) ? Deoptimizer::kLazyDeoptExitSize
4991 : Deoptimizer::kEagerDeoptExitSize);
4992 }
4993
LoadCodeObjectEntry(Register destination,Register code_object)4994 void TurboAssembler::LoadCodeObjectEntry(Register destination,
4995 Register code_object) {
4996 // Code objects are called differently depending on whether we are generating
4997 // builtin code (which will later be embedded into the binary) or compiling
4998 // user JS code at runtime.
4999 // * Builtin code runs in --jitless mode and thus must not call into on-heap
5000 // Code targets. Instead, we dispatch through the builtins entry table.
5001 // * Codegen at runtime does not have this restriction and we can use the
5002 // shorter, branchless instruction sequence. The assumption here is that
5003 // targets are usually generated code and not builtin Code objects.
5004 ASM_CODE_COMMENT(this);
5005 if (options().isolate_independent_code) {
5006 DCHECK(root_array_available());
5007 Label if_code_is_off_heap, out;
5008
5009 UseScratchRegisterScope temps(this);
5010 Register scratch = temps.Acquire();
5011
5012 DCHECK(!AreAliased(destination, scratch));
5013 DCHECK(!AreAliased(code_object, scratch));
5014
5015 // Check whether the Code object is an off-heap trampoline. If so, call its
5016 // (off-heap) entry point directly without going through the (on-heap)
5017 // trampoline. Otherwise, just call the Code object as always.
5018
5019 Lw(scratch, FieldMemOperand(code_object, Code::kFlagsOffset));
5020 And(scratch, scratch, Operand(Code::IsOffHeapTrampoline::kMask));
5021 Branch(&if_code_is_off_heap, ne, scratch, Operand(zero_reg));
5022 // Not an off-heap trampoline object, the entry point is at
5023 // Code::raw_instruction_start().
5024 Add64(destination, code_object, Code::kHeaderSize - kHeapObjectTag);
5025 Branch(&out);
5026
5027 // An off-heap trampoline, the entry point is loaded from the builtin entry
5028 // table.
5029 bind(&if_code_is_off_heap);
5030 Lw(scratch, FieldMemOperand(code_object, Code::kBuiltinIndexOffset));
5031 slli(destination, scratch, kSystemPointerSizeLog2);
5032 Add64(destination, destination, kRootRegister);
5033 Ld(destination,
5034 MemOperand(destination, IsolateData::builtin_entry_table_offset()));
5035
5036 bind(&out);
5037 } else {
5038 Add64(destination, code_object, Code::kHeaderSize - kHeapObjectTag);
5039 }
5040 }
5041
CallCodeObject(Register code_object)5042 void TurboAssembler::CallCodeObject(Register code_object) {
5043 ASM_CODE_COMMENT(this);
5044 LoadCodeObjectEntry(code_object, code_object);
5045 Call(code_object);
5046 }
5047
JumpCodeObject(Register code_object,JumpMode jump_mode)5048 void TurboAssembler::JumpCodeObject(Register code_object, JumpMode jump_mode) {
5049 ASM_CODE_COMMENT(this);
5050 DCHECK_EQ(JumpMode::kJump, jump_mode);
5051 LoadCodeObjectEntry(code_object, code_object);
5052 Jump(code_object);
5053 }
5054
LoadTaggedPointerField(const Register & destination,const MemOperand & field_operand)5055 void TurboAssembler::LoadTaggedPointerField(const Register& destination,
5056 const MemOperand& field_operand) {
5057 if (COMPRESS_POINTERS_BOOL) {
5058 DecompressTaggedPointer(destination, field_operand);
5059 } else {
5060 Ld(destination, field_operand);
5061 }
5062 }
5063
LoadAnyTaggedField(const Register & destination,const MemOperand & field_operand)5064 void TurboAssembler::LoadAnyTaggedField(const Register& destination,
5065 const MemOperand& field_operand) {
5066 if (COMPRESS_POINTERS_BOOL) {
5067 DecompressAnyTagged(destination, field_operand);
5068 } else {
5069 Ld(destination, field_operand);
5070 }
5071 }
5072
LoadTaggedSignedField(const Register & destination,const MemOperand & field_operand)5073 void TurboAssembler::LoadTaggedSignedField(const Register& destination,
5074 const MemOperand& field_operand) {
5075 if (COMPRESS_POINTERS_BOOL) {
5076 DecompressTaggedSigned(destination, field_operand);
5077 } else {
5078 Ld(destination, field_operand);
5079 }
5080 }
5081
SmiUntagField(Register dst,const MemOperand & src)5082 void TurboAssembler::SmiUntagField(Register dst, const MemOperand& src) {
5083 SmiUntag(dst, src);
5084 }
5085
StoreTaggedField(const Register & value,const MemOperand & dst_field_operand)5086 void TurboAssembler::StoreTaggedField(const Register& value,
5087 const MemOperand& dst_field_operand) {
5088 if (COMPRESS_POINTERS_BOOL) {
5089 Sw(value, dst_field_operand);
5090 } else {
5091 Sd(value, dst_field_operand);
5092 }
5093 }
5094
DecompressTaggedSigned(const Register & destination,const MemOperand & field_operand)5095 void TurboAssembler::DecompressTaggedSigned(const Register& destination,
5096 const MemOperand& field_operand) {
5097 ASM_CODE_COMMENT(this);
5098 Lwu(destination, field_operand);
5099 if (FLAG_debug_code) {
5100 // Corrupt the top 32 bits. Made up of 16 fixed bits and 16 pc offset bits.
5101 Add64(destination, destination,
5102 Operand(((kDebugZapValue << 16) | (pc_offset() & 0xffff)) << 32));
5103 }
5104 }
5105
DecompressTaggedPointer(const Register & destination,const MemOperand & field_operand)5106 void TurboAssembler::DecompressTaggedPointer(const Register& destination,
5107 const MemOperand& field_operand) {
5108 ASM_CODE_COMMENT(this);
5109 Lwu(destination, field_operand);
5110 Add64(destination, kPtrComprCageBaseRegister, destination);
5111 }
5112
DecompressTaggedPointer(const Register & destination,const Register & source)5113 void TurboAssembler::DecompressTaggedPointer(const Register& destination,
5114 const Register& source) {
5115 ASM_CODE_COMMENT(this);
5116 And(destination, source, Operand(0xFFFFFFFF));
5117 Add64(destination, kPtrComprCageBaseRegister, Operand(destination));
5118 }
5119
DecompressAnyTagged(const Register & destination,const MemOperand & field_operand)5120 void TurboAssembler::DecompressAnyTagged(const Register& destination,
5121 const MemOperand& field_operand) {
5122 ASM_CODE_COMMENT(this);
5123 Lwu(destination, field_operand);
5124 Add64(destination, kPtrComprCageBaseRegister, destination);
5125 }
5126
DropArguments(Register count,ArgumentsCountType type,ArgumentsCountMode mode,Register scratch)5127 void MacroAssembler::DropArguments(Register count, ArgumentsCountType type,
5128 ArgumentsCountMode mode, Register scratch) {
5129 switch (type) {
5130 case kCountIsInteger: {
5131 CalcScaledAddress(sp, sp, count, kPointerSizeLog2);
5132 break;
5133 }
5134 case kCountIsSmi: {
5135 STATIC_ASSERT(kSmiTagSize == 1 && kSmiTag == 0);
5136 DCHECK_NE(scratch, no_reg);
5137 SmiScale(scratch, count, kPointerSizeLog2);
5138 Add64(sp, sp, scratch);
5139 break;
5140 }
5141 case kCountIsBytes: {
5142 Add64(sp, sp, count);
5143 break;
5144 }
5145 }
5146 if (mode == kCountExcludesReceiver) {
5147 Add64(sp, sp, kSystemPointerSize);
5148 }
5149 }
5150
DropArgumentsAndPushNewReceiver(Register argc,Register receiver,ArgumentsCountType type,ArgumentsCountMode mode,Register scratch)5151 void MacroAssembler::DropArgumentsAndPushNewReceiver(Register argc,
5152 Register receiver,
5153 ArgumentsCountType type,
5154 ArgumentsCountMode mode,
5155 Register scratch) {
5156 DCHECK(!AreAliased(argc, receiver));
5157 if (mode == kCountExcludesReceiver) {
5158 // Drop arguments without receiver and override old receiver.
5159 DropArguments(argc, type, kCountIncludesReceiver, scratch);
5160 Sd(receiver, MemOperand(sp));
5161 } else {
5162 DropArguments(argc, type, mode, scratch);
5163 push(receiver);
5164 }
5165 }
5166
5167 } // namespace internal
5168 } // namespace v8
5169
5170 #endif // V8_TARGET_ARCH_RISCV64
5171