• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2021 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include <limits.h>  // For LONG_MIN, LONG_MAX.
6 
7 #if V8_TARGET_ARCH_RISCV64
8 
9 #include "src/base/bits.h"
10 #include "src/base/division-by-constant.h"
11 #include "src/codegen/assembler-inl.h"
12 #include "src/codegen/callable.h"
13 #include "src/codegen/code-factory.h"
14 #include "src/codegen/external-reference-table.h"
15 #include "src/codegen/interface-descriptors-inl.h"
16 #include "src/codegen/macro-assembler.h"
17 #include "src/codegen/register-configuration.h"
18 #include "src/debug/debug.h"
19 #include "src/deoptimizer/deoptimizer.h"
20 #include "src/execution/frames-inl.h"
21 #include "src/heap/memory-chunk.h"
22 #include "src/init/bootstrapper.h"
23 #include "src/logging/counters.h"
24 #include "src/objects/heap-number.h"
25 #include "src/runtime/runtime.h"
26 #include "src/snapshot/snapshot.h"
27 #include "src/wasm/wasm-code-manager.h"
28 
29 // Satisfy cpplint check, but don't include platform-specific header. It is
30 // included recursively via macro-assembler.h.
31 #if 0
32 #include "src/codegen/riscv64/macro-assembler-riscv64.h"
33 #endif
34 
35 namespace v8 {
36 namespace internal {
37 
IsZero(const Operand & rt)38 static inline bool IsZero(const Operand& rt) {
39   if (rt.is_reg()) {
40     return rt.rm() == zero_reg;
41   } else {
42     return rt.immediate() == 0;
43   }
44 }
45 
RequiredStackSizeForCallerSaved(SaveFPRegsMode fp_mode,Register exclusion1,Register exclusion2,Register exclusion3) const46 int TurboAssembler::RequiredStackSizeForCallerSaved(SaveFPRegsMode fp_mode,
47                                                     Register exclusion1,
48                                                     Register exclusion2,
49                                                     Register exclusion3) const {
50   int bytes = 0;
51 
52   RegList exclusions = {exclusion1, exclusion2, exclusion3};
53   RegList list = kJSCallerSaved - exclusions;
54   bytes += list.Count() * kSystemPointerSize;
55 
56   if (fp_mode == SaveFPRegsMode::kSave) {
57     bytes += kCallerSavedFPU.Count() * kDoubleSize;
58   }
59 
60   return bytes;
61 }
62 
PushCallerSaved(SaveFPRegsMode fp_mode,Register exclusion1,Register exclusion2,Register exclusion3)63 int TurboAssembler::PushCallerSaved(SaveFPRegsMode fp_mode, Register exclusion1,
64                                     Register exclusion2, Register exclusion3) {
65   int bytes = 0;
66 
67   RegList exclusions = {exclusion1, exclusion2, exclusion3};
68   RegList list = kJSCallerSaved - exclusions;
69   MultiPush(list);
70   bytes += list.Count() * kSystemPointerSize;
71 
72   if (fp_mode == SaveFPRegsMode::kSave) {
73     MultiPushFPU(kCallerSavedFPU);
74     bytes += kCallerSavedFPU.Count() * kDoubleSize;
75   }
76 
77   return bytes;
78 }
79 
PopCallerSaved(SaveFPRegsMode fp_mode,Register exclusion1,Register exclusion2,Register exclusion3)80 int TurboAssembler::PopCallerSaved(SaveFPRegsMode fp_mode, Register exclusion1,
81                                    Register exclusion2, Register exclusion3) {
82   int bytes = 0;
83   if (fp_mode == SaveFPRegsMode::kSave) {
84     MultiPopFPU(kCallerSavedFPU);
85     bytes += kCallerSavedFPU.Count() * kDoubleSize;
86   }
87 
88   RegList exclusions = {exclusion1, exclusion2, exclusion3};
89   RegList list = kJSCallerSaved - exclusions;
90   MultiPop(list);
91   bytes += list.Count() * kSystemPointerSize;
92 
93   return bytes;
94 }
95 
LoadRoot(Register destination,RootIndex index)96 void TurboAssembler::LoadRoot(Register destination, RootIndex index) {
97   Ld(destination,
98      MemOperand(kRootRegister, RootRegisterOffsetForRootIndex(index)));
99 }
100 
LoadRoot(Register destination,RootIndex index,Condition cond,Register src1,const Operand & src2)101 void TurboAssembler::LoadRoot(Register destination, RootIndex index,
102                               Condition cond, Register src1,
103                               const Operand& src2) {
104   Label skip;
105   BranchShort(&skip, NegateCondition(cond), src1, src2);
106   Ld(destination,
107      MemOperand(kRootRegister, RootRegisterOffsetForRootIndex(index)));
108   bind(&skip);
109 }
110 
PushCommonFrame(Register marker_reg)111 void TurboAssembler::PushCommonFrame(Register marker_reg) {
112   if (marker_reg.is_valid()) {
113     Push(ra, fp, marker_reg);
114     Add64(fp, sp, Operand(kSystemPointerSize));
115   } else {
116     Push(ra, fp);
117     Mv(fp, sp);
118   }
119 }
120 
PushStandardFrame(Register function_reg)121 void TurboAssembler::PushStandardFrame(Register function_reg) {
122   int offset = -StandardFrameConstants::kContextOffset;
123   if (function_reg.is_valid()) {
124     Push(ra, fp, cp, function_reg, kJavaScriptCallArgCountRegister);
125     offset += 2 * kSystemPointerSize;
126   } else {
127     Push(ra, fp, cp, kJavaScriptCallArgCountRegister);
128     offset += kSystemPointerSize;
129   }
130   Add64(fp, sp, Operand(offset));
131 }
132 
SafepointRegisterStackIndex(int reg_code)133 int MacroAssembler::SafepointRegisterStackIndex(int reg_code) {
134   // The registers are pushed starting with the highest encoding,
135   // which means that lowest encodings are closest to the stack pointer.
136   return kSafepointRegisterStackIndexMap[reg_code];
137 }
138 
139 // Clobbers object, dst, value, and ra, if (ra_status == kRAHasBeenSaved)
140 // The register 'object' contains a heap object pointer.  The heap object
141 // tag is shifted away.
RecordWriteField(Register object,int offset,Register value,RAStatus ra_status,SaveFPRegsMode save_fp,RememberedSetAction remembered_set_action,SmiCheck smi_check)142 void MacroAssembler::RecordWriteField(Register object, int offset,
143                                       Register value, RAStatus ra_status,
144                                       SaveFPRegsMode save_fp,
145                                       RememberedSetAction remembered_set_action,
146                                       SmiCheck smi_check) {
147   DCHECK(!AreAliased(object, value));
148   // First, check if a write barrier is even needed. The tests below
149   // catch stores of Smis.
150   Label done;
151 
152   // Skip the barrier if writing a smi.
153   if (smi_check == SmiCheck::kInline) {
154     JumpIfSmi(value, &done);
155   }
156 
157   // Although the object register is tagged, the offset is relative to the start
158   // of the object, so offset must be a multiple of kTaggedSize.
159   DCHECK(IsAligned(offset, kTaggedSize));
160 
161   if (FLAG_debug_code) {
162     Label ok;
163     UseScratchRegisterScope temps(this);
164     Register scratch = temps.Acquire();
165     DCHECK(!AreAliased(object, value, scratch));
166     Add64(scratch, object, offset - kHeapObjectTag);
167     And(scratch, scratch, Operand(kTaggedSize - 1));
168     BranchShort(&ok, eq, scratch, Operand(zero_reg));
169     Abort(AbortReason::kUnalignedCellInWriteBarrier);
170     bind(&ok);
171   }
172 
173   RecordWrite(object, Operand(offset - kHeapObjectTag), value, ra_status,
174               save_fp, remembered_set_action, SmiCheck::kOmit);
175 
176   bind(&done);
177 }
178 
MaybeSaveRegisters(RegList registers)179 void TurboAssembler::MaybeSaveRegisters(RegList registers) {
180   if (registers.is_empty()) return;
181   MultiPush(registers);
182 }
183 
MaybeRestoreRegisters(RegList registers)184 void TurboAssembler::MaybeRestoreRegisters(RegList registers) {
185   if (registers.is_empty()) return;
186   MultiPop(registers);
187 }
188 
CallEphemeronKeyBarrier(Register object,Register slot_address,SaveFPRegsMode fp_mode)189 void TurboAssembler::CallEphemeronKeyBarrier(Register object,
190                                              Register slot_address,
191                                              SaveFPRegsMode fp_mode) {
192   DCHECK(!AreAliased(object, slot_address));
193   RegList registers =
194       WriteBarrierDescriptor::ComputeSavedRegisters(object, slot_address);
195   MaybeSaveRegisters(registers);
196 
197   Register object_parameter = WriteBarrierDescriptor::ObjectRegister();
198   Register slot_address_parameter =
199       WriteBarrierDescriptor::SlotAddressRegister();
200 
201   Push(object);
202   Push(slot_address);
203   Pop(slot_address_parameter);
204   Pop(object_parameter);
205 
206   Call(isolate()->builtins()->code_handle(
207            Builtins::GetEphemeronKeyBarrierStub(fp_mode)),
208        RelocInfo::CODE_TARGET);
209   MaybeRestoreRegisters(registers);
210 }
211 
CallRecordWriteStubSaveRegisters(Register object,Register slot_address,RememberedSetAction remembered_set_action,SaveFPRegsMode fp_mode,StubCallMode mode)212 void TurboAssembler::CallRecordWriteStubSaveRegisters(
213     Register object, Register slot_address,
214     RememberedSetAction remembered_set_action, SaveFPRegsMode fp_mode,
215     StubCallMode mode) {
216   DCHECK(!AreAliased(object, slot_address));
217   RegList registers =
218       WriteBarrierDescriptor::ComputeSavedRegisters(object, slot_address);
219   MaybeSaveRegisters(registers);
220 
221   Register object_parameter = WriteBarrierDescriptor::ObjectRegister();
222   Register slot_address_parameter =
223       WriteBarrierDescriptor::SlotAddressRegister();
224 
225   Push(object);
226   Push(slot_address);
227   Pop(slot_address_parameter);
228   Pop(object_parameter);
229 
230   CallRecordWriteStub(object_parameter, slot_address_parameter,
231                       remembered_set_action, fp_mode, mode);
232 
233   MaybeRestoreRegisters(registers);
234 }
235 
CallRecordWriteStub(Register object,Register slot_address,RememberedSetAction remembered_set_action,SaveFPRegsMode fp_mode,StubCallMode mode)236 void TurboAssembler::CallRecordWriteStub(
237     Register object, Register slot_address,
238     RememberedSetAction remembered_set_action, SaveFPRegsMode fp_mode,
239     StubCallMode mode) {
240   // Use CallRecordWriteStubSaveRegisters if the object and slot registers
241   // need to be caller saved.
242   DCHECK_EQ(WriteBarrierDescriptor::ObjectRegister(), object);
243   DCHECK_EQ(WriteBarrierDescriptor::SlotAddressRegister(), slot_address);
244   if (mode == StubCallMode::kCallWasmRuntimeStub) {
245     auto wasm_target =
246         wasm::WasmCode::GetRecordWriteStub(remembered_set_action, fp_mode);
247     Call(wasm_target, RelocInfo::WASM_STUB_CALL);
248   } else {
249     auto builtin = Builtins::GetRecordWriteStub(remembered_set_action, fp_mode);
250     if (options().inline_offheap_trampolines) {
251       // Inline the trampoline. //qj
252       RecordCommentForOffHeapTrampoline(builtin);
253 
254       UseScratchRegisterScope temps(this);
255       BlockTrampolinePoolScope block_trampoline_pool(this);
256       Register scratch = temps.Acquire();
257       li(scratch, Operand(BuiltinEntry(builtin), RelocInfo::OFF_HEAP_TARGET));
258       Call(scratch);
259       RecordComment("]");
260     } else {
261       Handle<Code> code_target = isolate()->builtins()->code_handle(builtin);
262       Call(code_target, RelocInfo::CODE_TARGET);
263     }
264   }
265 }
266 
267 // Clobbers object, address, value, and ra, if (ra_status == kRAHasBeenSaved)
268 // The register 'object' contains a heap object pointer.  The heap object
269 // tag is shifted away.
RecordWrite(Register object,Operand offset,Register value,RAStatus ra_status,SaveFPRegsMode fp_mode,RememberedSetAction remembered_set_action,SmiCheck smi_check)270 void MacroAssembler::RecordWrite(Register object, Operand offset,
271                                  Register value, RAStatus ra_status,
272                                  SaveFPRegsMode fp_mode,
273                                  RememberedSetAction remembered_set_action,
274                                  SmiCheck smi_check) {
275   DCHECK(!AreAliased(object, value));
276 
277   if (FLAG_debug_code) {
278     UseScratchRegisterScope temps(this);
279     Register temp = temps.Acquire();
280     DCHECK(!AreAliased(object, value, temp));
281     Add64(temp, object, offset);
282     LoadTaggedPointerField(temp, MemOperand(temp));
283     Assert(eq, AbortReason::kWrongAddressOrValuePassedToRecordWrite, temp,
284            Operand(value));
285   }
286 
287   if ((remembered_set_action == RememberedSetAction::kOmit &&
288        !FLAG_incremental_marking) ||
289       FLAG_disable_write_barriers) {
290     return;
291   }
292 
293   // First, check if a write barrier is even needed. The tests below
294   // catch stores of smis and stores into the young generation.
295   Label done;
296 
297   if (smi_check == SmiCheck::kInline) {
298     DCHECK_EQ(0, kSmiTag);
299     JumpIfSmi(value, &done);
300   }
301 
302   {
303     UseScratchRegisterScope temps(this);
304     Register temp = temps.Acquire();
305     CheckPageFlag(value,
306                   temp,  // Used as scratch.
307                   MemoryChunk::kPointersToHereAreInterestingMask,
308                   eq,  // In RISC-V, it uses cc for a comparison with 0, so if
309                        // no bits are set, and cc is eq, it will branch to done
310                   &done);
311 
312     CheckPageFlag(object,
313                   temp,  // Used as scratch.
314                   MemoryChunk::kPointersFromHereAreInterestingMask,
315                   eq,  // In RISC-V, it uses cc for a comparison with 0, so if
316                        // no bits are set, and cc is eq, it will branch to done
317                   &done);
318   }
319   // Record the actual write.
320   if (ra_status == kRAHasNotBeenSaved) {
321     push(ra);
322   }
323   Register slot_address = WriteBarrierDescriptor::SlotAddressRegister();
324   DCHECK(!AreAliased(object, slot_address, value));
325   // TODO(cbruni): Turn offset into int.
326   DCHECK(offset.IsImmediate());
327   Add64(slot_address, object, offset);
328   CallRecordWriteStub(object, slot_address, remembered_set_action, fp_mode);
329   if (ra_status == kRAHasNotBeenSaved) {
330     pop(ra);
331   }
332   if (FLAG_debug_code) li(slot_address, Operand(kZapValue));
333 
334   bind(&done);
335 }
336 
337 // ---------------------------------------------------------------------------
338 // Instruction macros.
339 
Add32(Register rd,Register rs,const Operand & rt)340 void TurboAssembler::Add32(Register rd, Register rs, const Operand& rt) {
341   if (rt.is_reg()) {
342     if (FLAG_riscv_c_extension && (rd.code() == rs.code()) &&
343         ((rd.code() & 0b11000) == 0b01000) &&
344         ((rt.rm().code() & 0b11000) == 0b01000)) {
345       c_addw(rd, rt.rm());
346     } else {
347       addw(rd, rs, rt.rm());
348     }
349   } else {
350     if (FLAG_riscv_c_extension && is_int6(rt.immediate()) &&
351         (rd.code() == rs.code()) && (rd != zero_reg) &&
352         !MustUseReg(rt.rmode())) {
353       c_addiw(rd, static_cast<int8_t>(rt.immediate()));
354     } else if (is_int12(rt.immediate()) && !MustUseReg(rt.rmode())) {
355       addiw(rd, rs, static_cast<int32_t>(rt.immediate()));
356     } else if ((-4096 <= rt.immediate() && rt.immediate() <= -2049) ||
357                (2048 <= rt.immediate() && rt.immediate() <= 4094)) {
358       addiw(rd, rs, rt.immediate() / 2);
359       addiw(rd, rd, rt.immediate() - (rt.immediate() / 2));
360     } else {
361       // li handles the relocation.
362       UseScratchRegisterScope temps(this);
363       Register scratch = temps.Acquire();
364       Li(scratch, rt.immediate());
365       addw(rd, rs, scratch);
366     }
367   }
368 }
369 
Add64(Register rd,Register rs,const Operand & rt)370 void TurboAssembler::Add64(Register rd, Register rs, const Operand& rt) {
371   if (rt.is_reg()) {
372     if (FLAG_riscv_c_extension && (rd.code() == rs.code()) &&
373         (rt.rm() != zero_reg) && (rs != zero_reg)) {
374       c_add(rd, rt.rm());
375     } else {
376       add(rd, rs, rt.rm());
377     }
378   } else {
379     if (FLAG_riscv_c_extension && is_int6(rt.immediate()) &&
380         (rd.code() == rs.code()) && (rd != zero_reg) && (rt.immediate() != 0) &&
381         !MustUseReg(rt.rmode())) {
382       c_addi(rd, static_cast<int8_t>(rt.immediate()));
383     } else if (FLAG_riscv_c_extension && is_int10(rt.immediate()) &&
384                (rt.immediate() != 0) && ((rt.immediate() & 0xf) == 0) &&
385                (rd.code() == rs.code()) && (rd == sp) &&
386                !MustUseReg(rt.rmode())) {
387       c_addi16sp(static_cast<int16_t>(rt.immediate()));
388     } else if (FLAG_riscv_c_extension && ((rd.code() & 0b11000) == 0b01000) &&
389                (rs == sp) && is_uint10(rt.immediate()) &&
390                (rt.immediate() != 0) && !MustUseReg(rt.rmode())) {
391       c_addi4spn(rd, static_cast<uint16_t>(rt.immediate()));
392     } else if (is_int12(rt.immediate()) && !MustUseReg(rt.rmode())) {
393       addi(rd, rs, static_cast<int32_t>(rt.immediate()));
394     } else if ((-4096 <= rt.immediate() && rt.immediate() <= -2049) ||
395                (2048 <= rt.immediate() && rt.immediate() <= 4094)) {
396       addi(rd, rs, rt.immediate() / 2);
397       addi(rd, rd, rt.immediate() - (rt.immediate() / 2));
398     } else {
399       // li handles the relocation.
400       UseScratchRegisterScope temps(this);
401       Register scratch = temps.Acquire();
402       BlockTrampolinePoolScope block_trampoline_pool(this);
403       Li(scratch, rt.immediate());
404       add(rd, rs, scratch);
405     }
406   }
407 }
408 
Sub32(Register rd,Register rs,const Operand & rt)409 void TurboAssembler::Sub32(Register rd, Register rs, const Operand& rt) {
410   if (rt.is_reg()) {
411     if (FLAG_riscv_c_extension && (rd.code() == rs.code()) &&
412         ((rd.code() & 0b11000) == 0b01000) &&
413         ((rt.rm().code() & 0b11000) == 0b01000)) {
414       c_subw(rd, rt.rm());
415     } else {
416       subw(rd, rs, rt.rm());
417     }
418   } else {
419     DCHECK(is_int32(rt.immediate()));
420     if (FLAG_riscv_c_extension && (rd.code() == rs.code()) &&
421         (rd != zero_reg) && is_int6(-rt.immediate()) &&
422         !MustUseReg(rt.rmode())) {
423       c_addiw(
424           rd,
425           static_cast<int8_t>(
426               -rt.immediate()));  // No c_subiw instr, use c_addiw(x, y, -imm).
427     } else if (is_int12(-rt.immediate()) && !MustUseReg(rt.rmode())) {
428       addiw(rd, rs,
429             static_cast<int32_t>(
430                 -rt.immediate()));  // No subiw instr, use addiw(x, y, -imm).
431     } else if ((-4096 <= -rt.immediate() && -rt.immediate() <= -2049) ||
432                (2048 <= -rt.immediate() && -rt.immediate() <= 4094)) {
433       addiw(rd, rs, -rt.immediate() / 2);
434       addiw(rd, rd, -rt.immediate() - (-rt.immediate() / 2));
435     } else {
436       UseScratchRegisterScope temps(this);
437       Register scratch = temps.Acquire();
438       if (-rt.immediate() >> 12 == 0 && !MustUseReg(rt.rmode())) {
439         // Use load -imm and addu when loading -imm generates one instruction.
440         Li(scratch, -rt.immediate());
441         addw(rd, rs, scratch);
442       } else {
443         // li handles the relocation.
444         Li(scratch, rt.immediate());
445         subw(rd, rs, scratch);
446       }
447     }
448   }
449 }
450 
Sub64(Register rd,Register rs,const Operand & rt)451 void TurboAssembler::Sub64(Register rd, Register rs, const Operand& rt) {
452   if (rt.is_reg()) {
453     if (FLAG_riscv_c_extension && (rd.code() == rs.code()) &&
454         ((rd.code() & 0b11000) == 0b01000) &&
455         ((rt.rm().code() & 0b11000) == 0b01000)) {
456       c_sub(rd, rt.rm());
457     } else {
458       sub(rd, rs, rt.rm());
459     }
460   } else if (FLAG_riscv_c_extension && (rd.code() == rs.code()) &&
461              (rd != zero_reg) && is_int6(-rt.immediate()) &&
462              (rt.immediate() != 0) && !MustUseReg(rt.rmode())) {
463     c_addi(rd,
464            static_cast<int8_t>(
465                -rt.immediate()));  // No c_subi instr, use c_addi(x, y, -imm).
466 
467   } else if (FLAG_riscv_c_extension && is_int10(-rt.immediate()) &&
468              (rt.immediate() != 0) && ((rt.immediate() & 0xf) == 0) &&
469              (rd.code() == rs.code()) && (rd == sp) &&
470              !MustUseReg(rt.rmode())) {
471     c_addi16sp(static_cast<int16_t>(-rt.immediate()));
472   } else if (is_int12(-rt.immediate()) && !MustUseReg(rt.rmode())) {
473     addi(rd, rs,
474          static_cast<int32_t>(
475              -rt.immediate()));  // No subi instr, use addi(x, y, -imm).
476   } else if ((-4096 <= -rt.immediate() && -rt.immediate() <= -2049) ||
477              (2048 <= -rt.immediate() && -rt.immediate() <= 4094)) {
478     addi(rd, rs, -rt.immediate() / 2);
479     addi(rd, rd, -rt.immediate() - (-rt.immediate() / 2));
480   } else {
481     int li_count = InstrCountForLi64Bit(rt.immediate());
482     int li_neg_count = InstrCountForLi64Bit(-rt.immediate());
483     if (li_neg_count < li_count && !MustUseReg(rt.rmode())) {
484       // Use load -imm and add when loading -imm generates one instruction.
485       DCHECK(rt.immediate() != std::numeric_limits<int32_t>::min());
486       UseScratchRegisterScope temps(this);
487       Register scratch = temps.Acquire();
488       Li(scratch, -rt.immediate());
489       add(rd, rs, scratch);
490     } else {
491       // li handles the relocation.
492       UseScratchRegisterScope temps(this);
493       Register scratch = temps.Acquire();
494       Li(scratch, rt.immediate());
495       sub(rd, rs, scratch);
496     }
497   }
498 }
499 
Mul32(Register rd,Register rs,const Operand & rt)500 void TurboAssembler::Mul32(Register rd, Register rs, const Operand& rt) {
501   if (rt.is_reg()) {
502     mulw(rd, rs, rt.rm());
503   } else {
504     // li handles the relocation.
505     UseScratchRegisterScope temps(this);
506     Register scratch = temps.Acquire();
507     Li(scratch, rt.immediate());
508     mulw(rd, rs, scratch);
509   }
510 }
511 
Mulh32(Register rd,Register rs,const Operand & rt)512 void TurboAssembler::Mulh32(Register rd, Register rs, const Operand& rt) {
513   if (rt.is_reg()) {
514     mul(rd, rs, rt.rm());
515   } else {
516     // li handles the relocation.
517     UseScratchRegisterScope temps(this);
518     Register scratch = temps.Acquire();
519     Li(scratch, rt.immediate());
520     mul(rd, rs, scratch);
521   }
522   srai(rd, rd, 32);
523 }
524 
Mulhu32(Register rd,Register rs,const Operand & rt,Register rsz,Register rtz)525 void TurboAssembler::Mulhu32(Register rd, Register rs, const Operand& rt,
526                              Register rsz, Register rtz) {
527   slli(rsz, rs, 32);
528   if (rt.is_reg()) {
529     slli(rtz, rt.rm(), 32);
530   } else {
531     Li(rtz, rt.immediate() << 32);
532   }
533   mulhu(rd, rsz, rtz);
534   srai(rd, rd, 32);
535 }
536 
Mul64(Register rd,Register rs,const Operand & rt)537 void TurboAssembler::Mul64(Register rd, Register rs, const Operand& rt) {
538   if (rt.is_reg()) {
539     mul(rd, rs, rt.rm());
540   } else {
541     // li handles the relocation.
542     UseScratchRegisterScope temps(this);
543     Register scratch = temps.Acquire();
544     Li(scratch, rt.immediate());
545     mul(rd, rs, scratch);
546   }
547 }
548 
Mulh64(Register rd,Register rs,const Operand & rt)549 void TurboAssembler::Mulh64(Register rd, Register rs, const Operand& rt) {
550   if (rt.is_reg()) {
551     mulh(rd, rs, rt.rm());
552   } else {
553     // li handles the relocation.
554     UseScratchRegisterScope temps(this);
555     Register scratch = temps.Acquire();
556     Li(scratch, rt.immediate());
557     mulh(rd, rs, scratch);
558   }
559 }
560 
Div32(Register res,Register rs,const Operand & rt)561 void TurboAssembler::Div32(Register res, Register rs, const Operand& rt) {
562   if (rt.is_reg()) {
563     divw(res, rs, rt.rm());
564   } else {
565     // li handles the relocation.
566     UseScratchRegisterScope temps(this);
567     Register scratch = temps.Acquire();
568     Li(scratch, rt.immediate());
569     divw(res, rs, scratch);
570   }
571 }
572 
Mod32(Register rd,Register rs,const Operand & rt)573 void TurboAssembler::Mod32(Register rd, Register rs, const Operand& rt) {
574   if (rt.is_reg()) {
575     remw(rd, rs, rt.rm());
576   } else {
577     // li handles the relocation.
578     UseScratchRegisterScope temps(this);
579     Register scratch = temps.Acquire();
580     Li(scratch, rt.immediate());
581     remw(rd, rs, scratch);
582   }
583 }
584 
Modu32(Register rd,Register rs,const Operand & rt)585 void TurboAssembler::Modu32(Register rd, Register rs, const Operand& rt) {
586   if (rt.is_reg()) {
587     remuw(rd, rs, rt.rm());
588   } else {
589     // li handles the relocation.
590     UseScratchRegisterScope temps(this);
591     Register scratch = temps.Acquire();
592     Li(scratch, rt.immediate());
593     remuw(rd, rs, scratch);
594   }
595 }
596 
Div64(Register rd,Register rs,const Operand & rt)597 void TurboAssembler::Div64(Register rd, Register rs, const Operand& rt) {
598   if (rt.is_reg()) {
599     div(rd, rs, rt.rm());
600   } else {
601     // li handles the relocation.
602     UseScratchRegisterScope temps(this);
603     Register scratch = temps.Acquire();
604     Li(scratch, rt.immediate());
605     div(rd, rs, scratch);
606   }
607 }
608 
Divu32(Register res,Register rs,const Operand & rt)609 void TurboAssembler::Divu32(Register res, Register rs, const Operand& rt) {
610   if (rt.is_reg()) {
611     divuw(res, rs, rt.rm());
612   } else {
613     // li handles the relocation.
614     UseScratchRegisterScope temps(this);
615     Register scratch = temps.Acquire();
616     Li(scratch, rt.immediate());
617     divuw(res, rs, scratch);
618   }
619 }
620 
Divu64(Register res,Register rs,const Operand & rt)621 void TurboAssembler::Divu64(Register res, Register rs, const Operand& rt) {
622   if (rt.is_reg()) {
623     divu(res, rs, rt.rm());
624   } else {
625     // li handles the relocation.
626     UseScratchRegisterScope temps(this);
627     Register scratch = temps.Acquire();
628     Li(scratch, rt.immediate());
629     divu(res, rs, scratch);
630   }
631 }
632 
Mod64(Register rd,Register rs,const Operand & rt)633 void TurboAssembler::Mod64(Register rd, Register rs, const Operand& rt) {
634   if (rt.is_reg()) {
635     rem(rd, rs, rt.rm());
636   } else {
637     // li handles the relocation.
638     UseScratchRegisterScope temps(this);
639     Register scratch = temps.Acquire();
640     Li(scratch, rt.immediate());
641     rem(rd, rs, scratch);
642   }
643 }
644 
Modu64(Register rd,Register rs,const Operand & rt)645 void TurboAssembler::Modu64(Register rd, Register rs, const Operand& rt) {
646   if (rt.is_reg()) {
647     remu(rd, rs, rt.rm());
648   } else {
649     // li handles the relocation.
650     UseScratchRegisterScope temps(this);
651     Register scratch = temps.Acquire();
652     Li(scratch, rt.immediate());
653     remu(rd, rs, scratch);
654   }
655 }
656 
And(Register rd,Register rs,const Operand & rt)657 void TurboAssembler::And(Register rd, Register rs, const Operand& rt) {
658   if (rt.is_reg()) {
659     if (FLAG_riscv_c_extension && (rd.code() == rs.code()) &&
660         ((rd.code() & 0b11000) == 0b01000) &&
661         ((rt.rm().code() & 0b11000) == 0b01000)) {
662       c_and(rd, rt.rm());
663     } else {
664       and_(rd, rs, rt.rm());
665     }
666   } else {
667     if (FLAG_riscv_c_extension && is_int6(rt.immediate()) &&
668         !MustUseReg(rt.rmode()) && (rd.code() == rs.code()) &&
669         ((rd.code() & 0b11000) == 0b01000)) {
670       c_andi(rd, static_cast<int8_t>(rt.immediate()));
671     } else if (is_int12(rt.immediate()) && !MustUseReg(rt.rmode())) {
672       andi(rd, rs, static_cast<int32_t>(rt.immediate()));
673     } else {
674       // li handles the relocation.
675       UseScratchRegisterScope temps(this);
676       Register scratch = temps.Acquire();
677       Li(scratch, rt.immediate());
678       and_(rd, rs, scratch);
679     }
680   }
681 }
682 
Or(Register rd,Register rs,const Operand & rt)683 void TurboAssembler::Or(Register rd, Register rs, const Operand& rt) {
684   if (rt.is_reg()) {
685     if (FLAG_riscv_c_extension && (rd.code() == rs.code()) &&
686         ((rd.code() & 0b11000) == 0b01000) &&
687         ((rt.rm().code() & 0b11000) == 0b01000)) {
688       c_or(rd, rt.rm());
689     } else {
690       or_(rd, rs, rt.rm());
691     }
692   } else {
693     if (is_int12(rt.immediate()) && !MustUseReg(rt.rmode())) {
694       ori(rd, rs, static_cast<int32_t>(rt.immediate()));
695     } else {
696       // li handles the relocation.
697       UseScratchRegisterScope temps(this);
698       Register scratch = temps.Acquire();
699       Li(scratch, rt.immediate());
700       or_(rd, rs, scratch);
701     }
702   }
703 }
704 
Xor(Register rd,Register rs,const Operand & rt)705 void TurboAssembler::Xor(Register rd, Register rs, const Operand& rt) {
706   if (rt.is_reg()) {
707     if (FLAG_riscv_c_extension && (rd.code() == rs.code()) &&
708         ((rd.code() & 0b11000) == 0b01000) &&
709         ((rt.rm().code() & 0b11000) == 0b01000)) {
710       c_xor(rd, rt.rm());
711     } else {
712       xor_(rd, rs, rt.rm());
713     }
714   } else {
715     if (is_int12(rt.immediate()) && !MustUseReg(rt.rmode())) {
716       xori(rd, rs, static_cast<int32_t>(rt.immediate()));
717     } else {
718       // li handles the relocation.
719       UseScratchRegisterScope temps(this);
720       Register scratch = temps.Acquire();
721       Li(scratch, rt.immediate());
722       xor_(rd, rs, scratch);
723     }
724   }
725 }
726 
Nor(Register rd,Register rs,const Operand & rt)727 void TurboAssembler::Nor(Register rd, Register rs, const Operand& rt) {
728   if (rt.is_reg()) {
729     or_(rd, rs, rt.rm());
730     not_(rd, rd);
731   } else {
732     Or(rd, rs, rt);
733     not_(rd, rd);
734   }
735 }
736 
Neg(Register rs,const Operand & rt)737 void TurboAssembler::Neg(Register rs, const Operand& rt) {
738   DCHECK(rt.is_reg());
739   neg(rs, rt.rm());
740 }
741 
Seqz(Register rd,const Operand & rt)742 void TurboAssembler::Seqz(Register rd, const Operand& rt) {
743   if (rt.is_reg()) {
744     seqz(rd, rt.rm());
745   } else {
746     li(rd, rt.immediate() == 0);
747   }
748 }
749 
Snez(Register rd,const Operand & rt)750 void TurboAssembler::Snez(Register rd, const Operand& rt) {
751   if (rt.is_reg()) {
752     snez(rd, rt.rm());
753   } else {
754     li(rd, rt.immediate() != 0);
755   }
756 }
757 
Seq(Register rd,Register rs,const Operand & rt)758 void TurboAssembler::Seq(Register rd, Register rs, const Operand& rt) {
759   if (rs == zero_reg) {
760     Seqz(rd, rt);
761   } else if (IsZero(rt)) {
762     seqz(rd, rs);
763   } else {
764     Sub64(rd, rs, rt);
765     seqz(rd, rd);
766   }
767 }
768 
Sne(Register rd,Register rs,const Operand & rt)769 void TurboAssembler::Sne(Register rd, Register rs, const Operand& rt) {
770   if (rs == zero_reg) {
771     Snez(rd, rt);
772   } else if (IsZero(rt)) {
773     snez(rd, rs);
774   } else {
775     Sub64(rd, rs, rt);
776     snez(rd, rd);
777   }
778 }
779 
Slt(Register rd,Register rs,const Operand & rt)780 void TurboAssembler::Slt(Register rd, Register rs, const Operand& rt) {
781   if (rt.is_reg()) {
782     slt(rd, rs, rt.rm());
783   } else {
784     if (is_int12(rt.immediate()) && !MustUseReg(rt.rmode())) {
785       slti(rd, rs, static_cast<int32_t>(rt.immediate()));
786     } else {
787       // li handles the relocation.
788       UseScratchRegisterScope temps(this);
789       Register scratch = temps.Acquire();
790       BlockTrampolinePoolScope block_trampoline_pool(this);
791       Li(scratch, rt.immediate());
792       slt(rd, rs, scratch);
793     }
794   }
795 }
796 
Sltu(Register rd,Register rs,const Operand & rt)797 void TurboAssembler::Sltu(Register rd, Register rs, const Operand& rt) {
798   if (rt.is_reg()) {
799     sltu(rd, rs, rt.rm());
800   } else {
801     if (is_int12(rt.immediate()) && !MustUseReg(rt.rmode())) {
802       sltiu(rd, rs, static_cast<int32_t>(rt.immediate()));
803     } else {
804       // li handles the relocation.
805       UseScratchRegisterScope temps(this);
806       Register scratch = temps.Acquire();
807       BlockTrampolinePoolScope block_trampoline_pool(this);
808       Li(scratch, rt.immediate());
809       sltu(rd, rs, scratch);
810     }
811   }
812 }
813 
Sle(Register rd,Register rs,const Operand & rt)814 void TurboAssembler::Sle(Register rd, Register rs, const Operand& rt) {
815   if (rt.is_reg()) {
816     slt(rd, rt.rm(), rs);
817   } else {
818     // li handles the relocation.
819     UseScratchRegisterScope temps(this);
820     Register scratch = temps.Acquire();
821     BlockTrampolinePoolScope block_trampoline_pool(this);
822     Li(scratch, rt.immediate());
823     slt(rd, scratch, rs);
824   }
825   xori(rd, rd, 1);
826 }
827 
Sleu(Register rd,Register rs,const Operand & rt)828 void TurboAssembler::Sleu(Register rd, Register rs, const Operand& rt) {
829   if (rt.is_reg()) {
830     sltu(rd, rt.rm(), rs);
831   } else {
832     // li handles the relocation.
833     UseScratchRegisterScope temps(this);
834     Register scratch = temps.Acquire();
835     BlockTrampolinePoolScope block_trampoline_pool(this);
836     Li(scratch, rt.immediate());
837     sltu(rd, scratch, rs);
838   }
839   xori(rd, rd, 1);
840 }
841 
Sge(Register rd,Register rs,const Operand & rt)842 void TurboAssembler::Sge(Register rd, Register rs, const Operand& rt) {
843   Slt(rd, rs, rt);
844   xori(rd, rd, 1);
845 }
846 
Sgeu(Register rd,Register rs,const Operand & rt)847 void TurboAssembler::Sgeu(Register rd, Register rs, const Operand& rt) {
848   Sltu(rd, rs, rt);
849   xori(rd, rd, 1);
850 }
851 
Sgt(Register rd,Register rs,const Operand & rt)852 void TurboAssembler::Sgt(Register rd, Register rs, const Operand& rt) {
853   if (rt.is_reg()) {
854     slt(rd, rt.rm(), rs);
855   } else {
856     // li handles the relocation.
857     UseScratchRegisterScope temps(this);
858     Register scratch = temps.Acquire();
859     BlockTrampolinePoolScope block_trampoline_pool(this);
860     Li(scratch, rt.immediate());
861     slt(rd, scratch, rs);
862   }
863 }
864 
Sgtu(Register rd,Register rs,const Operand & rt)865 void TurboAssembler::Sgtu(Register rd, Register rs, const Operand& rt) {
866   if (rt.is_reg()) {
867     sltu(rd, rt.rm(), rs);
868   } else {
869     // li handles the relocation.
870     UseScratchRegisterScope temps(this);
871     Register scratch = temps.Acquire();
872     BlockTrampolinePoolScope block_trampoline_pool(this);
873     Li(scratch, rt.immediate());
874     sltu(rd, scratch, rs);
875   }
876 }
877 
Sll32(Register rd,Register rs,const Operand & rt)878 void TurboAssembler::Sll32(Register rd, Register rs, const Operand& rt) {
879   if (rt.is_reg()) {
880     sllw(rd, rs, rt.rm());
881   } else {
882     uint8_t shamt = static_cast<uint8_t>(rt.immediate());
883     slliw(rd, rs, shamt);
884   }
885 }
886 
Sra32(Register rd,Register rs,const Operand & rt)887 void TurboAssembler::Sra32(Register rd, Register rs, const Operand& rt) {
888   if (rt.is_reg()) {
889     sraw(rd, rs, rt.rm());
890   } else {
891     uint8_t shamt = static_cast<uint8_t>(rt.immediate());
892     sraiw(rd, rs, shamt);
893   }
894 }
895 
Srl32(Register rd,Register rs,const Operand & rt)896 void TurboAssembler::Srl32(Register rd, Register rs, const Operand& rt) {
897   if (rt.is_reg()) {
898     srlw(rd, rs, rt.rm());
899   } else {
900     uint8_t shamt = static_cast<uint8_t>(rt.immediate());
901     srliw(rd, rs, shamt);
902   }
903 }
904 
Sra64(Register rd,Register rs,const Operand & rt)905 void TurboAssembler::Sra64(Register rd, Register rs, const Operand& rt) {
906   if (rt.is_reg()) {
907     sra(rd, rs, rt.rm());
908   } else if (FLAG_riscv_c_extension && (rd.code() == rs.code()) &&
909              ((rd.code() & 0b11000) == 0b01000) && is_int6(rt.immediate())) {
910     uint8_t shamt = static_cast<uint8_t>(rt.immediate());
911     c_srai(rd, shamt);
912   } else {
913     uint8_t shamt = static_cast<uint8_t>(rt.immediate());
914     srai(rd, rs, shamt);
915   }
916 }
917 
Srl64(Register rd,Register rs,const Operand & rt)918 void TurboAssembler::Srl64(Register rd, Register rs, const Operand& rt) {
919   if (rt.is_reg()) {
920     srl(rd, rs, rt.rm());
921   } else if (FLAG_riscv_c_extension && (rd.code() == rs.code()) &&
922              ((rd.code() & 0b11000) == 0b01000) && is_int6(rt.immediate())) {
923     uint8_t shamt = static_cast<uint8_t>(rt.immediate());
924     c_srli(rd, shamt);
925   } else {
926     uint8_t shamt = static_cast<uint8_t>(rt.immediate());
927     srli(rd, rs, shamt);
928   }
929 }
930 
Sll64(Register rd,Register rs,const Operand & rt)931 void TurboAssembler::Sll64(Register rd, Register rs, const Operand& rt) {
932   if (rt.is_reg()) {
933     sll(rd, rs, rt.rm());
934   } else {
935     uint8_t shamt = static_cast<uint8_t>(rt.immediate());
936     if (FLAG_riscv_c_extension && (rd.code() == rs.code()) &&
937         (rd != zero_reg) && (shamt != 0) && is_uint6(shamt)) {
938       c_slli(rd, shamt);
939     } else {
940       slli(rd, rs, shamt);
941     }
942   }
943 }
944 
Li(Register rd,int64_t imm)945 void TurboAssembler::Li(Register rd, int64_t imm) {
946   if (FLAG_riscv_c_extension && (rd != zero_reg) && is_int6(imm)) {
947     c_li(rd, imm);
948   } else {
949     RV_li(rd, imm);
950   }
951 }
952 
Mv(Register rd,const Operand & rt)953 void TurboAssembler::Mv(Register rd, const Operand& rt) {
954   if (FLAG_riscv_c_extension && (rd != zero_reg) && (rt.rm() != zero_reg)) {
955     c_mv(rd, rt.rm());
956   } else {
957     mv(rd, rt.rm());
958   }
959 }
960 
Ror(Register rd,Register rs,const Operand & rt)961 void TurboAssembler::Ror(Register rd, Register rs, const Operand& rt) {
962   UseScratchRegisterScope temps(this);
963   Register scratch = temps.Acquire();
964   BlockTrampolinePoolScope block_trampoline_pool(this);
965   if (rt.is_reg()) {
966     negw(scratch, rt.rm());
967     sllw(scratch, rs, scratch);
968     srlw(rd, rs, rt.rm());
969     or_(rd, scratch, rd);
970     sext_w(rd, rd);
971   } else {
972     int64_t ror_value = rt.immediate() % 32;
973     if (ror_value == 0) {
974       Mv(rd, rs);
975       return;
976     } else if (ror_value < 0) {
977       ror_value += 32;
978     }
979     srliw(scratch, rs, ror_value);
980     slliw(rd, rs, 32 - ror_value);
981     or_(rd, scratch, rd);
982     sext_w(rd, rd);
983   }
984 }
985 
Dror(Register rd,Register rs,const Operand & rt)986 void TurboAssembler::Dror(Register rd, Register rs, const Operand& rt) {
987   UseScratchRegisterScope temps(this);
988   Register scratch = temps.Acquire();
989   BlockTrampolinePoolScope block_trampoline_pool(this);
990   if (rt.is_reg()) {
991     negw(scratch, rt.rm());
992     sll(scratch, rs, scratch);
993     srl(rd, rs, rt.rm());
994     or_(rd, scratch, rd);
995   } else {
996     int64_t dror_value = rt.immediate() % 64;
997     if (dror_value == 0) {
998       Mv(rd, rs);
999       return;
1000     } else if (dror_value < 0) {
1001       dror_value += 64;
1002     }
1003     srli(scratch, rs, dror_value);
1004     slli(rd, rs, 64 - dror_value);
1005     or_(rd, scratch, rd);
1006   }
1007 }
1008 
CalcScaledAddress(Register rd,Register rt,Register rs,uint8_t sa)1009 void TurboAssembler::CalcScaledAddress(Register rd, Register rt, Register rs,
1010                                        uint8_t sa) {
1011   DCHECK(sa >= 1 && sa <= 31);
1012   UseScratchRegisterScope temps(this);
1013   Register tmp = rd == rt ? temps.Acquire() : rd;
1014   DCHECK(tmp != rt);
1015   slli(tmp, rs, sa);
1016   Add64(rd, rt, tmp);
1017 }
1018 
1019 // ------------Pseudo-instructions-------------
1020 // Change endianness
ByteSwap(Register rd,Register rs,int operand_size,Register scratch)1021 void TurboAssembler::ByteSwap(Register rd, Register rs, int operand_size,
1022                               Register scratch) {
1023   DCHECK_NE(scratch, rs);
1024   DCHECK_NE(scratch, rd);
1025   DCHECK(operand_size == 4 || operand_size == 8);
1026   if (operand_size == 4) {
1027     // Uint32_t x1 = 0x00FF00FF;
1028     // x0 = (x0 << 16 | x0 >> 16);
1029     // x0 = (((x0 & x1) << 8)  | ((x0 & (x1 << 8)) >> 8));
1030     UseScratchRegisterScope temps(this);
1031     BlockTrampolinePoolScope block_trampoline_pool(this);
1032     DCHECK((rd != t6) && (rs != t6));
1033     Register x0 = temps.Acquire();
1034     Register x1 = temps.Acquire();
1035     Register x2 = scratch;
1036     li(x1, 0x00FF00FF);
1037     slliw(x0, rs, 16);
1038     srliw(rd, rs, 16);
1039     or_(x0, rd, x0);   // x0 <- x0 << 16 | x0 >> 16
1040     and_(x2, x0, x1);  // x2 <- x0 & 0x00FF00FF
1041     slliw(x2, x2, 8);  // x2 <- (x0 & x1) << 8
1042     slliw(x1, x1, 8);  // x1 <- 0xFF00FF00
1043     and_(rd, x0, x1);  // x0 & 0xFF00FF00
1044     srliw(rd, rd, 8);
1045     or_(rd, rd, x2);  // (((x0 & x1) << 8)  | ((x0 & (x1 << 8)) >> 8))
1046   } else {
1047     // uinx24_t x1 = 0x0000FFFF0000FFFFl;
1048     // uinx24_t x1 = 0x00FF00FF00FF00FFl;
1049     // x0 = (x0 << 32 | x0 >> 32);
1050     // x0 = (x0 & x1) << 16 | (x0 & (x1 << 16)) >> 16;
1051     // x0 = (x0 & x1) << 8  | (x0 & (x1 << 8)) >> 8;
1052     UseScratchRegisterScope temps(this);
1053     BlockTrampolinePoolScope block_trampoline_pool(this);
1054     DCHECK((rd != t6) && (rs != t6));
1055     Register x0 = temps.Acquire();
1056     Register x1 = temps.Acquire();
1057     Register x2 = scratch;
1058     li(x1, 0x0000FFFF0000FFFFl);
1059     slli(x0, rs, 32);
1060     srli(rd, rs, 32);
1061     or_(x0, rd, x0);   // x0 <- x0 << 32 | x0 >> 32
1062     and_(x2, x0, x1);  // x2 <- x0 & 0x0000FFFF0000FFFF
1063     slli(x2, x2, 16);  // x2 <- (x0 & 0x0000FFFF0000FFFF) << 16
1064     slli(x1, x1, 16);  // x1 <- 0xFFFF0000FFFF0000
1065     and_(rd, x0, x1);  // rd <- x0 & 0xFFFF0000FFFF0000
1066     srli(rd, rd, 16);  // rd <- x0 & (x1 << 16)) >> 16
1067     or_(x0, rd, x2);   // (x0 & x1) << 16 | (x0 & (x1 << 16)) >> 16;
1068     li(x1, 0x00FF00FF00FF00FFl);
1069     and_(x2, x0, x1);  // x2 <- x0 & 0x00FF00FF00FF00FF
1070     slli(x2, x2, 8);   // x2 <- (x0 & x1) << 8
1071     slli(x1, x1, 8);   // x1 <- 0xFF00FF00FF00FF00
1072     and_(rd, x0, x1);
1073     srli(rd, rd, 8);  // rd <- (x0 & (x1 << 8)) >> 8
1074     or_(rd, rd, x2);  // (((x0 & x1) << 8)  | ((x0 & (x1 << 8)) >> 8))
1075   }
1076 }
1077 
1078 template <int NBYTES, bool LOAD_SIGNED>
LoadNBytes(Register rd,const MemOperand & rs,Register scratch)1079 void TurboAssembler::LoadNBytes(Register rd, const MemOperand& rs,
1080                                 Register scratch) {
1081   DCHECK(rd != rs.rm() && rd != scratch);
1082   DCHECK_LE(NBYTES, 8);
1083 
1084   // load the most significant byte
1085   if (LOAD_SIGNED) {
1086     lb(rd, rs.rm(), rs.offset() + (NBYTES - 1));
1087   } else {
1088     lbu(rd, rs.rm(), rs.offset() + (NBYTES - 1));
1089   }
1090 
1091   // load remaining (nbytes-1) bytes from higher to lower
1092   slli(rd, rd, 8 * (NBYTES - 1));
1093   for (int i = (NBYTES - 2); i >= 0; i--) {
1094     lbu(scratch, rs.rm(), rs.offset() + i);
1095     if (i) slli(scratch, scratch, i * 8);
1096     or_(rd, rd, scratch);
1097   }
1098 }
1099 
1100 template <int NBYTES, bool LOAD_SIGNED>
LoadNBytesOverwritingBaseReg(const MemOperand & rs,Register scratch0,Register scratch1)1101 void TurboAssembler::LoadNBytesOverwritingBaseReg(const MemOperand& rs,
1102                                                   Register scratch0,
1103                                                   Register scratch1) {
1104   // This function loads nbytes from memory specified by rs and into rs.rm()
1105   DCHECK(rs.rm() != scratch0 && rs.rm() != scratch1 && scratch0 != scratch1);
1106   DCHECK_LE(NBYTES, 8);
1107 
1108   // load the most significant byte
1109   if (LOAD_SIGNED) {
1110     lb(scratch0, rs.rm(), rs.offset() + (NBYTES - 1));
1111   } else {
1112     lbu(scratch0, rs.rm(), rs.offset() + (NBYTES - 1));
1113   }
1114 
1115   // load remaining (nbytes-1) bytes from higher to lower
1116   slli(scratch0, scratch0, 8 * (NBYTES - 1));
1117   for (int i = (NBYTES - 2); i >= 0; i--) {
1118     lbu(scratch1, rs.rm(), rs.offset() + i);
1119     if (i) {
1120       slli(scratch1, scratch1, i * 8);
1121       or_(scratch0, scratch0, scratch1);
1122     } else {
1123       // write to rs.rm() when processing the last byte
1124       or_(rs.rm(), scratch0, scratch1);
1125     }
1126   }
1127 }
1128 
1129 template <int NBYTES, bool IS_SIGNED>
UnalignedLoadHelper(Register rd,const MemOperand & rs)1130 void TurboAssembler::UnalignedLoadHelper(Register rd, const MemOperand& rs) {
1131   BlockTrampolinePoolScope block_trampoline_pool(this);
1132   UseScratchRegisterScope temps(this);
1133 
1134   if (NeedAdjustBaseAndOffset(rs, OffsetAccessType::TWO_ACCESSES, NBYTES - 1)) {
1135     // Adjust offset for two accesses and check if offset + 3 fits into int12.
1136     MemOperand source = rs;
1137     Register scratch_base = temps.Acquire();
1138     DCHECK(scratch_base != rs.rm());
1139     AdjustBaseAndOffset(&source, scratch_base, OffsetAccessType::TWO_ACCESSES,
1140                         NBYTES - 1);
1141 
1142     // Since source.rm() is scratch_base, assume rd != source.rm()
1143     DCHECK(rd != source.rm());
1144     Register scratch_other = temps.Acquire();
1145     LoadNBytes<NBYTES, IS_SIGNED>(rd, source, scratch_other);
1146   } else {
1147     // no need to adjust base-and-offset
1148     if (rd != rs.rm()) {
1149       Register scratch = temps.Acquire();
1150       LoadNBytes<NBYTES, IS_SIGNED>(rd, rs, scratch);
1151     } else {  // rd == rs.rm()
1152       Register scratch = temps.Acquire();
1153       Register scratch2 = temps.Acquire();
1154       LoadNBytesOverwritingBaseReg<NBYTES, IS_SIGNED>(rs, scratch, scratch2);
1155     }
1156   }
1157 }
1158 
1159 template <int NBYTES>
UnalignedFLoadHelper(FPURegister frd,const MemOperand & rs,Register scratch_base)1160 void TurboAssembler::UnalignedFLoadHelper(FPURegister frd, const MemOperand& rs,
1161                                           Register scratch_base) {
1162   DCHECK(NBYTES == 4 || NBYTES == 8);
1163   DCHECK_NE(scratch_base, rs.rm());
1164   BlockTrampolinePoolScope block_trampoline_pool(this);
1165   MemOperand source = rs;
1166   if (NeedAdjustBaseAndOffset(rs, OffsetAccessType::TWO_ACCESSES, NBYTES - 1)) {
1167     // Adjust offset for two accesses and check if offset + 3 fits into int12.
1168     DCHECK(scratch_base != rs.rm());
1169     AdjustBaseAndOffset(&source, scratch_base, OffsetAccessType::TWO_ACCESSES,
1170                         NBYTES - 1);
1171   }
1172   UseScratchRegisterScope temps(this);
1173   Register scratch_other = temps.Acquire();
1174   Register scratch = temps.Acquire();
1175   DCHECK(scratch != rs.rm() && scratch_other != scratch &&
1176          scratch_other != rs.rm());
1177   LoadNBytes<NBYTES, true>(scratch, source, scratch_other);
1178   if (NBYTES == 4)
1179     fmv_w_x(frd, scratch);
1180   else
1181     fmv_d_x(frd, scratch);
1182 }
1183 
1184 template <int NBYTES>
UnalignedStoreHelper(Register rd,const MemOperand & rs,Register scratch_other)1185 void TurboAssembler::UnalignedStoreHelper(Register rd, const MemOperand& rs,
1186                                           Register scratch_other) {
1187   DCHECK(scratch_other != rs.rm());
1188   DCHECK_LE(NBYTES, 8);
1189   MemOperand source = rs;
1190   UseScratchRegisterScope temps(this);
1191   Register scratch_base = temps.Acquire();
1192   // Adjust offset for two accesses and check if offset + 3 fits into int12.
1193   if (NeedAdjustBaseAndOffset(rs, OffsetAccessType::TWO_ACCESSES, NBYTES - 1)) {
1194     DCHECK(scratch_base != rd && scratch_base != rs.rm());
1195     AdjustBaseAndOffset(&source, scratch_base, OffsetAccessType::TWO_ACCESSES,
1196                         NBYTES - 1);
1197   }
1198 
1199   BlockTrampolinePoolScope block_trampoline_pool(this);
1200   if (scratch_other == no_reg) {
1201     if (temps.hasAvailable()) {
1202       scratch_other = temps.Acquire();
1203     } else {
1204       push(t2);
1205       scratch_other = t2;
1206     }
1207   }
1208 
1209   DCHECK(scratch_other != rd && scratch_other != rs.rm() &&
1210          scratch_other != source.rm());
1211 
1212   sb(rd, source.rm(), source.offset());
1213   for (size_t i = 1; i <= (NBYTES - 1); i++) {
1214     srli(scratch_other, rd, i * 8);
1215     sb(scratch_other, source.rm(), source.offset() + i);
1216   }
1217   if (scratch_other == t2) {
1218     pop(t2);
1219   }
1220 }
1221 
1222 template <int NBYTES>
UnalignedFStoreHelper(FPURegister frd,const MemOperand & rs,Register scratch)1223 void TurboAssembler::UnalignedFStoreHelper(FPURegister frd,
1224                                            const MemOperand& rs,
1225                                            Register scratch) {
1226   DCHECK(NBYTES == 8 || NBYTES == 4);
1227   DCHECK_NE(scratch, rs.rm());
1228   if (NBYTES == 4) {
1229     fmv_x_w(scratch, frd);
1230   } else {
1231     fmv_x_d(scratch, frd);
1232   }
1233   UnalignedStoreHelper<NBYTES>(scratch, rs);
1234 }
1235 
1236 template <typename Reg_T, typename Func>
AlignedLoadHelper(Reg_T target,const MemOperand & rs,Func generator)1237 void TurboAssembler::AlignedLoadHelper(Reg_T target, const MemOperand& rs,
1238                                        Func generator) {
1239   MemOperand source = rs;
1240   UseScratchRegisterScope temps(this);
1241   BlockTrampolinePoolScope block_trampoline_pool(this);
1242   if (NeedAdjustBaseAndOffset(source)) {
1243     Register scratch = temps.Acquire();
1244     DCHECK(scratch != rs.rm());
1245     AdjustBaseAndOffset(&source, scratch);
1246   }
1247   generator(target, source);
1248 }
1249 
1250 template <typename Reg_T, typename Func>
AlignedStoreHelper(Reg_T value,const MemOperand & rs,Func generator)1251 void TurboAssembler::AlignedStoreHelper(Reg_T value, const MemOperand& rs,
1252                                         Func generator) {
1253   MemOperand source = rs;
1254   UseScratchRegisterScope temps(this);
1255   BlockTrampolinePoolScope block_trampoline_pool(this);
1256   if (NeedAdjustBaseAndOffset(source)) {
1257     Register scratch = temps.Acquire();
1258     // make sure scratch does not overwrite value
1259     if (std::is_same<Reg_T, Register>::value)
1260       DCHECK(scratch.code() != value.code());
1261     DCHECK(scratch != rs.rm());
1262     AdjustBaseAndOffset(&source, scratch);
1263   }
1264   generator(value, source);
1265 }
1266 
Ulw(Register rd,const MemOperand & rs)1267 void TurboAssembler::Ulw(Register rd, const MemOperand& rs) {
1268   UnalignedLoadHelper<4, true>(rd, rs);
1269 }
1270 
Ulwu(Register rd,const MemOperand & rs)1271 void TurboAssembler::Ulwu(Register rd, const MemOperand& rs) {
1272   UnalignedLoadHelper<4, false>(rd, rs);
1273 }
1274 
Usw(Register rd,const MemOperand & rs)1275 void TurboAssembler::Usw(Register rd, const MemOperand& rs) {
1276   UnalignedStoreHelper<4>(rd, rs);
1277 }
1278 
Ulh(Register rd,const MemOperand & rs)1279 void TurboAssembler::Ulh(Register rd, const MemOperand& rs) {
1280   UnalignedLoadHelper<2, true>(rd, rs);
1281 }
1282 
Ulhu(Register rd,const MemOperand & rs)1283 void TurboAssembler::Ulhu(Register rd, const MemOperand& rs) {
1284   UnalignedLoadHelper<2, false>(rd, rs);
1285 }
1286 
Ush(Register rd,const MemOperand & rs)1287 void TurboAssembler::Ush(Register rd, const MemOperand& rs) {
1288   UnalignedStoreHelper<2>(rd, rs);
1289 }
1290 
Uld(Register rd,const MemOperand & rs)1291 void TurboAssembler::Uld(Register rd, const MemOperand& rs) {
1292   UnalignedLoadHelper<8, true>(rd, rs);
1293 }
1294 
1295 // Load consequent 32-bit word pair in 64-bit reg. and put first word in low
1296 // bits,
1297 // second word in high bits.
LoadWordPair(Register rd,const MemOperand & rs)1298 void MacroAssembler::LoadWordPair(Register rd, const MemOperand& rs) {
1299   UseScratchRegisterScope temps(this);
1300   Register scratch = temps.Acquire();
1301   Lwu(rd, rs);
1302   Lw(scratch, MemOperand(rs.rm(), rs.offset() + kSystemPointerSize / 2));
1303   slli(scratch, scratch, 32);
1304   Add64(rd, rd, scratch);
1305 }
1306 
Usd(Register rd,const MemOperand & rs)1307 void TurboAssembler::Usd(Register rd, const MemOperand& rs) {
1308   UnalignedStoreHelper<8>(rd, rs);
1309 }
1310 
1311 // Do 64-bit store as two consequent 32-bit stores to unaligned address.
StoreWordPair(Register rd,const MemOperand & rs)1312 void MacroAssembler::StoreWordPair(Register rd, const MemOperand& rs) {
1313   UseScratchRegisterScope temps(this);
1314   Register scratch = temps.Acquire();
1315   Sw(rd, rs);
1316   srai(scratch, rd, 32);
1317   Sw(scratch, MemOperand(rs.rm(), rs.offset() + kSystemPointerSize / 2));
1318 }
1319 
ULoadFloat(FPURegister fd,const MemOperand & rs,Register scratch)1320 void TurboAssembler::ULoadFloat(FPURegister fd, const MemOperand& rs,
1321                                 Register scratch) {
1322   DCHECK_NE(scratch, rs.rm());
1323   UnalignedFLoadHelper<4>(fd, rs, scratch);
1324 }
1325 
UStoreFloat(FPURegister fd,const MemOperand & rs,Register scratch)1326 void TurboAssembler::UStoreFloat(FPURegister fd, const MemOperand& rs,
1327                                  Register scratch) {
1328   DCHECK_NE(scratch, rs.rm());
1329   UnalignedFStoreHelper<4>(fd, rs, scratch);
1330 }
1331 
ULoadDouble(FPURegister fd,const MemOperand & rs,Register scratch)1332 void TurboAssembler::ULoadDouble(FPURegister fd, const MemOperand& rs,
1333                                  Register scratch) {
1334   DCHECK_NE(scratch, rs.rm());
1335   UnalignedFLoadHelper<8>(fd, rs, scratch);
1336 }
1337 
UStoreDouble(FPURegister fd,const MemOperand & rs,Register scratch)1338 void TurboAssembler::UStoreDouble(FPURegister fd, const MemOperand& rs,
1339                                   Register scratch) {
1340   DCHECK_NE(scratch, rs.rm());
1341   UnalignedFStoreHelper<8>(fd, rs, scratch);
1342 }
1343 
Lb(Register rd,const MemOperand & rs)1344 void TurboAssembler::Lb(Register rd, const MemOperand& rs) {
1345   auto fn = [this](Register target, const MemOperand& source) {
1346     this->lb(target, source.rm(), source.offset());
1347   };
1348   AlignedLoadHelper(rd, rs, fn);
1349 }
1350 
Lbu(Register rd,const MemOperand & rs)1351 void TurboAssembler::Lbu(Register rd, const MemOperand& rs) {
1352   auto fn = [this](Register target, const MemOperand& source) {
1353     this->lbu(target, source.rm(), source.offset());
1354   };
1355   AlignedLoadHelper(rd, rs, fn);
1356 }
1357 
Sb(Register rd,const MemOperand & rs)1358 void TurboAssembler::Sb(Register rd, const MemOperand& rs) {
1359   auto fn = [this](Register value, const MemOperand& source) {
1360     this->sb(value, source.rm(), source.offset());
1361   };
1362   AlignedStoreHelper(rd, rs, fn);
1363 }
1364 
Lh(Register rd,const MemOperand & rs)1365 void TurboAssembler::Lh(Register rd, const MemOperand& rs) {
1366   auto fn = [this](Register target, const MemOperand& source) {
1367     this->lh(target, source.rm(), source.offset());
1368   };
1369   AlignedLoadHelper(rd, rs, fn);
1370 }
1371 
Lhu(Register rd,const MemOperand & rs)1372 void TurboAssembler::Lhu(Register rd, const MemOperand& rs) {
1373   auto fn = [this](Register target, const MemOperand& source) {
1374     this->lhu(target, source.rm(), source.offset());
1375   };
1376   AlignedLoadHelper(rd, rs, fn);
1377 }
1378 
Sh(Register rd,const MemOperand & rs)1379 void TurboAssembler::Sh(Register rd, const MemOperand& rs) {
1380   auto fn = [this](Register value, const MemOperand& source) {
1381     this->sh(value, source.rm(), source.offset());
1382   };
1383   AlignedStoreHelper(rd, rs, fn);
1384 }
1385 
Lw(Register rd,const MemOperand & rs)1386 void TurboAssembler::Lw(Register rd, const MemOperand& rs) {
1387   auto fn = [this](Register target, const MemOperand& source) {
1388     if (FLAG_riscv_c_extension && ((target.code() & 0b11000) == 0b01000) &&
1389         ((source.rm().code() & 0b11000) == 0b01000) &&
1390         is_uint7(source.offset()) && ((source.offset() & 0x3) == 0)) {
1391       this->c_lw(target, source.rm(), source.offset());
1392     } else if (FLAG_riscv_c_extension && (target != zero_reg) &&
1393                is_uint8(source.offset()) && (source.rm() == sp) &&
1394                ((source.offset() & 0x3) == 0)) {
1395       this->c_lwsp(target, source.offset());
1396     } else {
1397       this->lw(target, source.rm(), source.offset());
1398     }
1399   };
1400   AlignedLoadHelper(rd, rs, fn);
1401 }
1402 
Lwu(Register rd,const MemOperand & rs)1403 void TurboAssembler::Lwu(Register rd, const MemOperand& rs) {
1404   auto fn = [this](Register target, const MemOperand& source) {
1405     this->lwu(target, source.rm(), source.offset());
1406   };
1407   AlignedLoadHelper(rd, rs, fn);
1408 }
1409 
Sw(Register rd,const MemOperand & rs)1410 void TurboAssembler::Sw(Register rd, const MemOperand& rs) {
1411   auto fn = [this](Register value, const MemOperand& source) {
1412     if (FLAG_riscv_c_extension && ((value.code() & 0b11000) == 0b01000) &&
1413         ((source.rm().code() & 0b11000) == 0b01000) &&
1414         is_uint7(source.offset()) && ((source.offset() & 0x3) == 0)) {
1415       this->c_sw(value, source.rm(), source.offset());
1416     } else if (FLAG_riscv_c_extension && (source.rm() == sp) &&
1417                is_uint8(source.offset()) && (((source.offset() & 0x3) == 0))) {
1418       this->c_swsp(value, source.offset());
1419     } else {
1420       this->sw(value, source.rm(), source.offset());
1421     }
1422   };
1423   AlignedStoreHelper(rd, rs, fn);
1424 }
1425 
Ld(Register rd,const MemOperand & rs)1426 void TurboAssembler::Ld(Register rd, const MemOperand& rs) {
1427   auto fn = [this](Register target, const MemOperand& source) {
1428     if (FLAG_riscv_c_extension && ((target.code() & 0b11000) == 0b01000) &&
1429         ((source.rm().code() & 0b11000) == 0b01000) &&
1430         is_uint8(source.offset()) && ((source.offset() & 0x7) == 0)) {
1431       this->c_ld(target, source.rm(), source.offset());
1432     } else if (FLAG_riscv_c_extension && (target != zero_reg) &&
1433                is_uint9(source.offset()) && (source.rm() == sp) &&
1434                ((source.offset() & 0x7) == 0)) {
1435       this->c_ldsp(target, source.offset());
1436     } else {
1437       this->ld(target, source.rm(), source.offset());
1438     }
1439   };
1440   AlignedLoadHelper(rd, rs, fn);
1441 }
1442 
Sd(Register rd,const MemOperand & rs)1443 void TurboAssembler::Sd(Register rd, const MemOperand& rs) {
1444   auto fn = [this](Register value, const MemOperand& source) {
1445     if (FLAG_riscv_c_extension && ((value.code() & 0b11000) == 0b01000) &&
1446         ((source.rm().code() & 0b11000) == 0b01000) &&
1447         is_uint8(source.offset()) && ((source.offset() & 0x7) == 0)) {
1448       this->c_sd(value, source.rm(), source.offset());
1449     } else if (FLAG_riscv_c_extension && (source.rm() == sp) &&
1450                is_uint9(source.offset()) && ((source.offset() & 0x7) == 0)) {
1451       this->c_sdsp(value, source.offset());
1452     } else {
1453       this->sd(value, source.rm(), source.offset());
1454     }
1455   };
1456   AlignedStoreHelper(rd, rs, fn);
1457 }
1458 
LoadFloat(FPURegister fd,const MemOperand & src)1459 void TurboAssembler::LoadFloat(FPURegister fd, const MemOperand& src) {
1460   auto fn = [this](FPURegister target, const MemOperand& source) {
1461     this->flw(target, source.rm(), source.offset());
1462   };
1463   AlignedLoadHelper(fd, src, fn);
1464 }
1465 
StoreFloat(FPURegister fs,const MemOperand & src)1466 void TurboAssembler::StoreFloat(FPURegister fs, const MemOperand& src) {
1467   auto fn = [this](FPURegister value, const MemOperand& source) {
1468     this->fsw(value, source.rm(), source.offset());
1469   };
1470   AlignedStoreHelper(fs, src, fn);
1471 }
1472 
LoadDouble(FPURegister fd,const MemOperand & src)1473 void TurboAssembler::LoadDouble(FPURegister fd, const MemOperand& src) {
1474   auto fn = [this](FPURegister target, const MemOperand& source) {
1475     if (FLAG_riscv_c_extension && ((target.code() & 0b11000) == 0b01000) &&
1476         ((source.rm().code() & 0b11000) == 0b01000) &&
1477         is_uint8(source.offset()) && ((source.offset() & 0x7) == 0)) {
1478       this->c_fld(target, source.rm(), source.offset());
1479     } else if (FLAG_riscv_c_extension && (source.rm() == sp) &&
1480                is_uint9(source.offset()) && ((source.offset() & 0x7) == 0)) {
1481       this->c_fldsp(target, source.offset());
1482     } else {
1483       this->fld(target, source.rm(), source.offset());
1484     }
1485   };
1486   AlignedLoadHelper(fd, src, fn);
1487 }
1488 
StoreDouble(FPURegister fs,const MemOperand & src)1489 void TurboAssembler::StoreDouble(FPURegister fs, const MemOperand& src) {
1490   auto fn = [this](FPURegister value, const MemOperand& source) {
1491     if (FLAG_riscv_c_extension && ((value.code() & 0b11000) == 0b01000) &&
1492         ((source.rm().code() & 0b11000) == 0b01000) &&
1493         is_uint8(source.offset()) && ((source.offset() & 0x7) == 0)) {
1494       this->c_fsd(value, source.rm(), source.offset());
1495     } else if (FLAG_riscv_c_extension && (source.rm() == sp) &&
1496                is_uint9(source.offset()) && ((source.offset() & 0x7) == 0)) {
1497       this->c_fsdsp(value, source.offset());
1498     } else {
1499       this->fsd(value, source.rm(), source.offset());
1500     }
1501   };
1502   AlignedStoreHelper(fs, src, fn);
1503 }
1504 
Ll(Register rd,const MemOperand & rs)1505 void TurboAssembler::Ll(Register rd, const MemOperand& rs) {
1506   bool is_one_instruction = rs.offset() == 0;
1507   if (is_one_instruction) {
1508     lr_w(false, false, rd, rs.rm());
1509   } else {
1510     UseScratchRegisterScope temps(this);
1511     Register scratch = temps.Acquire();
1512     Add64(scratch, rs.rm(), rs.offset());
1513     lr_w(false, false, rd, scratch);
1514   }
1515 }
1516 
Lld(Register rd,const MemOperand & rs)1517 void TurboAssembler::Lld(Register rd, const MemOperand& rs) {
1518   bool is_one_instruction = rs.offset() == 0;
1519   if (is_one_instruction) {
1520     lr_d(false, false, rd, rs.rm());
1521   } else {
1522     UseScratchRegisterScope temps(this);
1523     Register scratch = temps.Acquire();
1524     Add64(scratch, rs.rm(), rs.offset());
1525     lr_d(false, false, rd, scratch);
1526   }
1527 }
1528 
Sc(Register rd,const MemOperand & rs)1529 void TurboAssembler::Sc(Register rd, const MemOperand& rs) {
1530   bool is_one_instruction = rs.offset() == 0;
1531   if (is_one_instruction) {
1532     sc_w(false, false, rd, rs.rm(), rd);
1533   } else {
1534     UseScratchRegisterScope temps(this);
1535     Register scratch = temps.Acquire();
1536     Add64(scratch, rs.rm(), rs.offset());
1537     sc_w(false, false, rd, scratch, rd);
1538   }
1539 }
1540 
Scd(Register rd,const MemOperand & rs)1541 void TurboAssembler::Scd(Register rd, const MemOperand& rs) {
1542   bool is_one_instruction = rs.offset() == 0;
1543   if (is_one_instruction) {
1544     sc_d(false, false, rd, rs.rm(), rd);
1545   } else {
1546     UseScratchRegisterScope temps(this);
1547     Register scratch = temps.Acquire();
1548     Add64(scratch, rs.rm(), rs.offset());
1549     sc_d(false, false, rd, scratch, rd);
1550   }
1551 }
1552 
li(Register dst,Handle<HeapObject> value,RelocInfo::Mode rmode)1553 void TurboAssembler::li(Register dst, Handle<HeapObject> value,
1554                         RelocInfo::Mode rmode) {
1555   // TODO(jgruber,v8:8887): Also consider a root-relative load when generating
1556   // non-isolate-independent code. In many cases it might be cheaper than
1557   // embedding the relocatable value.
1558   if (root_array_available_ && options().isolate_independent_code) {
1559     IndirectLoadConstant(dst, value);
1560     return;
1561   } else if (RelocInfo::IsCompressedEmbeddedObject(rmode)) {
1562     EmbeddedObjectIndex index = AddEmbeddedObject(value);
1563     DCHECK(is_uint32(index));
1564     li(dst, Operand(index, rmode));
1565   } else {
1566     DCHECK(RelocInfo::IsFullEmbeddedObject(rmode));
1567     li(dst, Operand(value.address(), rmode));
1568   }
1569 }
1570 
li(Register dst,ExternalReference value,LiFlags mode)1571 void TurboAssembler::li(Register dst, ExternalReference value, LiFlags mode) {
1572   // TODO(jgruber,v8:8887): Also consider a root-relative load when generating
1573   // non-isolate-independent code. In many cases it might be cheaper than
1574   // embedding the relocatable value.
1575   if (root_array_available_ && options().isolate_independent_code) {
1576     IndirectLoadExternalReference(dst, value);
1577     return;
1578   }
1579   li(dst, Operand(value), mode);
1580 }
1581 
li(Register dst,const StringConstantBase * string,LiFlags mode)1582 void TurboAssembler::li(Register dst, const StringConstantBase* string,
1583                         LiFlags mode) {
1584   li(dst, Operand::EmbeddedStringConstant(string), mode);
1585 }
1586 
InstrCountForLiLower32Bit(int64_t value)1587 static inline int InstrCountForLiLower32Bit(int64_t value) {
1588   int64_t Hi20 = ((value + 0x800) >> 12);
1589   int64_t Lo12 = value << 52 >> 52;
1590   if (Hi20 == 0 || Lo12 == 0) {
1591     return 1;
1592   }
1593   return 2;
1594 }
1595 
InstrCountForLi64Bit(int64_t value)1596 int TurboAssembler::InstrCountForLi64Bit(int64_t value) {
1597   if (is_int32(value + 0x800)) {
1598     return InstrCountForLiLower32Bit(value);
1599   } else {
1600     return li_estimate(value);
1601   }
1602   UNREACHABLE();
1603   return INT_MAX;
1604 }
1605 
li_optimized(Register rd,Operand j,LiFlags mode)1606 void TurboAssembler::li_optimized(Register rd, Operand j, LiFlags mode) {
1607   DCHECK(!j.is_reg());
1608   DCHECK(!MustUseReg(j.rmode()));
1609   DCHECK(mode == OPTIMIZE_SIZE);
1610   Li(rd, j.immediate());
1611 }
1612 
li(Register rd,Operand j,LiFlags mode)1613 void TurboAssembler::li(Register rd, Operand j, LiFlags mode) {
1614   DCHECK(!j.is_reg());
1615   BlockTrampolinePoolScope block_trampoline_pool(this);
1616   if (!MustUseReg(j.rmode()) && mode == OPTIMIZE_SIZE) {
1617     UseScratchRegisterScope temps(this);
1618     int count = li_estimate(j.immediate(), temps.hasAvailable());
1619     int reverse_count = li_estimate(~j.immediate(), temps.hasAvailable());
1620     if (FLAG_riscv_constant_pool && count >= 4 && reverse_count >= 4) {
1621       // Ld a Address from a constant pool.
1622       RecordEntry((uint64_t)j.immediate(), j.rmode());
1623       auipc(rd, 0);
1624       // Record a value into constant pool.
1625       ld(rd, rd, 0);
1626     } else {
1627       if ((count - reverse_count) > 1) {
1628         Li(rd, ~j.immediate());
1629         not_(rd, rd);
1630       } else {
1631         Li(rd, j.immediate());
1632       }
1633     }
1634   } else if (MustUseReg(j.rmode())) {
1635     int64_t immediate;
1636     if (j.IsHeapObjectRequest()) {
1637       RequestHeapObject(j.heap_object_request());
1638       immediate = 0;
1639     } else {
1640       immediate = j.immediate();
1641     }
1642 
1643     RecordRelocInfo(j.rmode(), immediate);
1644     li_ptr(rd, immediate);
1645   } else if (mode == ADDRESS_LOAD) {
1646     // We always need the same number of instructions as we may need to patch
1647     // this code to load another value which may need all 6 instructions.
1648     RecordRelocInfo(j.rmode());
1649     li_ptr(rd, j.immediate());
1650   } else {  // Always emit the same 48 bit instruction
1651             // sequence.
1652     li_ptr(rd, j.immediate());
1653   }
1654 }
1655 
1656 static RegList t_regs = {t0, t1, t2, t3, t4, t5, t6};
1657 static RegList a_regs = {a0, a1, a2, a3, a4, a5, a6, a7};
1658 static RegList s_regs = {s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11};
1659 
MultiPush(RegList regs)1660 void TurboAssembler::MultiPush(RegList regs) {
1661   int16_t num_to_push = regs.Count();
1662   int16_t stack_offset = num_to_push * kSystemPointerSize;
1663 
1664 #define TEST_AND_PUSH_REG(reg)             \
1665   if (regs.has(reg)) {                     \
1666     stack_offset -= kSystemPointerSize;    \
1667     Sd(reg, MemOperand(sp, stack_offset)); \
1668     regs.clear(reg);                       \
1669   }
1670 
1671 #define T_REGS(V) V(t6) V(t5) V(t4) V(t3) V(t2) V(t1) V(t0)
1672 #define A_REGS(V) V(a7) V(a6) V(a5) V(a4) V(a3) V(a2) V(a1) V(a0)
1673 #define S_REGS(V) \
1674   V(s11) V(s10) V(s9) V(s8) V(s7) V(s6) V(s5) V(s4) V(s3) V(s2) V(s1)
1675 
1676   Sub64(sp, sp, Operand(stack_offset));
1677 
1678   // Certain usage of MultiPush requires that registers are pushed onto the
1679   // stack in a particular: ra, fp, sp, gp, .... (basically in the decreasing
1680   // order of register numbers according to MIPS register numbers)
1681   TEST_AND_PUSH_REG(ra);
1682   TEST_AND_PUSH_REG(fp);
1683   TEST_AND_PUSH_REG(sp);
1684   TEST_AND_PUSH_REG(gp);
1685   TEST_AND_PUSH_REG(tp);
1686   if (!(regs & s_regs).is_empty()) {
1687     S_REGS(TEST_AND_PUSH_REG)
1688   }
1689   if (!(regs & a_regs).is_empty()) {
1690     A_REGS(TEST_AND_PUSH_REG)
1691   }
1692   if (!(regs & t_regs).is_empty()) {
1693     T_REGS(TEST_AND_PUSH_REG)
1694   }
1695 
1696   DCHECK(regs.is_empty());
1697 
1698 #undef TEST_AND_PUSH_REG
1699 #undef T_REGS
1700 #undef A_REGS
1701 #undef S_REGS
1702 }
1703 
MultiPop(RegList regs)1704 void TurboAssembler::MultiPop(RegList regs) {
1705   int16_t stack_offset = 0;
1706 
1707 #define TEST_AND_POP_REG(reg)              \
1708   if (regs.has(reg)) {                     \
1709     Ld(reg, MemOperand(sp, stack_offset)); \
1710     stack_offset += kSystemPointerSize;    \
1711     regs.clear(reg);                       \
1712   }
1713 
1714 #define T_REGS(V) V(t0) V(t1) V(t2) V(t3) V(t4) V(t5) V(t6)
1715 #define A_REGS(V) V(a0) V(a1) V(a2) V(a3) V(a4) V(a5) V(a6) V(a7)
1716 #define S_REGS(V) \
1717   V(s1) V(s2) V(s3) V(s4) V(s5) V(s6) V(s7) V(s8) V(s9) V(s10) V(s11)
1718 
1719   // MultiPop pops from the stack in reverse order as MultiPush
1720   if (!(regs & t_regs).is_empty()) {
1721     T_REGS(TEST_AND_POP_REG)
1722   }
1723   if (!(regs & a_regs).is_empty()) {
1724     A_REGS(TEST_AND_POP_REG)
1725   }
1726   if (!(regs & s_regs).is_empty()) {
1727     S_REGS(TEST_AND_POP_REG)
1728   }
1729   TEST_AND_POP_REG(tp);
1730   TEST_AND_POP_REG(gp);
1731   TEST_AND_POP_REG(sp);
1732   TEST_AND_POP_REG(fp);
1733   TEST_AND_POP_REG(ra);
1734 
1735   DCHECK(regs.is_empty());
1736 
1737   addi(sp, sp, stack_offset);
1738 
1739 #undef TEST_AND_POP_REG
1740 #undef T_REGS
1741 #undef S_REGS
1742 #undef A_REGS
1743 }
1744 
MultiPushFPU(DoubleRegList regs)1745 void TurboAssembler::MultiPushFPU(DoubleRegList regs) {
1746   int16_t num_to_push = regs.Count();
1747   int16_t stack_offset = num_to_push * kDoubleSize;
1748 
1749   Sub64(sp, sp, Operand(stack_offset));
1750   for (int16_t i = kNumRegisters - 1; i >= 0; i--) {
1751     if ((regs.bits() & (1 << i)) != 0) {
1752       stack_offset -= kDoubleSize;
1753       StoreDouble(FPURegister::from_code(i), MemOperand(sp, stack_offset));
1754     }
1755   }
1756 }
1757 
MultiPopFPU(DoubleRegList regs)1758 void TurboAssembler::MultiPopFPU(DoubleRegList regs) {
1759   int16_t stack_offset = 0;
1760 
1761   for (int16_t i = 0; i < kNumRegisters; i++) {
1762     if ((regs.bits() & (1 << i)) != 0) {
1763       LoadDouble(FPURegister::from_code(i), MemOperand(sp, stack_offset));
1764       stack_offset += kDoubleSize;
1765     }
1766   }
1767   addi(sp, sp, stack_offset);
1768 }
1769 
ExtractBits(Register rt,Register rs,uint16_t pos,uint16_t size,bool sign_extend)1770 void TurboAssembler::ExtractBits(Register rt, Register rs, uint16_t pos,
1771                                  uint16_t size, bool sign_extend) {
1772   DCHECK(pos < 64 && 0 < size && size <= 64 && 0 < pos + size &&
1773          pos + size <= 64);
1774   slli(rt, rs, 64 - (pos + size));
1775   if (sign_extend) {
1776     srai(rt, rt, 64 - size);
1777   } else {
1778     srli(rt, rt, 64 - size);
1779   }
1780 }
1781 
InsertBits(Register dest,Register source,Register pos,int size)1782 void TurboAssembler::InsertBits(Register dest, Register source, Register pos,
1783                                 int size) {
1784   DCHECK_LT(size, 64);
1785   UseScratchRegisterScope temps(this);
1786   Register mask = temps.Acquire();
1787   BlockTrampolinePoolScope block_trampoline_pool(this);
1788   Register source_ = temps.Acquire();
1789   // Create a mask of the length=size.
1790   li(mask, 1);
1791   slli(mask, mask, size);
1792   addi(mask, mask, -1);
1793   and_(source_, mask, source);
1794   sll(source_, source_, pos);
1795   // Make a mask containing 0's. 0's start at "pos" with length=size.
1796   sll(mask, mask, pos);
1797   not_(mask, mask);
1798   // cut area for insertion of source.
1799   and_(dest, mask, dest);
1800   // insert source
1801   or_(dest, dest, source_);
1802 }
1803 
Neg_s(FPURegister fd,FPURegister fs)1804 void TurboAssembler::Neg_s(FPURegister fd, FPURegister fs) { fneg_s(fd, fs); }
1805 
Neg_d(FPURegister fd,FPURegister fs)1806 void TurboAssembler::Neg_d(FPURegister fd, FPURegister fs) { fneg_d(fd, fs); }
1807 
Cvt_d_uw(FPURegister fd,Register rs)1808 void TurboAssembler::Cvt_d_uw(FPURegister fd, Register rs) {
1809   // Convert rs to a FP value in fd.
1810   fcvt_d_wu(fd, rs);
1811 }
1812 
Cvt_d_w(FPURegister fd,Register rs)1813 void TurboAssembler::Cvt_d_w(FPURegister fd, Register rs) {
1814   // Convert rs to a FP value in fd.
1815   fcvt_d_w(fd, rs);
1816 }
1817 
Cvt_d_ul(FPURegister fd,Register rs)1818 void TurboAssembler::Cvt_d_ul(FPURegister fd, Register rs) {
1819   // Convert rs to a FP value in fd.
1820   fcvt_d_lu(fd, rs);
1821 }
1822 
Cvt_s_uw(FPURegister fd,Register rs)1823 void TurboAssembler::Cvt_s_uw(FPURegister fd, Register rs) {
1824   // Convert rs to a FP value in fd.
1825   fcvt_s_wu(fd, rs);
1826 }
1827 
Cvt_s_w(FPURegister fd,Register rs)1828 void TurboAssembler::Cvt_s_w(FPURegister fd, Register rs) {
1829   // Convert rs to a FP value in fd.
1830   fcvt_s_w(fd, rs);
1831 }
1832 
Cvt_s_ul(FPURegister fd,Register rs)1833 void TurboAssembler::Cvt_s_ul(FPURegister fd, Register rs) {
1834   // Convert rs to a FP value in fd.
1835   fcvt_s_lu(fd, rs);
1836 }
1837 
1838 template <typename CvtFunc>
RoundFloatingPointToInteger(Register rd,FPURegister fs,Register result,CvtFunc fcvt_generator)1839 void TurboAssembler::RoundFloatingPointToInteger(Register rd, FPURegister fs,
1840                                                  Register result,
1841                                                  CvtFunc fcvt_generator) {
1842   // Save csr_fflags to scratch & clear exception flags
1843   if (result.is_valid()) {
1844     BlockTrampolinePoolScope block_trampoline_pool(this);
1845     UseScratchRegisterScope temps(this);
1846     Register scratch = temps.Acquire();
1847 
1848     int exception_flags = kInvalidOperation;
1849     csrrci(scratch, csr_fflags, exception_flags);
1850 
1851     // actual conversion instruction
1852     fcvt_generator(this, rd, fs);
1853 
1854     // check kInvalidOperation flag (out-of-range, NaN)
1855     // set result to 1 if normal, otherwise set result to 0 for abnormal
1856     frflags(result);
1857     andi(result, result, exception_flags);
1858     seqz(result, result);  // result <-- 1 (normal), result <-- 0 (abnormal)
1859 
1860     // restore csr_fflags
1861     csrw(csr_fflags, scratch);
1862   } else {
1863     // actual conversion instruction
1864     fcvt_generator(this, rd, fs);
1865   }
1866 }
1867 
Clear_if_nan_d(Register rd,FPURegister fs)1868 void TurboAssembler::Clear_if_nan_d(Register rd, FPURegister fs) {
1869   Label no_nan;
1870   feq_d(kScratchReg, fs, fs);
1871   bnez(kScratchReg, &no_nan);
1872   Move(rd, zero_reg);
1873   bind(&no_nan);
1874 }
1875 
Clear_if_nan_s(Register rd,FPURegister fs)1876 void TurboAssembler::Clear_if_nan_s(Register rd, FPURegister fs) {
1877   Label no_nan;
1878   feq_s(kScratchReg, fs, fs);
1879   bnez(kScratchReg, &no_nan);
1880   Move(rd, zero_reg);
1881   bind(&no_nan);
1882 }
1883 
Trunc_uw_d(Register rd,FPURegister fs,Register result)1884 void TurboAssembler::Trunc_uw_d(Register rd, FPURegister fs, Register result) {
1885   RoundFloatingPointToInteger(
1886       rd, fs, result, [](TurboAssembler* tasm, Register dst, FPURegister src) {
1887         tasm->fcvt_wu_d(dst, src, RTZ);
1888       });
1889 }
1890 
Trunc_w_d(Register rd,FPURegister fs,Register result)1891 void TurboAssembler::Trunc_w_d(Register rd, FPURegister fs, Register result) {
1892   RoundFloatingPointToInteger(
1893       rd, fs, result, [](TurboAssembler* tasm, Register dst, FPURegister src) {
1894         tasm->fcvt_w_d(dst, src, RTZ);
1895       });
1896 }
1897 
Trunc_uw_s(Register rd,FPURegister fs,Register result)1898 void TurboAssembler::Trunc_uw_s(Register rd, FPURegister fs, Register result) {
1899   RoundFloatingPointToInteger(
1900       rd, fs, result, [](TurboAssembler* tasm, Register dst, FPURegister src) {
1901         tasm->fcvt_wu_s(dst, src, RTZ);
1902       });
1903 }
1904 
Trunc_w_s(Register rd,FPURegister fs,Register result)1905 void TurboAssembler::Trunc_w_s(Register rd, FPURegister fs, Register result) {
1906   RoundFloatingPointToInteger(
1907       rd, fs, result, [](TurboAssembler* tasm, Register dst, FPURegister src) {
1908         tasm->fcvt_w_s(dst, src, RTZ);
1909       });
1910 }
1911 
Trunc_ul_d(Register rd,FPURegister fs,Register result)1912 void TurboAssembler::Trunc_ul_d(Register rd, FPURegister fs, Register result) {
1913   RoundFloatingPointToInteger(
1914       rd, fs, result, [](TurboAssembler* tasm, Register dst, FPURegister src) {
1915         tasm->fcvt_lu_d(dst, src, RTZ);
1916       });
1917 }
1918 
Trunc_l_d(Register rd,FPURegister fs,Register result)1919 void TurboAssembler::Trunc_l_d(Register rd, FPURegister fs, Register result) {
1920   RoundFloatingPointToInteger(
1921       rd, fs, result, [](TurboAssembler* tasm, Register dst, FPURegister src) {
1922         tasm->fcvt_l_d(dst, src, RTZ);
1923       });
1924 }
1925 
Trunc_ul_s(Register rd,FPURegister fs,Register result)1926 void TurboAssembler::Trunc_ul_s(Register rd, FPURegister fs, Register result) {
1927   RoundFloatingPointToInteger(
1928       rd, fs, result, [](TurboAssembler* tasm, Register dst, FPURegister src) {
1929         tasm->fcvt_lu_s(dst, src, RTZ);
1930       });
1931 }
1932 
Trunc_l_s(Register rd,FPURegister fs,Register result)1933 void TurboAssembler::Trunc_l_s(Register rd, FPURegister fs, Register result) {
1934   RoundFloatingPointToInteger(
1935       rd, fs, result, [](TurboAssembler* tasm, Register dst, FPURegister src) {
1936         tasm->fcvt_l_s(dst, src, RTZ);
1937       });
1938 }
1939 
Round_w_s(Register rd,FPURegister fs,Register result)1940 void TurboAssembler::Round_w_s(Register rd, FPURegister fs, Register result) {
1941   RoundFloatingPointToInteger(
1942       rd, fs, result, [](TurboAssembler* tasm, Register dst, FPURegister src) {
1943         tasm->fcvt_w_s(dst, src, RNE);
1944       });
1945 }
1946 
Round_w_d(Register rd,FPURegister fs,Register result)1947 void TurboAssembler::Round_w_d(Register rd, FPURegister fs, Register result) {
1948   RoundFloatingPointToInteger(
1949       rd, fs, result, [](TurboAssembler* tasm, Register dst, FPURegister src) {
1950         tasm->fcvt_w_d(dst, src, RNE);
1951       });
1952 }
1953 
Ceil_w_s(Register rd,FPURegister fs,Register result)1954 void TurboAssembler::Ceil_w_s(Register rd, FPURegister fs, Register result) {
1955   RoundFloatingPointToInteger(
1956       rd, fs, result, [](TurboAssembler* tasm, Register dst, FPURegister src) {
1957         tasm->fcvt_w_s(dst, src, RUP);
1958       });
1959 }
1960 
Ceil_w_d(Register rd,FPURegister fs,Register result)1961 void TurboAssembler::Ceil_w_d(Register rd, FPURegister fs, Register result) {
1962   RoundFloatingPointToInteger(
1963       rd, fs, result, [](TurboAssembler* tasm, Register dst, FPURegister src) {
1964         tasm->fcvt_w_d(dst, src, RUP);
1965       });
1966 }
1967 
Floor_w_s(Register rd,FPURegister fs,Register result)1968 void TurboAssembler::Floor_w_s(Register rd, FPURegister fs, Register result) {
1969   RoundFloatingPointToInteger(
1970       rd, fs, result, [](TurboAssembler* tasm, Register dst, FPURegister src) {
1971         tasm->fcvt_w_s(dst, src, RDN);
1972       });
1973 }
1974 
Floor_w_d(Register rd,FPURegister fs,Register result)1975 void TurboAssembler::Floor_w_d(Register rd, FPURegister fs, Register result) {
1976   RoundFloatingPointToInteger(
1977       rd, fs, result, [](TurboAssembler* tasm, Register dst, FPURegister src) {
1978         tasm->fcvt_w_d(dst, src, RDN);
1979       });
1980 }
1981 
1982 // According to JS ECMA specification, for floating-point round operations, if
1983 // the input is NaN, +/-infinity, or +/-0, the same input is returned as the
1984 // rounded result; this differs from behavior of RISCV fcvt instructions (which
1985 // round out-of-range values to the nearest max or min value), therefore special
1986 // handling is needed by NaN, +/-Infinity, +/-0
1987 template <typename F>
RoundHelper(FPURegister dst,FPURegister src,FPURegister fpu_scratch,RoundingMode frm)1988 void TurboAssembler::RoundHelper(FPURegister dst, FPURegister src,
1989                                  FPURegister fpu_scratch, RoundingMode frm) {
1990   BlockTrampolinePoolScope block_trampoline_pool(this);
1991   UseScratchRegisterScope temps(this);
1992   Register scratch2 = temps.Acquire();
1993 
1994   DCHECK((std::is_same<float, F>::value) || (std::is_same<double, F>::value));
1995   // Need at least two FPRs, so check against dst == src == fpu_scratch
1996   DCHECK(!(dst == src && dst == fpu_scratch));
1997 
1998   const int kFloatMantissaBits =
1999       sizeof(F) == 4 ? kFloat32MantissaBits : kFloat64MantissaBits;
2000   const int kFloatExponentBits =
2001       sizeof(F) == 4 ? kFloat32ExponentBits : kFloat64ExponentBits;
2002   const int kFloatExponentBias =
2003       sizeof(F) == 4 ? kFloat32ExponentBias : kFloat64ExponentBias;
2004   Label done;
2005 
2006   {
2007     UseScratchRegisterScope temps2(this);
2008     Register scratch = temps2.Acquire();
2009     // extract exponent value of the source floating-point to scratch
2010     if (std::is_same<F, double>::value) {
2011       fmv_x_d(scratch, src);
2012     } else {
2013       fmv_x_w(scratch, src);
2014     }
2015     ExtractBits(scratch2, scratch, kFloatMantissaBits, kFloatExponentBits);
2016   }
2017 
2018   // if src is NaN/+-Infinity/+-Zero or if the exponent is larger than # of bits
2019   // in mantissa, the result is the same as src, so move src to dest  (to avoid
2020   // generating another branch)
2021   if (dst != src) {
2022     if (std::is_same<F, double>::value) {
2023       fmv_d(dst, src);
2024     } else {
2025       fmv_s(dst, src);
2026     }
2027   }
2028   {
2029     Label not_NaN;
2030     UseScratchRegisterScope temps2(this);
2031     Register scratch = temps2.Acquire();
2032     // According to the wasm spec
2033     // (https://webassembly.github.io/spec/core/exec/numerics.html#aux-nans)
2034     // if input is canonical NaN, then output is canonical NaN, and if input is
2035     // any other NaN, then output is any NaN with most significant bit of
2036     // payload is 1. In RISC-V, feq_d will set scratch to 0 if src is a NaN. If
2037     // src is not a NaN, branch to the label and do nothing, but if it is,
2038     // fmin_d will set dst to the canonical NaN.
2039     if (std::is_same<F, double>::value) {
2040       feq_d(scratch, src, src);
2041       bnez(scratch, &not_NaN);
2042       fmin_d(dst, src, src);
2043     } else {
2044       feq_s(scratch, src, src);
2045       bnez(scratch, &not_NaN);
2046       fmin_s(dst, src, src);
2047     }
2048     bind(&not_NaN);
2049   }
2050 
2051   // If real exponent (i.e., scratch2 - kFloatExponentBias) is greater than
2052   // kFloat32MantissaBits, it means the floating-point value has no fractional
2053   // part, thus the input is already rounded, jump to done. Note that, NaN and
2054   // Infinity in floating-point representation sets maximal exponent value, so
2055   // they also satisfy (scratch2 - kFloatExponentBias >= kFloatMantissaBits),
2056   // and JS round semantics specify that rounding of NaN (Infinity) returns NaN
2057   // (Infinity), so NaN and Infinity are considered rounded value too.
2058   Branch(&done, greater_equal, scratch2,
2059          Operand(kFloatExponentBias + kFloatMantissaBits));
2060 
2061   // Actual rounding is needed along this path
2062 
2063   // old_src holds the original input, needed for the case of src == dst
2064   FPURegister old_src = src;
2065   if (src == dst) {
2066     DCHECK(fpu_scratch != dst);
2067     Move(fpu_scratch, src);
2068     old_src = fpu_scratch;
2069   }
2070 
2071   // Since only input whose real exponent value is less than kMantissaBits
2072   // (i.e., 23 or 52-bits) falls into this path, the value range of the input
2073   // falls into that of 23- or 53-bit integers. So we round the input to integer
2074   // values, then convert them back to floating-point.
2075   {
2076     UseScratchRegisterScope temps(this);
2077     Register scratch = temps.Acquire();
2078     if (std::is_same<F, double>::value) {
2079       fcvt_l_d(scratch, src, frm);
2080       fcvt_d_l(dst, scratch, frm);
2081     } else {
2082       fcvt_w_s(scratch, src, frm);
2083       fcvt_s_w(dst, scratch, frm);
2084     }
2085   }
2086   // A special handling is needed if the input is a very small positive/negative
2087   // number that rounds to zero. JS semantics requires that the rounded result
2088   // retains the sign of the input, so a very small positive (negative)
2089   // floating-point number should be rounded to positive (negative) 0.
2090   // Therefore, we use sign-bit injection to produce +/-0 correctly. Instead of
2091   // testing for zero w/ a branch, we just insert sign-bit for everyone on this
2092   // path (this is where old_src is needed)
2093   if (std::is_same<F, double>::value) {
2094     fsgnj_d(dst, dst, old_src);
2095   } else {
2096     fsgnj_s(dst, dst, old_src);
2097   }
2098 
2099   bind(&done);
2100 }
2101 
2102 // According to JS ECMA specification, for floating-point round operations, if
2103 // the input is NaN, +/-infinity, or +/-0, the same input is returned as the
2104 // rounded result; this differs from behavior of RISCV fcvt instructions (which
2105 // round out-of-range values to the nearest max or min value), therefore special
2106 // handling is needed by NaN, +/-Infinity, +/-0
2107 template <typename F>
RoundHelper(VRegister dst,VRegister src,Register scratch,VRegister v_scratch,RoundingMode frm)2108 void TurboAssembler::RoundHelper(VRegister dst, VRegister src, Register scratch,
2109                                  VRegister v_scratch, RoundingMode frm) {
2110   VU.set(scratch, std::is_same<F, float>::value ? E32 : E64, m1);
2111   // if src is NaN/+-Infinity/+-Zero or if the exponent is larger than # of bits
2112   // in mantissa, the result is the same as src, so move src to dest  (to avoid
2113   // generating another branch)
2114 
2115   // If real exponent (i.e., scratch2 - kFloatExponentBias) is greater than
2116   // kFloat32MantissaBits, it means the floating-point value has no fractional
2117   // part, thus the input is already rounded, jump to done. Note that, NaN and
2118   // Infinity in floating-point representation sets maximal exponent value, so
2119   // they also satisfy (scratch2 - kFloatExponentBias >= kFloatMantissaBits),
2120   // and JS round semantics specify that rounding of NaN (Infinity) returns NaN
2121   // (Infinity), so NaN and Infinity are considered rounded value too.
2122   const int kFloatMantissaBits =
2123       sizeof(F) == 4 ? kFloat32MantissaBits : kFloat64MantissaBits;
2124   const int kFloatExponentBits =
2125       sizeof(F) == 4 ? kFloat32ExponentBits : kFloat64ExponentBits;
2126   const int kFloatExponentBias =
2127       sizeof(F) == 4 ? kFloat32ExponentBias : kFloat64ExponentBias;
2128 
2129   // slli(rt, rs, 64 - (pos + size));
2130   // if (sign_extend) {
2131   //   srai(rt, rt, 64 - size);
2132   // } else {
2133   //   srli(rt, rt, 64 - size);
2134   // }
2135 
2136   li(scratch, 64 - kFloatMantissaBits - kFloatExponentBits);
2137   vsll_vx(v_scratch, src, scratch);
2138   li(scratch, 64 - kFloatExponentBits);
2139   vsrl_vx(v_scratch, v_scratch, scratch);
2140   li(scratch, kFloatExponentBias + kFloatMantissaBits);
2141   vmslt_vx(v0, v_scratch, scratch);
2142 
2143   VU.set(frm);
2144   vmv_vv(dst, src);
2145   if (dst == src) {
2146     vmv_vv(v_scratch, src);
2147   }
2148   vfcvt_x_f_v(dst, src, MaskType::Mask);
2149   vfcvt_f_x_v(dst, dst, MaskType::Mask);
2150 
2151   // A special handling is needed if the input is a very small positive/negative
2152   // number that rounds to zero. JS semantics requires that the rounded result
2153   // retains the sign of the input, so a very small positive (negative)
2154   // floating-point number should be rounded to positive (negative) 0.
2155   if (dst == src) {
2156     vfsngj_vv(dst, dst, v_scratch);
2157   } else {
2158     vfsngj_vv(dst, dst, src);
2159   }
2160 }
2161 
Ceil_f(VRegister vdst,VRegister vsrc,Register scratch,VRegister v_scratch)2162 void TurboAssembler::Ceil_f(VRegister vdst, VRegister vsrc, Register scratch,
2163                             VRegister v_scratch) {
2164   RoundHelper<float>(vdst, vsrc, scratch, v_scratch, RUP);
2165 }
2166 
Ceil_d(VRegister vdst,VRegister vsrc,Register scratch,VRegister v_scratch)2167 void TurboAssembler::Ceil_d(VRegister vdst, VRegister vsrc, Register scratch,
2168                             VRegister v_scratch) {
2169   RoundHelper<double>(vdst, vsrc, scratch, v_scratch, RUP);
2170 }
2171 
Floor_f(VRegister vdst,VRegister vsrc,Register scratch,VRegister v_scratch)2172 void TurboAssembler::Floor_f(VRegister vdst, VRegister vsrc, Register scratch,
2173                              VRegister v_scratch) {
2174   RoundHelper<float>(vdst, vsrc, scratch, v_scratch, RDN);
2175 }
2176 
Floor_d(VRegister vdst,VRegister vsrc,Register scratch,VRegister v_scratch)2177 void TurboAssembler::Floor_d(VRegister vdst, VRegister vsrc, Register scratch,
2178                              VRegister v_scratch) {
2179   RoundHelper<double>(vdst, vsrc, scratch, v_scratch, RDN);
2180 }
2181 
Trunc_d(VRegister vdst,VRegister vsrc,Register scratch,VRegister v_scratch)2182 void TurboAssembler::Trunc_d(VRegister vdst, VRegister vsrc, Register scratch,
2183                              VRegister v_scratch) {
2184   RoundHelper<double>(vdst, vsrc, scratch, v_scratch, RTZ);
2185 }
2186 
Trunc_f(VRegister vdst,VRegister vsrc,Register scratch,VRegister v_scratch)2187 void TurboAssembler::Trunc_f(VRegister vdst, VRegister vsrc, Register scratch,
2188                              VRegister v_scratch) {
2189   RoundHelper<float>(vdst, vsrc, scratch, v_scratch, RTZ);
2190 }
2191 
Round_f(VRegister vdst,VRegister vsrc,Register scratch,VRegister v_scratch)2192 void TurboAssembler::Round_f(VRegister vdst, VRegister vsrc, Register scratch,
2193                              VRegister v_scratch) {
2194   RoundHelper<float>(vdst, vsrc, scratch, v_scratch, RNE);
2195 }
2196 
Round_d(VRegister vdst,VRegister vsrc,Register scratch,VRegister v_scratch)2197 void TurboAssembler::Round_d(VRegister vdst, VRegister vsrc, Register scratch,
2198                              VRegister v_scratch) {
2199   RoundHelper<double>(vdst, vsrc, scratch, v_scratch, RNE);
2200 }
2201 
Floor_d_d(FPURegister dst,FPURegister src,FPURegister fpu_scratch)2202 void TurboAssembler::Floor_d_d(FPURegister dst, FPURegister src,
2203                                FPURegister fpu_scratch) {
2204   RoundHelper<double>(dst, src, fpu_scratch, RDN);
2205 }
2206 
Ceil_d_d(FPURegister dst,FPURegister src,FPURegister fpu_scratch)2207 void TurboAssembler::Ceil_d_d(FPURegister dst, FPURegister src,
2208                               FPURegister fpu_scratch) {
2209   RoundHelper<double>(dst, src, fpu_scratch, RUP);
2210 }
2211 
Trunc_d_d(FPURegister dst,FPURegister src,FPURegister fpu_scratch)2212 void TurboAssembler::Trunc_d_d(FPURegister dst, FPURegister src,
2213                                FPURegister fpu_scratch) {
2214   RoundHelper<double>(dst, src, fpu_scratch, RTZ);
2215 }
2216 
Round_d_d(FPURegister dst,FPURegister src,FPURegister fpu_scratch)2217 void TurboAssembler::Round_d_d(FPURegister dst, FPURegister src,
2218                                FPURegister fpu_scratch) {
2219   RoundHelper<double>(dst, src, fpu_scratch, RNE);
2220 }
2221 
Floor_s_s(FPURegister dst,FPURegister src,FPURegister fpu_scratch)2222 void TurboAssembler::Floor_s_s(FPURegister dst, FPURegister src,
2223                                FPURegister fpu_scratch) {
2224   RoundHelper<float>(dst, src, fpu_scratch, RDN);
2225 }
2226 
Ceil_s_s(FPURegister dst,FPURegister src,FPURegister fpu_scratch)2227 void TurboAssembler::Ceil_s_s(FPURegister dst, FPURegister src,
2228                               FPURegister fpu_scratch) {
2229   RoundHelper<float>(dst, src, fpu_scratch, RUP);
2230 }
2231 
Trunc_s_s(FPURegister dst,FPURegister src,FPURegister fpu_scratch)2232 void TurboAssembler::Trunc_s_s(FPURegister dst, FPURegister src,
2233                                FPURegister fpu_scratch) {
2234   RoundHelper<float>(dst, src, fpu_scratch, RTZ);
2235 }
2236 
Round_s_s(FPURegister dst,FPURegister src,FPURegister fpu_scratch)2237 void TurboAssembler::Round_s_s(FPURegister dst, FPURegister src,
2238                                FPURegister fpu_scratch) {
2239   RoundHelper<float>(dst, src, fpu_scratch, RNE);
2240 }
2241 
Madd_s(FPURegister fd,FPURegister fr,FPURegister fs,FPURegister ft)2242 void MacroAssembler::Madd_s(FPURegister fd, FPURegister fr, FPURegister fs,
2243                             FPURegister ft) {
2244   fmadd_s(fd, fs, ft, fr);
2245 }
2246 
Madd_d(FPURegister fd,FPURegister fr,FPURegister fs,FPURegister ft)2247 void MacroAssembler::Madd_d(FPURegister fd, FPURegister fr, FPURegister fs,
2248                             FPURegister ft) {
2249   fmadd_d(fd, fs, ft, fr);
2250 }
2251 
Msub_s(FPURegister fd,FPURegister fr,FPURegister fs,FPURegister ft)2252 void MacroAssembler::Msub_s(FPURegister fd, FPURegister fr, FPURegister fs,
2253                             FPURegister ft) {
2254   fmsub_s(fd, fs, ft, fr);
2255 }
2256 
Msub_d(FPURegister fd,FPURegister fr,FPURegister fs,FPURegister ft)2257 void MacroAssembler::Msub_d(FPURegister fd, FPURegister fr, FPURegister fs,
2258                             FPURegister ft) {
2259   fmsub_d(fd, fs, ft, fr);
2260 }
2261 
CompareF32(Register rd,FPUCondition cc,FPURegister cmp1,FPURegister cmp2)2262 void TurboAssembler::CompareF32(Register rd, FPUCondition cc, FPURegister cmp1,
2263                                 FPURegister cmp2) {
2264   switch (cc) {
2265     case EQ:
2266       feq_s(rd, cmp1, cmp2);
2267       break;
2268     case NE:
2269       feq_s(rd, cmp1, cmp2);
2270       NegateBool(rd, rd);
2271       break;
2272     case LT:
2273       flt_s(rd, cmp1, cmp2);
2274       break;
2275     case GE:
2276       fle_s(rd, cmp2, cmp1);
2277       break;
2278     case LE:
2279       fle_s(rd, cmp1, cmp2);
2280       break;
2281     case GT:
2282       flt_s(rd, cmp2, cmp1);
2283       break;
2284     default:
2285       UNREACHABLE();
2286   }
2287 }
2288 
CompareF64(Register rd,FPUCondition cc,FPURegister cmp1,FPURegister cmp2)2289 void TurboAssembler::CompareF64(Register rd, FPUCondition cc, FPURegister cmp1,
2290                                 FPURegister cmp2) {
2291   switch (cc) {
2292     case EQ:
2293       feq_d(rd, cmp1, cmp2);
2294       break;
2295     case NE:
2296       feq_d(rd, cmp1, cmp2);
2297       NegateBool(rd, rd);
2298       break;
2299     case LT:
2300       flt_d(rd, cmp1, cmp2);
2301       break;
2302     case GE:
2303       fle_d(rd, cmp2, cmp1);
2304       break;
2305     case LE:
2306       fle_d(rd, cmp1, cmp2);
2307       break;
2308     case GT:
2309       flt_d(rd, cmp2, cmp1);
2310       break;
2311     default:
2312       UNREACHABLE();
2313   }
2314 }
2315 
CompareIsNotNanF32(Register rd,FPURegister cmp1,FPURegister cmp2)2316 void TurboAssembler::CompareIsNotNanF32(Register rd, FPURegister cmp1,
2317                                         FPURegister cmp2) {
2318   UseScratchRegisterScope temps(this);
2319   BlockTrampolinePoolScope block_trampoline_pool(this);
2320   Register scratch = temps.Acquire();
2321 
2322   feq_s(rd, cmp1, cmp1);       // rd <- !isNan(cmp1)
2323   feq_s(scratch, cmp2, cmp2);  // scratch <- !isNaN(cmp2)
2324   And(rd, rd, scratch);        // rd <- !isNan(cmp1) && !isNan(cmp2)
2325 }
2326 
CompareIsNotNanF64(Register rd,FPURegister cmp1,FPURegister cmp2)2327 void TurboAssembler::CompareIsNotNanF64(Register rd, FPURegister cmp1,
2328                                         FPURegister cmp2) {
2329   UseScratchRegisterScope temps(this);
2330   BlockTrampolinePoolScope block_trampoline_pool(this);
2331   Register scratch = temps.Acquire();
2332 
2333   feq_d(rd, cmp1, cmp1);       // rd <- !isNan(cmp1)
2334   feq_d(scratch, cmp2, cmp2);  // scratch <- !isNaN(cmp2)
2335   And(rd, rd, scratch);        // rd <- !isNan(cmp1) && !isNan(cmp2)
2336 }
2337 
CompareIsNanF32(Register rd,FPURegister cmp1,FPURegister cmp2)2338 void TurboAssembler::CompareIsNanF32(Register rd, FPURegister cmp1,
2339                                      FPURegister cmp2) {
2340   CompareIsNotNanF32(rd, cmp1, cmp2);  // rd <- !isNan(cmp1) && !isNan(cmp2)
2341   Xor(rd, rd, 1);                      // rd <- isNan(cmp1) || isNan(cmp2)
2342 }
2343 
CompareIsNanF64(Register rd,FPURegister cmp1,FPURegister cmp2)2344 void TurboAssembler::CompareIsNanF64(Register rd, FPURegister cmp1,
2345                                      FPURegister cmp2) {
2346   CompareIsNotNanF64(rd, cmp1, cmp2);  // rd <- !isNan(cmp1) && !isNan(cmp2)
2347   Xor(rd, rd, 1);                      // rd <- isNan(cmp1) || isNan(cmp2)
2348 }
2349 
BranchTrueShortF(Register rs,Label * target)2350 void TurboAssembler::BranchTrueShortF(Register rs, Label* target) {
2351   Branch(target, not_equal, rs, Operand(zero_reg));
2352 }
2353 
BranchFalseShortF(Register rs,Label * target)2354 void TurboAssembler::BranchFalseShortF(Register rs, Label* target) {
2355   Branch(target, equal, rs, Operand(zero_reg));
2356 }
2357 
BranchTrueF(Register rs,Label * target)2358 void TurboAssembler::BranchTrueF(Register rs, Label* target) {
2359   bool long_branch =
2360       target->is_bound() ? !is_near(target) : is_trampoline_emitted();
2361   if (long_branch) {
2362     Label skip;
2363     BranchFalseShortF(rs, &skip);
2364     BranchLong(target);
2365     bind(&skip);
2366   } else {
2367     BranchTrueShortF(rs, target);
2368   }
2369 }
2370 
BranchFalseF(Register rs,Label * target)2371 void TurboAssembler::BranchFalseF(Register rs, Label* target) {
2372   bool long_branch =
2373       target->is_bound() ? !is_near(target) : is_trampoline_emitted();
2374   if (long_branch) {
2375     Label skip;
2376     BranchTrueShortF(rs, &skip);
2377     BranchLong(target);
2378     bind(&skip);
2379   } else {
2380     BranchFalseShortF(rs, target);
2381   }
2382 }
2383 
InsertHighWordF64(FPURegister dst,Register src_high)2384 void TurboAssembler::InsertHighWordF64(FPURegister dst, Register src_high) {
2385   UseScratchRegisterScope temps(this);
2386   Register scratch = temps.Acquire();
2387   Register scratch2 = temps.Acquire();
2388   BlockTrampolinePoolScope block_trampoline_pool(this);
2389 
2390   DCHECK(src_high != scratch2 && src_high != scratch);
2391 
2392   fmv_x_d(scratch, dst);
2393   slli(scratch2, src_high, 32);
2394   slli(scratch, scratch, 32);
2395   srli(scratch, scratch, 32);
2396   or_(scratch, scratch, scratch2);
2397   fmv_d_x(dst, scratch);
2398 }
2399 
InsertLowWordF64(FPURegister dst,Register src_low)2400 void TurboAssembler::InsertLowWordF64(FPURegister dst, Register src_low) {
2401   UseScratchRegisterScope temps(this);
2402   Register scratch = temps.Acquire();
2403   Register scratch2 = temps.Acquire();
2404   BlockTrampolinePoolScope block_trampoline_pool(this);
2405 
2406   DCHECK(src_low != scratch && src_low != scratch2);
2407   fmv_x_d(scratch, dst);
2408   slli(scratch2, src_low, 32);
2409   srli(scratch2, scratch2, 32);
2410   srli(scratch, scratch, 32);
2411   slli(scratch, scratch, 32);
2412   or_(scratch, scratch, scratch2);
2413   fmv_d_x(dst, scratch);
2414 }
2415 
LoadFPRImmediate(FPURegister dst,uint32_t src)2416 void TurboAssembler::LoadFPRImmediate(FPURegister dst, uint32_t src) {
2417   // Handle special values first.
2418   if (src == bit_cast<uint32_t>(0.0f) && has_single_zero_reg_set_) {
2419     if (dst != kDoubleRegZero) fmv_s(dst, kDoubleRegZero);
2420   } else if (src == bit_cast<uint32_t>(-0.0f) && has_single_zero_reg_set_) {
2421     Neg_s(dst, kDoubleRegZero);
2422   } else {
2423     if (dst == kDoubleRegZero) {
2424       DCHECK(src == bit_cast<uint32_t>(0.0f));
2425       fmv_w_x(dst, zero_reg);
2426       has_single_zero_reg_set_ = true;
2427       has_double_zero_reg_set_ = false;
2428     } else {
2429       UseScratchRegisterScope temps(this);
2430       Register scratch = temps.Acquire();
2431       li(scratch, Operand(static_cast<int32_t>(src)));
2432       fmv_w_x(dst, scratch);
2433     }
2434   }
2435 }
2436 
LoadFPRImmediate(FPURegister dst,uint64_t src)2437 void TurboAssembler::LoadFPRImmediate(FPURegister dst, uint64_t src) {
2438   // Handle special values first.
2439   if (src == bit_cast<uint64_t>(0.0) && has_double_zero_reg_set_) {
2440     if (dst != kDoubleRegZero) fmv_d(dst, kDoubleRegZero);
2441   } else if (src == bit_cast<uint64_t>(-0.0) && has_double_zero_reg_set_) {
2442     Neg_d(dst, kDoubleRegZero);
2443   } else {
2444     if (dst == kDoubleRegZero) {
2445       DCHECK(src == bit_cast<uint64_t>(0.0));
2446       fmv_d_x(dst, zero_reg);
2447       has_double_zero_reg_set_ = true;
2448       has_single_zero_reg_set_ = false;
2449     } else {
2450       UseScratchRegisterScope temps(this);
2451       Register scratch = temps.Acquire();
2452       li(scratch, Operand(src));
2453       fmv_d_x(dst, scratch);
2454     }
2455   }
2456 }
2457 
CompareI(Register rd,Register rs,const Operand & rt,Condition cond)2458 void TurboAssembler::CompareI(Register rd, Register rs, const Operand& rt,
2459                               Condition cond) {
2460   switch (cond) {
2461     case eq:
2462       Seq(rd, rs, rt);
2463       break;
2464     case ne:
2465       Sne(rd, rs, rt);
2466       break;
2467 
2468     // Signed comparison.
2469     case greater:
2470       Sgt(rd, rs, rt);
2471       break;
2472     case greater_equal:
2473       Sge(rd, rs, rt);  // rs >= rt
2474       break;
2475     case less:
2476       Slt(rd, rs, rt);  // rs < rt
2477       break;
2478     case less_equal:
2479       Sle(rd, rs, rt);  // rs <= rt
2480       break;
2481 
2482     // Unsigned comparison.
2483     case Ugreater:
2484       Sgtu(rd, rs, rt);  // rs > rt
2485       break;
2486     case Ugreater_equal:
2487       Sgeu(rd, rs, rt);  // rs >= rt
2488       break;
2489     case Uless:
2490       Sltu(rd, rs, rt);  // rs < rt
2491       break;
2492     case Uless_equal:
2493       Sleu(rd, rs, rt);  // rs <= rt
2494       break;
2495     case cc_always:
2496       UNREACHABLE();
2497     default:
2498       UNREACHABLE();
2499   }
2500 }
2501 
2502 // dest <- (condition != 0 ? zero : dest)
LoadZeroIfConditionNotZero(Register dest,Register condition)2503 void TurboAssembler::LoadZeroIfConditionNotZero(Register dest,
2504                                                 Register condition) {
2505   UseScratchRegisterScope temps(this);
2506   Register scratch = temps.Acquire();
2507   seqz(scratch, condition);
2508   // neg + and may be more efficient than mul(dest, dest, scratch)
2509   neg(scratch, scratch);  // 0 is still 0, 1 becomes all 1s
2510   and_(dest, dest, scratch);
2511 }
2512 
2513 // dest <- (condition == 0 ? 0 : dest)
LoadZeroIfConditionZero(Register dest,Register condition)2514 void TurboAssembler::LoadZeroIfConditionZero(Register dest,
2515                                              Register condition) {
2516   UseScratchRegisterScope temps(this);
2517   Register scratch = temps.Acquire();
2518   snez(scratch, condition);
2519   //  neg + and may be more efficient than mul(dest, dest, scratch);
2520   neg(scratch, scratch);  // 0 is still 0, 1 becomes all 1s
2521   and_(dest, dest, scratch);
2522 }
2523 
Clz32(Register rd,Register xx)2524 void TurboAssembler::Clz32(Register rd, Register xx) {
2525   // 32 bit unsigned in lower word: count number of leading zeros.
2526   //  int n = 32;
2527   //  unsigned y;
2528 
2529   //  y = x >>16; if (y != 0) { n = n -16; x = y; }
2530   //  y = x >> 8; if (y != 0) { n = n - 8; x = y; }
2531   //  y = x >> 4; if (y != 0) { n = n - 4; x = y; }
2532   //  y = x >> 2; if (y != 0) { n = n - 2; x = y; }
2533   //  y = x >> 1; if (y != 0) {rd = n - 2; return;}
2534   //  rd = n - x;
2535 
2536   Label L0, L1, L2, L3, L4;
2537   UseScratchRegisterScope temps(this);
2538   BlockTrampolinePoolScope block_trampoline_pool(this);
2539   Register x = rd;
2540   Register y = temps.Acquire();
2541   Register n = temps.Acquire();
2542   DCHECK(xx != y && xx != n);
2543   Move(x, xx);
2544   li(n, Operand(32));
2545   srliw(y, x, 16);
2546   BranchShort(&L0, eq, y, Operand(zero_reg));
2547   Move(x, y);
2548   addiw(n, n, -16);
2549   bind(&L0);
2550   srliw(y, x, 8);
2551   BranchShort(&L1, eq, y, Operand(zero_reg));
2552   addiw(n, n, -8);
2553   Move(x, y);
2554   bind(&L1);
2555   srliw(y, x, 4);
2556   BranchShort(&L2, eq, y, Operand(zero_reg));
2557   addiw(n, n, -4);
2558   Move(x, y);
2559   bind(&L2);
2560   srliw(y, x, 2);
2561   BranchShort(&L3, eq, y, Operand(zero_reg));
2562   addiw(n, n, -2);
2563   Move(x, y);
2564   bind(&L3);
2565   srliw(y, x, 1);
2566   subw(rd, n, x);
2567   BranchShort(&L4, eq, y, Operand(zero_reg));
2568   addiw(rd, n, -2);
2569   bind(&L4);
2570 }
2571 
Clz64(Register rd,Register xx)2572 void TurboAssembler::Clz64(Register rd, Register xx) {
2573   // 64 bit: count number of leading zeros.
2574   //  int n = 64;
2575   //  unsigned y;
2576 
2577   //  y = x >>32; if (y != 0) { n = n - 32; x = y; }
2578   //  y = x >>16; if (y != 0) { n = n - 16; x = y; }
2579   //  y = x >> 8; if (y != 0) { n = n - 8; x = y; }
2580   //  y = x >> 4; if (y != 0) { n = n - 4; x = y; }
2581   //  y = x >> 2; if (y != 0) { n = n - 2; x = y; }
2582   //  y = x >> 1; if (y != 0) {rd = n - 2; return;}
2583   //  rd = n - x;
2584 
2585   Label L0, L1, L2, L3, L4, L5;
2586   UseScratchRegisterScope temps(this);
2587   BlockTrampolinePoolScope block_trampoline_pool(this);
2588   Register x = rd;
2589   Register y = temps.Acquire();
2590   Register n = temps.Acquire();
2591   DCHECK(xx != y && xx != n);
2592   Move(x, xx);
2593   li(n, Operand(64));
2594   srli(y, x, 32);
2595   BranchShort(&L0, eq, y, Operand(zero_reg));
2596   addiw(n, n, -32);
2597   Move(x, y);
2598   bind(&L0);
2599   srli(y, x, 16);
2600   BranchShort(&L1, eq, y, Operand(zero_reg));
2601   addiw(n, n, -16);
2602   Move(x, y);
2603   bind(&L1);
2604   srli(y, x, 8);
2605   BranchShort(&L2, eq, y, Operand(zero_reg));
2606   addiw(n, n, -8);
2607   Move(x, y);
2608   bind(&L2);
2609   srli(y, x, 4);
2610   BranchShort(&L3, eq, y, Operand(zero_reg));
2611   addiw(n, n, -4);
2612   Move(x, y);
2613   bind(&L3);
2614   srli(y, x, 2);
2615   BranchShort(&L4, eq, y, Operand(zero_reg));
2616   addiw(n, n, -2);
2617   Move(x, y);
2618   bind(&L4);
2619   srli(y, x, 1);
2620   subw(rd, n, x);
2621   BranchShort(&L5, eq, y, Operand(zero_reg));
2622   addiw(rd, n, -2);
2623   bind(&L5);
2624 }
2625 
Ctz32(Register rd,Register rs)2626 void TurboAssembler::Ctz32(Register rd, Register rs) {
2627   // Convert trailing zeroes to trailing ones, and bits to their left
2628   // to zeroes.
2629 
2630   BlockTrampolinePoolScope block_trampoline_pool(this);
2631   {
2632     UseScratchRegisterScope temps(this);
2633     Register scratch = temps.Acquire();
2634     Add64(scratch, rs, -1);
2635     Xor(rd, scratch, rs);
2636     And(rd, rd, scratch);
2637     // Count number of leading zeroes.
2638   }
2639   Clz32(rd, rd);
2640   {
2641     // Subtract number of leading zeroes from 32 to get number of trailing
2642     // ones. Remember that the trailing ones were formerly trailing zeroes.
2643     UseScratchRegisterScope temps(this);
2644     Register scratch = temps.Acquire();
2645     li(scratch, 32);
2646     Sub32(rd, scratch, rd);
2647   }
2648 }
2649 
Ctz64(Register rd,Register rs)2650 void TurboAssembler::Ctz64(Register rd, Register rs) {
2651   // Convert trailing zeroes to trailing ones, and bits to their left
2652   // to zeroes.
2653 
2654   BlockTrampolinePoolScope block_trampoline_pool(this);
2655   {
2656     UseScratchRegisterScope temps(this);
2657     Register scratch = temps.Acquire();
2658     Add64(scratch, rs, -1);
2659     Xor(rd, scratch, rs);
2660     And(rd, rd, scratch);
2661     // Count number of leading zeroes.
2662   }
2663   Clz64(rd, rd);
2664   {
2665     // Subtract number of leading zeroes from 64 to get number of trailing
2666     // ones. Remember that the trailing ones were formerly trailing zeroes.
2667     UseScratchRegisterScope temps(this);
2668     Register scratch = temps.Acquire();
2669     li(scratch, 64);
2670     Sub64(rd, scratch, rd);
2671   }
2672 }
2673 
Popcnt32(Register rd,Register rs,Register scratch)2674 void TurboAssembler::Popcnt32(Register rd, Register rs, Register scratch) {
2675   DCHECK_NE(scratch, rs);
2676   DCHECK_NE(scratch, rd);
2677   // https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
2678   //
2679   // A generalization of the best bit counting method to integers of
2680   // bit-widths up to 128 (parameterized by type T) is this:
2681   //
2682   // v = v - ((v >> 1) & (T)~(T)0/3);                           // temp
2683   // v = (v & (T)~(T)0/15*3) + ((v >> 2) & (T)~(T)0/15*3);      // temp
2684   // v = (v + (v >> 4)) & (T)~(T)0/255*15;                      // temp
2685   // c = (T)(v * ((T)~(T)0/255)) >> (sizeof(T) - 1) * BITS_PER_BYTE; //count
2686   //
2687   // There are algorithms which are faster in the cases where very few
2688   // bits are set but the algorithm here attempts to minimize the total
2689   // number of instructions executed even when a large number of bits
2690   // are set.
2691   // The number of instruction is 20.
2692   // uint32_t B0 = 0x55555555;     // (T)~(T)0/3
2693   // uint32_t B1 = 0x33333333;     // (T)~(T)0/15*3
2694   // uint32_t B2 = 0x0F0F0F0F;     // (T)~(T)0/255*15
2695   // uint32_t value = 0x01010101;  // (T)~(T)0/255
2696 
2697   uint32_t shift = 24;
2698   UseScratchRegisterScope temps(this);
2699   BlockTrampolinePoolScope block_trampoline_pool(this);
2700   Register scratch2 = temps.Acquire();
2701   Register value = temps.Acquire();
2702   DCHECK((rd != value) && (rs != value));
2703   li(value, 0x01010101);     // value = 0x01010101;
2704   li(scratch2, 0x55555555);  // B0 = 0x55555555;
2705   Srl32(scratch, rs, 1);
2706   And(scratch, scratch, scratch2);
2707   Sub32(scratch, rs, scratch);
2708   li(scratch2, 0x33333333);  // B1 = 0x33333333;
2709   slli(rd, scratch2, 4);
2710   or_(scratch2, scratch2, rd);
2711   And(rd, scratch, scratch2);
2712   Srl32(scratch, scratch, 2);
2713   And(scratch, scratch, scratch2);
2714   Add32(scratch, rd, scratch);
2715   srliw(rd, scratch, 4);
2716   Add32(rd, rd, scratch);
2717   li(scratch2, 0xF);
2718   Mul32(scratch2, value, scratch2);  // B2 = 0x0F0F0F0F;
2719   And(rd, rd, scratch2);
2720   Mul32(rd, rd, value);
2721   Srl32(rd, rd, shift);
2722 }
2723 
Popcnt64(Register rd,Register rs,Register scratch)2724 void TurboAssembler::Popcnt64(Register rd, Register rs, Register scratch) {
2725   DCHECK_NE(scratch, rs);
2726   DCHECK_NE(scratch, rd);
2727   // uint64_t B0 = 0x5555555555555555l;     // (T)~(T)0/3
2728   // uint64_t B1 = 0x3333333333333333l;     // (T)~(T)0/15*3
2729   // uint64_t B2 = 0x0F0F0F0F0F0F0F0Fl;     // (T)~(T)0/255*15
2730   // uint64_t value = 0x0101010101010101l;  // (T)~(T)0/255
2731   // uint64_t shift = 24;                   // (sizeof(T) - 1) * BITS_PER_BYTE
2732 
2733   uint64_t shift = 24;
2734   UseScratchRegisterScope temps(this);
2735   BlockTrampolinePoolScope block_trampoline_pool(this);
2736   Register scratch2 = temps.Acquire();
2737   Register value = temps.Acquire();
2738   DCHECK((rd != value) && (rs != value));
2739   li(value, 0x1111111111111111l);  // value = 0x1111111111111111l;
2740   li(scratch2, 5);
2741   Mul64(scratch2, value, scratch2);  // B0 = 0x5555555555555555l;
2742   Srl64(scratch, rs, 1);
2743   And(scratch, scratch, scratch2);
2744   Sub64(scratch, rs, scratch);
2745   li(scratch2, 3);
2746   Mul64(scratch2, value, scratch2);  // B1 = 0x3333333333333333l;
2747   And(rd, scratch, scratch2);
2748   Srl64(scratch, scratch, 2);
2749   And(scratch, scratch, scratch2);
2750   Add64(scratch, rd, scratch);
2751   Srl64(rd, scratch, 4);
2752   Add64(rd, rd, scratch);
2753   li(scratch2, 0xF);
2754   li(value, 0x0101010101010101l);    // value = 0x0101010101010101l;
2755   Mul64(scratch2, value, scratch2);  // B2 = 0x0F0F0F0F0F0F0F0Fl;
2756   And(rd, rd, scratch2);
2757   Mul64(rd, rd, value);
2758   srli(rd, rd, 32 + shift);
2759 }
2760 
TryInlineTruncateDoubleToI(Register result,DoubleRegister double_input,Label * done)2761 void TurboAssembler::TryInlineTruncateDoubleToI(Register result,
2762                                                 DoubleRegister double_input,
2763                                                 Label* done) {
2764   UseScratchRegisterScope temps(this);
2765   Register scratch = temps.Acquire();
2766   // if scratch == 1, exception happens during truncation
2767   Trunc_w_d(result, double_input, scratch);
2768   // If we had no exceptions (i.e., scratch==1) we are done.
2769   Branch(done, eq, scratch, Operand(1));
2770 }
2771 
TruncateDoubleToI(Isolate * isolate,Zone * zone,Register result,DoubleRegister double_input,StubCallMode stub_mode)2772 void TurboAssembler::TruncateDoubleToI(Isolate* isolate, Zone* zone,
2773                                        Register result,
2774                                        DoubleRegister double_input,
2775                                        StubCallMode stub_mode) {
2776   Label done;
2777 
2778   TryInlineTruncateDoubleToI(result, double_input, &done);
2779 
2780   // If we fell through then inline version didn't succeed - call stub
2781   // instead.
2782   push(ra);
2783   Sub64(sp, sp, Operand(kDoubleSize));  // Put input on stack.
2784   fsd(double_input, sp, 0);
2785 
2786   if (stub_mode == StubCallMode::kCallWasmRuntimeStub) {
2787     Call(wasm::WasmCode::kDoubleToI, RelocInfo::WASM_STUB_CALL);
2788   } else {
2789     Call(BUILTIN_CODE(isolate, DoubleToI), RelocInfo::CODE_TARGET);
2790   }
2791   ld(result, sp, 0);
2792 
2793   Add64(sp, sp, Operand(kDoubleSize));
2794   pop(ra);
2795 
2796   bind(&done);
2797 }
2798 
2799 // BRANCH_ARGS_CHECK checks that conditional jump arguments are correct.
2800 #define BRANCH_ARGS_CHECK(cond, rs, rt)                                  \
2801   DCHECK((cond == cc_always && rs == zero_reg && rt.rm() == zero_reg) || \
2802          (cond != cc_always && (rs != zero_reg || rt.rm() != zero_reg)))
2803 
Branch(int32_t offset)2804 void TurboAssembler::Branch(int32_t offset) {
2805   DCHECK(is_int21(offset));
2806   BranchShort(offset);
2807 }
2808 
Branch(int32_t offset,Condition cond,Register rs,const Operand & rt,Label::Distance near_jump)2809 void TurboAssembler::Branch(int32_t offset, Condition cond, Register rs,
2810                             const Operand& rt, Label::Distance near_jump) {
2811   bool is_near = BranchShortCheck(offset, nullptr, cond, rs, rt);
2812   DCHECK(is_near);
2813   USE(is_near);
2814 }
2815 
Branch(Label * L)2816 void TurboAssembler::Branch(Label* L) {
2817   if (L->is_bound()) {
2818     if (is_near(L)) {
2819       BranchShort(L);
2820     } else {
2821       BranchLong(L);
2822     }
2823   } else {
2824     if (is_trampoline_emitted()) {
2825       BranchLong(L);
2826     } else {
2827       BranchShort(L);
2828     }
2829   }
2830 }
2831 
Branch(Label * L,Condition cond,Register rs,const Operand & rt,Label::Distance near_jump)2832 void TurboAssembler::Branch(Label* L, Condition cond, Register rs,
2833                             const Operand& rt, Label::Distance near_jump) {
2834   if (L->is_bound()) {
2835     if (!BranchShortCheck(0, L, cond, rs, rt)) {
2836       if (cond != cc_always) {
2837         Label skip;
2838         Condition neg_cond = NegateCondition(cond);
2839         BranchShort(&skip, neg_cond, rs, rt);
2840         BranchLong(L);
2841         bind(&skip);
2842       } else {
2843         BranchLong(L);
2844         EmitConstPoolWithJumpIfNeeded();
2845       }
2846     }
2847   } else {
2848     if (is_trampoline_emitted() && near_jump == Label::Distance::kFar) {
2849       if (cond != cc_always) {
2850         Label skip;
2851         Condition neg_cond = NegateCondition(cond);
2852         BranchShort(&skip, neg_cond, rs, rt);
2853         BranchLong(L);
2854         bind(&skip);
2855       } else {
2856         BranchLong(L);
2857         EmitConstPoolWithJumpIfNeeded();
2858       }
2859     } else {
2860       BranchShort(L, cond, rs, rt);
2861     }
2862   }
2863 }
2864 
Branch(Label * L,Condition cond,Register rs,RootIndex index)2865 void TurboAssembler::Branch(Label* L, Condition cond, Register rs,
2866                             RootIndex index) {
2867   UseScratchRegisterScope temps(this);
2868   Register scratch = temps.Acquire();
2869   LoadRoot(scratch, index);
2870   Branch(L, cond, rs, Operand(scratch));
2871 }
2872 
BranchShortHelper(int32_t offset,Label * L)2873 void TurboAssembler::BranchShortHelper(int32_t offset, Label* L) {
2874   DCHECK(L == nullptr || offset == 0);
2875   offset = GetOffset(offset, L, OffsetSize::kOffset21);
2876   j(offset);
2877 }
2878 
BranchShort(int32_t offset)2879 void TurboAssembler::BranchShort(int32_t offset) {
2880   DCHECK(is_int21(offset));
2881   BranchShortHelper(offset, nullptr);
2882 }
2883 
BranchShort(Label * L)2884 void TurboAssembler::BranchShort(Label* L) { BranchShortHelper(0, L); }
2885 
GetOffset(int32_t offset,Label * L,OffsetSize bits)2886 int32_t TurboAssembler::GetOffset(int32_t offset, Label* L, OffsetSize bits) {
2887   if (L) {
2888     offset = branch_offset_helper(L, bits);
2889   } else {
2890     DCHECK(is_intn(offset, bits));
2891   }
2892   return offset;
2893 }
2894 
GetRtAsRegisterHelper(const Operand & rt,Register scratch)2895 Register TurboAssembler::GetRtAsRegisterHelper(const Operand& rt,
2896                                                Register scratch) {
2897   Register r2 = no_reg;
2898   if (rt.is_reg()) {
2899     r2 = rt.rm();
2900   } else {
2901     r2 = scratch;
2902     li(r2, rt);
2903   }
2904 
2905   return r2;
2906 }
2907 
CalculateOffset(Label * L,int32_t * offset,OffsetSize bits)2908 bool TurboAssembler::CalculateOffset(Label* L, int32_t* offset,
2909                                      OffsetSize bits) {
2910   if (!is_near(L, bits)) return false;
2911   *offset = GetOffset(*offset, L, bits);
2912   return true;
2913 }
2914 
CalculateOffset(Label * L,int32_t * offset,OffsetSize bits,Register * scratch,const Operand & rt)2915 bool TurboAssembler::CalculateOffset(Label* L, int32_t* offset, OffsetSize bits,
2916                                      Register* scratch, const Operand& rt) {
2917   if (!is_near(L, bits)) return false;
2918   *scratch = GetRtAsRegisterHelper(rt, *scratch);
2919   *offset = GetOffset(*offset, L, bits);
2920   return true;
2921 }
2922 
BranchShortHelper(int32_t offset,Label * L,Condition cond,Register rs,const Operand & rt)2923 bool TurboAssembler::BranchShortHelper(int32_t offset, Label* L, Condition cond,
2924                                        Register rs, const Operand& rt) {
2925   DCHECK(L == nullptr || offset == 0);
2926   UseScratchRegisterScope temps(this);
2927   BlockTrampolinePoolScope block_trampoline_pool(this);
2928   Register scratch = no_reg;
2929   if (!rt.is_reg()) {
2930     scratch = temps.Acquire();
2931     li(scratch, rt);
2932   } else {
2933     scratch = rt.rm();
2934   }
2935   {
2936     BlockTrampolinePoolScope block_trampoline_pool(this);
2937     switch (cond) {
2938       case cc_always:
2939         if (!CalculateOffset(L, &offset, OffsetSize::kOffset21)) return false;
2940         j(offset);
2941         EmitConstPoolWithJumpIfNeeded();
2942         break;
2943       case eq:
2944         // rs == rt
2945         if (rt.is_reg() && rs == rt.rm()) {
2946           if (!CalculateOffset(L, &offset, OffsetSize::kOffset21)) return false;
2947           j(offset);
2948         } else {
2949           if (!CalculateOffset(L, &offset, OffsetSize::kOffset13)) return false;
2950           beq(rs, scratch, offset);
2951         }
2952         break;
2953       case ne:
2954         // rs != rt
2955         if (rt.is_reg() && rs == rt.rm()) {
2956           break;  // No code needs to be emitted
2957         } else {
2958           if (!CalculateOffset(L, &offset, OffsetSize::kOffset13)) return false;
2959           bne(rs, scratch, offset);
2960         }
2961         break;
2962 
2963       // Signed comparison.
2964       case greater:
2965         // rs > rt
2966         if (rt.is_reg() && rs == rt.rm()) {
2967           break;  // No code needs to be emitted.
2968         } else {
2969           if (!CalculateOffset(L, &offset, OffsetSize::kOffset13)) return false;
2970           bgt(rs, scratch, offset);
2971         }
2972         break;
2973       case greater_equal:
2974         // rs >= rt
2975         if (rt.is_reg() && rs == rt.rm()) {
2976           if (!CalculateOffset(L, &offset, OffsetSize::kOffset21)) return false;
2977           j(offset);
2978         } else {
2979           if (!CalculateOffset(L, &offset, OffsetSize::kOffset13)) return false;
2980           bge(rs, scratch, offset);
2981         }
2982         break;
2983       case less:
2984         // rs < rt
2985         if (rt.is_reg() && rs == rt.rm()) {
2986           break;  // No code needs to be emitted.
2987         } else {
2988           if (!CalculateOffset(L, &offset, OffsetSize::kOffset13)) return false;
2989           blt(rs, scratch, offset);
2990         }
2991         break;
2992       case less_equal:
2993         // rs <= rt
2994         if (rt.is_reg() && rs == rt.rm()) {
2995           if (!CalculateOffset(L, &offset, OffsetSize::kOffset21)) return false;
2996           j(offset);
2997         } else {
2998           if (!CalculateOffset(L, &offset, OffsetSize::kOffset13)) return false;
2999           ble(rs, scratch, offset);
3000         }
3001         break;
3002 
3003       // Unsigned comparison.
3004       case Ugreater:
3005         // rs > rt
3006         if (rt.is_reg() && rs == rt.rm()) {
3007           break;  // No code needs to be emitted.
3008         } else {
3009           if (!CalculateOffset(L, &offset, OffsetSize::kOffset13)) return false;
3010           bgtu(rs, scratch, offset);
3011         }
3012         break;
3013       case Ugreater_equal:
3014         // rs >= rt
3015         if (rt.is_reg() && rs == rt.rm()) {
3016           if (!CalculateOffset(L, &offset, OffsetSize::kOffset21)) return false;
3017           j(offset);
3018         } else {
3019           if (!CalculateOffset(L, &offset, OffsetSize::kOffset13)) return false;
3020           bgeu(rs, scratch, offset);
3021         }
3022         break;
3023       case Uless:
3024         // rs < rt
3025         if (rt.is_reg() && rs == rt.rm()) {
3026           break;  // No code needs to be emitted.
3027         } else {
3028           if (!CalculateOffset(L, &offset, OffsetSize::kOffset13)) return false;
3029           bltu(rs, scratch, offset);
3030         }
3031         break;
3032       case Uless_equal:
3033         // rs <= rt
3034         if (rt.is_reg() && rs == rt.rm()) {
3035           if (!CalculateOffset(L, &offset, OffsetSize::kOffset21)) return false;
3036           j(offset);
3037         } else {
3038           if (!CalculateOffset(L, &offset, OffsetSize::kOffset13)) return false;
3039           bleu(rs, scratch, offset);
3040         }
3041         break;
3042       default:
3043         UNREACHABLE();
3044     }
3045   }
3046 
3047   CheckTrampolinePoolQuick(1);
3048   return true;
3049 }
3050 
BranchShortCheck(int32_t offset,Label * L,Condition cond,Register rs,const Operand & rt)3051 bool TurboAssembler::BranchShortCheck(int32_t offset, Label* L, Condition cond,
3052                                       Register rs, const Operand& rt) {
3053   BRANCH_ARGS_CHECK(cond, rs, rt);
3054 
3055   if (!L) {
3056     DCHECK(is_int13(offset));
3057     return BranchShortHelper(offset, nullptr, cond, rs, rt);
3058   } else {
3059     DCHECK_EQ(offset, 0);
3060     return BranchShortHelper(0, L, cond, rs, rt);
3061   }
3062 }
3063 
BranchShort(int32_t offset,Condition cond,Register rs,const Operand & rt)3064 void TurboAssembler::BranchShort(int32_t offset, Condition cond, Register rs,
3065                                  const Operand& rt) {
3066   BranchShortCheck(offset, nullptr, cond, rs, rt);
3067 }
3068 
BranchShort(Label * L,Condition cond,Register rs,const Operand & rt)3069 void TurboAssembler::BranchShort(Label* L, Condition cond, Register rs,
3070                                  const Operand& rt) {
3071   BranchShortCheck(0, L, cond, rs, rt);
3072 }
3073 
BranchAndLink(int32_t offset)3074 void TurboAssembler::BranchAndLink(int32_t offset) {
3075   BranchAndLinkShort(offset);
3076 }
3077 
BranchAndLink(int32_t offset,Condition cond,Register rs,const Operand & rt)3078 void TurboAssembler::BranchAndLink(int32_t offset, Condition cond, Register rs,
3079                                    const Operand& rt) {
3080   bool is_near = BranchAndLinkShortCheck(offset, nullptr, cond, rs, rt);
3081   DCHECK(is_near);
3082   USE(is_near);
3083 }
3084 
BranchAndLink(Label * L)3085 void TurboAssembler::BranchAndLink(Label* L) {
3086   if (L->is_bound()) {
3087     if (is_near(L)) {
3088       BranchAndLinkShort(L);
3089     } else {
3090       BranchAndLinkLong(L);
3091     }
3092   } else {
3093     if (is_trampoline_emitted()) {
3094       BranchAndLinkLong(L);
3095     } else {
3096       BranchAndLinkShort(L);
3097     }
3098   }
3099 }
3100 
BranchAndLink(Label * L,Condition cond,Register rs,const Operand & rt)3101 void TurboAssembler::BranchAndLink(Label* L, Condition cond, Register rs,
3102                                    const Operand& rt) {
3103   if (L->is_bound()) {
3104     if (!BranchAndLinkShortCheck(0, L, cond, rs, rt)) {
3105       Label skip;
3106       Condition neg_cond = NegateCondition(cond);
3107       BranchShort(&skip, neg_cond, rs, rt);
3108       BranchAndLinkLong(L);
3109       bind(&skip);
3110     }
3111   } else {
3112     if (is_trampoline_emitted()) {
3113       Label skip;
3114       Condition neg_cond = NegateCondition(cond);
3115       BranchShort(&skip, neg_cond, rs, rt);
3116       BranchAndLinkLong(L);
3117       bind(&skip);
3118     } else {
3119       BranchAndLinkShortCheck(0, L, cond, rs, rt);
3120     }
3121   }
3122 }
3123 
BranchAndLinkShortHelper(int32_t offset,Label * L)3124 void TurboAssembler::BranchAndLinkShortHelper(int32_t offset, Label* L) {
3125   DCHECK(L == nullptr || offset == 0);
3126   offset = GetOffset(offset, L, OffsetSize::kOffset21);
3127   jal(offset);
3128 }
3129 
BranchAndLinkShort(int32_t offset)3130 void TurboAssembler::BranchAndLinkShort(int32_t offset) {
3131   DCHECK(is_int21(offset));
3132   BranchAndLinkShortHelper(offset, nullptr);
3133 }
3134 
BranchAndLinkShort(Label * L)3135 void TurboAssembler::BranchAndLinkShort(Label* L) {
3136   BranchAndLinkShortHelper(0, L);
3137 }
3138 
3139 // Pre r6 we need to use a bgezal or bltzal, but they can't be used directly
3140 // with the slt instructions. We could use sub or add instead but we would miss
3141 // overflow cases, so we keep slt and add an intermediate third instruction.
BranchAndLinkShortHelper(int32_t offset,Label * L,Condition cond,Register rs,const Operand & rt)3142 bool TurboAssembler::BranchAndLinkShortHelper(int32_t offset, Label* L,
3143                                               Condition cond, Register rs,
3144                                               const Operand& rt) {
3145   DCHECK(L == nullptr || offset == 0);
3146   if (!is_near(L, OffsetSize::kOffset21)) return false;
3147 
3148   UseScratchRegisterScope temps(this);
3149   Register scratch = temps.Acquire();
3150   BlockTrampolinePoolScope block_trampoline_pool(this);
3151 
3152   if (cond == cc_always) {
3153     offset = GetOffset(offset, L, OffsetSize::kOffset21);
3154     jal(offset);
3155   } else {
3156     Branch(kInstrSize * 2, NegateCondition(cond), rs,
3157            Operand(GetRtAsRegisterHelper(rt, scratch)));
3158     offset = GetOffset(offset, L, OffsetSize::kOffset21);
3159     jal(offset);
3160   }
3161 
3162   return true;
3163 }
3164 
BranchAndLinkShortCheck(int32_t offset,Label * L,Condition cond,Register rs,const Operand & rt)3165 bool TurboAssembler::BranchAndLinkShortCheck(int32_t offset, Label* L,
3166                                              Condition cond, Register rs,
3167                                              const Operand& rt) {
3168   BRANCH_ARGS_CHECK(cond, rs, rt);
3169 
3170   if (!L) {
3171     DCHECK(is_int21(offset));
3172     return BranchAndLinkShortHelper(offset, nullptr, cond, rs, rt);
3173   } else {
3174     DCHECK_EQ(offset, 0);
3175     return BranchAndLinkShortHelper(0, L, cond, rs, rt);
3176   }
3177 }
3178 
LoadFromConstantsTable(Register destination,int constant_index)3179 void TurboAssembler::LoadFromConstantsTable(Register destination,
3180                                             int constant_index) {
3181   DCHECK(RootsTable::IsImmortalImmovable(RootIndex::kBuiltinsConstantsTable));
3182   LoadRoot(destination, RootIndex::kBuiltinsConstantsTable);
3183   LoadTaggedPointerField(
3184       destination, FieldMemOperand(destination, FixedArray::OffsetOfElementAt(
3185                                                     constant_index)));
3186 }
3187 
LoadRootRelative(Register destination,int32_t offset)3188 void TurboAssembler::LoadRootRelative(Register destination, int32_t offset) {
3189   Ld(destination, MemOperand(kRootRegister, offset));
3190 }
3191 
LoadRootRegisterOffset(Register destination,intptr_t offset)3192 void TurboAssembler::LoadRootRegisterOffset(Register destination,
3193                                             intptr_t offset) {
3194   if (offset == 0) {
3195     Move(destination, kRootRegister);
3196   } else {
3197     Add64(destination, kRootRegister, Operand(offset));
3198   }
3199 }
3200 
Jump(Register target,Condition cond,Register rs,const Operand & rt)3201 void TurboAssembler::Jump(Register target, Condition cond, Register rs,
3202                           const Operand& rt) {
3203   BlockTrampolinePoolScope block_trampoline_pool(this);
3204   if (cond == cc_always) {
3205     jr(target);
3206     ForceConstantPoolEmissionWithoutJump();
3207   } else {
3208     BRANCH_ARGS_CHECK(cond, rs, rt);
3209     Branch(kInstrSize * 2, NegateCondition(cond), rs, rt);
3210     jr(target);
3211   }
3212 }
3213 
Jump(intptr_t target,RelocInfo::Mode rmode,Condition cond,Register rs,const Operand & rt)3214 void TurboAssembler::Jump(intptr_t target, RelocInfo::Mode rmode,
3215                           Condition cond, Register rs, const Operand& rt) {
3216   Label skip;
3217   if (cond != cc_always) {
3218     Branch(&skip, NegateCondition(cond), rs, rt);
3219   }
3220   {
3221     BlockTrampolinePoolScope block_trampoline_pool(this);
3222     li(t6, Operand(target, rmode));
3223     Jump(t6, al, zero_reg, Operand(zero_reg));
3224     EmitConstPoolWithJumpIfNeeded();
3225     bind(&skip);
3226   }
3227 }
3228 
Jump(Address target,RelocInfo::Mode rmode,Condition cond,Register rs,const Operand & rt)3229 void TurboAssembler::Jump(Address target, RelocInfo::Mode rmode, Condition cond,
3230                           Register rs, const Operand& rt) {
3231   DCHECK(!RelocInfo::IsCodeTarget(rmode));
3232   Jump(static_cast<intptr_t>(target), rmode, cond, rs, rt);
3233 }
3234 
Jump(Handle<Code> code,RelocInfo::Mode rmode,Condition cond,Register rs,const Operand & rt)3235 void TurboAssembler::Jump(Handle<Code> code, RelocInfo::Mode rmode,
3236                           Condition cond, Register rs, const Operand& rt) {
3237   DCHECK(RelocInfo::IsCodeTarget(rmode));
3238 
3239   BlockTrampolinePoolScope block_trampoline_pool(this);
3240   Builtin builtin = Builtin::kNoBuiltinId;
3241   bool target_is_isolate_independent_builtin =
3242       isolate()->builtins()->IsBuiltinHandle(code, &builtin) &&
3243       Builtins::IsIsolateIndependent(builtin);
3244   if (target_is_isolate_independent_builtin &&
3245       options().use_pc_relative_calls_and_jumps) {
3246     int32_t code_target_index = AddCodeTarget(code);
3247     Label skip;
3248     BlockTrampolinePoolScope block_trampoline_pool(this);
3249     if (cond != al) {
3250       Branch(&skip, NegateCondition(cond), rs, rt);
3251     }
3252     RecordRelocInfo(RelocInfo::RELATIVE_CODE_TARGET);
3253     GenPCRelativeJump(t6, code_target_index);
3254     bind(&skip);
3255     return;
3256   } else if (root_array_available_ && options().isolate_independent_code &&
3257              target_is_isolate_independent_builtin) {
3258     int offset = static_cast<int>(code->builtin_id()) * kSystemPointerSize +
3259                  IsolateData::builtin_entry_table_offset();
3260     Ld(t6, MemOperand(kRootRegister, offset));
3261     Jump(t6, cond, rs, rt);
3262     return;
3263   } else if (options().inline_offheap_trampolines &&
3264              target_is_isolate_independent_builtin) {
3265     // Inline the trampoline.
3266     RecordCommentForOffHeapTrampoline(builtin);
3267     li(t6, Operand(BuiltinEntry(builtin), RelocInfo::OFF_HEAP_TARGET));
3268     Jump(t6, cond, rs, rt);
3269     RecordComment("]");
3270     return;
3271   }
3272 
3273   int32_t target_index = AddCodeTarget(code);
3274   Jump(static_cast<intptr_t>(target_index), rmode, cond, rs, rt);
3275 }
3276 
Jump(const ExternalReference & reference)3277 void TurboAssembler::Jump(const ExternalReference& reference) {
3278   li(t6, reference);
3279   Jump(t6);
3280 }
3281 
3282 // Note: To call gcc-compiled C code on riscv64, you must call through t6.
Call(Register target,Condition cond,Register rs,const Operand & rt)3283 void TurboAssembler::Call(Register target, Condition cond, Register rs,
3284                           const Operand& rt) {
3285   BlockTrampolinePoolScope block_trampoline_pool(this);
3286   if (cond == cc_always) {
3287     jalr(ra, target, 0);
3288   } else {
3289     BRANCH_ARGS_CHECK(cond, rs, rt);
3290     Branch(kInstrSize * 2, NegateCondition(cond), rs, rt);
3291     jalr(ra, target, 0);
3292   }
3293 }
3294 
JumpIfIsInRange(Register value,unsigned lower_limit,unsigned higher_limit,Label * on_in_range)3295 void MacroAssembler::JumpIfIsInRange(Register value, unsigned lower_limit,
3296                                      unsigned higher_limit,
3297                                      Label* on_in_range) {
3298   if (lower_limit != 0) {
3299     UseScratchRegisterScope temps(this);
3300     Register scratch = temps.Acquire();
3301     Sub64(scratch, value, Operand(lower_limit));
3302     Branch(on_in_range, Uless_equal, scratch,
3303            Operand(higher_limit - lower_limit));
3304   } else {
3305     Branch(on_in_range, Uless_equal, value,
3306            Operand(higher_limit - lower_limit));
3307   }
3308 }
3309 
Call(Address target,RelocInfo::Mode rmode,Condition cond,Register rs,const Operand & rt)3310 void TurboAssembler::Call(Address target, RelocInfo::Mode rmode, Condition cond,
3311                           Register rs, const Operand& rt) {
3312   li(t6, Operand(static_cast<int64_t>(target), rmode), ADDRESS_LOAD);
3313   Call(t6, cond, rs, rt);
3314 }
3315 
Call(Handle<Code> code,RelocInfo::Mode rmode,Condition cond,Register rs,const Operand & rt)3316 void TurboAssembler::Call(Handle<Code> code, RelocInfo::Mode rmode,
3317                           Condition cond, Register rs, const Operand& rt) {
3318   Builtin builtin = Builtin::kNoBuiltinId;
3319   bool target_is_isolate_independent_builtin =
3320       isolate()->builtins()->IsBuiltinHandle(code, &builtin) &&
3321       Builtins::IsIsolateIndependent(builtin);
3322   if (target_is_isolate_independent_builtin &&
3323       options().use_pc_relative_calls_and_jumps) {
3324     int32_t code_target_index = AddCodeTarget(code);
3325     Label skip;
3326     BlockTrampolinePoolScope block_trampoline_pool(this);
3327     RecordCommentForOffHeapTrampoline(builtin);
3328     if (cond != al) {
3329       Branch(&skip, NegateCondition(cond), rs, rt);
3330     }
3331     RecordRelocInfo(RelocInfo::RELATIVE_CODE_TARGET);
3332     GenPCRelativeJumpAndLink(t6, code_target_index);
3333     bind(&skip);
3334     RecordComment("]");
3335     return;
3336   } else if (root_array_available_ && options().isolate_independent_code &&
3337              target_is_isolate_independent_builtin) {
3338     int offset = static_cast<int>(code->builtin_id()) * kSystemPointerSize +
3339                  IsolateData::builtin_entry_table_offset();
3340     LoadRootRelative(t6, offset);
3341     Call(t6, cond, rs, rt);
3342     return;
3343   } else if (options().inline_offheap_trampolines &&
3344              target_is_isolate_independent_builtin) {
3345     // Inline the trampoline.
3346     RecordCommentForOffHeapTrampoline(builtin);
3347     li(t6, Operand(BuiltinEntry(builtin), RelocInfo::OFF_HEAP_TARGET));
3348     Call(t6, cond, rs, rt);
3349     RecordComment("]");
3350     return;
3351   }
3352 
3353   DCHECK(RelocInfo::IsCodeTarget(rmode));
3354   DCHECK(code->IsExecutable());
3355   int32_t target_index = AddCodeTarget(code);
3356   Call(static_cast<Address>(target_index), rmode, cond, rs, rt);
3357 }
3358 
LoadEntryFromBuiltinIndex(Register builtin)3359 void TurboAssembler::LoadEntryFromBuiltinIndex(Register builtin) {
3360   STATIC_ASSERT(kSystemPointerSize == 8);
3361   STATIC_ASSERT(kSmiTagSize == 1);
3362   STATIC_ASSERT(kSmiTag == 0);
3363 
3364   // The builtin register contains the builtin index as a Smi.
3365   SmiUntag(builtin, builtin);
3366   CalcScaledAddress(builtin, kRootRegister, builtin, kSystemPointerSizeLog2);
3367   Ld(builtin, MemOperand(builtin, IsolateData::builtin_entry_table_offset()));
3368 }
3369 
CallBuiltinByIndex(Register builtin)3370 void TurboAssembler::CallBuiltinByIndex(Register builtin) {
3371   LoadEntryFromBuiltinIndex(builtin);
3372   Call(builtin);
3373 }
3374 
CallBuiltin(Builtin builtin)3375 void TurboAssembler::CallBuiltin(Builtin builtin) {
3376   RecordCommentForOffHeapTrampoline(builtin);
3377   if (options().short_builtin_calls) {
3378     Call(BuiltinEntry(builtin), RelocInfo::RUNTIME_ENTRY);
3379   } else {
3380     Call(BuiltinEntry(builtin), RelocInfo::OFF_HEAP_TARGET);
3381   }
3382   RecordComment("]");
3383 }
3384 
TailCallBuiltin(Builtin builtin)3385 void TurboAssembler::TailCallBuiltin(Builtin builtin) {
3386   RecordCommentForOffHeapTrampoline(builtin);
3387   if (options().short_builtin_calls) {
3388     Jump(BuiltinEntry(builtin), RelocInfo::RUNTIME_ENTRY);
3389   } else {
3390     Jump(BuiltinEntry(builtin), RelocInfo::OFF_HEAP_TARGET);
3391   }
3392   RecordComment("]");
3393 }
3394 
LoadEntryFromBuiltin(Builtin builtin,Register destination)3395 void TurboAssembler::LoadEntryFromBuiltin(Builtin builtin,
3396                                           Register destination) {
3397   Ld(destination, EntryFromBuiltinAsOperand(builtin));
3398 }
3399 
EntryFromBuiltinAsOperand(Builtin builtin)3400 MemOperand TurboAssembler::EntryFromBuiltinAsOperand(Builtin builtin) {
3401   DCHECK(root_array_available());
3402   return MemOperand(kRootRegister,
3403                     IsolateData::BuiltinEntrySlotOffset(builtin));
3404 }
3405 
PatchAndJump(Address target)3406 void TurboAssembler::PatchAndJump(Address target) {
3407   UseScratchRegisterScope temps(this);
3408   Register scratch = temps.Acquire();
3409   auipc(scratch, 0);  // Load PC into scratch
3410   Ld(t6, MemOperand(scratch, kInstrSize * 4));
3411   jr(t6);
3412   nop();  // For alignment
3413   DCHECK_EQ(reinterpret_cast<uint64_t>(pc_) % 8, 0);
3414   *reinterpret_cast<uint64_t*>(pc_) = target;  // pc_ should be align.
3415   pc_ += sizeof(uint64_t);
3416 }
3417 
StoreReturnAddressAndCall(Register target)3418 void TurboAssembler::StoreReturnAddressAndCall(Register target) {
3419   // This generates the final instruction sequence for calls to C functions
3420   // once an exit frame has been constructed.
3421   //
3422   // Note that this assumes the caller code (i.e. the Code object currently
3423   // being generated) is immovable or that the callee function cannot trigger
3424   // GC, since the callee function will return to it.
3425   //
3426   // Compute the return address in lr to return to after the jump below. The
3427   // pc is already at '+ 8' from the current instruction; but return is after
3428   // three instructions, so add another 4 to pc to get the return address.
3429   //
3430   Assembler::BlockTrampolinePoolScope block_trampoline_pool(this);
3431   int kNumInstructionsToJump = 5;
3432   if (FLAG_riscv_c_extension) kNumInstructionsToJump = 4;
3433   Label find_ra;
3434   // Adjust the value in ra to point to the correct return location, one
3435   // instruction past the real call into C code (the jalr(t6)), and push it.
3436   // This is the return address of the exit frame.
3437   auipc(ra, 0);  // Set ra the current PC
3438   bind(&find_ra);
3439   addi(ra, ra,
3440        (kNumInstructionsToJump + 1) *
3441            kInstrSize);  // Set ra to insn after the call
3442 
3443   // This spot was reserved in EnterExitFrame.
3444   Sd(ra, MemOperand(sp));
3445   addi(sp, sp, -kCArgsSlotsSize);
3446   // Stack is still aligned.
3447 
3448   // Call the C routine.
3449   Mv(t6,
3450      target);  // Function pointer to t6 to conform to ABI for PIC.
3451   jalr(t6);
3452   // Make sure the stored 'ra' points to this position.
3453   DCHECK_EQ(kNumInstructionsToJump, InstructionsGeneratedSince(&find_ra));
3454 }
3455 
Ret(Condition cond,Register rs,const Operand & rt)3456 void TurboAssembler::Ret(Condition cond, Register rs, const Operand& rt) {
3457   Jump(ra, cond, rs, rt);
3458   if (cond == al) {
3459     ForceConstantPoolEmissionWithoutJump();
3460   }
3461 }
3462 
3463 
BranchLong(Label * L)3464 void TurboAssembler::BranchLong(Label* L) {
3465   // Generate position independent long branch.
3466   BlockTrampolinePoolScope block_trampoline_pool(this);
3467   int64_t imm64;
3468   imm64 = branch_long_offset(L);
3469   GenPCRelativeJump(t6, imm64);
3470   EmitConstPoolWithJumpIfNeeded();
3471 }
3472 
BranchAndLinkLong(Label * L)3473 void TurboAssembler::BranchAndLinkLong(Label* L) {
3474   // Generate position independent long branch and link.
3475   BlockTrampolinePoolScope block_trampoline_pool(this);
3476   int64_t imm64;
3477   imm64 = branch_long_offset(L);
3478   GenPCRelativeJumpAndLink(t6, imm64);
3479 }
3480 
DropAndRet(int drop)3481 void TurboAssembler::DropAndRet(int drop) {
3482   Add64(sp, sp, drop * kSystemPointerSize);
3483   Ret();
3484 }
3485 
DropAndRet(int drop,Condition cond,Register r1,const Operand & r2)3486 void TurboAssembler::DropAndRet(int drop, Condition cond, Register r1,
3487                                 const Operand& r2) {
3488   // Both Drop and Ret need to be conditional.
3489   Label skip;
3490   if (cond != cc_always) {
3491     Branch(&skip, NegateCondition(cond), r1, r2);
3492   }
3493 
3494   Drop(drop);
3495   Ret();
3496 
3497   if (cond != cc_always) {
3498     bind(&skip);
3499   }
3500 }
3501 
Drop(int count,Condition cond,Register reg,const Operand & op)3502 void TurboAssembler::Drop(int count, Condition cond, Register reg,
3503                           const Operand& op) {
3504   if (count <= 0) {
3505     return;
3506   }
3507 
3508   Label skip;
3509 
3510   if (cond != al) {
3511     Branch(&skip, NegateCondition(cond), reg, op);
3512   }
3513 
3514   Add64(sp, sp, Operand(count * kSystemPointerSize));
3515 
3516   if (cond != al) {
3517     bind(&skip);
3518   }
3519 }
3520 
Swap(Register reg1,Register reg2,Register scratch)3521 void MacroAssembler::Swap(Register reg1, Register reg2, Register scratch) {
3522   if (scratch == no_reg) {
3523     Xor(reg1, reg1, Operand(reg2));
3524     Xor(reg2, reg2, Operand(reg1));
3525     Xor(reg1, reg1, Operand(reg2));
3526   } else {
3527     Mv(scratch, reg1);
3528     Mv(reg1, reg2);
3529     Mv(reg2, scratch);
3530   }
3531 }
3532 
Call(Label * target)3533 void TurboAssembler::Call(Label* target) { BranchAndLink(target); }
3534 
LoadAddress(Register dst,Label * target,RelocInfo::Mode rmode)3535 void TurboAssembler::LoadAddress(Register dst, Label* target,
3536                                  RelocInfo::Mode rmode) {
3537   int32_t offset;
3538   if (CalculateOffset(target, &offset, OffsetSize::kOffset32)) {
3539     CHECK(is_int32(offset + 0x800));
3540     int32_t Hi20 = (((int32_t)offset + 0x800) >> 12);
3541     int32_t Lo12 = (int32_t)offset << 20 >> 20;
3542     BlockTrampolinePoolScope block_trampoline_pool(this);
3543     auipc(dst, Hi20);
3544     addi(dst, dst, Lo12);
3545   } else {
3546     uint64_t address = jump_address(target);
3547     li(dst, Operand(address, rmode), ADDRESS_LOAD);
3548   }
3549 }
3550 
Push(Smi smi)3551 void TurboAssembler::Push(Smi smi) {
3552   UseScratchRegisterScope temps(this);
3553   Register scratch = temps.Acquire();
3554   li(scratch, Operand(smi));
3555   push(scratch);
3556 }
3557 
PushArray(Register array,Register size,PushArrayOrder order)3558 void TurboAssembler::PushArray(Register array, Register size,
3559                                PushArrayOrder order) {
3560   UseScratchRegisterScope temps(this);
3561   Register scratch = temps.Acquire();
3562   Register scratch2 = temps.Acquire();
3563   Label loop, entry;
3564   if (order == PushArrayOrder::kReverse) {
3565     Mv(scratch, zero_reg);
3566     jmp(&entry);
3567     bind(&loop);
3568     CalcScaledAddress(scratch2, array, scratch, kSystemPointerSizeLog2);
3569     Ld(scratch2, MemOperand(scratch2));
3570     push(scratch2);
3571     Add64(scratch, scratch, Operand(1));
3572     bind(&entry);
3573     Branch(&loop, less, scratch, Operand(size));
3574   } else {
3575     Mv(scratch, size);
3576     jmp(&entry);
3577     bind(&loop);
3578     CalcScaledAddress(scratch2, array, scratch, kSystemPointerSizeLog2);
3579     Ld(scratch2, MemOperand(scratch2));
3580     push(scratch2);
3581     bind(&entry);
3582     Add64(scratch, scratch, Operand(-1));
3583     Branch(&loop, greater_equal, scratch, Operand(zero_reg));
3584   }
3585 }
3586 
Push(Handle<HeapObject> handle)3587 void TurboAssembler::Push(Handle<HeapObject> handle) {
3588   UseScratchRegisterScope temps(this);
3589   Register scratch = temps.Acquire();
3590   li(scratch, Operand(handle));
3591   push(scratch);
3592 }
3593 
3594 // ---------------------------------------------------------------------------
3595 // Exception handling.
3596 
PushStackHandler()3597 void MacroAssembler::PushStackHandler() {
3598   // Adjust this code if not the case.
3599   STATIC_ASSERT(StackHandlerConstants::kSize == 2 * kSystemPointerSize);
3600   STATIC_ASSERT(StackHandlerConstants::kNextOffset == 0 * kSystemPointerSize);
3601 
3602   Push(Smi::zero());  // Padding.
3603 
3604   // Link the current handler as the next handler.
3605   UseScratchRegisterScope temps(this);
3606   Register handler_address = temps.Acquire();
3607   li(handler_address,
3608      ExternalReference::Create(IsolateAddressId::kHandlerAddress, isolate()));
3609   Register handler = temps.Acquire();
3610   Ld(handler, MemOperand(handler_address));
3611   push(handler);
3612 
3613   // Set this new handler as the current one.
3614   Sd(sp, MemOperand(handler_address));
3615 }
3616 
PopStackHandler()3617 void MacroAssembler::PopStackHandler() {
3618   STATIC_ASSERT(StackHandlerConstants::kNextOffset == 0);
3619   pop(a1);
3620   Add64(sp, sp,
3621         Operand(static_cast<int64_t>(StackHandlerConstants::kSize -
3622                                      kSystemPointerSize)));
3623   UseScratchRegisterScope temps(this);
3624   Register scratch = temps.Acquire();
3625   li(scratch,
3626      ExternalReference::Create(IsolateAddressId::kHandlerAddress, isolate()));
3627   Sd(a1, MemOperand(scratch));
3628 }
3629 
FPUCanonicalizeNaN(const DoubleRegister dst,const DoubleRegister src)3630 void TurboAssembler::FPUCanonicalizeNaN(const DoubleRegister dst,
3631                                         const DoubleRegister src) {
3632   // Subtracting 0.0 preserves all inputs except for signalling NaNs, which
3633   // become quiet NaNs. We use fsub rather than fadd because fsub preserves -0.0
3634   // inputs: -0.0 + 0.0 = 0.0, but -0.0 - 0.0 = -0.0.
3635   fsub_d(dst, src, kDoubleRegZero);
3636 }
3637 
MovFromFloatResult(const DoubleRegister dst)3638 void TurboAssembler::MovFromFloatResult(const DoubleRegister dst) {
3639   Move(dst, fa0);  // Reg fa0 is FP return value.
3640 }
3641 
MovFromFloatParameter(const DoubleRegister dst)3642 void TurboAssembler::MovFromFloatParameter(const DoubleRegister dst) {
3643   Move(dst, fa0);  // Reg fa0 is FP first argument value.
3644 }
3645 
MovToFloatParameter(DoubleRegister src)3646 void TurboAssembler::MovToFloatParameter(DoubleRegister src) { Move(fa0, src); }
3647 
MovToFloatResult(DoubleRegister src)3648 void TurboAssembler::MovToFloatResult(DoubleRegister src) { Move(fa0, src); }
3649 
MovToFloatParameters(DoubleRegister src1,DoubleRegister src2)3650 void TurboAssembler::MovToFloatParameters(DoubleRegister src1,
3651                                           DoubleRegister src2) {
3652   const DoubleRegister fparg2 = fa1;
3653   if (src2 == fa0) {
3654     DCHECK(src1 != fparg2);
3655     Move(fparg2, src2);
3656     Move(fa0, src1);
3657   } else {
3658     Move(fa0, src1);
3659     Move(fparg2, src2);
3660   }
3661 }
3662 
3663 // -----------------------------------------------------------------------------
3664 // JavaScript invokes.
3665 
LoadStackLimit(Register destination,StackLimitKind kind)3666 void MacroAssembler::LoadStackLimit(Register destination, StackLimitKind kind) {
3667   DCHECK(root_array_available());
3668   Isolate* isolate = this->isolate();
3669   ExternalReference limit =
3670       kind == StackLimitKind::kRealStackLimit
3671           ? ExternalReference::address_of_real_jslimit(isolate)
3672           : ExternalReference::address_of_jslimit(isolate);
3673   DCHECK(TurboAssembler::IsAddressableThroughRootRegister(isolate, limit));
3674 
3675   intptr_t offset =
3676       TurboAssembler::RootRegisterOffsetForExternalReference(isolate, limit);
3677   CHECK(is_int32(offset));
3678   Ld(destination, MemOperand(kRootRegister, static_cast<int32_t>(offset)));
3679 }
3680 
StackOverflowCheck(Register num_args,Register scratch1,Register scratch2,Label * stack_overflow,Label * done)3681 void MacroAssembler::StackOverflowCheck(Register num_args, Register scratch1,
3682                                         Register scratch2,
3683                                         Label* stack_overflow, Label* done) {
3684   // Check the stack for overflow. We are not trying to catch
3685   // interruptions (e.g. debug break and preemption) here, so the "real stack
3686   // limit" is checked.
3687   DCHECK(stack_overflow != nullptr || done != nullptr);
3688   LoadStackLimit(scratch1, StackLimitKind::kRealStackLimit);
3689   // Make scratch1 the space we have left. The stack might already be overflowed
3690   // here which will cause scratch1 to become negative.
3691   Sub64(scratch1, sp, scratch1);
3692   // Check if the arguments will overflow the stack.
3693   Sll64(scratch2, num_args, kSystemPointerSizeLog2);
3694   // Signed comparison.
3695   if (stack_overflow != nullptr) {
3696     Branch(stack_overflow, le, scratch1, Operand(scratch2));
3697   } else if (done != nullptr) {
3698     Branch(done, gt, scratch1, Operand(scratch2));
3699   } else {
3700     UNREACHABLE();
3701   }
3702 }
3703 
InvokePrologue(Register expected_parameter_count,Register actual_parameter_count,Label * done,InvokeType type)3704 void MacroAssembler::InvokePrologue(Register expected_parameter_count,
3705                                     Register actual_parameter_count,
3706                                     Label* done, InvokeType type) {
3707   Label regular_invoke;
3708 
3709   //  a0: actual arguments count
3710   //  a1: function (passed through to callee)
3711   //  a2: expected arguments count
3712 
3713   DCHECK_EQ(actual_parameter_count, a0);
3714   DCHECK_EQ(expected_parameter_count, a2);
3715 
3716   // If the expected parameter count is equal to the adaptor sentinel, no need
3717   // to push undefined value as arguments.
3718   if (kDontAdaptArgumentsSentinel != 0) {
3719     Branch(&regular_invoke, eq, expected_parameter_count,
3720            Operand(kDontAdaptArgumentsSentinel));
3721   }
3722   // If overapplication or if the actual argument count is equal to the
3723   // formal parameter count, no need to push extra undefined values.
3724   Sub64(expected_parameter_count, expected_parameter_count,
3725         actual_parameter_count);
3726   Branch(&regular_invoke, le, expected_parameter_count, Operand(zero_reg));
3727 
3728   Label stack_overflow;
3729   {
3730     UseScratchRegisterScope temps(this);
3731     StackOverflowCheck(expected_parameter_count, temps.Acquire(),
3732                        temps.Acquire(), &stack_overflow);
3733   }
3734   // Underapplication. Move the arguments already in the stack, including the
3735   // receiver and the return address.
3736   {
3737     Label copy;
3738     Register src = a6, dest = a7;
3739     Move(src, sp);
3740     Sll64(t0, expected_parameter_count, kSystemPointerSizeLog2);
3741     Sub64(sp, sp, Operand(t0));
3742     // Update stack pointer.
3743     Move(dest, sp);
3744     Move(t0, actual_parameter_count);
3745     bind(&copy);
3746     Ld(t1, MemOperand(src, 0));
3747     Sd(t1, MemOperand(dest, 0));
3748     Sub64(t0, t0, Operand(1));
3749     Add64(src, src, Operand(kSystemPointerSize));
3750     Add64(dest, dest, Operand(kSystemPointerSize));
3751     Branch(&copy, gt, t0, Operand(zero_reg));
3752   }
3753 
3754   // Fill remaining expected arguments with undefined values.
3755   LoadRoot(t0, RootIndex::kUndefinedValue);
3756   {
3757     Label loop;
3758     bind(&loop);
3759     Sd(t0, MemOperand(a7, 0));
3760     Sub64(expected_parameter_count, expected_parameter_count, Operand(1));
3761     Add64(a7, a7, Operand(kSystemPointerSize));
3762     Branch(&loop, gt, expected_parameter_count, Operand(zero_reg));
3763   }
3764   Branch(&regular_invoke);
3765 
3766   bind(&stack_overflow);
3767   {
3768     FrameScope frame(
3769         this, has_frame() ? StackFrame::NO_FRAME_TYPE : StackFrame::INTERNAL);
3770     CallRuntime(Runtime::kThrowStackOverflow);
3771     break_(0xCC);
3772   }
3773   bind(&regular_invoke);
3774 }
3775 
CheckDebugHook(Register fun,Register new_target,Register expected_parameter_count,Register actual_parameter_count)3776 void MacroAssembler::CheckDebugHook(Register fun, Register new_target,
3777                                     Register expected_parameter_count,
3778                                     Register actual_parameter_count) {
3779   Label skip_hook;
3780   {
3781     UseScratchRegisterScope temps(this);
3782     Register scratch = temps.Acquire();
3783     li(scratch,
3784        ExternalReference::debug_hook_on_function_call_address(isolate()));
3785     Lb(scratch, MemOperand(scratch));
3786     Branch(&skip_hook, eq, scratch, Operand(zero_reg));
3787   }
3788   {
3789     // Load receiver to pass it later to DebugOnFunctionCall hook.
3790     UseScratchRegisterScope temps(this);
3791     Register receiver = temps.Acquire();
3792     LoadReceiver(receiver, actual_parameter_count);
3793 
3794     FrameScope frame(
3795         this, has_frame() ? StackFrame::NO_FRAME_TYPE : StackFrame::INTERNAL);
3796     SmiTag(expected_parameter_count);
3797     Push(expected_parameter_count);
3798 
3799     SmiTag(actual_parameter_count);
3800     Push(actual_parameter_count);
3801 
3802     if (new_target.is_valid()) {
3803       Push(new_target);
3804     }
3805     Push(fun);
3806     Push(fun);
3807     Push(receiver);
3808     CallRuntime(Runtime::kDebugOnFunctionCall);
3809     Pop(fun);
3810     if (new_target.is_valid()) {
3811       Pop(new_target);
3812     }
3813 
3814     Pop(actual_parameter_count);
3815     SmiUntag(actual_parameter_count);
3816 
3817     Pop(expected_parameter_count);
3818     SmiUntag(expected_parameter_count);
3819   }
3820   bind(&skip_hook);
3821 }
3822 
InvokeFunctionCode(Register function,Register new_target,Register expected_parameter_count,Register actual_parameter_count,InvokeType type)3823 void MacroAssembler::InvokeFunctionCode(Register function, Register new_target,
3824                                         Register expected_parameter_count,
3825                                         Register actual_parameter_count,
3826                                         InvokeType type) {
3827   // You can't call a function without a valid frame.
3828   DCHECK_IMPLIES(type == InvokeType::kCall, has_frame());
3829   DCHECK_EQ(function, a1);
3830   DCHECK_IMPLIES(new_target.is_valid(), new_target == a3);
3831 
3832   // On function call, call into the debugger if necessary.
3833   CheckDebugHook(function, new_target, expected_parameter_count,
3834                  actual_parameter_count);
3835 
3836   // Clear the new.target register if not given.
3837   if (!new_target.is_valid()) {
3838     LoadRoot(a3, RootIndex::kUndefinedValue);
3839   }
3840 
3841   Label done;
3842   InvokePrologue(expected_parameter_count, actual_parameter_count, &done, type);
3843   // We call indirectly through the code field in the function to
3844   // allow recompilation to take effect without changing any of the
3845   // call sites.
3846   Register code = kJavaScriptCallCodeStartRegister;
3847   LoadTaggedPointerField(code,
3848                          FieldMemOperand(function, JSFunction::kCodeOffset));
3849   switch (type) {
3850     case InvokeType::kCall:
3851       CallCodeObject(code);
3852       break;
3853     case InvokeType::kJump:
3854       JumpCodeObject(code);
3855       break;
3856   }
3857 
3858   // Continue here if InvokePrologue does handle the invocation due to
3859   // mismatched parameter counts.
3860   bind(&done);
3861 }
3862 
InvokeFunctionWithNewTarget(Register function,Register new_target,Register actual_parameter_count,InvokeType type)3863 void MacroAssembler::InvokeFunctionWithNewTarget(
3864     Register function, Register new_target, Register actual_parameter_count,
3865     InvokeType type) {
3866   // You can't call a function without a valid frame.
3867   DCHECK_IMPLIES(type == InvokeType::kCall, has_frame());
3868 
3869   // Contract with called JS functions requires that function is passed in a1.
3870   DCHECK_EQ(function, a1);
3871   Register expected_parameter_count = a2;
3872   {
3873     UseScratchRegisterScope temps(this);
3874     Register temp_reg = temps.Acquire();
3875     LoadTaggedPointerField(
3876         temp_reg,
3877         FieldMemOperand(function, JSFunction::kSharedFunctionInfoOffset));
3878     LoadTaggedPointerField(
3879         cp, FieldMemOperand(function, JSFunction::kContextOffset));
3880     // The argument count is stored as uint16_t
3881     Lhu(expected_parameter_count,
3882         FieldMemOperand(temp_reg,
3883                         SharedFunctionInfo::kFormalParameterCountOffset));
3884   }
3885   InvokeFunctionCode(function, new_target, expected_parameter_count,
3886                      actual_parameter_count, type);
3887 }
3888 
InvokeFunction(Register function,Register expected_parameter_count,Register actual_parameter_count,InvokeType type)3889 void MacroAssembler::InvokeFunction(Register function,
3890                                     Register expected_parameter_count,
3891                                     Register actual_parameter_count,
3892                                     InvokeType type) {
3893   // You can't call a function without a valid frame.
3894   DCHECK_IMPLIES(type == InvokeType::kCall, has_frame());
3895 
3896   // Contract with called JS functions requires that function is passed in a1.
3897   DCHECK_EQ(function, a1);
3898 
3899   // Get the function and setup the context.
3900   LoadTaggedPointerField(cp, FieldMemOperand(a1, JSFunction::kContextOffset));
3901 
3902   InvokeFunctionCode(a1, no_reg, expected_parameter_count,
3903                      actual_parameter_count, type);
3904 }
3905 
3906 // ---------------------------------------------------------------------------
3907 // Support functions.
3908 
GetObjectType(Register object,Register map,Register type_reg)3909 void MacroAssembler::GetObjectType(Register object, Register map,
3910                                    Register type_reg) {
3911   LoadMap(map, object);
3912   Lhu(type_reg, FieldMemOperand(map, Map::kInstanceTypeOffset));
3913 }
3914 
GetInstanceTypeRange(Register map,Register type_reg,InstanceType lower_limit,Register range)3915 void MacroAssembler::GetInstanceTypeRange(Register map, Register type_reg,
3916                                           InstanceType lower_limit,
3917                                           Register range) {
3918   Lhu(type_reg, FieldMemOperand(map, Map::kInstanceTypeOffset));
3919   Sub64(range, type_reg, Operand(lower_limit));
3920 }
3921 //------------------------------------------------------------------------------
3922 // Wasm
WasmRvvEq(VRegister dst,VRegister lhs,VRegister rhs,VSew sew,Vlmul lmul)3923 void TurboAssembler::WasmRvvEq(VRegister dst, VRegister lhs, VRegister rhs,
3924                                VSew sew, Vlmul lmul) {
3925   VU.set(kScratchReg, sew, lmul);
3926   vmseq_vv(v0, lhs, rhs);
3927   li(kScratchReg, -1);
3928   vmv_vx(dst, zero_reg);
3929   vmerge_vx(dst, kScratchReg, dst);
3930 }
3931 
WasmRvvNe(VRegister dst,VRegister lhs,VRegister rhs,VSew sew,Vlmul lmul)3932 void TurboAssembler::WasmRvvNe(VRegister dst, VRegister lhs, VRegister rhs,
3933                                VSew sew, Vlmul lmul) {
3934   VU.set(kScratchReg, sew, lmul);
3935   vmsne_vv(v0, lhs, rhs);
3936   li(kScratchReg, -1);
3937   vmv_vx(dst, zero_reg);
3938   vmerge_vx(dst, kScratchReg, dst);
3939 }
3940 
WasmRvvGeS(VRegister dst,VRegister lhs,VRegister rhs,VSew sew,Vlmul lmul)3941 void TurboAssembler::WasmRvvGeS(VRegister dst, VRegister lhs, VRegister rhs,
3942                                 VSew sew, Vlmul lmul) {
3943   VU.set(kScratchReg, sew, lmul);
3944   vmsle_vv(v0, rhs, lhs);
3945   li(kScratchReg, -1);
3946   vmv_vx(dst, zero_reg);
3947   vmerge_vx(dst, kScratchReg, dst);
3948 }
3949 
WasmRvvGeU(VRegister dst,VRegister lhs,VRegister rhs,VSew sew,Vlmul lmul)3950 void TurboAssembler::WasmRvvGeU(VRegister dst, VRegister lhs, VRegister rhs,
3951                                 VSew sew, Vlmul lmul) {
3952   VU.set(kScratchReg, sew, lmul);
3953   vmsleu_vv(v0, rhs, lhs);
3954   li(kScratchReg, -1);
3955   vmv_vx(dst, zero_reg);
3956   vmerge_vx(dst, kScratchReg, dst);
3957 }
3958 
WasmRvvGtS(VRegister dst,VRegister lhs,VRegister rhs,VSew sew,Vlmul lmul)3959 void TurboAssembler::WasmRvvGtS(VRegister dst, VRegister lhs, VRegister rhs,
3960                                 VSew sew, Vlmul lmul) {
3961   VU.set(kScratchReg, sew, lmul);
3962   vmslt_vv(v0, rhs, lhs);
3963   li(kScratchReg, -1);
3964   vmv_vx(dst, zero_reg);
3965   vmerge_vx(dst, kScratchReg, dst);
3966 }
3967 
WasmRvvGtU(VRegister dst,VRegister lhs,VRegister rhs,VSew sew,Vlmul lmul)3968 void TurboAssembler::WasmRvvGtU(VRegister dst, VRegister lhs, VRegister rhs,
3969                                 VSew sew, Vlmul lmul) {
3970   VU.set(kScratchReg, sew, lmul);
3971   vmsltu_vv(v0, rhs, lhs);
3972   li(kScratchReg, -1);
3973   vmv_vx(dst, zero_reg);
3974   vmerge_vx(dst, kScratchReg, dst);
3975 }
3976 
WasmRvvS128const(VRegister dst,const uint8_t imms[16])3977 void TurboAssembler::WasmRvvS128const(VRegister dst, const uint8_t imms[16]) {
3978   uint64_t imm1 = *(reinterpret_cast<const uint64_t*>(imms));
3979   uint64_t imm2 = *((reinterpret_cast<const uint64_t*>(imms)) + 1);
3980   VU.set(kScratchReg, VSew::E64, Vlmul::m1);
3981   li(kScratchReg, 1);
3982   vmv_vx(v0, kScratchReg);
3983   li(kScratchReg, imm1);
3984   vmerge_vx(dst, kScratchReg, dst);
3985   li(kScratchReg, imm2);
3986   vsll_vi(v0, v0, 1);
3987   vmerge_vx(dst, kScratchReg, dst);
3988 }
3989 
LoadLane(int ts,VRegister dst,uint8_t laneidx,MemOperand src)3990 void TurboAssembler::LoadLane(int ts, VRegister dst, uint8_t laneidx,
3991                               MemOperand src) {
3992   if (ts == 8) {
3993     Lbu(kScratchReg2, src);
3994     VU.set(kScratchReg, E64, m1);
3995     li(kScratchReg, 0x1 << laneidx);
3996     vmv_sx(v0, kScratchReg);
3997     VU.set(kScratchReg, E8, m1);
3998     vmerge_vx(dst, kScratchReg2, dst);
3999   } else if (ts == 16) {
4000     Lhu(kScratchReg2, src);
4001     VU.set(kScratchReg, E16, m1);
4002     li(kScratchReg, 0x1 << laneidx);
4003     vmv_sx(v0, kScratchReg);
4004     vmerge_vx(dst, kScratchReg2, dst);
4005   } else if (ts == 32) {
4006     Lwu(kScratchReg2, src);
4007     VU.set(kScratchReg, E32, m1);
4008     li(kScratchReg, 0x1 << laneidx);
4009     vmv_sx(v0, kScratchReg);
4010     vmerge_vx(dst, kScratchReg2, dst);
4011   } else if (ts == 64) {
4012     Ld(kScratchReg2, src);
4013     VU.set(kScratchReg, E64, m1);
4014     li(kScratchReg, 0x1 << laneidx);
4015     vmv_sx(v0, kScratchReg);
4016     vmerge_vx(dst, kScratchReg2, dst);
4017   } else {
4018     UNREACHABLE();
4019   }
4020 }
4021 
StoreLane(int sz,VRegister src,uint8_t laneidx,MemOperand dst)4022 void TurboAssembler::StoreLane(int sz, VRegister src, uint8_t laneidx,
4023                                MemOperand dst) {
4024   if (sz == 8) {
4025     VU.set(kScratchReg, E8, m1);
4026     vslidedown_vi(kSimd128ScratchReg, src, laneidx);
4027     vmv_xs(kScratchReg, kSimd128ScratchReg);
4028     Sb(kScratchReg, dst);
4029   } else if (sz == 16) {
4030     VU.set(kScratchReg, E16, m1);
4031     vslidedown_vi(kSimd128ScratchReg, src, laneidx);
4032     vmv_xs(kScratchReg, kSimd128ScratchReg);
4033     Sh(kScratchReg, dst);
4034   } else if (sz == 32) {
4035     VU.set(kScratchReg, E32, m1);
4036     vslidedown_vi(kSimd128ScratchReg, src, laneidx);
4037     vmv_xs(kScratchReg, kSimd128ScratchReg);
4038     Sw(kScratchReg, dst);
4039   } else {
4040     DCHECK_EQ(sz, 64);
4041     VU.set(kScratchReg, E64, m1);
4042     vslidedown_vi(kSimd128ScratchReg, src, laneidx);
4043     vmv_xs(kScratchReg, kSimd128ScratchReg);
4044     Sd(kScratchReg, dst);
4045   }
4046 }
4047 // -----------------------------------------------------------------------------
4048 // Runtime calls.
4049 
AddOverflow64(Register dst,Register left,const Operand & right,Register overflow)4050 void TurboAssembler::AddOverflow64(Register dst, Register left,
4051                                    const Operand& right, Register overflow) {
4052   UseScratchRegisterScope temps(this);
4053   BlockTrampolinePoolScope block_trampoline_pool(this);
4054   Register right_reg = no_reg;
4055   Register scratch = temps.Acquire();
4056   Register scratch2 = temps.Acquire();
4057   if (!right.is_reg()) {
4058     li(scratch, Operand(right));
4059     right_reg = scratch;
4060   } else {
4061     right_reg = right.rm();
4062   }
4063   DCHECK(left != scratch2 && right_reg != scratch2 && dst != scratch2 &&
4064          overflow != scratch2);
4065   DCHECK(overflow != left && overflow != right_reg);
4066   if (dst == left || dst == right_reg) {
4067     add(scratch2, left, right_reg);
4068     xor_(overflow, scratch2, left);
4069     xor_(scratch, scratch2, right_reg);
4070     and_(overflow, overflow, scratch);
4071     Mv(dst, scratch2);
4072   } else {
4073     add(dst, left, right_reg);
4074     xor_(overflow, dst, left);
4075     xor_(scratch, dst, right_reg);
4076     and_(overflow, overflow, scratch);
4077   }
4078 }
4079 
SubOverflow64(Register dst,Register left,const Operand & right,Register overflow)4080 void TurboAssembler::SubOverflow64(Register dst, Register left,
4081                                    const Operand& right, Register overflow) {
4082   UseScratchRegisterScope temps(this);
4083   BlockTrampolinePoolScope block_trampoline_pool(this);
4084   Register right_reg = no_reg;
4085   Register scratch = temps.Acquire();
4086   Register scratch2 = temps.Acquire();
4087   if (!right.is_reg()) {
4088     li(scratch, Operand(right));
4089     right_reg = scratch;
4090   } else {
4091     right_reg = right.rm();
4092   }
4093 
4094   DCHECK(left != scratch2 && right_reg != scratch2 && dst != scratch2 &&
4095          overflow != scratch2);
4096   DCHECK(overflow != left && overflow != right_reg);
4097 
4098   if (dst == left || dst == right_reg) {
4099     sub(scratch2, left, right_reg);
4100     xor_(overflow, left, scratch2);
4101     xor_(scratch, left, right_reg);
4102     and_(overflow, overflow, scratch);
4103     Mv(dst, scratch2);
4104   } else {
4105     sub(dst, left, right_reg);
4106     xor_(overflow, left, dst);
4107     xor_(scratch, left, right_reg);
4108     and_(overflow, overflow, scratch);
4109   }
4110 }
4111 
MulOverflow32(Register dst,Register left,const Operand & right,Register overflow)4112 void TurboAssembler::MulOverflow32(Register dst, Register left,
4113                                    const Operand& right, Register overflow) {
4114   ASM_CODE_COMMENT(this);
4115   UseScratchRegisterScope temps(this);
4116   BlockTrampolinePoolScope block_trampoline_pool(this);
4117   Register right_reg = no_reg;
4118   Register scratch = temps.Acquire();
4119   Register scratch2 = temps.Acquire();
4120   if (!right.is_reg()) {
4121     li(scratch, Operand(right));
4122     right_reg = scratch;
4123   } else {
4124     right_reg = right.rm();
4125   }
4126 
4127   DCHECK(left != scratch2 && right_reg != scratch2 && dst != scratch2 &&
4128          overflow != scratch2);
4129   DCHECK(overflow != left && overflow != right_reg);
4130   sext_w(overflow, left);
4131   sext_w(scratch2, right_reg);
4132 
4133   mul(overflow, overflow, scratch2);
4134   sext_w(dst, overflow);
4135   xor_(overflow, overflow, dst);
4136 }
4137 
CallRuntime(const Runtime::Function * f,int num_arguments,SaveFPRegsMode save_doubles)4138 void MacroAssembler::CallRuntime(const Runtime::Function* f, int num_arguments,
4139                                  SaveFPRegsMode save_doubles) {
4140   ASM_CODE_COMMENT(this);
4141   // All parameters are on the stack. a0 has the return value after call.
4142 
4143   // If the expected number of arguments of the runtime function is
4144   // constant, we check that the actual number of arguments match the
4145   // expectation.
4146   CHECK(f->nargs < 0 || f->nargs == num_arguments);
4147 
4148   // TODO(1236192): Most runtime routines don't need the number of
4149   // arguments passed in because it is constant. At some point we
4150   // should remove this need and make the runtime routine entry code
4151   // smarter.
4152   PrepareCEntryArgs(num_arguments);
4153   PrepareCEntryFunction(ExternalReference::Create(f));
4154   Handle<Code> code =
4155       CodeFactory::CEntry(isolate(), f->result_size, save_doubles);
4156   Call(code, RelocInfo::CODE_TARGET);
4157 }
4158 
TailCallRuntime(Runtime::FunctionId fid)4159 void MacroAssembler::TailCallRuntime(Runtime::FunctionId fid) {
4160   ASM_CODE_COMMENT(this);
4161   const Runtime::Function* function = Runtime::FunctionForId(fid);
4162   DCHECK_EQ(1, function->result_size);
4163   if (function->nargs >= 0) {
4164     PrepareCEntryArgs(function->nargs);
4165   }
4166   JumpToExternalReference(ExternalReference::Create(fid));
4167 }
4168 
JumpToExternalReference(const ExternalReference & builtin,bool builtin_exit_frame)4169 void MacroAssembler::JumpToExternalReference(const ExternalReference& builtin,
4170                                              bool builtin_exit_frame) {
4171   ASM_CODE_COMMENT(this);
4172   PrepareCEntryFunction(builtin);
4173   Handle<Code> code = CodeFactory::CEntry(isolate(), 1, SaveFPRegsMode::kIgnore,
4174                                           ArgvMode::kStack, builtin_exit_frame);
4175   Jump(code, RelocInfo::CODE_TARGET, al, zero_reg, Operand(zero_reg));
4176 }
4177 
JumpToOffHeapInstructionStream(Address entry)4178 void MacroAssembler::JumpToOffHeapInstructionStream(Address entry) {
4179   // Ld a Address from a constant pool.
4180   // Record a value into constant pool.
4181   ASM_CODE_COMMENT(this);
4182   if (!FLAG_riscv_constant_pool) {
4183     li(kOffHeapTrampolineRegister, Operand(entry, RelocInfo::OFF_HEAP_TARGET));
4184   } else {
4185     RecordEntry(entry, RelocInfo::OFF_HEAP_TARGET);
4186     RecordRelocInfo(RelocInfo::OFF_HEAP_TARGET, entry);
4187     auipc(kOffHeapTrampolineRegister, 0);
4188     ld(kOffHeapTrampolineRegister, kOffHeapTrampolineRegister, 0);
4189   }
4190   Jump(kOffHeapTrampolineRegister);
4191 }
4192 
LoadWeakValue(Register out,Register in,Label * target_if_cleared)4193 void MacroAssembler::LoadWeakValue(Register out, Register in,
4194                                    Label* target_if_cleared) {
4195   ASM_CODE_COMMENT(this);
4196   Branch(target_if_cleared, eq, in, Operand(kClearedWeakHeapObjectLower32));
4197   And(out, in, Operand(~kWeakHeapObjectMask));
4198 }
4199 
EmitIncrementCounter(StatsCounter * counter,int value,Register scratch1,Register scratch2)4200 void MacroAssembler::EmitIncrementCounter(StatsCounter* counter, int value,
4201                                           Register scratch1,
4202                                           Register scratch2) {
4203   DCHECK_GT(value, 0);
4204   if (FLAG_native_code_counters && counter->Enabled()) {
4205     ASM_CODE_COMMENT(this);
4206     // This operation has to be exactly 32-bit wide in case the external
4207     // reference table redirects the counter to a uint32_t
4208     // dummy_stats_counter_ field.
4209     li(scratch2, ExternalReference::Create(counter));
4210     Lw(scratch1, MemOperand(scratch2));
4211     Add32(scratch1, scratch1, Operand(value));
4212     Sw(scratch1, MemOperand(scratch2));
4213   }
4214 }
4215 
EmitDecrementCounter(StatsCounter * counter,int value,Register scratch1,Register scratch2)4216 void MacroAssembler::EmitDecrementCounter(StatsCounter* counter, int value,
4217                                           Register scratch1,
4218                                           Register scratch2) {
4219   DCHECK_GT(value, 0);
4220   if (FLAG_native_code_counters && counter->Enabled()) {
4221     ASM_CODE_COMMENT(this);
4222     // This operation has to be exactly 32-bit wide in case the external
4223     // reference table redirects the counter to a uint32_t
4224     // dummy_stats_counter_ field.
4225     li(scratch2, ExternalReference::Create(counter));
4226     Lw(scratch1, MemOperand(scratch2));
4227     Sub32(scratch1, scratch1, Operand(value));
4228     Sw(scratch1, MemOperand(scratch2));
4229   }
4230 }
4231 
4232 // -----------------------------------------------------------------------------
4233 // Debugging.
4234 
Trap()4235 void TurboAssembler::Trap() { stop(); }
DebugBreak()4236 void TurboAssembler::DebugBreak() { stop(); }
4237 
Assert(Condition cc,AbortReason reason,Register rs,Operand rt)4238 void TurboAssembler::Assert(Condition cc, AbortReason reason, Register rs,
4239                             Operand rt) {
4240   if (FLAG_debug_code) Check(cc, reason, rs, rt);
4241 }
4242 
Check(Condition cc,AbortReason reason,Register rs,Operand rt)4243 void TurboAssembler::Check(Condition cc, AbortReason reason, Register rs,
4244                            Operand rt) {
4245   Label L;
4246   BranchShort(&L, cc, rs, rt);
4247   Abort(reason);
4248   // Will not return here.
4249   bind(&L);
4250 }
4251 
Abort(AbortReason reason)4252 void TurboAssembler::Abort(AbortReason reason) {
4253   Label abort_start;
4254   bind(&abort_start);
4255   if (FLAG_code_comments) {
4256     const char* msg = GetAbortReason(reason);
4257     RecordComment("Abort message: ");
4258     RecordComment(msg);
4259   }
4260 
4261   // Avoid emitting call to builtin if requested.
4262   if (trap_on_abort()) {
4263     ebreak();
4264     return;
4265   }
4266 
4267   if (should_abort_hard()) {
4268     // We don't care if we constructed a frame. Just pretend we did.
4269     FrameScope assume_frame(this, StackFrame::NO_FRAME_TYPE);
4270     PrepareCallCFunction(0, a0);
4271     li(a0, Operand(static_cast<int64_t>(reason)));
4272     CallCFunction(ExternalReference::abort_with_reason(), 1);
4273     return;
4274   }
4275 
4276   Move(a0, Smi::FromInt(static_cast<int>(reason)));
4277 
4278   // Disable stub call restrictions to always allow calls to abort.
4279   if (!has_frame()) {
4280     // We don't actually want to generate a pile of code for this, so just
4281     // claim there is a stack frame, without generating one.
4282     FrameScope scope(this, StackFrame::NO_FRAME_TYPE);
4283     Call(BUILTIN_CODE(isolate(), Abort), RelocInfo::CODE_TARGET);
4284   } else {
4285     Call(BUILTIN_CODE(isolate(), Abort), RelocInfo::CODE_TARGET);
4286   }
4287   // Will not return here.
4288   if (is_trampoline_pool_blocked()) {
4289     // If the calling code cares about the exact number of
4290     // instructions generated, we insert padding here to keep the size
4291     // of the Abort macro constant.
4292     // Currently in debug mode with debug_code enabled the number of
4293     // generated instructions is 10, so we use this as a maximum value.
4294     static const int kExpectedAbortInstructions = 10;
4295     int abort_instructions = InstructionsGeneratedSince(&abort_start);
4296     DCHECK_LE(abort_instructions, kExpectedAbortInstructions);
4297     while (abort_instructions++ < kExpectedAbortInstructions) {
4298       nop();
4299     }
4300   }
4301 }
4302 
LoadMap(Register destination,Register object)4303 void TurboAssembler::LoadMap(Register destination, Register object) {
4304   ASM_CODE_COMMENT(this);
4305   LoadTaggedPointerField(destination,
4306                          FieldMemOperand(object, HeapObject::kMapOffset));
4307 }
4308 
LoadNativeContextSlot(Register dst,int index)4309 void MacroAssembler::LoadNativeContextSlot(Register dst, int index) {
4310   ASM_CODE_COMMENT(this);
4311   LoadMap(dst, cp);
4312   LoadTaggedPointerField(
4313       dst, FieldMemOperand(
4314                dst, Map::kConstructorOrBackPointerOrNativeContextOffset));
4315   LoadTaggedPointerField(dst, MemOperand(dst, Context::SlotOffset(index)));
4316 }
4317 
StubPrologue(StackFrame::Type type)4318 void TurboAssembler::StubPrologue(StackFrame::Type type) {
4319   ASM_CODE_COMMENT(this);
4320   UseScratchRegisterScope temps(this);
4321   Register scratch = temps.Acquire();
4322   li(scratch, Operand(StackFrame::TypeToMarker(type)));
4323   PushCommonFrame(scratch);
4324 }
4325 
Prologue()4326 void TurboAssembler::Prologue() { PushStandardFrame(a1); }
4327 
EnterFrame(StackFrame::Type type)4328 void TurboAssembler::EnterFrame(StackFrame::Type type) {
4329   ASM_CODE_COMMENT(this);
4330   UseScratchRegisterScope temps(this);
4331   Register scratch = temps.Acquire();
4332   BlockTrampolinePoolScope block_trampoline_pool(this);
4333   Push(ra, fp);
4334   Move(fp, sp);
4335   if (!StackFrame::IsJavaScript(type)) {
4336     li(scratch, Operand(StackFrame::TypeToMarker(type)));
4337     Push(scratch);
4338   }
4339 #if V8_ENABLE_WEBASSEMBLY
4340   if (type == StackFrame::WASM) Push(kWasmInstanceRegister);
4341 #endif  // V8_ENABLE_WEBASSEMBLY
4342 }
4343 
LeaveFrame(StackFrame::Type type)4344 void TurboAssembler::LeaveFrame(StackFrame::Type type) {
4345   ASM_CODE_COMMENT(this);
4346   addi(sp, fp, 2 * kSystemPointerSize);
4347   Ld(ra, MemOperand(fp, 1 * kSystemPointerSize));
4348   Ld(fp, MemOperand(fp, 0 * kSystemPointerSize));
4349 }
4350 
EnterExitFrame(bool save_doubles,int stack_space,StackFrame::Type frame_type)4351 void MacroAssembler::EnterExitFrame(bool save_doubles, int stack_space,
4352                                     StackFrame::Type frame_type) {
4353   ASM_CODE_COMMENT(this);
4354   DCHECK(frame_type == StackFrame::EXIT ||
4355          frame_type == StackFrame::BUILTIN_EXIT);
4356 
4357   // Set up the frame structure on the stack.
4358   STATIC_ASSERT(2 * kSystemPointerSize ==
4359                 ExitFrameConstants::kCallerSPDisplacement);
4360   STATIC_ASSERT(1 * kSystemPointerSize == ExitFrameConstants::kCallerPCOffset);
4361   STATIC_ASSERT(0 * kSystemPointerSize == ExitFrameConstants::kCallerFPOffset);
4362 
4363   // This is how the stack will look:
4364   // fp + 2 (==kCallerSPDisplacement) - old stack's end
4365   // [fp + 1 (==kCallerPCOffset)] - saved old ra
4366   // [fp + 0 (==kCallerFPOffset)] - saved old fp
4367   // [fp - 1 StackFrame::EXIT Smi
4368   // [fp - 2 (==kSPOffset)] - sp of the called function
4369   // fp - (2 + stack_space + alignment) == sp == [fp - kSPOffset] - top of the
4370   //   new stack (will contain saved ra)
4371 
4372   // Save registers and reserve room for saved entry sp.
4373   addi(sp, sp,
4374        -2 * kSystemPointerSize - ExitFrameConstants::kFixedFrameSizeFromFp);
4375   Sd(ra, MemOperand(sp, 3 * kSystemPointerSize));
4376   Sd(fp, MemOperand(sp, 2 * kSystemPointerSize));
4377   {
4378     UseScratchRegisterScope temps(this);
4379     Register scratch = temps.Acquire();
4380     li(scratch, Operand(StackFrame::TypeToMarker(frame_type)));
4381     Sd(scratch, MemOperand(sp, 1 * kSystemPointerSize));
4382   }
4383   // Set up new frame pointer.
4384   addi(fp, sp, ExitFrameConstants::kFixedFrameSizeFromFp);
4385 
4386   if (FLAG_debug_code) {
4387     Sd(zero_reg, MemOperand(fp, ExitFrameConstants::kSPOffset));
4388   }
4389 
4390   {
4391     UseScratchRegisterScope temps(this);
4392     Register scratch = temps.Acquire();
4393     BlockTrampolinePoolScope block_trampoline_pool(this);
4394     // Save the frame pointer and the context in top.
4395     li(scratch, ExternalReference::Create(IsolateAddressId::kCEntryFPAddress,
4396                                           isolate()));
4397     Sd(fp, MemOperand(scratch));
4398     li(scratch,
4399        ExternalReference::Create(IsolateAddressId::kContextAddress, isolate()));
4400     Sd(cp, MemOperand(scratch));
4401   }
4402 
4403   const int frame_alignment = MacroAssembler::ActivationFrameAlignment();
4404   if (save_doubles) {
4405     // The stack is already aligned to 0 modulo 8 for stores with sdc1.
4406     int space = kNumCallerSavedFPU * kDoubleSize;
4407     Sub64(sp, sp, Operand(space));
4408     int count = 0;
4409     for (int i = 0; i < kNumFPURegisters; i++) {
4410       if (kCallerSavedFPU.bits() & (1 << i)) {
4411         FPURegister reg = FPURegister::from_code(i);
4412         StoreDouble(reg, MemOperand(sp, count * kDoubleSize));
4413         count++;
4414       }
4415     }
4416   }
4417 
4418   // Reserve place for the return address, stack space and an optional slot
4419   // (used by DirectCEntry to hold the return value if a struct is
4420   // returned) and align the frame preparing for calling the runtime function.
4421   DCHECK_GE(stack_space, 0);
4422   Sub64(sp, sp, Operand((stack_space + 2) * kSystemPointerSize));
4423   if (frame_alignment > 0) {
4424     DCHECK(base::bits::IsPowerOfTwo(frame_alignment));
4425     And(sp, sp, Operand(-frame_alignment));  // Align stack.
4426   }
4427 
4428   // Set the exit frame sp value to point just before the return address
4429   // location.
4430   UseScratchRegisterScope temps(this);
4431   Register scratch = temps.Acquire();
4432   addi(scratch, sp, kSystemPointerSize);
4433   Sd(scratch, MemOperand(fp, ExitFrameConstants::kSPOffset));
4434 }
4435 
LeaveExitFrame(bool save_doubles,Register argument_count,bool do_return,bool argument_count_is_length)4436 void MacroAssembler::LeaveExitFrame(bool save_doubles, Register argument_count,
4437                                     bool do_return,
4438                                     bool argument_count_is_length) {
4439   ASM_CODE_COMMENT(this);
4440   UseScratchRegisterScope temps(this);
4441   Register scratch = temps.Acquire();
4442   BlockTrampolinePoolScope block_trampoline_pool(this);
4443   // Optionally restore all double registers.
4444   if (save_doubles) {
4445     // Remember: we only need to restore kCallerSavedFPU.
4446     Sub64(scratch, fp,
4447           Operand(ExitFrameConstants::kFixedFrameSizeFromFp +
4448                   kNumCallerSavedFPU * kDoubleSize));
4449     int cout = 0;
4450     for (int i = 0; i < kNumFPURegisters; i++) {
4451       if (kCalleeSavedFPU.bits() & (1 << i)) {
4452         FPURegister reg = FPURegister::from_code(i);
4453         LoadDouble(reg, MemOperand(scratch, cout * kDoubleSize));
4454         cout++;
4455       }
4456     }
4457   }
4458 
4459   // Clear top frame.
4460   li(scratch,
4461      ExternalReference::Create(IsolateAddressId::kCEntryFPAddress, isolate()));
4462   Sd(zero_reg, MemOperand(scratch));
4463 
4464   // Restore current context from top and clear it in debug mode.
4465   li(scratch,
4466      ExternalReference::Create(IsolateAddressId::kContextAddress, isolate()));
4467   Ld(cp, MemOperand(scratch));
4468 
4469   if (FLAG_debug_code) {
4470     UseScratchRegisterScope temp(this);
4471     Register scratch2 = temp.Acquire();
4472     li(scratch2, Operand(Context::kInvalidContext));
4473     Sd(scratch2, MemOperand(scratch));
4474   }
4475 
4476   // Pop the arguments, restore registers, and return.
4477   Mv(sp, fp);  // Respect ABI stack constraint.
4478   Ld(fp, MemOperand(sp, ExitFrameConstants::kCallerFPOffset));
4479   Ld(ra, MemOperand(sp, ExitFrameConstants::kCallerPCOffset));
4480 
4481   if (argument_count.is_valid()) {
4482     if (argument_count_is_length) {
4483       add(sp, sp, argument_count);
4484     } else {
4485       CalcScaledAddress(sp, sp, argument_count, kSystemPointerSizeLog2);
4486     }
4487   }
4488 
4489   addi(sp, sp, 2 * kSystemPointerSize);
4490 
4491   if (do_return) {
4492     Ret();
4493   }
4494 }
4495 
ActivationFrameAlignment()4496 int TurboAssembler::ActivationFrameAlignment() {
4497 #if V8_HOST_ARCH_RISCV64
4498   // Running on the real platform. Use the alignment as mandated by the local
4499   // environment.
4500   // Note: This will break if we ever start generating snapshots on one RISC-V
4501   // platform for another RISC-V platform with a different alignment.
4502   return base::OS::ActivationFrameAlignment();
4503 #else   // V8_HOST_ARCH_RISCV64
4504   // If we are using the simulator then we should always align to the expected
4505   // alignment. As the simulator is used to generate snapshots we do not know
4506   // if the target platform will need alignment, so this is controlled from a
4507   // flag.
4508   return FLAG_sim_stack_alignment;
4509 #endif  // V8_HOST_ARCH_RISCV64
4510 }
4511 
AssertStackIsAligned()4512 void MacroAssembler::AssertStackIsAligned() {
4513   if (FLAG_debug_code) {
4514     ASM_CODE_COMMENT(this);
4515     const int frame_alignment = ActivationFrameAlignment();
4516     const int frame_alignment_mask = frame_alignment - 1;
4517 
4518     if (frame_alignment > kSystemPointerSize) {
4519       Label alignment_as_expected;
4520       DCHECK(base::bits::IsPowerOfTwo(frame_alignment));
4521       {
4522         UseScratchRegisterScope temps(this);
4523         Register scratch = temps.Acquire();
4524         andi(scratch, sp, frame_alignment_mask);
4525         BranchShort(&alignment_as_expected, eq, scratch, Operand(zero_reg));
4526       }
4527       // Don't use Check here, as it will call Runtime_Abort re-entering here.
4528       ebreak();
4529       bind(&alignment_as_expected);
4530     }
4531   }
4532 }
4533 
SmiUntag(Register dst,const MemOperand & src)4534 void TurboAssembler::SmiUntag(Register dst, const MemOperand& src) {
4535   ASM_CODE_COMMENT(this);
4536   if (SmiValuesAre32Bits()) {
4537     Lw(dst, MemOperand(src.rm(), SmiWordOffset(src.offset())));
4538   } else {
4539     DCHECK(SmiValuesAre31Bits());
4540     if (COMPRESS_POINTERS_BOOL) {
4541       Lw(dst, src);
4542     } else {
4543       Ld(dst, src);
4544     }
4545     SmiUntag(dst);
4546   }
4547 }
4548 
SmiToInt32(Register smi)4549 void TurboAssembler::SmiToInt32(Register smi) {
4550   ASM_CODE_COMMENT(this);
4551   if (FLAG_enable_slow_asserts) {
4552     AssertSmi(smi);
4553   }
4554   DCHECK(SmiValuesAre32Bits() || SmiValuesAre31Bits());
4555   SmiUntag(smi);
4556 }
4557 
JumpIfSmi(Register value,Label * smi_label)4558 void TurboAssembler::JumpIfSmi(Register value, Label* smi_label) {
4559   ASM_CODE_COMMENT(this);
4560   DCHECK_EQ(0, kSmiTag);
4561   UseScratchRegisterScope temps(this);
4562   Register scratch = temps.Acquire();
4563   andi(scratch, value, kSmiTagMask);
4564   Branch(smi_label, eq, scratch, Operand(zero_reg));
4565 }
4566 
JumpIfNotSmi(Register value,Label * not_smi_label)4567 void MacroAssembler::JumpIfNotSmi(Register value, Label* not_smi_label) {
4568   ASM_CODE_COMMENT(this);
4569   UseScratchRegisterScope temps(this);
4570   Register scratch = temps.Acquire();
4571   DCHECK_EQ(0, kSmiTag);
4572   andi(scratch, value, kSmiTagMask);
4573   Branch(not_smi_label, ne, scratch, Operand(zero_reg));
4574 }
4575 
AssertNotSmi(Register object,AbortReason reason)4576 void TurboAssembler::AssertNotSmi(Register object, AbortReason reason) {
4577   if (FLAG_debug_code) {
4578     ASM_CODE_COMMENT(this);
4579     STATIC_ASSERT(kSmiTag == 0);
4580     DCHECK(object != kScratchReg);
4581     andi(kScratchReg, object, kSmiTagMask);
4582     Check(ne, reason, kScratchReg, Operand(zero_reg));
4583   }
4584 }
4585 
AssertSmi(Register object,AbortReason reason)4586 void TurboAssembler::AssertSmi(Register object, AbortReason reason) {
4587   if (FLAG_debug_code) {
4588     ASM_CODE_COMMENT(this);
4589     STATIC_ASSERT(kSmiTag == 0);
4590     DCHECK(object != kScratchReg);
4591     andi(kScratchReg, object, kSmiTagMask);
4592     Check(eq, reason, kScratchReg, Operand(zero_reg));
4593   }
4594 }
4595 
AssertConstructor(Register object)4596 void MacroAssembler::AssertConstructor(Register object) {
4597   if (FLAG_debug_code) {
4598     ASM_CODE_COMMENT(this);
4599     DCHECK(object != kScratchReg);
4600     BlockTrampolinePoolScope block_trampoline_pool(this);
4601     STATIC_ASSERT(kSmiTag == 0);
4602     SmiTst(object, kScratchReg);
4603     Check(ne, AbortReason::kOperandIsASmiAndNotAConstructor, kScratchReg,
4604           Operand(zero_reg));
4605 
4606     LoadMap(kScratchReg, object);
4607     Lbu(kScratchReg, FieldMemOperand(kScratchReg, Map::kBitFieldOffset));
4608     And(kScratchReg, kScratchReg, Operand(Map::Bits1::IsConstructorBit::kMask));
4609     Check(ne, AbortReason::kOperandIsNotAConstructor, kScratchReg,
4610           Operand(zero_reg));
4611   }
4612 }
4613 
AssertFunction(Register object)4614 void MacroAssembler::AssertFunction(Register object) {
4615   if (FLAG_debug_code) {
4616     ASM_CODE_COMMENT(this);
4617     BlockTrampolinePoolScope block_trampoline_pool(this);
4618     STATIC_ASSERT(kSmiTag == 0);
4619     DCHECK(object != kScratchReg);
4620     SmiTst(object, kScratchReg);
4621     Check(ne, AbortReason::kOperandIsASmiAndNotAFunction, kScratchReg,
4622           Operand(zero_reg));
4623     push(object);
4624     LoadMap(object, object);
4625     UseScratchRegisterScope temps(this);
4626     Register range = temps.Acquire();
4627     GetInstanceTypeRange(object, object, FIRST_JS_FUNCTION_TYPE, range);
4628     Check(Uless_equal, AbortReason::kOperandIsNotAFunction, range,
4629           Operand(LAST_JS_FUNCTION_TYPE - FIRST_JS_FUNCTION_TYPE));
4630     pop(object);
4631   }
4632 }
4633 
AssertCallableFunction(Register object)4634 void MacroAssembler::AssertCallableFunction(Register object) {
4635   if (!FLAG_debug_code) return;
4636   ASM_CODE_COMMENT(this);
4637   STATIC_ASSERT(kSmiTag == 0);
4638   AssertNotSmi(object, AbortReason::kOperandIsASmiAndNotAFunction);
4639   push(object);
4640   LoadMap(object, object);
4641   UseScratchRegisterScope temps(this);
4642   Register range = temps.Acquire();
4643   GetInstanceTypeRange(object, object, FIRST_CALLABLE_JS_FUNCTION_TYPE, range);
4644   Check(Uless_equal, AbortReason::kOperandIsNotACallableFunction, range,
4645         Operand(LAST_CALLABLE_JS_FUNCTION_TYPE -
4646                 FIRST_CALLABLE_JS_FUNCTION_TYPE));
4647   pop(object);
4648 }
4649 
AssertBoundFunction(Register object)4650 void MacroAssembler::AssertBoundFunction(Register object) {
4651   if (FLAG_debug_code) {
4652     ASM_CODE_COMMENT(this);
4653     BlockTrampolinePoolScope block_trampoline_pool(this);
4654     STATIC_ASSERT(kSmiTag == 0);
4655     DCHECK(object != kScratchReg);
4656     SmiTst(object, kScratchReg);
4657     Check(ne, AbortReason::kOperandIsASmiAndNotABoundFunction, kScratchReg,
4658           Operand(zero_reg));
4659     GetObjectType(object, kScratchReg, kScratchReg);
4660     Check(eq, AbortReason::kOperandIsNotABoundFunction, kScratchReg,
4661           Operand(JS_BOUND_FUNCTION_TYPE));
4662   }
4663 }
4664 
AssertGeneratorObject(Register object)4665 void MacroAssembler::AssertGeneratorObject(Register object) {
4666   if (!FLAG_debug_code) return;
4667   ASM_CODE_COMMENT(this);
4668   BlockTrampolinePoolScope block_trampoline_pool(this);
4669   STATIC_ASSERT(kSmiTag == 0);
4670   DCHECK(object != kScratchReg);
4671   SmiTst(object, kScratchReg);
4672   Check(ne, AbortReason::kOperandIsASmiAndNotAGeneratorObject, kScratchReg,
4673         Operand(zero_reg));
4674 
4675   GetObjectType(object, kScratchReg, kScratchReg);
4676 
4677   Label done;
4678 
4679   // Check if JSGeneratorObject
4680   BranchShort(&done, eq, kScratchReg, Operand(JS_GENERATOR_OBJECT_TYPE));
4681 
4682   // Check if JSAsyncFunctionObject (See MacroAssembler::CompareInstanceType)
4683   BranchShort(&done, eq, kScratchReg, Operand(JS_ASYNC_FUNCTION_OBJECT_TYPE));
4684 
4685   // Check if JSAsyncGeneratorObject
4686   BranchShort(&done, eq, kScratchReg, Operand(JS_ASYNC_GENERATOR_OBJECT_TYPE));
4687 
4688   Abort(AbortReason::kOperandIsNotAGeneratorObject);
4689 
4690   bind(&done);
4691 }
4692 
AssertUndefinedOrAllocationSite(Register object,Register scratch)4693 void MacroAssembler::AssertUndefinedOrAllocationSite(Register object,
4694                                                      Register scratch) {
4695   if (FLAG_debug_code) {
4696     ASM_CODE_COMMENT(this);
4697     Label done_checking;
4698     AssertNotSmi(object);
4699     LoadRoot(scratch, RootIndex::kUndefinedValue);
4700     BranchShort(&done_checking, eq, object, Operand(scratch));
4701     GetObjectType(object, scratch, scratch);
4702     Assert(eq, AbortReason::kExpectedUndefinedOrCell, scratch,
4703            Operand(ALLOCATION_SITE_TYPE));
4704     bind(&done_checking);
4705   }
4706 }
4707 
4708 template <typename F_TYPE>
FloatMinMaxHelper(FPURegister dst,FPURegister src1,FPURegister src2,MaxMinKind kind)4709 void TurboAssembler::FloatMinMaxHelper(FPURegister dst, FPURegister src1,
4710                                        FPURegister src2, MaxMinKind kind) {
4711   DCHECK((std::is_same<F_TYPE, float>::value) ||
4712          (std::is_same<F_TYPE, double>::value));
4713 
4714   if (src1 == src2 && dst != src1) {
4715     if (std::is_same<float, F_TYPE>::value) {
4716       fmv_s(dst, src1);
4717     } else {
4718       fmv_d(dst, src1);
4719     }
4720     return;
4721   }
4722 
4723   Label done, nan;
4724 
4725   // For RISCV, fmin_s returns the other non-NaN operand as result if only one
4726   // operand is NaN; but for JS, if any operand is NaN, result is Nan. The
4727   // following handles the discrepency between handling of NaN between ISA and
4728   // JS semantics
4729   UseScratchRegisterScope temps(this);
4730   Register scratch = temps.Acquire();
4731   if (std::is_same<float, F_TYPE>::value) {
4732     CompareIsNotNanF32(scratch, src1, src2);
4733   } else {
4734     CompareIsNotNanF64(scratch, src1, src2);
4735   }
4736   BranchFalseF(scratch, &nan);
4737 
4738   if (kind == MaxMinKind::kMax) {
4739     if (std::is_same<float, F_TYPE>::value) {
4740       fmax_s(dst, src1, src2);
4741     } else {
4742       fmax_d(dst, src1, src2);
4743     }
4744   } else {
4745     if (std::is_same<float, F_TYPE>::value) {
4746       fmin_s(dst, src1, src2);
4747     } else {
4748       fmin_d(dst, src1, src2);
4749     }
4750   }
4751   j(&done);
4752 
4753   bind(&nan);
4754   // if any operand is NaN, return NaN (fadd returns NaN if any operand is NaN)
4755   if (std::is_same<float, F_TYPE>::value) {
4756     fadd_s(dst, src1, src2);
4757   } else {
4758     fadd_d(dst, src1, src2);
4759   }
4760 
4761   bind(&done);
4762 }
4763 
Float32Max(FPURegister dst,FPURegister src1,FPURegister src2)4764 void TurboAssembler::Float32Max(FPURegister dst, FPURegister src1,
4765                                 FPURegister src2) {
4766   ASM_CODE_COMMENT(this);
4767   FloatMinMaxHelper<float>(dst, src1, src2, MaxMinKind::kMax);
4768 }
4769 
Float32Min(FPURegister dst,FPURegister src1,FPURegister src2)4770 void TurboAssembler::Float32Min(FPURegister dst, FPURegister src1,
4771                                 FPURegister src2) {
4772   ASM_CODE_COMMENT(this);
4773   FloatMinMaxHelper<float>(dst, src1, src2, MaxMinKind::kMin);
4774 }
4775 
Float64Max(FPURegister dst,FPURegister src1,FPURegister src2)4776 void TurboAssembler::Float64Max(FPURegister dst, FPURegister src1,
4777                                 FPURegister src2) {
4778   ASM_CODE_COMMENT(this);
4779   FloatMinMaxHelper<double>(dst, src1, src2, MaxMinKind::kMax);
4780 }
4781 
Float64Min(FPURegister dst,FPURegister src1,FPURegister src2)4782 void TurboAssembler::Float64Min(FPURegister dst, FPURegister src1,
4783                                 FPURegister src2) {
4784   ASM_CODE_COMMENT(this);
4785   FloatMinMaxHelper<double>(dst, src1, src2, MaxMinKind::kMin);
4786 }
4787 
4788 static const int kRegisterPassedArguments = 8;
4789 
CalculateStackPassedDWords(int num_gp_arguments,int num_fp_arguments)4790 int TurboAssembler::CalculateStackPassedDWords(int num_gp_arguments,
4791                                                int num_fp_arguments) {
4792   int stack_passed_dwords = 0;
4793 
4794   // Up to eight integer arguments are passed in registers a0..a7 and
4795   // up to eight floating point arguments are passed in registers fa0..fa7
4796   if (num_gp_arguments > kRegisterPassedArguments) {
4797     stack_passed_dwords += num_gp_arguments - kRegisterPassedArguments;
4798   }
4799   if (num_fp_arguments > kRegisterPassedArguments) {
4800     stack_passed_dwords += num_fp_arguments - kRegisterPassedArguments;
4801   }
4802   stack_passed_dwords += kCArgSlotCount;
4803   return stack_passed_dwords;
4804 }
4805 
PrepareCallCFunction(int num_reg_arguments,int num_double_arguments,Register scratch)4806 void TurboAssembler::PrepareCallCFunction(int num_reg_arguments,
4807                                           int num_double_arguments,
4808                                           Register scratch) {
4809   ASM_CODE_COMMENT(this);
4810   int frame_alignment = ActivationFrameAlignment();
4811 
4812   // Up to eight simple arguments in a0..a7, fa0..fa7.
4813   // Remaining arguments are pushed on the stack (arg slot calculation handled
4814   // by CalculateStackPassedDWords()).
4815   int stack_passed_arguments =
4816       CalculateStackPassedDWords(num_reg_arguments, num_double_arguments);
4817   if (frame_alignment > kSystemPointerSize) {
4818     // Make stack end at alignment and make room for stack arguments and the
4819     // original value of sp.
4820     Mv(scratch, sp);
4821     Sub64(sp, sp, Operand((stack_passed_arguments + 1) * kSystemPointerSize));
4822     DCHECK(base::bits::IsPowerOfTwo(frame_alignment));
4823     And(sp, sp, Operand(-frame_alignment));
4824     Sd(scratch, MemOperand(sp, stack_passed_arguments * kSystemPointerSize));
4825   } else {
4826     Sub64(sp, sp, Operand(stack_passed_arguments * kSystemPointerSize));
4827   }
4828 }
4829 
PrepareCallCFunction(int num_reg_arguments,Register scratch)4830 void TurboAssembler::PrepareCallCFunction(int num_reg_arguments,
4831                                           Register scratch) {
4832   PrepareCallCFunction(num_reg_arguments, 0, scratch);
4833 }
4834 
CallCFunction(ExternalReference function,int num_reg_arguments,int num_double_arguments)4835 void TurboAssembler::CallCFunction(ExternalReference function,
4836                                    int num_reg_arguments,
4837                                    int num_double_arguments) {
4838   BlockTrampolinePoolScope block_trampoline_pool(this);
4839   li(t6, function);
4840   CallCFunctionHelper(t6, num_reg_arguments, num_double_arguments);
4841 }
4842 
CallCFunction(Register function,int num_reg_arguments,int num_double_arguments)4843 void TurboAssembler::CallCFunction(Register function, int num_reg_arguments,
4844                                    int num_double_arguments) {
4845   CallCFunctionHelper(function, num_reg_arguments, num_double_arguments);
4846 }
4847 
CallCFunction(ExternalReference function,int num_arguments)4848 void TurboAssembler::CallCFunction(ExternalReference function,
4849                                    int num_arguments) {
4850   CallCFunction(function, num_arguments, 0);
4851 }
4852 
CallCFunction(Register function,int num_arguments)4853 void TurboAssembler::CallCFunction(Register function, int num_arguments) {
4854   CallCFunction(function, num_arguments, 0);
4855 }
4856 
CallCFunctionHelper(Register function,int num_reg_arguments,int num_double_arguments)4857 void TurboAssembler::CallCFunctionHelper(Register function,
4858                                          int num_reg_arguments,
4859                                          int num_double_arguments) {
4860   DCHECK_LE(num_reg_arguments + num_double_arguments, kMaxCParameters);
4861   DCHECK(has_frame());
4862   ASM_CODE_COMMENT(this);
4863   // Make sure that the stack is aligned before calling a C function unless
4864   // running in the simulator. The simulator has its own alignment check which
4865   // provides more information.
4866   // The argument stots are presumed to have been set up by
4867   // PrepareCallCFunction.
4868 
4869 #if V8_HOST_ARCH_RISCV64
4870   if (FLAG_debug_code) {
4871     int frame_alignment = base::OS::ActivationFrameAlignment();
4872     int frame_alignment_mask = frame_alignment - 1;
4873     if (frame_alignment > kSystemPointerSize) {
4874       DCHECK(base::bits::IsPowerOfTwo(frame_alignment));
4875       Label alignment_as_expected;
4876       {
4877         UseScratchRegisterScope temps(this);
4878         Register scratch = temps.Acquire();
4879         And(scratch, sp, Operand(frame_alignment_mask));
4880         BranchShort(&alignment_as_expected, eq, scratch, Operand(zero_reg));
4881       }
4882       // Don't use Check here, as it will call Runtime_Abort possibly
4883       // re-entering here.
4884       ebreak();
4885       bind(&alignment_as_expected);
4886     }
4887   }
4888 #endif  // V8_HOST_ARCH_RISCV64
4889 
4890   // Just call directly. The function called cannot cause a GC, or
4891   // allow preemption, so the return address in the link register
4892   // stays correct.
4893   {
4894     if (function != t6) {
4895       Mv(t6, function);
4896       function = t6;
4897     }
4898 
4899     // Save the frame pointer and PC so that the stack layout remains
4900     // iterable, even without an ExitFrame which normally exists between JS
4901     // and C frames.
4902     // 't' registers are caller-saved so this is safe as a scratch register.
4903     Register pc_scratch = t1;
4904     Register scratch = t2;
4905 
4906     auipc(pc_scratch, 0);
4907     // See x64 code for reasoning about how to address the isolate data fields.
4908     if (root_array_available()) {
4909       Sd(pc_scratch, MemOperand(kRootRegister,
4910                                 IsolateData::fast_c_call_caller_pc_offset()));
4911       Sd(fp, MemOperand(kRootRegister,
4912                         IsolateData::fast_c_call_caller_fp_offset()));
4913     } else {
4914       DCHECK_NOT_NULL(isolate());
4915       li(scratch, ExternalReference::fast_c_call_caller_pc_address(isolate()));
4916       Sd(pc_scratch, MemOperand(scratch));
4917       li(scratch, ExternalReference::fast_c_call_caller_fp_address(isolate()));
4918       Sd(fp, MemOperand(scratch));
4919     }
4920 
4921     Call(function);
4922 
4923     if (isolate() != nullptr) {
4924       // We don't unset the PC; the FP is the source of truth.
4925       UseScratchRegisterScope temps(this);
4926       Register scratch = temps.Acquire();
4927       li(scratch, ExternalReference::fast_c_call_caller_fp_address(isolate()));
4928       Sd(zero_reg, MemOperand(scratch));
4929     }
4930   }
4931 
4932   int stack_passed_arguments =
4933       CalculateStackPassedDWords(num_reg_arguments, num_double_arguments);
4934 
4935   if (base::OS::ActivationFrameAlignment() > kSystemPointerSize) {
4936     Ld(sp, MemOperand(sp, stack_passed_arguments * kSystemPointerSize));
4937   } else {
4938     Add64(sp, sp, Operand(stack_passed_arguments * kSystemPointerSize));
4939   }
4940 }
4941 
4942 #undef BRANCH_ARGS_CHECK
4943 
CheckPageFlag(Register object,Register scratch,int mask,Condition cc,Label * condition_met)4944 void TurboAssembler::CheckPageFlag(Register object, Register scratch, int mask,
4945                                    Condition cc, Label* condition_met) {
4946   And(scratch, object, Operand(~kPageAlignmentMask));
4947   Ld(scratch, MemOperand(scratch, BasicMemoryChunk::kFlagsOffset));
4948   And(scratch, scratch, Operand(mask));
4949   Branch(condition_met, cc, scratch, Operand(zero_reg));
4950 }
4951 
GetRegisterThatIsNotOneOf(Register reg1,Register reg2,Register reg3,Register reg4,Register reg5,Register reg6)4952 Register GetRegisterThatIsNotOneOf(Register reg1, Register reg2, Register reg3,
4953                                    Register reg4, Register reg5,
4954                                    Register reg6) {
4955   RegList regs = {reg1, reg2, reg3, reg4, reg5, reg6};
4956 
4957   const RegisterConfiguration* config = RegisterConfiguration::Default();
4958   for (int i = 0; i < config->num_allocatable_general_registers(); ++i) {
4959     int code = config->GetAllocatableGeneralCode(i);
4960     Register candidate = Register::from_code(code);
4961     if (regs.has(candidate)) continue;
4962     return candidate;
4963   }
4964   UNREACHABLE();
4965 }
4966 
ComputeCodeStartAddress(Register dst)4967 void TurboAssembler::ComputeCodeStartAddress(Register dst) {
4968   // This push on ra and the pop below together ensure that we restore the
4969   // register ra, which is needed while computing the code start address.
4970   push(ra);
4971 
4972   auipc(ra, 0);
4973   addi(ra, ra, kInstrSize * 2);  // ra = address of li
4974   int pc = pc_offset();
4975   li(dst, Operand(pc));
4976   Sub64(dst, ra, dst);
4977 
4978   pop(ra);  // Restore ra
4979 }
4980 
CallForDeoptimization(Builtin target,int,Label * exit,DeoptimizeKind kind,Label * ret,Label *)4981 void TurboAssembler::CallForDeoptimization(Builtin target, int, Label* exit,
4982                                            DeoptimizeKind kind, Label* ret,
4983                                            Label*) {
4984   ASM_CODE_COMMENT(this);
4985   BlockTrampolinePoolScope block_trampoline_pool(this);
4986   Ld(t6,
4987      MemOperand(kRootRegister, IsolateData::BuiltinEntrySlotOffset(target)));
4988   Call(t6);
4989   DCHECK_EQ(SizeOfCodeGeneratedSince(exit),
4990             (kind == DeoptimizeKind::kLazy) ? Deoptimizer::kLazyDeoptExitSize
4991                                             : Deoptimizer::kEagerDeoptExitSize);
4992 }
4993 
LoadCodeObjectEntry(Register destination,Register code_object)4994 void TurboAssembler::LoadCodeObjectEntry(Register destination,
4995                                          Register code_object) {
4996   // Code objects are called differently depending on whether we are generating
4997   // builtin code (which will later be embedded into the binary) or compiling
4998   // user JS code at runtime.
4999   // * Builtin code runs in --jitless mode and thus must not call into on-heap
5000   //   Code targets. Instead, we dispatch through the builtins entry table.
5001   // * Codegen at runtime does not have this restriction and we can use the
5002   //   shorter, branchless instruction sequence. The assumption here is that
5003   //   targets are usually generated code and not builtin Code objects.
5004   ASM_CODE_COMMENT(this);
5005   if (options().isolate_independent_code) {
5006     DCHECK(root_array_available());
5007     Label if_code_is_off_heap, out;
5008 
5009     UseScratchRegisterScope temps(this);
5010     Register scratch = temps.Acquire();
5011 
5012     DCHECK(!AreAliased(destination, scratch));
5013     DCHECK(!AreAliased(code_object, scratch));
5014 
5015     // Check whether the Code object is an off-heap trampoline. If so, call its
5016     // (off-heap) entry point directly without going through the (on-heap)
5017     // trampoline.  Otherwise, just call the Code object as always.
5018 
5019     Lw(scratch, FieldMemOperand(code_object, Code::kFlagsOffset));
5020     And(scratch, scratch, Operand(Code::IsOffHeapTrampoline::kMask));
5021     Branch(&if_code_is_off_heap, ne, scratch, Operand(zero_reg));
5022     // Not an off-heap trampoline object, the entry point is at
5023     // Code::raw_instruction_start().
5024     Add64(destination, code_object, Code::kHeaderSize - kHeapObjectTag);
5025     Branch(&out);
5026 
5027     // An off-heap trampoline, the entry point is loaded from the builtin entry
5028     // table.
5029     bind(&if_code_is_off_heap);
5030     Lw(scratch, FieldMemOperand(code_object, Code::kBuiltinIndexOffset));
5031     slli(destination, scratch, kSystemPointerSizeLog2);
5032     Add64(destination, destination, kRootRegister);
5033     Ld(destination,
5034        MemOperand(destination, IsolateData::builtin_entry_table_offset()));
5035 
5036     bind(&out);
5037   } else {
5038     Add64(destination, code_object, Code::kHeaderSize - kHeapObjectTag);
5039   }
5040 }
5041 
CallCodeObject(Register code_object)5042 void TurboAssembler::CallCodeObject(Register code_object) {
5043   ASM_CODE_COMMENT(this);
5044   LoadCodeObjectEntry(code_object, code_object);
5045   Call(code_object);
5046 }
5047 
JumpCodeObject(Register code_object,JumpMode jump_mode)5048 void TurboAssembler::JumpCodeObject(Register code_object, JumpMode jump_mode) {
5049   ASM_CODE_COMMENT(this);
5050   DCHECK_EQ(JumpMode::kJump, jump_mode);
5051   LoadCodeObjectEntry(code_object, code_object);
5052   Jump(code_object);
5053 }
5054 
LoadTaggedPointerField(const Register & destination,const MemOperand & field_operand)5055 void TurboAssembler::LoadTaggedPointerField(const Register& destination,
5056                                             const MemOperand& field_operand) {
5057   if (COMPRESS_POINTERS_BOOL) {
5058     DecompressTaggedPointer(destination, field_operand);
5059   } else {
5060     Ld(destination, field_operand);
5061   }
5062 }
5063 
LoadAnyTaggedField(const Register & destination,const MemOperand & field_operand)5064 void TurboAssembler::LoadAnyTaggedField(const Register& destination,
5065                                         const MemOperand& field_operand) {
5066   if (COMPRESS_POINTERS_BOOL) {
5067     DecompressAnyTagged(destination, field_operand);
5068   } else {
5069     Ld(destination, field_operand);
5070   }
5071 }
5072 
LoadTaggedSignedField(const Register & destination,const MemOperand & field_operand)5073 void TurboAssembler::LoadTaggedSignedField(const Register& destination,
5074                                            const MemOperand& field_operand) {
5075   if (COMPRESS_POINTERS_BOOL) {
5076     DecompressTaggedSigned(destination, field_operand);
5077   } else {
5078     Ld(destination, field_operand);
5079   }
5080 }
5081 
SmiUntagField(Register dst,const MemOperand & src)5082 void TurboAssembler::SmiUntagField(Register dst, const MemOperand& src) {
5083   SmiUntag(dst, src);
5084 }
5085 
StoreTaggedField(const Register & value,const MemOperand & dst_field_operand)5086 void TurboAssembler::StoreTaggedField(const Register& value,
5087                                       const MemOperand& dst_field_operand) {
5088   if (COMPRESS_POINTERS_BOOL) {
5089     Sw(value, dst_field_operand);
5090   } else {
5091     Sd(value, dst_field_operand);
5092   }
5093 }
5094 
DecompressTaggedSigned(const Register & destination,const MemOperand & field_operand)5095 void TurboAssembler::DecompressTaggedSigned(const Register& destination,
5096                                             const MemOperand& field_operand) {
5097   ASM_CODE_COMMENT(this);
5098   Lwu(destination, field_operand);
5099   if (FLAG_debug_code) {
5100     // Corrupt the top 32 bits. Made up of 16 fixed bits and 16 pc offset bits.
5101     Add64(destination, destination,
5102           Operand(((kDebugZapValue << 16) | (pc_offset() & 0xffff)) << 32));
5103   }
5104 }
5105 
DecompressTaggedPointer(const Register & destination,const MemOperand & field_operand)5106 void TurboAssembler::DecompressTaggedPointer(const Register& destination,
5107                                              const MemOperand& field_operand) {
5108   ASM_CODE_COMMENT(this);
5109   Lwu(destination, field_operand);
5110   Add64(destination, kPtrComprCageBaseRegister, destination);
5111 }
5112 
DecompressTaggedPointer(const Register & destination,const Register & source)5113 void TurboAssembler::DecompressTaggedPointer(const Register& destination,
5114                                              const Register& source) {
5115   ASM_CODE_COMMENT(this);
5116   And(destination, source, Operand(0xFFFFFFFF));
5117   Add64(destination, kPtrComprCageBaseRegister, Operand(destination));
5118 }
5119 
DecompressAnyTagged(const Register & destination,const MemOperand & field_operand)5120 void TurboAssembler::DecompressAnyTagged(const Register& destination,
5121                                          const MemOperand& field_operand) {
5122   ASM_CODE_COMMENT(this);
5123   Lwu(destination, field_operand);
5124   Add64(destination, kPtrComprCageBaseRegister, destination);
5125 }
5126 
DropArguments(Register count,ArgumentsCountType type,ArgumentsCountMode mode,Register scratch)5127 void MacroAssembler::DropArguments(Register count, ArgumentsCountType type,
5128                                    ArgumentsCountMode mode, Register scratch) {
5129   switch (type) {
5130     case kCountIsInteger: {
5131       CalcScaledAddress(sp, sp, count, kPointerSizeLog2);
5132       break;
5133     }
5134     case kCountIsSmi: {
5135       STATIC_ASSERT(kSmiTagSize == 1 && kSmiTag == 0);
5136       DCHECK_NE(scratch, no_reg);
5137       SmiScale(scratch, count, kPointerSizeLog2);
5138       Add64(sp, sp, scratch);
5139       break;
5140     }
5141     case kCountIsBytes: {
5142       Add64(sp, sp, count);
5143       break;
5144     }
5145   }
5146   if (mode == kCountExcludesReceiver) {
5147     Add64(sp, sp, kSystemPointerSize);
5148   }
5149 }
5150 
DropArgumentsAndPushNewReceiver(Register argc,Register receiver,ArgumentsCountType type,ArgumentsCountMode mode,Register scratch)5151 void MacroAssembler::DropArgumentsAndPushNewReceiver(Register argc,
5152                                                      Register receiver,
5153                                                      ArgumentsCountType type,
5154                                                      ArgumentsCountMode mode,
5155                                                      Register scratch) {
5156   DCHECK(!AreAliased(argc, receiver));
5157   if (mode == kCountExcludesReceiver) {
5158     // Drop arguments without receiver and override old receiver.
5159     DropArguments(argc, type, kCountIncludesReceiver, scratch);
5160     Sd(receiver, MemOperand(sp));
5161   } else {
5162     DropArguments(argc, type, mode, scratch);
5163     push(receiver);
5164   }
5165 }
5166 
5167 }  // namespace internal
5168 }  // namespace v8
5169 
5170 #endif  // V8_TARGET_ARCH_RISCV64
5171