• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2021-2024 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16 Encoder (implementation of math and mem Low-level emitters)
17 */
18 
19 #include <aarch64/macro-assembler-aarch64.h>
20 #include <cstddef>
21 #include "compiler/optimizer/code_generator/target/aarch64/target.h"
22 #include "compiler/optimizer/code_generator/encode.h"
23 #include "compiler/optimizer/code_generator/fast_divisor.h"
24 #include "scoped_tmp_reg.h"
25 #include "compiler/optimizer/code_generator/relocations.h"
26 
27 #if defined(USE_VIXL_ARM64) && !defined(PANDA_MINIMAL_VIXL)
28 #include "aarch64/disasm-aarch64.h"
29 #endif
30 
31 #include <iomanip>
32 
33 #include "lib_helpers.inl"
34 
35 #ifndef PANDA_TARGET_MACOS
36 #include "elf.h"
37 #endif  // PANDA_TARGET_MACOS
38 
39 namespace ark::compiler::aarch64 {
40 using vixl::aarch64::CPURegister;
41 using vixl::aarch64::MemOperand;
42 
43 /// Converters
Convert(const Condition cc)44 static vixl::aarch64::Condition Convert(const Condition cc)
45 {
46     switch (cc) {
47         case Condition::EQ:
48             return vixl::aarch64::Condition::eq;
49         case Condition::NE:
50             return vixl::aarch64::Condition::ne;
51         case Condition::LT:
52             return vixl::aarch64::Condition::lt;
53         case Condition::GT:
54             return vixl::aarch64::Condition::gt;
55         case Condition::LE:
56             return vixl::aarch64::Condition::le;
57         case Condition::GE:
58             return vixl::aarch64::Condition::ge;
59         case Condition::LO:
60             return vixl::aarch64::Condition::lo;
61         case Condition::LS:
62             return vixl::aarch64::Condition::ls;
63         case Condition::HI:
64             return vixl::aarch64::Condition::hi;
65         case Condition::HS:
66             return vixl::aarch64::Condition::hs;
67         // NOTE(igorban) : Remove them
68         case Condition::MI:
69             return vixl::aarch64::Condition::mi;
70         case Condition::PL:
71             return vixl::aarch64::Condition::pl;
72         case Condition::VS:
73             return vixl::aarch64::Condition::vs;
74         case Condition::VC:
75             return vixl::aarch64::Condition::vc;
76         case Condition::AL:
77             return vixl::aarch64::Condition::al;
78         case Condition::NV:
79             return vixl::aarch64::Condition::nv;
80         default:
81             UNREACHABLE();
82             return vixl::aarch64::Condition::eq;
83     }
84 }
85 
ConvertTest(const Condition cc)86 static vixl::aarch64::Condition ConvertTest(const Condition cc)
87 {
88     ASSERT(cc == Condition::TST_EQ || cc == Condition::TST_NE);
89     return cc == Condition::TST_EQ ? vixl::aarch64::Condition::eq : vixl::aarch64::Condition::ne;
90 }
91 
Convert(const ShiftType type)92 static vixl::aarch64::Shift Convert(const ShiftType type)
93 {
94     switch (type) {
95         case ShiftType::LSL:
96             return vixl::aarch64::Shift::LSL;
97         case ShiftType::LSR:
98             return vixl::aarch64::Shift::LSR;
99         case ShiftType::ASR:
100             return vixl::aarch64::Shift::ASR;
101         case ShiftType::ROR:
102             return vixl::aarch64::Shift::ROR;
103         default:
104             UNREACHABLE();
105     }
106 }
107 
VixlVReg(Reg reg)108 static vixl::aarch64::VRegister VixlVReg(Reg reg)
109 {
110     ASSERT(reg.IsValid());
111     auto vixlVreg = vixl::aarch64::VRegister(reg.GetId(), reg.GetSize());
112     ASSERT(vixlVreg.IsValid());
113     return vixlVreg;
114 }
115 
VixlShift(Shift shift)116 static vixl::aarch64::Operand VixlShift(Shift shift)
117 {
118     Reg reg = shift.GetBase();
119     ASSERT(reg.IsValid());
120     if (reg.IsScalar()) {
121         ASSERT(reg.IsScalar());
122         size_t regSize = reg.GetSize();
123         if (regSize < WORD_SIZE) {
124             regSize = WORD_SIZE;
125         }
126         auto vixlReg = vixl::aarch64::Register(reg.GetId(), regSize);
127         ASSERT(vixlReg.IsValid());
128 
129         return vixl::aarch64::Operand(vixlReg, Convert(shift.GetType()), shift.GetScale());
130     }
131 
132     // Invalid register type
133     UNREACHABLE();
134 }
135 
ConvertMem(MemRef mem)136 static vixl::aarch64::MemOperand ConvertMem(MemRef mem)
137 {
138     bool base = mem.HasBase() && (mem.GetBase().GetId() != vixl::aarch64::xzr.GetCode());
139     bool hasIndex = mem.HasIndex();
140     bool shift = mem.HasScale();
141     bool offset = mem.HasDisp();
142     auto baseReg = Reg(mem.GetBase().GetId(), INT64_TYPE);
143     if (base && !hasIndex && !shift) {
144         // Memory address = x_reg(base) + imm(offset)
145         if (mem.GetDisp() != 0) {
146             auto disp = mem.GetDisp();
147             return vixl::aarch64::MemOperand(VixlReg(baseReg), VixlImm(disp));
148         }
149         // Memory address = x_reg(base)
150         return vixl::aarch64::MemOperand(VixlReg(mem.GetBase(), DOUBLE_WORD_SIZE));
151     }
152     if (base && hasIndex && !offset) {
153         auto scale = mem.GetScale();
154         auto indexReg = mem.GetIndex();
155         // Memory address = x_reg(base) + (SXTW(w_reg(index)) << scale)
156         if (indexReg.GetSize() == WORD_SIZE) {
157             // Sign-extend and shift w-register in offset-position (signed because index always has signed type)
158             return vixl::aarch64::MemOperand(VixlReg(baseReg), VixlReg(indexReg), vixl::aarch64::Extend::SXTW, scale);
159         }
160         // Memory address = x_reg(base) + (x_reg(index) << scale)
161         if (scale != 0) {
162             ASSERT(indexReg.GetSize() == DOUBLE_WORD_SIZE);
163             return vixl::aarch64::MemOperand(VixlReg(baseReg), VixlReg(indexReg), vixl::aarch64::LSL, scale);
164         }
165         // Memory address = x_reg(base) + x_reg(index)
166         return vixl::aarch64::MemOperand(VixlReg(baseReg), VixlReg(indexReg));
167     }
168     // Wrong memRef
169     // Return invalid memory operand
170     auto tmp = vixl::aarch64::MemOperand();
171     ASSERT(!tmp.IsValid());
172     return tmp;
173 }
174 
Promote(Reg reg)175 static Reg Promote(Reg reg)
176 {
177     if (reg.GetType() == INT8_TYPE) {
178         return Reg(reg.GetId(), INT16_TYPE);
179     }
180     return reg;
181 }
182 
CreateLabel()183 Aarch64LabelHolder::LabelId Aarch64LabelHolder::CreateLabel()
184 {
185     ++id_;
186     auto allocator = GetEncoder()->GetAllocator();
187     auto *label = allocator->New<LabelType>(allocator);
188     labels_.push_back(label);
189     ASSERT(labels_.size() == id_);
190     return id_ - 1;
191 }
192 
CreateLabels(LabelId size)193 void Aarch64LabelHolder::CreateLabels(LabelId size)
194 {
195     for (LabelId i = 0; i <= size; ++i) {
196         CreateLabel();
197     }
198 }
199 
BindLabel(LabelId id)200 void Aarch64LabelHolder::BindLabel(LabelId id)
201 {
202     static_cast<Aarch64Encoder *>(GetEncoder())->GetMasm()->Bind(labels_[id]);
203 }
204 
GetLabel(LabelId id) const205 Aarch64LabelHolder::LabelType *Aarch64LabelHolder::GetLabel(LabelId id) const
206 {
207     ASSERT(labels_.size() > id);
208     return labels_[id];
209 }
210 
Size()211 Aarch64LabelHolder::LabelId Aarch64LabelHolder::Size()
212 {
213     return labels_.size();
214 }
215 
Aarch64Encoder(ArenaAllocator * allocator)216 Aarch64Encoder::Aarch64Encoder(ArenaAllocator *allocator) : Encoder(allocator, Arch::AARCH64)
217 {
218     labels_ = allocator->New<Aarch64LabelHolder>(this);
219     if (labels_ == nullptr) {
220         SetFalseResult();
221     }
222     // We enable LR tmp reg by default in Aarch64
223     EnableLrAsTempReg(true);
224 }
225 
~Aarch64Encoder()226 Aarch64Encoder::~Aarch64Encoder()
227 {
228     auto labels = static_cast<Aarch64LabelHolder *>(GetLabels())->labels_;
229     for (auto label : labels) {
230         label->~Label();
231     }
232     if (masm_ != nullptr) {
233         masm_->~MacroAssembler();
234         masm_ = nullptr;
235     }
236 }
237 
GetLabels() const238 LabelHolder *Aarch64Encoder::GetLabels() const
239 {
240     ASSERT(labels_ != nullptr);
241     return labels_;
242 }
243 
IsValid() const244 bool Aarch64Encoder::IsValid() const
245 {
246     return true;
247 }
248 
GetTarget()249 constexpr auto Aarch64Encoder::GetTarget()
250 {
251     return ark::compiler::Target(Arch::AARCH64);
252 }
253 
SetMaxAllocatedBytes(size_t size)254 void Aarch64Encoder::SetMaxAllocatedBytes(size_t size)
255 {
256     GetMasm()->GetBuffer()->SetMmapMaxBytes(size);
257 }
258 
InitMasm()259 bool Aarch64Encoder::InitMasm()
260 {
261     if (masm_ == nullptr) {
262         // Initialize Masm
263         masm_ = GetAllocator()->New<vixl::aarch64::MacroAssembler>(GetAllocator());
264         if (masm_ == nullptr || !masm_->IsValid()) {
265             SetFalseResult();
266             return false;
267         }
268         ASSERT(GetMasm());
269 
270         // Make sure that the compiler uses the same scratch registers as the assembler
271         CHECK_EQ(RegMask(GetMasm()->GetScratchRegisterList()->GetList()), GetTarget().GetTempRegsMask());
272         CHECK_EQ(RegMask(GetMasm()->GetScratchVRegisterList()->GetList()), GetTarget().GetTempVRegsMask());
273     }
274     return true;
275 }
276 
Finalize()277 void Aarch64Encoder::Finalize()
278 {
279     GetMasm()->FinalizeCode();
280 }
281 
EncodeJump(LabelHolder::LabelId id)282 void Aarch64Encoder::EncodeJump(LabelHolder::LabelId id)
283 {
284     auto label = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(id);
285     GetMasm()->B(label);
286 }
287 
EncodeJump(LabelHolder::LabelId id,Reg src0,Reg src1,Condition cc)288 void Aarch64Encoder::EncodeJump(LabelHolder::LabelId id, Reg src0, Reg src1, Condition cc)
289 {
290     if (src1.GetId() == GetRegfile()->GetZeroReg().GetId()) {
291         EncodeJump(id, src0, cc);
292         return;
293     }
294 
295     if (src0.IsScalar()) {
296         GetMasm()->Cmp(VixlReg(src0), VixlReg(src1));
297     } else {
298         GetMasm()->Fcmp(VixlVReg(src0), VixlVReg(src1));
299     }
300 
301     auto label = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(id);
302     GetMasm()->B(label, Convert(cc));
303 }
304 
EncodeJump(LabelHolder::LabelId id,Reg src,Imm imm,Condition cc)305 void Aarch64Encoder::EncodeJump(LabelHolder::LabelId id, Reg src, Imm imm, Condition cc)
306 {
307     auto value = imm.GetAsInt();
308     if (value == 0) {
309         EncodeJump(id, src, cc);
310         return;
311     }
312 
313     if (value < 0) {
314         GetMasm()->Cmn(VixlReg(src), VixlImm(-value));
315     } else {  // if (value > 0)
316         GetMasm()->Cmp(VixlReg(src), VixlImm(value));
317     }
318 
319     auto label = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(id);
320     GetMasm()->B(label, Convert(cc));
321 }
322 
EncodeJumpTest(LabelHolder::LabelId id,Reg src0,Reg src1,Condition cc)323 void Aarch64Encoder::EncodeJumpTest(LabelHolder::LabelId id, Reg src0, Reg src1, Condition cc)
324 {
325     ASSERT(src0.IsScalar() && src1.IsScalar());
326 
327     GetMasm()->Tst(VixlReg(src0), VixlReg(src1));
328     auto label = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(id);
329     GetMasm()->B(label, ConvertTest(cc));
330 }
331 
EncodeJumpTest(LabelHolder::LabelId id,Reg src,Imm imm,Condition cc)332 void Aarch64Encoder::EncodeJumpTest(LabelHolder::LabelId id, Reg src, Imm imm, Condition cc)
333 {
334     ASSERT(src.IsScalar());
335 
336     auto value = imm.GetAsInt();
337     if (CanEncodeImmLogical(value, src.GetSize() > WORD_SIZE ? DOUBLE_WORD_SIZE : WORD_SIZE)) {
338         GetMasm()->Tst(VixlReg(src), VixlImm(value));
339         auto label = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(id);
340         GetMasm()->B(label, ConvertTest(cc));
341     } else {
342         ScopedTmpReg tmpReg(this, src.GetType());
343         EncodeMov(tmpReg, imm);
344         EncodeJumpTest(id, src, tmpReg, cc);
345     }
346 }
347 
EncodeJump(LabelHolder::LabelId id,Reg src,Condition cc)348 void Aarch64Encoder::EncodeJump(LabelHolder::LabelId id, Reg src, Condition cc)
349 {
350     auto label = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(id);
351     ASSERT(src.IsScalar());
352     auto rzero = Reg(GetRegfile()->GetZeroReg().GetId(), src.GetType());
353 
354     switch (cc) {
355         case Condition::LO:
356             // Always false
357             return;
358         case Condition::HS:
359             // Always true
360             GetMasm()->B(label);
361             return;
362         case Condition::EQ:
363         case Condition::LS:
364             if (src.GetId() == rzero.GetId()) {
365                 GetMasm()->B(label);
366                 return;
367             }
368             // True only when zero
369             GetMasm()->Cbz(VixlReg(src), label);
370             return;
371         case Condition::NE:
372         case Condition::HI:
373             if (src.GetId() == rzero.GetId()) {
374                 // Do nothing
375                 return;
376             }
377             // True only when non-zero
378             GetMasm()->Cbnz(VixlReg(src), label);
379             return;
380         default:
381             break;
382     }
383 
384     ASSERT(rzero.IsValid());
385     GetMasm()->Cmp(VixlReg(src), VixlReg(rzero));
386     GetMasm()->B(label, Convert(cc));
387 }
388 
EncodeJump(Reg dst)389 void Aarch64Encoder::EncodeJump(Reg dst)
390 {
391     GetMasm()->Br(VixlReg(dst));
392 }
393 
EncodeJump(RelocationInfo * relocation)394 void Aarch64Encoder::EncodeJump([[maybe_unused]] RelocationInfo *relocation)
395 {
396 #ifdef PANDA_TARGET_MACOS
397     LOG(FATAL, COMPILER) << "Not supported in Macos build";
398 #else
399     auto buffer = GetMasm()->GetBuffer();
400     relocation->offset = GetCursorOffset();
401     relocation->addend = 0;
402     relocation->type = R_AARCH64_CALL26;
403     static constexpr uint32_t CALL_WITH_ZERO_OFFSET = 0x14000000;
404     buffer->Emit32(CALL_WITH_ZERO_OFFSET);
405 #endif
406 }
407 
EncodeBitTestAndBranch(LabelHolder::LabelId id,compiler::Reg reg,uint32_t bitPos,bool bitValue)408 void Aarch64Encoder::EncodeBitTestAndBranch(LabelHolder::LabelId id, compiler::Reg reg, uint32_t bitPos, bool bitValue)
409 {
410     ASSERT(reg.IsScalar() && reg.GetSize() > bitPos);
411     auto label = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(id);
412     if (bitValue) {
413         GetMasm()->Tbnz(VixlReg(reg), bitPos, label);
414     } else {
415         GetMasm()->Tbz(VixlReg(reg), bitPos, label);
416     }
417 }
418 
EncodeNop()419 void Aarch64Encoder::EncodeNop()
420 {
421     GetMasm()->Nop();
422 }
423 
MakeCall(compiler::RelocationInfo * relocation)424 void Aarch64Encoder::MakeCall([[maybe_unused]] compiler::RelocationInfo *relocation)
425 {
426 #ifdef PANDA_TARGET_MACOS
427     LOG(FATAL, COMPILER) << "Not supported in Macos build";
428 #else
429     auto buffer = GetMasm()->GetBuffer();
430     relocation->offset = GetCursorOffset();
431     relocation->addend = 0;
432     relocation->type = R_AARCH64_CALL26;
433     static constexpr uint32_t CALL_WITH_ZERO_OFFSET = 0x94000000;
434     buffer->Emit32(CALL_WITH_ZERO_OFFSET);
435 #endif
436 }
437 
MakeCall(const void * entryPoint)438 void Aarch64Encoder::MakeCall(const void *entryPoint)
439 {
440     ScopedTmpReg tmp(this, true);
441     EncodeMov(tmp, Imm(reinterpret_cast<uintptr_t>(entryPoint)));
442     GetMasm()->Blr(VixlReg(tmp));
443 }
444 
MakeCall(MemRef entryPoint)445 void Aarch64Encoder::MakeCall(MemRef entryPoint)
446 {
447     ScopedTmpReg tmp(this, true);
448     EncodeLdr(tmp, false, entryPoint);
449     GetMasm()->Blr(VixlReg(tmp));
450 }
451 
MakeCall(Reg reg)452 void Aarch64Encoder::MakeCall(Reg reg)
453 {
454     GetMasm()->Blr(VixlReg(reg));
455 }
456 
MakeCall(LabelHolder::LabelId id)457 void Aarch64Encoder::MakeCall(LabelHolder::LabelId id)
458 {
459     auto label = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(id);
460     GetMasm()->Bl(label);
461 }
462 
LoadPcRelative(Reg reg,intptr_t offset,Reg regAddr)463 void Aarch64Encoder::LoadPcRelative(Reg reg, intptr_t offset, Reg regAddr)
464 {
465     ASSERT(GetCodeOffset() != Encoder::INVALID_OFFSET);
466     ASSERT(reg.IsValid() || regAddr.IsValid());
467 
468     if (!regAddr.IsValid()) {
469         regAddr = reg.As(INT64_TYPE);
470     }
471 
472     if (vixl::IsInt21(offset)) {
473         GetMasm()->adr(VixlReg(regAddr), offset);
474         if (reg != INVALID_REGISTER) {
475             EncodeLdr(reg, false, MemRef(regAddr));
476         }
477     } else {
478         size_t pc = GetCodeOffset() + GetCursorOffset();
479         size_t addr;
480         if (intptr_t res = helpers::ToSigned(pc) + offset; res < 0) {
481             // Make both, pc and addr, positive
482             ssize_t extend = RoundUp(std::abs(res), vixl::aarch64::kPageSize);
483             addr = res + extend;
484             pc += extend;
485         } else {
486             addr = res;
487         }
488 
489         ssize_t adrpImm = (addr >> vixl::aarch64::kPageSizeLog2) - (pc >> vixl::aarch64::kPageSizeLog2);
490 
491         GetMasm()->adrp(VixlReg(regAddr), adrpImm);
492 
493         offset = ark::helpers::ToUnsigned(addr) & (vixl::aarch64::kPageSize - 1);
494         if (reg.GetId() != regAddr.GetId()) {
495             EncodeAdd(regAddr, regAddr, Imm(offset));
496             if (reg != INVALID_REGISTER) {
497                 EncodeLdr(reg, true, MemRef(regAddr));
498             }
499         } else {
500             EncodeLdr(reg, true, MemRef(regAddr, offset));
501         }
502     }
503 }
504 
MakeCallAot(intptr_t offset)505 void Aarch64Encoder::MakeCallAot(intptr_t offset)
506 {
507     ScopedTmpReg tmp(this, true);
508     LoadPcRelative(tmp, offset);
509     GetMasm()->Blr(VixlReg(tmp));
510 }
511 
CanMakeCallByOffset(intptr_t offset)512 bool Aarch64Encoder::CanMakeCallByOffset(intptr_t offset)
513 {
514     // NOLINTNEXTLINE(hicpp-signed-bitwise)
515     auto off = (offset >> vixl::aarch64::kInstructionSizeLog2);
516     return vixl::aarch64::Instruction::IsValidImmPCOffset(vixl::aarch64::ImmBranchType::UncondBranchType, off);
517 }
518 
MakeCallByOffset(intptr_t offset)519 void Aarch64Encoder::MakeCallByOffset(intptr_t offset)
520 {
521     GetMasm()->Bl(offset);
522 }
523 
MakeLoadAotTable(intptr_t offset,Reg reg)524 void Aarch64Encoder::MakeLoadAotTable(intptr_t offset, Reg reg)
525 {
526     LoadPcRelative(reg, offset);
527 }
528 
MakeLoadAotTableAddr(intptr_t offset,Reg addr,Reg val)529 void Aarch64Encoder::MakeLoadAotTableAddr(intptr_t offset, Reg addr, Reg val)
530 {
531     LoadPcRelative(val, offset, addr);
532 }
533 
EncodeAbort()534 void Aarch64Encoder::EncodeAbort()
535 {
536     GetMasm()->Brk();
537 }
538 
EncodeReturn()539 void Aarch64Encoder::EncodeReturn()
540 {
541     GetMasm()->Ret();
542 }
543 
EncodeMul(Reg unused1,Reg unused2,Imm unused3)544 void Aarch64Encoder::EncodeMul([[maybe_unused]] Reg unused1, [[maybe_unused]] Reg unused2, [[maybe_unused]] Imm unused3)
545 {
546     SetFalseResult();
547 }
548 
EncodeMov(Reg dst,Reg src)549 void Aarch64Encoder::EncodeMov(Reg dst, Reg src)
550 {
551     if (dst == src) {
552         return;
553     }
554     if (src.IsFloat() && dst.IsFloat()) {
555         if (src.GetSize() != dst.GetSize()) {
556             GetMasm()->Fcvt(VixlVReg(dst), VixlVReg(src));
557             return;
558         }
559         GetMasm()->Fmov(VixlVReg(dst), VixlVReg(src));
560         return;
561     }
562     if (src.IsFloat() && !dst.IsFloat()) {
563         GetMasm()->Fmov(VixlReg(dst, src.GetSize()), VixlVReg(src));
564         return;
565     }
566     if (dst.IsFloat()) {
567         ASSERT(src.IsScalar());
568         GetMasm()->Fmov(VixlVReg(dst), VixlReg(src));
569         return;
570     }
571     // DiscardForSameWReg below means we would drop "mov w0, w0", but it is guarded by "dst == src" above anyway.
572     // NOTE: "mov w0, w0" is not equal "nop", as it clears upper bits of x0.
573     // Keeping the option here helps to generate nothing when e.g. src is x0 and dst is w0.
574     // Probably, a better solution here is to system-wide checking register size on Encoder level.
575     if (src.GetSize() != dst.GetSize()) {
576         auto srcReg = Reg(src.GetId(), dst.GetType());
577         GetMasm()->Mov(VixlReg(dst), VixlReg(srcReg), vixl::aarch64::DiscardMoveMode::kDiscardForSameWReg);
578         return;
579     }
580     GetMasm()->Mov(VixlReg(dst), VixlReg(src), vixl::aarch64::DiscardMoveMode::kDiscardForSameWReg);
581 }
582 
EncodeNeg(Reg dst,Reg src)583 void Aarch64Encoder::EncodeNeg(Reg dst, Reg src)
584 {
585     if (dst.IsFloat()) {
586         GetMasm()->Fneg(VixlVReg(dst), VixlVReg(src));
587         return;
588     }
589     GetMasm()->Neg(VixlReg(dst), VixlReg(src));
590 }
591 
EncodeAbs(Reg dst,Reg src)592 void Aarch64Encoder::EncodeAbs(Reg dst, Reg src)
593 {
594     if (dst.IsFloat()) {
595         GetMasm()->Fabs(VixlVReg(dst), VixlVReg(src));
596         return;
597     }
598 
599     ASSERT(!GetRegfile()->IsZeroReg(dst));
600     if (GetRegfile()->IsZeroReg(src)) {
601         EncodeMov(dst, src);
602         return;
603     }
604 
605     if (src.GetSize() == DOUBLE_WORD_SIZE) {
606         GetMasm()->Cmp(VixlReg(src), vixl::aarch64::xzr);
607     } else {
608         GetMasm()->Cmp(VixlReg(src), vixl::aarch64::wzr);
609     }
610     GetMasm()->Cneg(VixlReg(Promote(dst)), VixlReg(Promote(src)), vixl::aarch64::Condition::lt);
611 }
612 
EncodeSqrt(Reg dst,Reg src)613 void Aarch64Encoder::EncodeSqrt(Reg dst, Reg src)
614 {
615     ASSERT(dst.IsFloat());
616     GetMasm()->Fsqrt(VixlVReg(dst), VixlVReg(src));
617 }
618 
EncodeIsInf(Reg dst,Reg src)619 void Aarch64Encoder::EncodeIsInf(Reg dst, Reg src)
620 {
621     ASSERT(dst.IsScalar() && src.IsFloat());
622 
623     if (src.GetSize() == WORD_SIZE) {
624         constexpr uint32_t INF_MASK = 0xff000000;
625 
626         ScopedTmpRegU32 tmpReg(this);
627         auto tmp = VixlReg(tmpReg);
628         GetMasm()->Fmov(tmp, VixlVReg(src));
629         GetMasm()->Mov(VixlReg(dst).W(), INF_MASK);
630         GetMasm()->Lsl(tmp, tmp, 1);
631         GetMasm()->Cmp(tmp, VixlReg(dst, WORD_SIZE));
632     } else {
633         constexpr uint64_t INF_MASK = 0xffe0000000000000;
634 
635         ScopedTmpRegU64 tmpReg(this);
636         auto tmp = VixlReg(tmpReg);
637         GetMasm()->Fmov(tmp, VixlVReg(src));
638         GetMasm()->Mov(VixlReg(dst).X(), INF_MASK);
639         GetMasm()->Lsl(tmp, tmp, 1);
640         GetMasm()->Cmp(tmp, VixlReg(dst, DOUBLE_WORD_SIZE));
641     }
642 
643     GetMasm()->Cset(VixlReg(dst), vixl::aarch64::Condition::eq);
644 }
645 
EncodeCmpFracWithDelta(Reg src)646 void Aarch64Encoder::EncodeCmpFracWithDelta(Reg src)
647 {
648     ASSERT(src.IsFloat());
649     ASSERT(src.GetSize() == WORD_SIZE || src.GetSize() == DOUBLE_WORD_SIZE);
650 
651     // Encode (fabs(src - trunc(src)) <= DELTA)
652     if (src.GetSize() == WORD_SIZE) {
653         ScopedTmpRegF32 tmp(this);
654         GetMasm()->Frintz(VixlVReg(tmp), VixlVReg(src));
655         EncodeSub(tmp, src, tmp);
656         EncodeAbs(tmp, tmp);
657         GetMasm()->Fcmp(VixlVReg(tmp), std::numeric_limits<float>::epsilon());
658     } else {
659         ScopedTmpRegF64 tmp(this);
660         GetMasm()->Frintz(VixlVReg(tmp), VixlVReg(src));
661         EncodeSub(tmp, src, tmp);
662         EncodeAbs(tmp, tmp);
663         GetMasm()->Fcmp(VixlVReg(tmp), std::numeric_limits<double>::epsilon());
664     }
665 }
666 
EncodeIsInteger(Reg dst,Reg src)667 void Aarch64Encoder::EncodeIsInteger(Reg dst, Reg src)
668 {
669     ASSERT(dst.IsScalar() && src.IsFloat());
670     ASSERT(src.GetSize() == WORD_SIZE || src.GetSize() == DOUBLE_WORD_SIZE);
671 
672     auto labelExit = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
673     auto labelInfOrNan = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
674 
675     EncodeCmpFracWithDelta(src);
676     GetMasm()->B(labelInfOrNan, vixl::aarch64::Condition::vs);  // Inf or NaN
677     GetMasm()->Cset(VixlReg(dst), vixl::aarch64::Condition::le);
678     GetMasm()->B(labelExit);
679 
680     // IsInteger returns false if src is Inf or NaN
681     GetMasm()->Bind(labelInfOrNan);
682     EncodeMov(dst, Imm(false));
683 
684     GetMasm()->Bind(labelExit);
685 }
686 
EncodeIsSafeInteger(Reg dst,Reg src)687 void Aarch64Encoder::EncodeIsSafeInteger(Reg dst, Reg src)
688 {
689     ASSERT(dst.IsScalar() && src.IsFloat());
690     ASSERT(src.GetSize() == WORD_SIZE || src.GetSize() == DOUBLE_WORD_SIZE);
691 
692     auto labelExit = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
693     auto labelFalse = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
694 
695     // Check if IsInteger
696     EncodeCmpFracWithDelta(src);
697     GetMasm()->B(labelFalse, vixl::aarch64::Condition::vs);  // Inf or NaN
698     GetMasm()->B(labelFalse, vixl::aarch64::Condition::gt);
699 
700     // Check if it is safe, i.e. src can be represented in float/double without losing precision
701     if (src.GetSize() == WORD_SIZE) {
702         ScopedTmpRegF32 tmp(this);
703         EncodeAbs(tmp, src);
704         GetMasm()->Fcmp(VixlVReg(tmp), MaxIntAsExactFloat());
705     } else {
706         ScopedTmpRegF64 tmp(this);
707         EncodeAbs(tmp, src);
708         GetMasm()->Fcmp(VixlVReg(tmp), MaxIntAsExactDouble());
709     }
710     GetMasm()->Cset(VixlReg(dst), vixl::aarch64::Condition::le);
711     GetMasm()->B(labelExit);
712 
713     // Return false if src !IsInteger
714     GetMasm()->Bind(labelFalse);
715     EncodeMov(dst, Imm(false));
716 
717     GetMasm()->Bind(labelExit);
718 }
719 
720 /* NaN values are needed to be canonicalized */
EncodeFpToBits(Reg dst,Reg src)721 void Aarch64Encoder::EncodeFpToBits(Reg dst, Reg src)
722 {
723     ASSERT(dst.IsScalar() && src.IsFloat());
724     ASSERT(dst.GetSize() == WORD_SIZE || dst.GetSize() == DOUBLE_WORD_SIZE);
725 
726     if (dst.GetSize() == WORD_SIZE) {
727         ASSERT(src.GetSize() == WORD_SIZE);
728 
729         constexpr auto FNAN = 0x7fc00000;
730 
731         ScopedTmpRegU32 tmp(this);
732 
733         GetMasm()->Fcmp(VixlVReg(src), VixlVReg(src));
734         GetMasm()->Mov(VixlReg(tmp), FNAN);
735         GetMasm()->Umov(VixlReg(dst), VixlVReg(src), 0);
736         GetMasm()->Csel(VixlReg(dst), VixlReg(tmp), VixlReg(dst), vixl::aarch64::Condition::ne);
737     } else {
738         ASSERT(src.GetSize() == DOUBLE_WORD_SIZE);
739 
740         constexpr auto DNAN = 0x7ff8000000000000;
741 
742         ScopedTmpRegU64 tmpReg(this);
743         auto tmp = VixlReg(tmpReg);
744 
745         GetMasm()->Fcmp(VixlVReg(src), VixlVReg(src));
746         GetMasm()->Mov(tmp, DNAN);
747         GetMasm()->Umov(VixlReg(dst), VixlVReg(src), 0);
748         GetMasm()->Csel(VixlReg(dst), tmp, VixlReg(dst), vixl::aarch64::Condition::ne);
749     }
750 }
751 
EncodeMoveBitsRaw(Reg dst,Reg src)752 void Aarch64Encoder::EncodeMoveBitsRaw(Reg dst, Reg src)
753 {
754     ASSERT((dst.IsFloat() && src.IsScalar()) || (src.IsFloat() && dst.IsScalar()));
755     if (dst.IsScalar()) {
756         ASSERT(src.GetSize() == dst.GetSize());
757         if (dst.GetSize() == WORD_SIZE) {
758             GetMasm()->Umov(VixlReg(dst).W(), VixlVReg(src).S(), 0);
759         } else {
760             GetMasm()->Umov(VixlReg(dst), VixlVReg(src), 0);
761         }
762     } else {
763         ASSERT(dst.GetSize() == src.GetSize());
764         ScopedTmpReg tmpReg(this, src.GetType());
765         auto srcReg = src;
766         auto rzero = GetRegfile()->GetZeroReg();
767         if (src.GetId() == rzero.GetId()) {
768             EncodeMov(tmpReg, Imm(0));
769             srcReg = tmpReg;
770         }
771 
772         if (srcReg.GetSize() == WORD_SIZE) {
773             GetMasm()->Fmov(VixlVReg(dst).S(), VixlReg(srcReg).W());
774         } else {
775             GetMasm()->Fmov(VixlVReg(dst), VixlReg(srcReg));
776         }
777     }
778 }
779 
EncodeReverseBytes(Reg dst,Reg src)780 void Aarch64Encoder::EncodeReverseBytes(Reg dst, Reg src)
781 {
782     auto rzero = GetRegfile()->GetZeroReg();
783     if (src.GetId() == rzero.GetId()) {
784         EncodeMov(dst, Imm(0));
785         return;
786     }
787 
788     ASSERT(src.GetSize() > BYTE_SIZE);
789     ASSERT(src.GetSize() == dst.GetSize());
790 
791     if (src.GetSize() == HALF_SIZE) {
792         GetMasm()->Rev16(VixlReg(dst), VixlReg(src));
793         GetMasm()->Sxth(VixlReg(dst), VixlReg(dst));
794     } else {
795         GetMasm()->Rev(VixlReg(dst), VixlReg(src));
796     }
797 }
798 
EncodeBitCount(Reg dst,Reg src)799 void Aarch64Encoder::EncodeBitCount(Reg dst, Reg src)
800 {
801     auto rzero = GetRegfile()->GetZeroReg();
802     if (src.GetId() == rzero.GetId()) {
803         EncodeMov(dst, Imm(0));
804         return;
805     }
806 
807     ASSERT(dst.GetSize() == WORD_SIZE);
808 
809     ScopedTmpRegF64 tmpReg0(this);
810     vixl::aarch64::VRegister tmpReg;
811     if (src.GetSize() == DOUBLE_WORD_SIZE) {
812         tmpReg = VixlVReg(tmpReg0).D();
813     } else {
814         tmpReg = VixlVReg(tmpReg0).S();
815     }
816 
817     if (src.GetSize() < WORD_SIZE) {
818         int64_t cutValue = (1ULL << src.GetSize()) - 1;
819         EncodeAnd(src, src, Imm(cutValue));
820     }
821 
822     GetMasm()->Fmov(tmpReg, VixlReg(src));
823     GetMasm()->Cnt(tmpReg.V8B(), tmpReg.V8B());
824     GetMasm()->Addv(tmpReg.B(), tmpReg.V8B());
825     EncodeMov(dst, tmpReg0);
826 }
827 
828 /* Since only ROR is supported on AArch64 we do
829  * left rotaion as ROR(v, -count) */
EncodeRotate(Reg dst,Reg src1,Reg src2,bool isRor)830 void Aarch64Encoder::EncodeRotate(Reg dst, Reg src1, Reg src2, bool isRor)
831 {
832     ASSERT(src1.GetSize() == WORD_SIZE || src1.GetSize() == DOUBLE_WORD_SIZE);
833     ASSERT(src1.GetSize() == dst.GetSize());
834     auto rzero = GetRegfile()->GetZeroReg();
835     if (rzero.GetId() == src2.GetId() || rzero.GetId() == src1.GetId()) {
836         EncodeMov(dst, src1);
837         return;
838     }
839     /* as the second parameters is always 32-bits long we have to
840      * adjust the counter register for the 64-bits first operand case */
841     if (isRor) {
842         auto count = (dst.GetSize() == WORD_SIZE ? VixlReg(src2) : VixlReg(src2).X());
843         GetMasm()->Ror(VixlReg(dst), VixlReg(src1), count);
844     } else {
845         ScopedTmpReg tmp(this);
846         auto cnt = (dst.GetId() == src1.GetId() ? tmp : dst);
847         auto count = (dst.GetSize() == WORD_SIZE ? VixlReg(cnt).W() : VixlReg(cnt).X());
848         auto source2 = (dst.GetSize() == WORD_SIZE ? VixlReg(src2).W() : VixlReg(src2).X());
849         GetMasm()->Neg(count, source2);
850         GetMasm()->Ror(VixlReg(dst), VixlReg(src1), count);
851     }
852 }
853 
EncodeSignum(Reg dst,Reg src)854 void Aarch64Encoder::EncodeSignum(Reg dst, Reg src)
855 {
856     ASSERT(src.GetSize() == WORD_SIZE || src.GetSize() == DOUBLE_WORD_SIZE);
857 
858     ScopedTmpRegU32 tmp(this);
859     auto sign = (dst.GetId() == src.GetId() ? tmp : dst);
860 
861     GetMasm()->Cmp(VixlReg(src), VixlImm(0));
862     GetMasm()->Cset(VixlReg(sign), vixl::aarch64::Condition::gt);
863 
864     constexpr auto SHIFT_WORD_BITS = 31;
865     constexpr auto SHIFT_DWORD_BITS = 63;
866 
867     /* The operation below is "sub dst, dst, src, lsr #reg_size-1"
868      * however, we can only encode as many as 32 bits in lsr field, so
869      * for 64-bits cases we cannot avoid having a separate lsr instruction */
870     if (src.GetSize() == WORD_SIZE) {
871         auto shift = Shift(src, LSR, SHIFT_WORD_BITS);
872         EncodeSub(dst, sign, shift);
873     } else {
874         ScopedTmpRegU64 shift(this);
875         sign = Reg(sign.GetId(), INT64_TYPE);
876         EncodeShr(shift, src, Imm(SHIFT_DWORD_BITS));
877         EncodeSub(dst, sign, shift);
878     }
879 }
880 
EncodeCountLeadingZeroBits(Reg dst,Reg src)881 void Aarch64Encoder::EncodeCountLeadingZeroBits(Reg dst, Reg src)
882 {
883     auto rzero = GetRegfile()->GetZeroReg();
884     if (rzero.GetId() == src.GetId()) {
885         EncodeMov(dst, Imm(src.GetSize()));
886         return;
887     }
888     GetMasm()->Clz(VixlReg(dst), VixlReg(src));
889 }
890 
EncodeCountTrailingZeroBits(Reg dst,Reg src)891 void Aarch64Encoder::EncodeCountTrailingZeroBits(Reg dst, Reg src)
892 {
893     auto rzero = GetRegfile()->GetZeroReg();
894     if (rzero.GetId() == src.GetId()) {
895         EncodeMov(dst, Imm(src.GetSize()));
896         return;
897     }
898     GetMasm()->Rbit(VixlReg(dst), VixlReg(src));
899     GetMasm()->Clz(VixlReg(dst), VixlReg(dst));
900 }
901 
EncodeCeil(Reg dst,Reg src)902 void Aarch64Encoder::EncodeCeil(Reg dst, Reg src)
903 {
904     GetMasm()->Frintp(VixlVReg(dst), VixlVReg(src));
905 }
906 
EncodeFloor(Reg dst,Reg src)907 void Aarch64Encoder::EncodeFloor(Reg dst, Reg src)
908 {
909     GetMasm()->Frintm(VixlVReg(dst), VixlVReg(src));
910 }
911 
EncodeRint(Reg dst,Reg src)912 void Aarch64Encoder::EncodeRint(Reg dst, Reg src)
913 {
914     GetMasm()->Frintn(VixlVReg(dst), VixlVReg(src));
915 }
916 
EncodeTrunc(Reg dst,Reg src)917 void Aarch64Encoder::EncodeTrunc(Reg dst, Reg src)
918 {
919     GetMasm()->Frintz(VixlVReg(dst), VixlVReg(src));
920 }
921 
EncodeRoundAway(Reg dst,Reg src)922 void Aarch64Encoder::EncodeRoundAway(Reg dst, Reg src)
923 {
924     GetMasm()->Frinta(VixlVReg(dst), VixlVReg(src));
925 }
926 
EncodeRoundToPInf(Reg dst,Reg src)927 void Aarch64Encoder::EncodeRoundToPInf(Reg dst, Reg src)
928 {
929     auto done = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
930     ScopedTmpReg tmp(this, src.GetType());
931     // round to nearest integer, ties away from zero
932     GetMasm()->Fcvtas(VixlReg(dst), VixlVReg(src));
933     // for positive values, zero and NaN inputs rounding is done
934     GetMasm()->Tbz(VixlReg(dst), dst.GetSize() - 1, done);
935     // if input is negative but not a tie, round to nearest is valid
936     // if input is a negative tie, dst += 1
937     GetMasm()->Frinta(VixlVReg(tmp), VixlVReg(src));
938     GetMasm()->Fsub(VixlVReg(tmp), VixlVReg(src), VixlVReg(tmp));
939     // NOLINTNEXTLINE(readability-magic-numbers)
940     GetMasm()->Fcmp(VixlVReg(tmp), 0.5F);
941     GetMasm()->Cinc(VixlReg(dst), VixlReg(dst), vixl::aarch64::Condition::eq);
942     GetMasm()->Bind(done);
943 }
944 
EncodeCrc32Update(Reg dst,Reg crcReg,Reg valReg)945 void Aarch64Encoder::EncodeCrc32Update(Reg dst, Reg crcReg, Reg valReg)
946 {
947     auto tmp = dst.GetId() != crcReg.GetId() && dst.GetId() != valReg.GetId() ? dst : ScopedTmpReg(this, dst.GetType());
948     GetMasm()->Mvn(VixlReg(tmp), VixlReg(crcReg));
949     GetMasm()->Crc32b(VixlReg(tmp), VixlReg(tmp), VixlReg(valReg));
950     GetMasm()->Mvn(VixlReg(dst), VixlReg(tmp));
951 }
952 
EncodeCompressEightUtf16ToUtf8CharsUsingSimd(Reg srcAddr,Reg dstAddr)953 void Aarch64Encoder::EncodeCompressEightUtf16ToUtf8CharsUsingSimd(Reg srcAddr, Reg dstAddr)
954 {
955     ScopedTmpReg tmp1(this, FLOAT64_TYPE);
956     ScopedTmpReg tmp2(this, FLOAT64_TYPE);
957     auto vixlVreg1 = vixl::aarch64::VRegister(tmp1.GetReg().GetId(), vixl::aarch64::VectorFormat::kFormat8B);
958     ASSERT(vixlVreg1.IsValid());
959     auto vixlVreg2 = vixl::aarch64::VRegister(tmp2.GetReg().GetId(), vixl::aarch64::VectorFormat::kFormat8B);
960     ASSERT(vixlVreg2.IsValid());
961     auto src = vixl::aarch64::MemOperand(VixlReg(srcAddr));
962     auto dst = vixl::aarch64::MemOperand(VixlReg(dstAddr));
963     GetMasm()->Ld2(vixlVreg1, vixlVreg2, src);
964     GetMasm()->St1(vixlVreg1, dst);
965 }
966 
EncodeCompressSixteenUtf16ToUtf8CharsUsingSimd(Reg srcAddr,Reg dstAddr)967 void Aarch64Encoder::EncodeCompressSixteenUtf16ToUtf8CharsUsingSimd(Reg srcAddr, Reg dstAddr)
968 {
969     ScopedTmpReg tmp1(this, FLOAT64_TYPE);
970     ScopedTmpReg tmp2(this, FLOAT64_TYPE);
971     auto vixlVreg1 = vixl::aarch64::VRegister(tmp1.GetReg().GetId(), vixl::aarch64::VectorFormat::kFormat16B);
972     ASSERT(vixlVreg1.IsValid());
973     auto vixlVreg2 = vixl::aarch64::VRegister(tmp2.GetReg().GetId(), vixl::aarch64::VectorFormat::kFormat16B);
974     ASSERT(vixlVreg2.IsValid());
975     auto src = vixl::aarch64::MemOperand(VixlReg(srcAddr));
976     auto dst = vixl::aarch64::MemOperand(VixlReg(dstAddr));
977     GetMasm()->Ld2(vixlVreg1, vixlVreg2, src);
978     GetMasm()->St1(vixlVreg1, dst);
979 }
980 
EncodeMemCharU8X32UsingSimd(Reg dst,Reg ch,Reg srcAddr,Reg tmp)981 void Aarch64Encoder::EncodeMemCharU8X32UsingSimd(Reg dst, Reg ch, Reg srcAddr, Reg tmp)
982 {
983     ScopedTmpReg vTmp0(this, FLOAT64_TYPE);
984     ScopedTmpReg vTmp1(this, FLOAT64_TYPE);
985     auto vReg0 = vixl::aarch64::VRegister(vTmp0.GetReg().GetId(), vixl::aarch64::VectorFormat::kFormat16B);
986     auto vReg1 = vixl::aarch64::VRegister(vTmp1.GetReg().GetId(), vixl::aarch64::VectorFormat::kFormat16B);
987     auto vReg2 = vixl::aarch64::VRegister(tmp.GetId(), vixl::aarch64::VectorFormat::kFormat16B);
988     auto xReg0 = vixl::aarch64::Register(dst.GetId(), vixl::aarch64::kXRegSize);
989     auto labelReturn = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
990     auto labelFound = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
991     auto labelSecond16B = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
992     auto labelCheckV0D1 = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
993     auto labelCheckV1D1 = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
994 
995     GetMasm()->Ld1(vReg0, vReg1, vixl::aarch64::MemOperand(VixlReg(srcAddr)));
996     GetMasm()->Dup(vReg2, VixlReg(ch));
997     GetMasm()->Cmeq(vReg0, vReg0, vReg2);
998     GetMasm()->Cmeq(vReg1, vReg1, vReg2);
999     // Give up if char is not there
1000     GetMasm()->Addp(vReg2, vReg0, vReg1);
1001     GetMasm()->Addp(vReg2.V2D(), vReg2.V2D(), vReg2.V2D());
1002     GetMasm()->Mov(xReg0, vReg2.D(), 0);
1003     GetMasm()->Cbz(xReg0, labelReturn);
1004     // Inspect the first 16-byte block
1005     GetMasm()->Mov(xReg0, vReg0.D(), 0);
1006     GetMasm()->Cbz(xReg0, labelCheckV0D1);
1007     GetMasm()->Rev(xReg0, xReg0);
1008     GetMasm()->Clz(xReg0, xReg0);
1009     GetMasm()->B(labelFound);
1010     GetMasm()->Bind(labelCheckV0D1);
1011     GetMasm()->Mov(xReg0, vReg0.D(), 1U);
1012     GetMasm()->Cbz(xReg0, labelSecond16B);
1013     GetMasm()->Rev(xReg0, xReg0);
1014     GetMasm()->Clz(xReg0, xReg0);
1015     GetMasm()->Add(xReg0, xReg0, VixlImm(BITS_PER_UINT64));
1016     GetMasm()->B(labelFound);
1017     // Inspect the second 16-byte block
1018     GetMasm()->Bind(labelSecond16B);
1019     GetMasm()->Mov(xReg0, vReg1.D(), 0);
1020     GetMasm()->Cbz(xReg0, labelCheckV1D1);
1021     GetMasm()->Rev(xReg0, xReg0);
1022     GetMasm()->Clz(xReg0, xReg0);
1023     GetMasm()->Add(xReg0, xReg0, VixlImm(2U * BITS_PER_UINT64));
1024     GetMasm()->B(labelFound);
1025     GetMasm()->Bind(labelCheckV1D1);
1026     GetMasm()->Mov(xReg0, vReg1.D(), 1U);
1027     GetMasm()->Rev(xReg0, xReg0);
1028     GetMasm()->Clz(xReg0, xReg0);
1029     GetMasm()->Add(xReg0, xReg0, VixlImm(3U * BITS_PER_UINT64));
1030 
1031     GetMasm()->Bind(labelFound);
1032     GetMasm()->Lsr(xReg0, xReg0, 3U);
1033     GetMasm()->Add(xReg0, xReg0, VixlReg(srcAddr));
1034     GetMasm()->Bind(labelReturn);
1035 }
1036 
EncodeMemCharU16X16UsingSimd(Reg dst,Reg ch,Reg srcAddr,Reg tmp)1037 void Aarch64Encoder::EncodeMemCharU16X16UsingSimd(Reg dst, Reg ch, Reg srcAddr, Reg tmp)
1038 {
1039     ScopedTmpReg vTmp0(this, FLOAT64_TYPE);
1040     ScopedTmpReg vTmp1(this, FLOAT64_TYPE);
1041     auto vReg0 = vixl::aarch64::VRegister(vTmp0.GetReg().GetId(), vixl::aarch64::VectorFormat::kFormat8H);
1042     auto vReg1 = vixl::aarch64::VRegister(vTmp1.GetReg().GetId(), vixl::aarch64::VectorFormat::kFormat8H);
1043     auto vReg2 = vixl::aarch64::VRegister(tmp.GetId(), vixl::aarch64::VectorFormat::kFormat8H);
1044     auto xReg0 = vixl::aarch64::Register(dst.GetId(), vixl::aarch64::kXRegSize);
1045     auto labelReturn = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1046     auto labelFound = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1047     auto labelSecond16B = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1048     auto labelCheckV0D1 = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1049     auto labelCheckV1D1 = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1050 
1051     GetMasm()->Ld1(vReg0, vReg1, vixl::aarch64::MemOperand(VixlReg(srcAddr)));
1052     GetMasm()->Dup(vReg2, VixlReg(ch));
1053     GetMasm()->Cmeq(vReg0, vReg0, vReg2);
1054     GetMasm()->Cmeq(vReg1, vReg1, vReg2);
1055     // Give up if char is not there
1056     GetMasm()->Addp(vReg2, vReg0, vReg1);
1057     GetMasm()->Addp(vReg2.V2D(), vReg2.V2D(), vReg2.V2D());
1058     GetMasm()->Mov(xReg0, vReg2.D(), 0);
1059     GetMasm()->Cbz(xReg0, labelReturn);
1060     // Inspect the first 16-byte block
1061     GetMasm()->Mov(xReg0, vReg0.D(), 0);
1062     GetMasm()->Cbz(xReg0, labelCheckV0D1);
1063     GetMasm()->Rev(xReg0, xReg0);
1064     GetMasm()->Clz(xReg0, xReg0);
1065     GetMasm()->B(labelFound);
1066     GetMasm()->Bind(labelCheckV0D1);
1067     GetMasm()->Mov(xReg0, vReg0.D(), 1U);
1068     GetMasm()->Cbz(xReg0, labelSecond16B);
1069     GetMasm()->Rev(xReg0, xReg0);
1070     GetMasm()->Clz(xReg0, xReg0);
1071     GetMasm()->Add(xReg0, xReg0, VixlImm(BITS_PER_UINT64));
1072     GetMasm()->B(labelFound);
1073     // Inspect the second 16-byte block
1074     GetMasm()->Bind(labelSecond16B);
1075     GetMasm()->Mov(xReg0, vReg1.D(), 0);
1076     GetMasm()->Cbz(xReg0, labelCheckV1D1);
1077     GetMasm()->Rev(xReg0, xReg0);
1078     GetMasm()->Clz(xReg0, xReg0);
1079     GetMasm()->Add(xReg0, xReg0, VixlImm(2U * BITS_PER_UINT64));
1080     GetMasm()->B(labelFound);
1081     GetMasm()->Bind(labelCheckV1D1);
1082     GetMasm()->Mov(xReg0, vReg1.D(), 1U);
1083     GetMasm()->Rev(xReg0, xReg0);
1084     GetMasm()->Clz(xReg0, xReg0);
1085     GetMasm()->Add(xReg0, xReg0, VixlImm(3U * BITS_PER_UINT64));
1086 
1087     GetMasm()->Bind(labelFound);
1088     GetMasm()->Lsr(xReg0, xReg0, 4U);
1089     GetMasm()->Lsl(xReg0, xReg0, 1U);
1090     GetMasm()->Add(xReg0, xReg0, VixlReg(srcAddr));
1091     GetMasm()->Bind(labelReturn);
1092 }
1093 
EncodeMemCharU8X16UsingSimd(Reg dst,Reg ch,Reg srcAddr,Reg tmp)1094 void Aarch64Encoder::EncodeMemCharU8X16UsingSimd(Reg dst, Reg ch, Reg srcAddr, Reg tmp)
1095 {
1096     ScopedTmpReg vTmp0(this, FLOAT64_TYPE);
1097     ScopedTmpReg vTmp1(this, FLOAT64_TYPE);
1098     auto vReg0 = vixl::aarch64::VRegister(vTmp0.GetReg().GetId(), vixl::aarch64::VectorFormat::kFormat16B);
1099     auto vReg1 = vixl::aarch64::VRegister(tmp.GetId(), vixl::aarch64::VectorFormat::kFormat16B);
1100     auto xReg0 = vixl::aarch64::Register(dst.GetId(), vixl::aarch64::kXRegSize);
1101     auto labelReturn = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1102     auto labelFound = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1103     auto labelCheckV0D1 = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1104 
1105     GetMasm()->Ld1(vReg0, vixl::aarch64::MemOperand(VixlReg(srcAddr)));
1106     GetMasm()->Dup(vReg1, VixlReg(ch));
1107     GetMasm()->Cmeq(vReg0, vReg0, vReg1);
1108     // Give up if char is not there
1109     GetMasm()->Addp(vReg1.V2D(), vReg0.V2D(), vReg0.V2D());
1110     GetMasm()->Mov(xReg0, vReg1.D(), 0);
1111     GetMasm()->Cbz(xReg0, labelReturn);
1112     // Compute a pointer to the char
1113     GetMasm()->Mov(xReg0, vReg0.D(), 0);
1114     GetMasm()->Cbz(xReg0, labelCheckV0D1);
1115     GetMasm()->Rev(xReg0, xReg0);
1116     GetMasm()->Clz(xReg0, xReg0);
1117     GetMasm()->B(labelFound);
1118     GetMasm()->Bind(labelCheckV0D1);
1119     GetMasm()->Mov(xReg0, vReg0.D(), 1U);
1120     GetMasm()->Rev(xReg0, xReg0);
1121     GetMasm()->Clz(xReg0, xReg0);
1122     GetMasm()->Add(xReg0, xReg0, VixlImm(BITS_PER_UINT64));
1123     GetMasm()->Bind(labelFound);
1124     GetMasm()->Lsr(xReg0, xReg0, 3U);  // number of 8-bit chars
1125     GetMasm()->Add(xReg0, xReg0, VixlReg(srcAddr));
1126     GetMasm()->Bind(labelReturn);
1127 }
1128 
EncodeMemCharU16X8UsingSimd(Reg dst,Reg ch,Reg srcAddr,Reg tmp)1129 void Aarch64Encoder::EncodeMemCharU16X8UsingSimd(Reg dst, Reg ch, Reg srcAddr, Reg tmp)
1130 {
1131     ScopedTmpReg vTmp0(this, FLOAT64_TYPE);
1132     ScopedTmpReg vTmp1(this, FLOAT64_TYPE);
1133     auto vReg0 = vixl::aarch64::VRegister(vTmp0.GetReg().GetId(), vixl::aarch64::VectorFormat::kFormat8H);
1134     auto vReg1 = vixl::aarch64::VRegister(tmp.GetId(), vixl::aarch64::VectorFormat::kFormat8H);
1135     auto xReg0 = vixl::aarch64::Register(dst.GetId(), vixl::aarch64::kXRegSize);
1136     auto labelReturn = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1137     auto labelFound = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1138     auto labelCheckV0D1 = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1139 
1140     GetMasm()->Ld1(vReg0, vixl::aarch64::MemOperand(VixlReg(srcAddr)));
1141     GetMasm()->Dup(vReg1, VixlReg(ch));
1142     GetMasm()->Cmeq(vReg0, vReg0, vReg1);
1143     // Give up if char is not there
1144     GetMasm()->Addp(vReg1.V2D(), vReg0.V2D(), vReg0.V2D());
1145     GetMasm()->Mov(xReg0, vReg1.D(), 0);
1146     GetMasm()->Cbz(xReg0, labelReturn);
1147     // Compute a pointer to the char
1148     GetMasm()->Mov(xReg0, vReg0.D(), 0);
1149     GetMasm()->Cbz(xReg0, labelCheckV0D1);
1150     GetMasm()->Rev(xReg0, xReg0);
1151     GetMasm()->Clz(xReg0, xReg0);
1152     GetMasm()->B(labelFound);
1153     GetMasm()->Bind(labelCheckV0D1);
1154     GetMasm()->Mov(xReg0, vReg0.D(), 1U);
1155     GetMasm()->Rev(xReg0, xReg0);
1156     GetMasm()->Clz(xReg0, xReg0);
1157     GetMasm()->Add(xReg0, xReg0, VixlImm(BITS_PER_UINT64));
1158     GetMasm()->Bind(labelFound);
1159     GetMasm()->Lsr(xReg0, xReg0, 4U);  // number of 16-bit chars
1160     GetMasm()->Lsl(xReg0, xReg0, 1U);  // number of bytes
1161     GetMasm()->Add(xReg0, xReg0, VixlReg(srcAddr));
1162     GetMasm()->Bind(labelReturn);
1163 }
1164 
EncodeUnsignedExtendBytesToShorts(Reg dst,Reg src)1165 void Aarch64Encoder::EncodeUnsignedExtendBytesToShorts(Reg dst, Reg src)
1166 {
1167     GetMasm()->Uxtl(VixlVReg(dst).V8H(), VixlVReg(src).V8B());
1168 }
1169 
EncodeReverseHalfWords(Reg dst,Reg src)1170 void Aarch64Encoder::EncodeReverseHalfWords(Reg dst, Reg src)
1171 {
1172     ASSERT(src.GetSize() == dst.GetSize());
1173 
1174     GetMasm()->rev64(VixlVReg(dst).V4H(), VixlVReg(src).V4H());
1175 }
1176 
CanEncodeBitCount()1177 bool Aarch64Encoder::CanEncodeBitCount()
1178 {
1179     return true;
1180 }
1181 
CanEncodeCompressedStringCharAt()1182 bool Aarch64Encoder::CanEncodeCompressedStringCharAt()
1183 {
1184     return true;
1185 }
1186 
CanEncodeCompressedStringCharAtI()1187 bool Aarch64Encoder::CanEncodeCompressedStringCharAtI()
1188 {
1189     return true;
1190 }
1191 
CanEncodeMAdd()1192 bool Aarch64Encoder::CanEncodeMAdd()
1193 {
1194     return true;
1195 }
1196 
CanEncodeMSub()1197 bool Aarch64Encoder::CanEncodeMSub()
1198 {
1199     return true;
1200 }
1201 
CanEncodeMNeg()1202 bool Aarch64Encoder::CanEncodeMNeg()
1203 {
1204     return true;
1205 }
1206 
CanEncodeOrNot()1207 bool Aarch64Encoder::CanEncodeOrNot()
1208 {
1209     return true;
1210 }
1211 
CanEncodeAndNot()1212 bool Aarch64Encoder::CanEncodeAndNot()
1213 {
1214     return true;
1215 }
1216 
CanEncodeXorNot()1217 bool Aarch64Encoder::CanEncodeXorNot()
1218 {
1219     return true;
1220 }
1221 
GetCursorOffset() const1222 size_t Aarch64Encoder::GetCursorOffset() const
1223 {
1224     return GetMasm()->GetBuffer()->GetCursorOffset();
1225 }
1226 
SetCursorOffset(size_t offset)1227 void Aarch64Encoder::SetCursorOffset(size_t offset)
1228 {
1229     GetMasm()->GetBuffer()->Rewind(offset);
1230 }
1231 
1232 /* return the power of 2 for the size of the type */
EncodeGetTypeSize(Reg size,Reg type)1233 void Aarch64Encoder::EncodeGetTypeSize(Reg size, Reg type)
1234 {
1235     auto sreg = VixlReg(type);
1236     auto dreg = VixlReg(size);
1237     constexpr uint8_t I16 = 0x5;
1238     constexpr uint8_t I32 = 0x7;
1239     constexpr uint8_t F64 = 0xa;
1240     constexpr uint8_t REF = 0xd;
1241     constexpr uint8_t SMALLREF = ark::OBJECT_POINTER_SIZE < sizeof(uint64_t) ? 1 : 0;
1242     auto end = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1243 
1244     GetMasm()->Mov(dreg, VixlImm(0));
1245     GetMasm()->Cmp(sreg, VixlImm(I16));
1246     GetMasm()->Cinc(dreg, dreg, vixl::aarch64::Condition::ge);
1247     GetMasm()->Cmp(sreg, VixlImm(I32));
1248     GetMasm()->Cinc(dreg, dreg, vixl::aarch64::Condition::ge);
1249     GetMasm()->Cmp(sreg, VixlImm(F64));
1250     GetMasm()->Cinc(dreg, dreg, vixl::aarch64::Condition::ge);
1251     GetMasm()->Cmp(sreg, VixlImm(REF));
1252     GetMasm()->B(end, vixl::aarch64::Condition::ne);
1253     GetMasm()->Sub(dreg, dreg, VixlImm(SMALLREF));
1254     GetMasm()->Bind(end);
1255 }
1256 
EncodeReverseBits(Reg dst,Reg src)1257 void Aarch64Encoder::EncodeReverseBits(Reg dst, Reg src)
1258 {
1259     auto rzero = GetRegfile()->GetZeroReg();
1260     if (rzero.GetId() == src.GetId()) {
1261         EncodeMov(dst, Imm(0));
1262         return;
1263     }
1264     ASSERT(src.GetSize() == WORD_SIZE || src.GetSize() == DOUBLE_WORD_SIZE);
1265     ASSERT(src.GetSize() == dst.GetSize());
1266 
1267     GetMasm()->Rbit(VixlReg(dst), VixlReg(src));
1268 }
1269 
EncodeCompressedStringCharAt(ArgsCompressedStringCharAt && args)1270 void Aarch64Encoder::EncodeCompressedStringCharAt(ArgsCompressedStringCharAt &&args)
1271 {
1272     auto [dst, str, idx, length, tmp, dataOffset, shift] = args;
1273     ASSERT(dst.GetSize() == HALF_SIZE);
1274 
1275     auto labelNotCompressed = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1276     auto labelCharLoaded = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1277     auto vixlTmp = VixlReg(tmp, DOUBLE_WORD_SIZE);
1278     auto vixlDst = VixlReg(dst);
1279 
1280     GetMasm()->Tbnz(VixlReg(length), 0, labelNotCompressed);
1281     EncodeAdd(tmp, str, idx);
1282     GetMasm()->ldrb(vixlDst, MemOperand(vixlTmp, dataOffset));
1283     GetMasm()->B(labelCharLoaded);
1284     GetMasm()->Bind(labelNotCompressed);
1285     EncodeAdd(tmp, str, Shift(idx, shift));
1286     GetMasm()->ldrh(vixlDst, MemOperand(vixlTmp, dataOffset));
1287     GetMasm()->Bind(labelCharLoaded);
1288 }
1289 
EncodeCompressedStringCharAtI(ArgsCompressedStringCharAtI && args)1290 void Aarch64Encoder::EncodeCompressedStringCharAtI(ArgsCompressedStringCharAtI &&args)
1291 {
1292     auto [dst, str, length, dataOffset, index, shift] = args;
1293     ASSERT(dst.GetSize() == HALF_SIZE);
1294 
1295     auto labelNotCompressed = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1296     auto labelCharLoaded = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1297     auto vixlStr = VixlReg(str);
1298     auto vixlDst = VixlReg(dst);
1299 
1300     auto rzero = GetRegfile()->GetZeroReg().GetId();
1301     if (str.GetId() == rzero) {
1302         return;
1303     }
1304     GetMasm()->Tbnz(VixlReg(length), 0, labelNotCompressed);
1305     GetMasm()->Ldrb(vixlDst, MemOperand(vixlStr, dataOffset + index));
1306     GetMasm()->B(labelCharLoaded);
1307     GetMasm()->Bind(labelNotCompressed);
1308     GetMasm()->Ldrh(vixlDst, MemOperand(vixlStr, dataOffset + (index << shift)));
1309     GetMasm()->Bind(labelCharLoaded);
1310 }
1311 
1312 /* Unsafe builtins implementation */
EncodeCompareAndSwap(Reg dst,Reg obj,Reg offset,Reg val,Reg newval)1313 void Aarch64Encoder::EncodeCompareAndSwap(Reg dst, Reg obj, Reg offset, Reg val, Reg newval)
1314 {
1315     /* Modeled according to the following logic:
1316       .L2:
1317       ldaxr   cur, [addr]
1318       cmp     cur, old
1319       bne     .L3
1320       stlxr   res, new, [addr]
1321       cbnz    res, .L2
1322       .L3:
1323       cset    w0, eq
1324     */
1325     ScopedTmpReg addr(this, true); /* LR is used */
1326     ScopedTmpReg cur(this, val.GetType());
1327     ScopedTmpReg res(this, val.GetType());
1328     auto loop = CreateLabel();
1329     auto exit = CreateLabel();
1330 
1331     /* ldaxr wants [reg]-form of memref (no offset or disp) */
1332     EncodeAdd(addr, obj, offset);
1333 
1334     BindLabel(loop);
1335     EncodeLdrExclusive(cur, addr, true);
1336     EncodeJump(exit, cur, val, Condition::NE);
1337     cur.Release();
1338     EncodeStrExclusive(res, newval, addr, true);
1339     EncodeJump(loop, res, Imm(0), Condition::NE);
1340     BindLabel(exit);
1341 
1342     GetMasm()->Cset(VixlReg(dst), vixl::aarch64::Condition::eq);
1343 }
1344 
EncodeUnsafeGetAndSet(Reg dst,Reg obj,Reg offset,Reg val)1345 void Aarch64Encoder::EncodeUnsafeGetAndSet(Reg dst, Reg obj, Reg offset, Reg val)
1346 {
1347     auto cur = ScopedTmpReg(this, val.GetType());
1348     auto last = ScopedTmpReg(this, val.GetType());
1349     auto addr = ScopedTmpReg(this, true); /* LR is used */
1350     auto mem = MemRef(addr);
1351     auto restart = CreateLabel();
1352     auto retryLdaxr = CreateLabel();
1353 
1354     /* ldaxr wants [reg]-form of memref (no offset or disp) */
1355     EncodeAdd(addr, obj, offset);
1356 
1357     /* Since GetAndSet is defined as a non-faulting operation we
1358      * have to cover two possible faulty cases:
1359      *      1. stlxr failed, we have to retry ldxar
1360      *      2. the value we got via ldxar was not the value we initially
1361      *         loaded, we have to start from the very beginning */
1362     BindLabel(restart);
1363     EncodeLdrAcquire(last, false, mem);
1364 
1365     BindLabel(retryLdaxr);
1366     EncodeLdrExclusive(cur, addr, true);
1367     EncodeJump(restart, cur, last, Condition::NE);
1368     last.Release();
1369     EncodeStrExclusive(dst, val, addr, true);
1370     EncodeJump(retryLdaxr, dst, Imm(0), Condition::NE);
1371 
1372     EncodeMov(dst, cur);
1373 }
1374 
EncodeUnsafeGetAndAdd(Reg dst,Reg obj,Reg offset,Reg val,Reg tmp)1375 void Aarch64Encoder::EncodeUnsafeGetAndAdd(Reg dst, Reg obj, Reg offset, Reg val, Reg tmp)
1376 {
1377     ScopedTmpReg cur(this, val.GetType());
1378     ScopedTmpReg last(this, val.GetType());
1379     auto newval = Reg(tmp.GetId(), val.GetType());
1380 
1381     auto restart = CreateLabel();
1382     auto retryLdaxr = CreateLabel();
1383 
1384     /* addr_reg aliases obj, obj reg will be restored bedore exit */
1385     auto addr = Reg(obj.GetId(), INT64_TYPE);
1386 
1387     /* ldaxr wants [reg]-form of memref (no offset or disp) */
1388     auto mem = MemRef(addr);
1389     EncodeAdd(addr, obj, offset);
1390 
1391     /* Since GetAndAdd is defined as a non-faulting operation we
1392      * have to cover two possible faulty cases:
1393      *      1. stlxr failed, we have to retry ldxar
1394      *      2. the value we got via ldxar was not the value we initially
1395      *         loaded, we have to start from the very beginning */
1396     BindLabel(restart);
1397     EncodeLdrAcquire(last, false, mem);
1398     EncodeAdd(newval, last, val);
1399 
1400     BindLabel(retryLdaxr);
1401     EncodeLdrExclusive(cur, addr, true);
1402     EncodeJump(restart, cur, last, Condition::NE);
1403     last.Release();
1404     EncodeStrExclusive(dst, newval, addr, true);
1405     EncodeJump(retryLdaxr, dst, Imm(0), Condition::NE);
1406 
1407     EncodeSub(obj, addr, offset); /* restore the original value */
1408     EncodeMov(dst, cur);
1409 }
1410 
EncodeMemoryBarrier(memory_order::Order order)1411 void Aarch64Encoder::EncodeMemoryBarrier(memory_order::Order order)
1412 {
1413     switch (order) {
1414         case memory_order::ACQUIRE: {
1415             GetMasm()->Dmb(vixl::aarch64::InnerShareable, vixl::aarch64::BarrierReads);
1416             break;
1417         }
1418         case memory_order::RELEASE: {
1419             GetMasm()->Dmb(vixl::aarch64::InnerShareable, vixl::aarch64::BarrierWrites);
1420             break;
1421         }
1422         case memory_order::FULL: {
1423             GetMasm()->Dmb(vixl::aarch64::InnerShareable, vixl::aarch64::BarrierAll);
1424             break;
1425         }
1426         default:
1427             break;
1428     }
1429 }
1430 
EncodeNot(Reg dst,Reg src)1431 void Aarch64Encoder::EncodeNot(Reg dst, Reg src)
1432 {
1433     GetMasm()->Mvn(VixlReg(dst), VixlReg(src));
1434 }
1435 
EncodeCastFloat(Reg dst,bool dstSigned,Reg src,bool srcSigned)1436 void Aarch64Encoder::EncodeCastFloat(Reg dst, bool dstSigned, Reg src, bool srcSigned)
1437 {
1438     // We DON'T support casts from float32/64 to int8/16 and bool, because this caste is not declared anywhere
1439     // in other languages and architecture, we do not know what the behavior should be.
1440     // But there is one implementation in other function: "EncodeCastFloatWithSmallDst". Call it in the "EncodeCast"
1441     // function instead of "EncodeCastFloat". It works as follows: cast from float32/64 to int32, moving sign bit from
1442     // int32 to dst type, then extend number from dst type to int32 (a necessary condition for an isa). All work in dst
1443     // register.
1444     ASSERT(dst.GetSize() >= WORD_SIZE);
1445 
1446     if (src.IsFloat() && dst.IsScalar()) {
1447         if (dstSigned) {
1448             GetMasm()->Fcvtzs(VixlReg(dst), VixlVReg(src));
1449         } else {
1450             GetMasm()->Fcvtzu(VixlReg(dst), VixlVReg(src));
1451         }
1452         return;
1453     }
1454     if (src.IsScalar() && dst.IsFloat()) {
1455         auto rzero = GetRegfile()->GetZeroReg().GetId();
1456         if (src.GetId() == rzero) {
1457             if (dst.GetSize() == WORD_SIZE) {
1458                 GetMasm()->Fmov(VixlVReg(dst), 0.0F);
1459             } else {
1460                 GetMasm()->Fmov(VixlVReg(dst), 0.0);
1461             }
1462         } else if (srcSigned) {
1463             GetMasm()->Scvtf(VixlVReg(dst), VixlReg(src));
1464         } else {
1465             GetMasm()->Ucvtf(VixlVReg(dst), VixlReg(src));
1466         }
1467         return;
1468     }
1469     if (src.IsFloat() && dst.IsFloat()) {
1470         if (src.GetSize() != dst.GetSize()) {
1471             GetMasm()->Fcvt(VixlVReg(dst), VixlVReg(src));
1472             return;
1473         }
1474         GetMasm()->Fmov(VixlVReg(dst), VixlVReg(src));
1475         return;
1476     }
1477     UNREACHABLE();
1478 }
1479 
EncodeCastFloatWithSmallDst(Reg dst,bool dstSigned,Reg src,bool srcSigned)1480 void Aarch64Encoder::EncodeCastFloatWithSmallDst(Reg dst, bool dstSigned, Reg src, bool srcSigned)
1481 {
1482     // Dst bool type don't supported!
1483 
1484     if (src.IsFloat() && dst.IsScalar()) {
1485         if (dstSigned) {
1486             GetMasm()->Fcvtzs(VixlReg(dst), VixlVReg(src));
1487             if (dst.GetSize() < WORD_SIZE) {
1488                 constexpr uint32_t TEST_BIT = (1U << (static_cast<uint32_t>(WORD_SIZE) - 1));
1489                 ScopedTmpReg tmpReg1(this, dst.GetType());
1490                 auto tmp1 = VixlReg(tmpReg1);
1491                 ScopedTmpReg tmpReg2(this, dst.GetType());
1492                 auto tmp2 = VixlReg(tmpReg2);
1493 
1494                 // NOLINTNEXTLINE(hicpp-signed-bitwise)
1495                 int32_t setBit = (dst.GetSize() == BYTE_SIZE) ? (1UL << (BYTE_SIZE - 1)) : (1UL << (HALF_SIZE - 1));
1496                 int32_t remBit = setBit - 1;
1497                 GetMasm()->Ands(tmp1, VixlReg(dst), TEST_BIT);
1498 
1499                 GetMasm()->Orr(tmp1, VixlReg(dst), setBit);
1500                 GetMasm()->And(tmp2, VixlReg(dst), remBit);
1501                 // Select result - if zero set - tmp2, else tmp1
1502                 GetMasm()->Csel(VixlReg(dst), tmp2, tmp1, vixl::aarch64::eq);
1503                 EncodeCastScalar(Reg(dst.GetId(), INT32_TYPE), dstSigned, dst, dstSigned);
1504             }
1505             return;
1506         }
1507         GetMasm()->Fcvtzu(VixlReg(dst), VixlVReg(src));
1508         if (dst.GetSize() < WORD_SIZE) {
1509             EncodeCastScalar(Reg(dst.GetId(), INT32_TYPE), dstSigned, dst, dstSigned);
1510         }
1511         return;
1512     }
1513     if (src.IsScalar() && dst.IsFloat()) {
1514         if (srcSigned) {
1515             GetMasm()->Scvtf(VixlVReg(dst), VixlReg(src));
1516         } else {
1517             GetMasm()->Ucvtf(VixlVReg(dst), VixlReg(src));
1518         }
1519         return;
1520     }
1521     if (src.IsFloat() && dst.IsFloat()) {
1522         if (src.GetSize() != dst.GetSize()) {
1523             GetMasm()->Fcvt(VixlVReg(dst), VixlVReg(src));
1524             return;
1525         }
1526         GetMasm()->Fmov(VixlVReg(dst), VixlVReg(src));
1527         return;
1528     }
1529     UNREACHABLE();
1530 }
1531 
EncodeCastSigned(Reg dst,Reg src)1532 void Aarch64Encoder::EncodeCastSigned(Reg dst, Reg src)
1533 {
1534     size_t srcSize = src.GetSize();
1535     size_t dstSize = dst.GetSize();
1536     auto srcR = Reg(src.GetId(), dst.GetType());
1537     // Else signed extend
1538     if (srcSize > dstSize) {
1539         srcSize = dstSize;
1540     }
1541     switch (srcSize) {
1542         case BYTE_SIZE:
1543             GetMasm()->Sxtb(VixlReg(dst), VixlReg(srcR));
1544             break;
1545         case HALF_SIZE:
1546             GetMasm()->Sxth(VixlReg(dst), VixlReg(srcR));
1547             break;
1548         case WORD_SIZE:
1549             GetMasm()->Sxtw(VixlReg(dst), VixlReg(srcR));
1550             break;
1551         case DOUBLE_WORD_SIZE:
1552             GetMasm()->Mov(VixlReg(dst), VixlReg(srcR));
1553             break;
1554         default:
1555             SetFalseResult();
1556             break;
1557     }
1558 }
1559 
EncodeCastUnsigned(Reg dst,Reg src)1560 void Aarch64Encoder::EncodeCastUnsigned(Reg dst, Reg src)
1561 {
1562     size_t srcSize = src.GetSize();
1563     size_t dstSize = dst.GetSize();
1564     auto srcR = Reg(src.GetId(), dst.GetType());
1565     if (srcSize > dstSize && dstSize < WORD_SIZE) {
1566         // We need to cut the number, if it is less, than 32-bit. It is by ISA agreement.
1567         int64_t cutValue = (1ULL << dstSize) - 1;
1568         GetMasm()->And(VixlReg(dst), VixlReg(src), VixlImm(cutValue));
1569         return;
1570     }
1571     // Else unsigned extend
1572     switch (srcSize) {
1573         case BYTE_SIZE:
1574             GetMasm()->Uxtb(VixlReg(dst), VixlReg(srcR));
1575             return;
1576         case HALF_SIZE:
1577             GetMasm()->Uxth(VixlReg(dst), VixlReg(srcR));
1578             return;
1579         case WORD_SIZE:
1580             GetMasm()->Uxtw(VixlReg(dst), VixlReg(srcR));
1581             return;
1582         case DOUBLE_WORD_SIZE:
1583             GetMasm()->Mov(VixlReg(dst), VixlReg(srcR));
1584             return;
1585         default:
1586             SetFalseResult();
1587             return;
1588     }
1589 }
1590 
EncodeCastScalar(Reg dst,bool dstSigned,Reg src,bool srcSigned)1591 void Aarch64Encoder::EncodeCastScalar(Reg dst, bool dstSigned, Reg src, bool srcSigned)
1592 {
1593     size_t srcSize = src.GetSize();
1594     size_t dstSize = dst.GetSize();
1595     // In our ISA minimal type is 32-bit, so type less then 32-bit
1596     // we should extend to 32-bit. So we can have 2 cast
1597     // (For examble, i8->u16 will work as i8->u16 and u16->u32)
1598     if (dstSize < WORD_SIZE) {
1599         if (srcSize > dstSize) {
1600             if (dstSigned) {
1601                 EncodeCastSigned(dst, src);
1602             } else {
1603                 EncodeCastUnsigned(dst, src);
1604             }
1605             return;
1606         }
1607         if (srcSize == dstSize) {
1608             GetMasm()->Mov(VixlReg(dst), VixlReg(src));
1609             if (!(srcSigned || dstSigned) || (srcSigned && dstSigned)) {
1610                 return;
1611             }
1612             if (dstSigned) {
1613                 EncodeCastSigned(Reg(dst.GetId(), INT32_TYPE), dst);
1614             } else {
1615                 EncodeCastUnsigned(Reg(dst.GetId(), INT32_TYPE), dst);
1616             }
1617             return;
1618         }
1619         if (srcSigned) {
1620             EncodeCastSigned(dst, src);
1621             if (!dstSigned) {
1622                 EncodeCastUnsigned(Reg(dst.GetId(), INT32_TYPE), dst);
1623             }
1624         } else {
1625             EncodeCastUnsigned(dst, src);
1626             if (dstSigned) {
1627                 EncodeCastSigned(Reg(dst.GetId(), INT32_TYPE), dst);
1628             }
1629         }
1630     } else {
1631         if (srcSize == dstSize) {
1632             GetMasm()->Mov(VixlReg(dst), VixlReg(src));
1633             return;
1634         }
1635         if (srcSigned) {
1636             EncodeCastSigned(dst, src);
1637         } else {
1638             EncodeCastUnsigned(dst, src);
1639         }
1640     }
1641 }
1642 
EncodeFastPathDynamicCast(Reg dst,Reg src,LabelHolder::LabelId slow)1643 void Aarch64Encoder::EncodeFastPathDynamicCast(Reg dst, Reg src, LabelHolder::LabelId slow)
1644 {
1645     ASSERT(IsJsNumberCast());
1646     ASSERT(src.IsFloat() && dst.IsScalar());
1647 
1648     CHECK_EQ(src.GetSize(), BITS_PER_UINT64);
1649     CHECK_EQ(dst.GetSize(), BITS_PER_UINT32);
1650 
1651     // We use slow path, because in general JS double -> int32 cast is complex and we check only few common cases here
1652     // and move other checks in slow path. In case CPU supports special JS double -> int32 instruction we do not need
1653     // slow path.
1654     if (!IsLabelValid(slow)) {
1655         // use special JS aarch64 instruction
1656 #ifndef NDEBUG
1657         vixl::CPUFeaturesScope scope(GetMasm(), vixl::CPUFeatures::kFP, vixl::CPUFeatures::kJSCVT);
1658 #endif
1659         GetMasm()->Fjcvtzs(VixlReg(dst), VixlVReg(src));
1660         return;
1661     }
1662 
1663     // infinite and big numbers will overflow here to INT64_MIN or INT64_MAX, but NaN casts to 0
1664     GetMasm()->Fcvtzs(VixlReg(dst, DOUBLE_WORD_SIZE), VixlVReg(src));
1665     // check INT64_MIN
1666     GetMasm()->Cmp(VixlReg(dst, DOUBLE_WORD_SIZE), VixlImm(1));
1667     // check INT64_MAX
1668     GetMasm()->Ccmp(VixlReg(dst, DOUBLE_WORD_SIZE), VixlImm(-1), vixl::aarch64::StatusFlags::VFlag,
1669                     vixl::aarch64::Condition::vc);
1670     auto slowLabel {static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(slow)};
1671     // jump to slow path in case of overflow
1672     GetMasm()->B(slowLabel, vixl::aarch64::Condition::vs);
1673 }
1674 
EncodeCast(Reg dst,bool dstSigned,Reg src,bool srcSigned)1675 void Aarch64Encoder::EncodeCast(Reg dst, bool dstSigned, Reg src, bool srcSigned)
1676 {
1677     if (src.IsFloat() || dst.IsFloat()) {
1678         EncodeCastFloat(dst, dstSigned, src, srcSigned);
1679         return;
1680     }
1681 
1682     ASSERT(src.IsScalar() && dst.IsScalar());
1683     auto rzero = GetRegfile()->GetZeroReg().GetId();
1684     if (src.GetId() == rzero) {
1685         ASSERT(dst.GetId() != rzero);
1686         EncodeMov(dst, Imm(0));
1687         return;
1688     }
1689     // Scalar part
1690     EncodeCastScalar(dst, dstSigned, src, srcSigned);
1691 }
1692 
EncodeCastToBool(Reg dst,Reg src)1693 void Aarch64Encoder::EncodeCastToBool(Reg dst, Reg src)
1694 {
1695     // In ISA says that we only support casts:
1696     // i32tou1, i64tou1, u32tou1, u64tou1
1697     ASSERT(src.IsScalar());
1698     ASSERT(dst.IsScalar());
1699 
1700     GetMasm()->Cmp(VixlReg(src), VixlImm(0));
1701     // In our ISA minimal type is 32-bit, so bool in 32bit
1702     GetMasm()->Cset(VixlReg(Reg(dst.GetId(), INT32_TYPE)), vixl::aarch64::Condition::ne);
1703 }
1704 
EncodeAdd(Reg dst,Reg src0,Shift src1)1705 void Aarch64Encoder::EncodeAdd(Reg dst, Reg src0, Shift src1)
1706 {
1707     if (dst.IsFloat()) {
1708         UNREACHABLE();
1709     }
1710     ASSERT(src0.GetSize() <= dst.GetSize());
1711     if (src0.GetSize() < dst.GetSize()) {
1712         auto src0Reg = Reg(src0.GetId(), dst.GetType());
1713         auto src1Reg = Reg(src1.GetBase().GetId(), dst.GetType());
1714         GetMasm()->Add(VixlReg(dst), VixlReg(src0Reg), VixlShift(Shift(src1Reg, src1.GetType(), src1.GetScale())));
1715         return;
1716     }
1717     GetMasm()->Add(VixlReg(dst), VixlReg(src0), VixlShift(src1));
1718 }
1719 
EncodeAdd(Reg dst,Reg src0,Reg src1)1720 void Aarch64Encoder::EncodeAdd(Reg dst, Reg src0, Reg src1)
1721 {
1722     if (dst.IsFloat()) {
1723         GetMasm()->Fadd(VixlVReg(dst), VixlVReg(src0), VixlVReg(src1));
1724         return;
1725     }
1726 
1727     /* if any of the operands has 64-bits size,
1728      * forcibly do the 64-bits wide operation */
1729     if ((src0.GetSize() | src1.GetSize() | dst.GetSize()) >= DOUBLE_WORD_SIZE) {
1730         GetMasm()->Add(VixlReg(dst).X(), VixlReg(src0).X(), VixlReg(src1).X());
1731     } else {
1732         /* Otherwise do 32-bits operation as any lesser
1733          * sizes have to be upcasted to 32-bits anyway */
1734         GetMasm()->Add(VixlReg(dst).W(), VixlReg(src0).W(), VixlReg(src1).W());
1735     }
1736 }
1737 
EncodeSub(Reg dst,Reg src0,Shift src1)1738 void Aarch64Encoder::EncodeSub(Reg dst, Reg src0, Shift src1)
1739 {
1740     ASSERT(dst.IsScalar());
1741     GetMasm()->Sub(VixlReg(dst), VixlReg(src0), VixlShift(src1));
1742 }
1743 
EncodeSub(Reg dst,Reg src0,Reg src1)1744 void Aarch64Encoder::EncodeSub(Reg dst, Reg src0, Reg src1)
1745 {
1746     if (dst.IsFloat()) {
1747         GetMasm()->Fsub(VixlVReg(dst), VixlVReg(src0), VixlVReg(src1));
1748         return;
1749     }
1750 
1751     /* if any of the operands has 64-bits size,
1752      * forcibly do the 64-bits wide operation */
1753     if ((src0.GetSize() | src1.GetSize() | dst.GetSize()) >= DOUBLE_WORD_SIZE) {
1754         GetMasm()->Sub(VixlReg(dst).X(), VixlReg(src0).X(), VixlReg(src1).X());
1755     } else {
1756         /* Otherwise do 32-bits operation as any lesser
1757          * sizes have to be upcasted to 32-bits anyway */
1758         GetMasm()->Sub(VixlReg(dst).W(), VixlReg(src0).W(), VixlReg(src1).W());
1759     }
1760 }
1761 
EncodeMul(Reg dst,Reg src0,Reg src1)1762 void Aarch64Encoder::EncodeMul(Reg dst, Reg src0, Reg src1)
1763 {
1764     if (dst.IsFloat()) {
1765         GetMasm()->Fmul(VixlVReg(dst), VixlVReg(src0), VixlVReg(src1));
1766         return;
1767     }
1768     auto rzero = GetRegfile()->GetZeroReg().GetId();
1769     if (src0.GetId() == rzero || src1.GetId() == rzero) {
1770         EncodeMov(dst, Imm(0));
1771         return;
1772     }
1773     GetMasm()->Mul(VixlReg(dst), VixlReg(src0), VixlReg(src1));
1774 }
1775 
EncodeAddOverflow(compiler::LabelHolder::LabelId id,Reg dst,Reg src0,Reg src1,Condition cc)1776 void Aarch64Encoder::EncodeAddOverflow(compiler::LabelHolder::LabelId id, Reg dst, Reg src0, Reg src1, Condition cc)
1777 {
1778     ASSERT(!dst.IsFloat() && !src0.IsFloat() && !src1.IsFloat());
1779     ASSERT(cc == Condition::VS || cc == Condition::VC);
1780     if (dst.GetSize() == DOUBLE_WORD_SIZE) {
1781         GetMasm()->Adds(VixlReg(dst).X(), VixlReg(src0).X(), VixlReg(src1).X());
1782     } else {
1783         /* Otherwise do 32-bits operation as any lesser
1784          * sizes have to be upcasted to 32-bits anyway */
1785         GetMasm()->Adds(VixlReg(dst).W(), VixlReg(src0).W(), VixlReg(src1).W());
1786     }
1787     auto label = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(id);
1788     GetMasm()->B(label, Convert(cc));
1789 }
1790 
EncodeSubOverflow(compiler::LabelHolder::LabelId id,Reg dst,Reg src0,Reg src1,Condition cc)1791 void Aarch64Encoder::EncodeSubOverflow(compiler::LabelHolder::LabelId id, Reg dst, Reg src0, Reg src1, Condition cc)
1792 {
1793     ASSERT(!dst.IsFloat() && !src0.IsFloat() && !src1.IsFloat());
1794     ASSERT(cc == Condition::VS || cc == Condition::VC);
1795     if (dst.GetSize() == DOUBLE_WORD_SIZE) {
1796         GetMasm()->Subs(VixlReg(dst).X(), VixlReg(src0).X(), VixlReg(src1).X());
1797     } else {
1798         /* Otherwise do 32-bits operation as any lesser
1799          * sizes have to be upcasted to 32-bits anyway */
1800         GetMasm()->Subs(VixlReg(dst).W(), VixlReg(src0).W(), VixlReg(src1).W());
1801     }
1802     auto label = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(id);
1803     GetMasm()->B(label, Convert(cc));
1804 }
1805 
EncodeNegOverflowAndZero(compiler::LabelHolder::LabelId id,Reg dst,Reg src)1806 void Aarch64Encoder::EncodeNegOverflowAndZero(compiler::LabelHolder::LabelId id, Reg dst, Reg src)
1807 {
1808     ASSERT(!dst.IsFloat() && !src.IsFloat());
1809     // NOLINTNEXTLINE(readability-magic-numbers)
1810     EncodeJumpTest(id, src, Imm(0x7fffffff), Condition::TST_EQ);
1811     GetMasm()->Neg(VixlReg(dst).W(), VixlReg(src).W());
1812 }
1813 
EncodeDiv(Reg dst,bool dstSigned,Reg src0,Reg src1)1814 void Aarch64Encoder::EncodeDiv(Reg dst, bool dstSigned, Reg src0, Reg src1)
1815 {
1816     if (dst.IsFloat()) {
1817         GetMasm()->Fdiv(VixlVReg(dst), VixlVReg(src0), VixlVReg(src1));
1818         return;
1819     }
1820 
1821     auto rzero = GetRegfile()->GetZeroReg().GetId();
1822     if (src1.GetId() == rzero || src0.GetId() == rzero) {
1823         ScopedTmpReg tmpReg(this, src1.GetType());
1824         EncodeMov(tmpReg, Imm(0));
1825         // Denominator is zero-reg
1826         if (src1.GetId() == rzero) {
1827             // Encode Abort
1828             GetMasm()->Udiv(VixlReg(dst), VixlReg(tmpReg), VixlReg(tmpReg));
1829             return;
1830         }
1831 
1832         // But src1 still may be zero
1833         if (src1.GetId() != src0.GetId()) {
1834             if (dstSigned) {
1835                 GetMasm()->Sdiv(VixlReg(dst), VixlReg(tmpReg), VixlReg(src1));
1836             } else {
1837                 GetMasm()->Udiv(VixlReg(dst), VixlReg(tmpReg), VixlReg(src1));
1838             }
1839             return;
1840         }
1841         UNREACHABLE();
1842     }
1843     if (dstSigned) {
1844         GetMasm()->Sdiv(VixlReg(dst), VixlReg(src0), VixlReg(src1));
1845     } else {
1846         GetMasm()->Udiv(VixlReg(dst), VixlReg(src0), VixlReg(src1));
1847     }
1848 }
1849 
EncodeMod(Reg dst,bool dstSigned,Reg src0,Reg src1)1850 void Aarch64Encoder::EncodeMod(Reg dst, bool dstSigned, Reg src0, Reg src1)
1851 {
1852     if (dst.IsScalar()) {
1853         auto rzero = GetRegfile()->GetZeroReg().GetId();
1854         if (src1.GetId() == rzero || src0.GetId() == rzero) {
1855             ScopedTmpReg tmpReg(this, src1.GetType());
1856             EncodeMov(tmpReg, Imm(0));
1857             // Denominator is zero-reg
1858             if (src1.GetId() == rzero) {
1859                 // Encode Abort
1860                 GetMasm()->Udiv(VixlReg(dst), VixlReg(tmpReg), VixlReg(tmpReg));
1861                 return;
1862             }
1863 
1864             if (src1.GetId() == src0.GetId()) {
1865                 SetFalseResult();
1866                 return;
1867             }
1868             // But src1 still may be zero
1869             ScopedTmpRegU64 tmpRegUd(this);
1870             if (dst.GetSize() < DOUBLE_WORD_SIZE) {
1871                 tmpRegUd.ChangeType(INT32_TYPE);
1872             }
1873             auto tmp = VixlReg(tmpRegUd);
1874             if (!dstSigned) {
1875                 GetMasm()->Udiv(tmp, VixlReg(tmpReg), VixlReg(src1));
1876                 GetMasm()->Msub(VixlReg(dst), tmp, VixlReg(src1), VixlReg(tmpReg));
1877                 return;
1878             }
1879             GetMasm()->Sdiv(tmp, VixlReg(tmpReg), VixlReg(src1));
1880             GetMasm()->Msub(VixlReg(dst), tmp, VixlReg(src1), VixlReg(tmpReg));
1881             return;
1882         }
1883 
1884         ScopedTmpRegU64 tmpReg(this);
1885         if (dst.GetSize() < DOUBLE_WORD_SIZE) {
1886             tmpReg.ChangeType(INT32_TYPE);
1887         }
1888         auto tmp = VixlReg(tmpReg);
1889 
1890         if (!dstSigned) {
1891             GetMasm()->Udiv(tmp, VixlReg(src0), VixlReg(src1));
1892             GetMasm()->Msub(VixlReg(dst), tmp, VixlReg(src1), VixlReg(src0));
1893             return;
1894         }
1895         GetMasm()->Sdiv(tmp, VixlReg(src0), VixlReg(src1));
1896         GetMasm()->Msub(VixlReg(dst), tmp, VixlReg(src1), VixlReg(src0));
1897         return;
1898     }
1899 
1900     EncodeFMod(dst, src0, src1);
1901 }
1902 
EncodeFMod(Reg dst,Reg src0,Reg src1)1903 void Aarch64Encoder::EncodeFMod(Reg dst, Reg src0, Reg src1)
1904 {
1905     ASSERT(dst.IsFloat());
1906 
1907     if (dst.GetType() == FLOAT32_TYPE) {
1908         using Fp = float (*)(float, float);
1909         MakeLibCall(dst, src0, src1, reinterpret_cast<void *>(static_cast<Fp>(fmodf)));
1910     } else {
1911         using Fp = double (*)(double, double);
1912         MakeLibCall(dst, src0, src1, reinterpret_cast<void *>(static_cast<Fp>(fmod)));
1913     }
1914 }
1915 
EncodeSignedDiv(Reg dst,Reg src0,Imm imm)1916 void Aarch64Encoder::EncodeSignedDiv(Reg dst, Reg src0, Imm imm)
1917 {
1918     int64_t divisor = imm.GetAsInt();
1919 
1920     FastConstSignedDivisor fastDivisor(divisor, dst.GetSize());
1921     int64_t magic = fastDivisor.GetMagic();
1922 
1923     ScopedTmpReg tmp(this, dst.GetType());
1924     Reg tmp64 = tmp.GetReg().As(INT64_TYPE);
1925     EncodeMov(tmp, Imm(magic));
1926 
1927     int64_t extraShift = 0;
1928     if (dst.GetSize() == DOUBLE_WORD_SIZE) {
1929         GetMasm()->Smulh(VixlReg(tmp), VixlReg(src0), VixlReg(tmp));
1930     } else {
1931         GetMasm()->Smull(VixlReg(tmp64), VixlReg(src0), VixlReg(tmp));
1932         extraShift = WORD_SIZE;
1933     }
1934 
1935     bool useSignFlag = false;
1936     if (divisor > 0 && magic < 0) {
1937         GetMasm()->Adds(VixlReg(tmp64), VixlReg(tmp64), VixlShift(Shift(src0.As(INT64_TYPE), extraShift)));
1938         useSignFlag = true;
1939     } else if (divisor < 0 && magic > 0) {
1940         GetMasm()->Subs(VixlReg(tmp64), VixlReg(tmp64), VixlShift(Shift(src0.As(INT64_TYPE), extraShift)));
1941         useSignFlag = true;
1942     }
1943 
1944     int64_t shift = fastDivisor.GetShift();
1945     EncodeAShr(dst.As(INT64_TYPE), tmp64, Imm(shift + extraShift));
1946 
1947     // result = (result < 0 ? result + 1 : result)
1948     if (useSignFlag) {
1949         GetMasm()->Cinc(VixlReg(dst), VixlReg(dst), vixl::aarch64::Condition::mi);
1950     } else {
1951         GetMasm()->Add(VixlReg(dst), VixlReg(dst), VixlShift(Shift(dst, ShiftType::LSR, dst.GetSize() - 1U)));
1952     }
1953 }
1954 
EncodeUnsignedDiv(Reg dst,Reg src0,Imm imm)1955 void Aarch64Encoder::EncodeUnsignedDiv(Reg dst, Reg src0, Imm imm)
1956 {
1957     auto divisor = bit_cast<uint64_t>(imm.GetAsInt());
1958 
1959     FastConstUnsignedDivisor fastDivisor(divisor, dst.GetSize());
1960     uint64_t magic = fastDivisor.GetMagic();
1961 
1962     ScopedTmpReg tmp(this, dst.GetType());
1963     Reg tmp64 = tmp.GetReg().As(INT64_TYPE);
1964     EncodeMov(tmp, Imm(magic));
1965 
1966     uint64_t extraShift = 0;
1967     if (dst.GetSize() == DOUBLE_WORD_SIZE) {
1968         GetMasm()->Umulh(VixlReg(tmp), VixlReg(src0), VixlReg(tmp));
1969     } else {
1970         GetMasm()->Umull(VixlReg(tmp64), VixlReg(src0), VixlReg(tmp));
1971         extraShift = WORD_SIZE;
1972     }
1973 
1974     uint64_t shift = fastDivisor.GetShift();
1975     if (!fastDivisor.GetAdd()) {
1976         EncodeShr(dst.As(INT64_TYPE), tmp64, Imm(shift + extraShift));
1977     } else {
1978         ASSERT(shift >= 1U);
1979         if (extraShift > 0U) {
1980             EncodeShr(tmp64, tmp64, Imm(extraShift));
1981         }
1982         EncodeSub(dst, src0, tmp);
1983         GetMasm()->Add(VixlReg(dst), VixlReg(tmp), VixlShift(Shift(dst, ShiftType::LSR, 1U)));
1984         EncodeShr(dst, dst, Imm(shift - 1U));
1985     }
1986 }
1987 
EncodeDiv(Reg dst,Reg src0,Imm imm,bool isSigned)1988 void Aarch64Encoder::EncodeDiv(Reg dst, Reg src0, Imm imm, bool isSigned)
1989 {
1990     ASSERT(dst.IsScalar() && dst.GetSize() >= WORD_SIZE);
1991     ASSERT(CanOptimizeImmDivMod(bit_cast<uint64_t>(imm.GetAsInt()), isSigned));
1992     if (isSigned) {
1993         EncodeSignedDiv(dst, src0, imm);
1994     } else {
1995         EncodeUnsignedDiv(dst, src0, imm);
1996     }
1997 }
1998 
EncodeMod(Reg dst,Reg src0,Imm imm,bool isSigned)1999 void Aarch64Encoder::EncodeMod(Reg dst, Reg src0, Imm imm, bool isSigned)
2000 {
2001     ASSERT(dst.IsScalar() && dst.GetSize() >= WORD_SIZE);
2002     ASSERT(CanOptimizeImmDivMod(bit_cast<uint64_t>(imm.GetAsInt()), isSigned));
2003     // dst = src0 - imm * (src0 / imm)
2004     ScopedTmpReg tmp(this, dst.GetType());
2005     EncodeDiv(tmp, src0, imm, isSigned);
2006 
2007     ScopedTmpReg immReg(this, dst.GetType());
2008     EncodeMov(immReg, imm);
2009 
2010     GetMasm()->Msub(VixlReg(dst), VixlReg(immReg), VixlReg(tmp), VixlReg(src0));
2011 }
2012 
EncodeMin(Reg dst,bool dstSigned,Reg src0,Reg src1)2013 void Aarch64Encoder::EncodeMin(Reg dst, bool dstSigned, Reg src0, Reg src1)
2014 {
2015     if (dst.IsFloat()) {
2016         GetMasm()->Fmin(VixlVReg(dst), VixlVReg(src0), VixlVReg(src1));
2017         return;
2018     }
2019     if (dstSigned) {
2020         GetMasm()->Cmp(VixlReg(src0), VixlReg(src1));
2021         GetMasm()->Csel(VixlReg(dst), VixlReg(src0), VixlReg(src1), vixl::aarch64::Condition::lt);
2022         return;
2023     }
2024     GetMasm()->Cmp(VixlReg(src0), VixlReg(src1));
2025     GetMasm()->Csel(VixlReg(dst), VixlReg(src0), VixlReg(src1), vixl::aarch64::Condition::ls);
2026 }
2027 
EncodeMax(Reg dst,bool dstSigned,Reg src0,Reg src1)2028 void Aarch64Encoder::EncodeMax(Reg dst, bool dstSigned, Reg src0, Reg src1)
2029 {
2030     if (dst.IsFloat()) {
2031         GetMasm()->Fmax(VixlVReg(dst), VixlVReg(src0), VixlVReg(src1));
2032         return;
2033     }
2034     if (dstSigned) {
2035         GetMasm()->Cmp(VixlReg(src0), VixlReg(src1));
2036         GetMasm()->Csel(VixlReg(dst), VixlReg(src0), VixlReg(src1), vixl::aarch64::Condition::gt);
2037         return;
2038     }
2039     GetMasm()->Cmp(VixlReg(src0), VixlReg(src1));
2040     GetMasm()->Csel(VixlReg(dst), VixlReg(src0), VixlReg(src1), vixl::aarch64::Condition::hi);
2041 }
2042 
EncodeShl(Reg dst,Reg src0,Reg src1)2043 void Aarch64Encoder::EncodeShl(Reg dst, Reg src0, Reg src1)
2044 {
2045     auto rzero = GetRegfile()->GetZeroReg().GetId();
2046     ASSERT(dst.GetId() != rzero);
2047     if (src0.GetId() == rzero) {
2048         EncodeMov(dst, Imm(0));
2049         return;
2050     }
2051     if (src1.GetId() == rzero) {
2052         EncodeMov(dst, src0);
2053     }
2054     if (dst.GetSize() < WORD_SIZE) {
2055         GetMasm()->And(VixlReg(src1), VixlReg(src1), VixlImm(dst.GetSize() - 1));
2056     }
2057     GetMasm()->Lsl(VixlReg(dst), VixlReg(src0), VixlReg(src1));
2058 }
2059 
EncodeShr(Reg dst,Reg src0,Reg src1)2060 void Aarch64Encoder::EncodeShr(Reg dst, Reg src0, Reg src1)
2061 {
2062     auto rzero = GetRegfile()->GetZeroReg().GetId();
2063     ASSERT(dst.GetId() != rzero);
2064     if (src0.GetId() == rzero) {
2065         EncodeMov(dst, Imm(0));
2066         return;
2067     }
2068     if (src1.GetId() == rzero) {
2069         EncodeMov(dst, src0);
2070     }
2071 
2072     if (dst.GetSize() < WORD_SIZE) {
2073         GetMasm()->And(VixlReg(src1), VixlReg(src1), VixlImm(dst.GetSize() - 1));
2074     }
2075 
2076     GetMasm()->Lsr(VixlReg(dst), VixlReg(src0), VixlReg(src1));
2077 }
2078 
EncodeAShr(Reg dst,Reg src0,Reg src1)2079 void Aarch64Encoder::EncodeAShr(Reg dst, Reg src0, Reg src1)
2080 {
2081     auto rzero = GetRegfile()->GetZeroReg().GetId();
2082     ASSERT(dst.GetId() != rzero);
2083     if (src0.GetId() == rzero) {
2084         EncodeMov(dst, Imm(0));
2085         return;
2086     }
2087     if (src1.GetId() == rzero) {
2088         EncodeMov(dst, src0);
2089     }
2090 
2091     if (dst.GetSize() < WORD_SIZE) {
2092         GetMasm()->And(VixlReg(src1), VixlReg(src1), VixlImm(dst.GetSize() - 1));
2093     }
2094     GetMasm()->Asr(VixlReg(dst), VixlReg(src0), VixlReg(src1));
2095 }
2096 
EncodeAnd(Reg dst,Reg src0,Reg src1)2097 void Aarch64Encoder::EncodeAnd(Reg dst, Reg src0, Reg src1)
2098 {
2099     GetMasm()->And(VixlReg(dst), VixlReg(src0), VixlReg(src1));
2100 }
2101 
EncodeAnd(Reg dst,Reg src0,Shift src1)2102 void Aarch64Encoder::EncodeAnd(Reg dst, Reg src0, Shift src1)
2103 {
2104     GetMasm()->And(VixlReg(dst), VixlReg(src0), VixlShift(src1));
2105 }
2106 
EncodeOr(Reg dst,Reg src0,Reg src1)2107 void Aarch64Encoder::EncodeOr(Reg dst, Reg src0, Reg src1)
2108 {
2109     GetMasm()->Orr(VixlReg(dst), VixlReg(src0), VixlReg(src1));
2110 }
2111 
EncodeOr(Reg dst,Reg src0,Shift src1)2112 void Aarch64Encoder::EncodeOr(Reg dst, Reg src0, Shift src1)
2113 {
2114     GetMasm()->Orr(VixlReg(dst), VixlReg(src0), VixlShift(src1));
2115 }
2116 
EncodeXor(Reg dst,Reg src0,Reg src1)2117 void Aarch64Encoder::EncodeXor(Reg dst, Reg src0, Reg src1)
2118 {
2119     GetMasm()->Eor(VixlReg(dst), VixlReg(src0), VixlReg(src1));
2120 }
2121 
EncodeXor(Reg dst,Reg src0,Shift src1)2122 void Aarch64Encoder::EncodeXor(Reg dst, Reg src0, Shift src1)
2123 {
2124     GetMasm()->Eor(VixlReg(dst), VixlReg(src0), VixlShift(src1));
2125 }
2126 
EncodeAdd(Reg dst,Reg src,Imm imm)2127 void Aarch64Encoder::EncodeAdd(Reg dst, Reg src, Imm imm)
2128 {
2129     ASSERT(dst.IsScalar() && "UNIMPLEMENTED");
2130     ASSERT(dst.GetSize() >= src.GetSize());
2131     if (dst.GetSize() != src.GetSize()) {
2132         auto srcReg = Reg(src.GetId(), dst.GetType());
2133         GetMasm()->Add(VixlReg(dst), VixlReg(srcReg), VixlImm(imm));
2134         return;
2135     }
2136     GetMasm()->Add(VixlReg(dst), VixlReg(src), VixlImm(imm));
2137 }
2138 
EncodeSub(Reg dst,Reg src,Imm imm)2139 void Aarch64Encoder::EncodeSub(Reg dst, Reg src, Imm imm)
2140 {
2141     ASSERT(dst.IsScalar() && "UNIMPLEMENTED");
2142     GetMasm()->Sub(VixlReg(dst), VixlReg(src), VixlImm(imm));
2143 }
2144 
EncodeShl(Reg dst,Reg src,Imm imm)2145 void Aarch64Encoder::EncodeShl(Reg dst, Reg src, Imm imm)
2146 {
2147     ASSERT(dst.IsScalar() && "Invalid operand type");
2148     auto rzero = GetRegfile()->GetZeroReg().GetId();
2149     ASSERT(dst.GetId() != rzero);
2150     if (src.GetId() == rzero) {
2151         EncodeMov(dst, Imm(0));
2152         return;
2153     }
2154 
2155     GetMasm()->Lsl(VixlReg(dst), VixlReg(src), imm.GetAsInt());
2156 }
2157 
EncodeShr(Reg dst,Reg src,Imm imm)2158 void Aarch64Encoder::EncodeShr(Reg dst, Reg src, Imm imm)
2159 {
2160     int64_t immValue = static_cast<uint64_t>(imm.GetAsInt()) & (dst.GetSize() - 1);
2161 
2162     ASSERT(dst.IsScalar() && "Invalid operand type");
2163     auto rzero = GetRegfile()->GetZeroReg().GetId();
2164     ASSERT(dst.GetId() != rzero);
2165     if (src.GetId() == rzero) {
2166         EncodeMov(dst, Imm(0));
2167         return;
2168     }
2169 
2170     GetMasm()->Lsr(VixlReg(dst), VixlReg(src), immValue);
2171 }
2172 
EncodeAShr(Reg dst,Reg src,Imm imm)2173 void Aarch64Encoder::EncodeAShr(Reg dst, Reg src, Imm imm)
2174 {
2175     ASSERT(dst.IsScalar() && "Invalid operand type");
2176     GetMasm()->Asr(VixlReg(dst), VixlReg(src), imm.GetAsInt());
2177 }
2178 
EncodeAnd(Reg dst,Reg src,Imm imm)2179 void Aarch64Encoder::EncodeAnd(Reg dst, Reg src, Imm imm)
2180 {
2181     ASSERT(dst.IsScalar() && "Invalid operand type");
2182     GetMasm()->And(VixlReg(dst), VixlReg(src), VixlImm(imm));
2183 }
2184 
EncodeOr(Reg dst,Reg src,Imm imm)2185 void Aarch64Encoder::EncodeOr(Reg dst, Reg src, Imm imm)
2186 {
2187     ASSERT(dst.IsScalar() && "Invalid operand type");
2188     GetMasm()->Orr(VixlReg(dst), VixlReg(src), VixlImm(imm));
2189 }
2190 
EncodeXor(Reg dst,Reg src,Imm imm)2191 void Aarch64Encoder::EncodeXor(Reg dst, Reg src, Imm imm)
2192 {
2193     ASSERT(dst.IsScalar() && "Invalid operand type");
2194     GetMasm()->Eor(VixlReg(dst), VixlReg(src), VixlImm(imm));
2195 }
2196 
EncodeMov(Reg dst,Imm src)2197 void Aarch64Encoder::EncodeMov(Reg dst, Imm src)
2198 {
2199     if (dst.IsFloat()) {
2200         if (dst.GetSize() == WORD_SIZE) {
2201             GetMasm()->Fmov(VixlVReg(dst), src.GetAsFloat());
2202         } else {
2203             GetMasm()->Fmov(VixlVReg(dst), src.GetAsDouble());
2204         }
2205         return;
2206     }
2207     if (dst.GetSize() > WORD_SIZE) {
2208         GetMasm()->Mov(VixlReg(dst), VixlImm(src));
2209     } else {
2210         GetMasm()->Mov(VixlReg(dst), VixlImm(static_cast<int32_t>(src.GetAsInt())));
2211     }
2212 }
2213 
EncodeLdr(Reg dst,bool dstSigned,MemRef mem)2214 void Aarch64Encoder::EncodeLdr(Reg dst, bool dstSigned, MemRef mem)
2215 {
2216     auto rzero = GetRegfile()->GetZeroReg().GetId();
2217     if (!ConvertMem(mem).IsValid() || (dst.GetId() == rzero && dst.IsScalar())) {
2218         // Try move zero reg to dst (for do not create temp-reg)
2219         // Check: dst not vector, dst not index, dst not rzero
2220         [[maybe_unused]] auto baseReg = mem.GetBase();
2221         auto indexReg = mem.GetIndex();
2222 
2223         // Invalid == base is rzero or invalid
2224         ASSERT(baseReg.GetId() == rzero || !baseReg.IsValid());
2225         // checks for use dst-register
2226         if (dst.IsScalar() && dst.IsValid() &&    // not float
2227             (indexReg.GetId() != dst.GetId()) &&  // not index
2228             (dst.GetId() != rzero)) {             // not rzero
2229             // May use dst like rzero
2230             EncodeMov(dst, Imm(0));
2231 
2232             auto fixMem = MemRef(dst, indexReg, mem.GetScale(), mem.GetDisp());
2233             ASSERT(ConvertMem(fixMem).IsValid());
2234             EncodeLdr(dst, dstSigned, fixMem);
2235         } else {
2236             // Use tmp-reg
2237             ScopedTmpReg tmpReg(this);
2238             EncodeMov(tmpReg, Imm(0));
2239 
2240             auto fixMem = MemRef(tmpReg, indexReg, mem.GetScale(), mem.GetDisp());
2241             ASSERT(ConvertMem(fixMem).IsValid());
2242             // Used for zero-dst
2243             EncodeLdr(tmpReg, dstSigned, fixMem);
2244         }
2245         return;
2246     }
2247     ASSERT(ConvertMem(mem).IsValid());
2248     if (dst.IsFloat()) {
2249         GetMasm()->Ldr(VixlVReg(dst), ConvertMem(mem));
2250         return;
2251     }
2252     if (dstSigned) {
2253         if (dst.GetSize() == BYTE_SIZE) {
2254             GetMasm()->Ldrsb(VixlReg(dst, DOUBLE_WORD_SIZE), ConvertMem(mem));
2255             return;
2256         }
2257         if (dst.GetSize() == HALF_SIZE) {
2258             GetMasm()->Ldrsh(VixlReg(dst), ConvertMem(mem));
2259             return;
2260         }
2261     } else {
2262         if (dst.GetSize() == BYTE_SIZE) {
2263             GetMasm()->Ldrb(VixlReg(dst, WORD_SIZE), ConvertMem(mem));
2264             return;
2265         }
2266         if (dst.GetSize() == HALF_SIZE) {
2267             GetMasm()->Ldrh(VixlReg(dst), ConvertMem(mem));
2268             return;
2269         }
2270     }
2271     GetMasm()->Ldr(VixlReg(dst), ConvertMem(mem));
2272 }
2273 
EncodeLdrAcquireInvalid(Reg dst,bool dstSigned,MemRef mem)2274 void Aarch64Encoder::EncodeLdrAcquireInvalid(Reg dst, bool dstSigned, MemRef mem)
2275 {
2276     // Try move zero reg to dst (for do not create temp-reg)
2277     // Check: dst not vector, dst not index, dst not rzero
2278     [[maybe_unused]] auto baseReg = mem.GetBase();
2279     auto rzero = GetRegfile()->GetZeroReg().GetId();
2280 
2281     auto indexReg = mem.GetIndex();
2282 
2283     // Invalid == base is rzero or invalid
2284     ASSERT(baseReg.GetId() == rzero || !baseReg.IsValid());
2285     // checks for use dst-register
2286     if (dst.IsScalar() && dst.IsValid() &&    // not float
2287         (indexReg.GetId() != dst.GetId()) &&  // not index
2288         (dst.GetId() != rzero)) {             // not rzero
2289         // May use dst like rzero
2290         EncodeMov(dst, Imm(0));
2291 
2292         auto fixMem = MemRef(dst, indexReg, mem.GetScale(), mem.GetDisp());
2293         ASSERT(ConvertMem(fixMem).IsValid());
2294         EncodeLdrAcquire(dst, dstSigned, fixMem);
2295     } else {
2296         // Use tmp-reg
2297         ScopedTmpReg tmpReg(this);
2298         EncodeMov(tmpReg, Imm(0));
2299 
2300         auto fixMem = MemRef(tmpReg, indexReg, mem.GetScale(), mem.GetDisp());
2301         ASSERT(ConvertMem(fixMem).IsValid());
2302         // Used for zero-dst
2303         EncodeLdrAcquire(tmpReg, dstSigned, fixMem);
2304     }
2305 }
2306 
EncodeLdrAcquireScalar(Reg dst,bool dstSigned,MemRef mem)2307 void Aarch64Encoder::EncodeLdrAcquireScalar(Reg dst, bool dstSigned, MemRef mem)
2308 {
2309 #ifndef NDEBUG
2310     CheckAlignment(mem, dst.GetSize());
2311 #endif  // NDEBUG
2312     if (dstSigned) {
2313         if (dst.GetSize() == BYTE_SIZE) {
2314             GetMasm()->Ldarb(VixlReg(dst), ConvertMem(mem));
2315             GetMasm()->Sxtb(VixlReg(dst), VixlReg(dst));
2316             return;
2317         }
2318         if (dst.GetSize() == HALF_SIZE) {
2319             GetMasm()->Ldarh(VixlReg(dst), ConvertMem(mem));
2320             GetMasm()->Sxth(VixlReg(dst), VixlReg(dst));
2321             return;
2322         }
2323         if (dst.GetSize() == WORD_SIZE) {
2324             GetMasm()->Ldar(VixlReg(dst), ConvertMem(mem));
2325             GetMasm()->Sxtw(VixlReg(dst), VixlReg(dst));
2326             return;
2327         }
2328     } else {
2329         if (dst.GetSize() == BYTE_SIZE) {
2330             GetMasm()->Ldarb(VixlReg(dst, WORD_SIZE), ConvertMem(mem));
2331             return;
2332         }
2333         if (dst.GetSize() == HALF_SIZE) {
2334             GetMasm()->Ldarh(VixlReg(dst), ConvertMem(mem));
2335             return;
2336         }
2337     }
2338     GetMasm()->Ldar(VixlReg(dst), ConvertMem(mem));
2339 }
2340 
CheckAlignment(MemRef mem,size_t size)2341 void Aarch64Encoder::CheckAlignment(MemRef mem, size_t size)
2342 {
2343     ASSERT(size == WORD_SIZE || size == BYTE_SIZE || size == HALF_SIZE || size == DOUBLE_WORD_SIZE);
2344     if (size == BYTE_SIZE) {
2345         return;
2346     }
2347     size_t alignmentMask = (size >> 3U) - 1;
2348     ASSERT(!mem.HasIndex() && !mem.HasScale());
2349     if (mem.HasDisp()) {
2350         // We need additional tmp register for check base + offset.
2351         // The case when separately the base and the offset are not aligned, but in sum there are aligned very rarely.
2352         // Therefore, the alignment check for base and offset takes place separately
2353         [[maybe_unused]] size_t offset = mem.GetDisp();
2354         ASSERT((offset & alignmentMask) == 0);
2355     }
2356     auto baseReg = mem.GetBase();
2357     auto end = CreateLabel();
2358     EncodeJumpTest(end, baseReg, Imm(alignmentMask), Condition::TST_EQ);
2359     EncodeAbort();
2360     BindLabel(end);
2361 }
2362 
EncodeLdrAcquire(Reg dst,bool dstSigned,MemRef mem)2363 void Aarch64Encoder::EncodeLdrAcquire(Reg dst, bool dstSigned, MemRef mem)
2364 {
2365     if (mem.HasIndex()) {
2366         ScopedTmpRegU64 tmpReg(this);
2367         if (mem.HasScale()) {
2368             EncodeAdd(tmpReg, mem.GetBase(), Shift(mem.GetIndex(), mem.GetScale()));
2369         } else {
2370             EncodeAdd(tmpReg, mem.GetBase(), mem.GetIndex());
2371         }
2372         mem = MemRef(tmpReg, mem.GetDisp());
2373     }
2374 
2375     auto rzero = GetRegfile()->GetZeroReg().GetId();
2376     if (!ConvertMem(mem).IsValid() || (dst.GetId() == rzero && dst.IsScalar())) {
2377         EncodeLdrAcquireInvalid(dst, dstSigned, mem);
2378         return;
2379     }
2380 
2381     ASSERT(!mem.HasIndex() && !mem.HasScale());
2382     if (dst.IsFloat()) {
2383         ScopedTmpRegU64 tmpReg(this);
2384         auto memLdar = mem;
2385         if (mem.HasDisp()) {
2386             if (vixl::aarch64::Assembler::IsImmAddSub(mem.GetDisp())) {
2387                 EncodeAdd(tmpReg, mem.GetBase(), Imm(mem.GetDisp()));
2388             } else {
2389                 EncodeMov(tmpReg, Imm(mem.GetDisp()));
2390                 EncodeAdd(tmpReg, mem.GetBase(), tmpReg);
2391             }
2392             memLdar = MemRef(tmpReg);
2393         }
2394 #ifndef NDEBUG
2395         CheckAlignment(memLdar, dst.GetSize());
2396 #endif  // NDEBUG
2397         auto tmp = VixlReg(tmpReg, dst.GetSize());
2398         GetMasm()->Ldar(tmp, ConvertMem(memLdar));
2399         GetMasm()->Fmov(VixlVReg(dst), tmp);
2400         return;
2401     }
2402 
2403     if (!mem.HasDisp()) {
2404         EncodeLdrAcquireScalar(dst, dstSigned, mem);
2405         return;
2406     }
2407 
2408     Reg dst64(dst.GetId(), INT64_TYPE);
2409     if (vixl::aarch64::Assembler::IsImmAddSub(mem.GetDisp())) {
2410         EncodeAdd(dst64, mem.GetBase(), Imm(mem.GetDisp()));
2411     } else {
2412         EncodeMov(dst64, Imm(mem.GetDisp()));
2413         EncodeAdd(dst64, mem.GetBase(), dst64);
2414     }
2415     EncodeLdrAcquireScalar(dst, dstSigned, MemRef(dst64));
2416 }
2417 
EncodeStr(Reg src,MemRef mem)2418 void Aarch64Encoder::EncodeStr(Reg src, MemRef mem)
2419 {
2420     if (!ConvertMem(mem).IsValid()) {
2421         auto indexReg = mem.GetIndex();
2422         auto rzero = GetRegfile()->GetZeroReg().GetId();
2423         // Invalid == base is rzero or invalid
2424         ASSERT(mem.GetBase().GetId() == rzero || !mem.GetBase().IsValid());
2425         // Use tmp-reg
2426         ScopedTmpReg tmpReg(this);
2427         EncodeMov(tmpReg, Imm(0));
2428 
2429         auto fixMem = MemRef(tmpReg, indexReg, mem.GetScale(), mem.GetDisp());
2430         ASSERT(ConvertMem(fixMem).IsValid());
2431         if (src.GetId() != rzero) {
2432             EncodeStr(src, fixMem);
2433         } else {
2434             EncodeStr(tmpReg, fixMem);
2435         }
2436         return;
2437     }
2438     ASSERT(ConvertMem(mem).IsValid());
2439     if (src.IsFloat()) {
2440         GetMasm()->Str(VixlVReg(src), ConvertMem(mem));
2441         return;
2442     }
2443     if (src.GetSize() == BYTE_SIZE) {
2444         GetMasm()->Strb(VixlReg(src), ConvertMem(mem));
2445         return;
2446     }
2447     if (src.GetSize() == HALF_SIZE) {
2448         GetMasm()->Strh(VixlReg(src), ConvertMem(mem));
2449         return;
2450     }
2451     GetMasm()->Str(VixlReg(src), ConvertMem(mem));
2452 }
2453 
EncodeStrRelease(Reg src,MemRef mem)2454 void Aarch64Encoder::EncodeStrRelease(Reg src, MemRef mem)
2455 {
2456     ScopedTmpRegLazy base(this);
2457     MemRef fixedMem;
2458     bool memWasFixed = false;
2459     if (mem.HasDisp()) {
2460         if (vixl::aarch64::Assembler::IsImmAddSub(mem.GetDisp())) {
2461             base.AcquireIfInvalid();
2462             EncodeAdd(base, mem.GetBase(), Imm(mem.GetDisp()));
2463         } else {
2464             base.AcquireIfInvalid();
2465             EncodeMov(base, Imm(mem.GetDisp()));
2466             EncodeAdd(base, mem.GetBase(), base);
2467         }
2468         memWasFixed = true;
2469     }
2470     if (mem.HasIndex()) {
2471         base.AcquireIfInvalid();
2472         if (mem.HasScale()) {
2473             EncodeAdd(base, memWasFixed ? base : mem.GetBase(), Shift(mem.GetIndex(), mem.GetScale()));
2474         } else {
2475             EncodeAdd(base, memWasFixed ? base : mem.GetBase(), mem.GetIndex());
2476         }
2477         memWasFixed = true;
2478     }
2479 
2480     if (memWasFixed) {
2481         fixedMem = MemRef(base);
2482     } else {
2483         fixedMem = mem;
2484     }
2485 
2486 #ifndef NDEBUG
2487     CheckAlignment(fixedMem, src.GetSize());
2488 #endif  // NDEBUG
2489     if (src.IsFloat()) {
2490         ScopedTmpRegU64 tmpReg(this);
2491         auto tmp = VixlReg(tmpReg, src.GetSize());
2492         GetMasm()->Fmov(tmp, VixlVReg(src));
2493         GetMasm()->Stlr(tmp, ConvertMem(fixedMem));
2494         return;
2495     }
2496     if (src.GetSize() == BYTE_SIZE) {
2497         GetMasm()->Stlrb(VixlReg(src), ConvertMem(fixedMem));
2498         return;
2499     }
2500     if (src.GetSize() == HALF_SIZE) {
2501         GetMasm()->Stlrh(VixlReg(src), ConvertMem(fixedMem));
2502         return;
2503     }
2504     GetMasm()->Stlr(VixlReg(src), ConvertMem(fixedMem));
2505 }
2506 
EncodeLdrExclusive(Reg dst,Reg addr,bool acquire)2507 void Aarch64Encoder::EncodeLdrExclusive(Reg dst, Reg addr, bool acquire)
2508 {
2509     ASSERT(dst.IsScalar());
2510     auto dstReg = VixlReg(dst);
2511     auto memCvt = ConvertMem(MemRef(addr));
2512 #ifndef NDEBUG
2513     CheckAlignment(MemRef(addr), dst.GetSize());
2514 #endif  // NDEBUG
2515     if (dst.GetSize() == BYTE_SIZE) {
2516         if (acquire) {
2517             GetMasm()->Ldaxrb(dstReg, memCvt);
2518             return;
2519         }
2520         GetMasm()->Ldxrb(dstReg, memCvt);
2521         return;
2522     }
2523     if (dst.GetSize() == HALF_SIZE) {
2524         if (acquire) {
2525             GetMasm()->Ldaxrh(dstReg, memCvt);
2526             return;
2527         }
2528         GetMasm()->Ldxrh(dstReg, memCvt);
2529         return;
2530     }
2531     if (acquire) {
2532         GetMasm()->Ldaxr(dstReg, memCvt);
2533         return;
2534     }
2535     GetMasm()->Ldxr(dstReg, memCvt);
2536 }
2537 
EncodeStrExclusive(Reg dst,Reg src,Reg addr,bool release)2538 void Aarch64Encoder::EncodeStrExclusive(Reg dst, Reg src, Reg addr, bool release)
2539 {
2540     ASSERT(dst.IsScalar() && src.IsScalar());
2541 
2542     bool copyDst = dst.GetId() == src.GetId() || dst.GetId() == addr.GetId();
2543     ScopedTmpReg tmp(this);
2544     auto srcReg = VixlReg(src);
2545     auto memCvt = ConvertMem(MemRef(addr));
2546     auto dstReg = copyDst ? VixlReg(tmp) : VixlReg(dst);
2547 #ifndef NDEBUG
2548     CheckAlignment(MemRef(addr), src.GetSize());
2549 #endif  // NDEBUG
2550 
2551     if (src.GetSize() == BYTE_SIZE) {
2552         if (release) {
2553             GetMasm()->Stlxrb(dstReg, srcReg, memCvt);
2554         } else {
2555             GetMasm()->Stxrb(dstReg, srcReg, memCvt);
2556         }
2557     } else if (src.GetSize() == HALF_SIZE) {
2558         if (release) {
2559             GetMasm()->Stlxrh(dstReg, srcReg, memCvt);
2560         } else {
2561             GetMasm()->Stxrh(dstReg, srcReg, memCvt);
2562         }
2563     } else {
2564         if (release) {
2565             GetMasm()->Stlxr(dstReg, srcReg, memCvt);
2566         } else {
2567             GetMasm()->Stxr(dstReg, srcReg, memCvt);
2568         }
2569     }
2570     if (copyDst) {
2571         EncodeMov(dst, tmp);
2572     }
2573 }
2574 
EncodeStrz(Reg src,MemRef mem)2575 void Aarch64Encoder::EncodeStrz(Reg src, MemRef mem)
2576 {
2577     if (!ConvertMem(mem).IsValid()) {
2578         EncodeStr(src, mem);
2579         return;
2580     }
2581     ASSERT(ConvertMem(mem).IsValid());
2582     // Upper half of registers must be zeroed by-default
2583     if (src.IsFloat()) {
2584         EncodeStr(src.As(FLOAT64_TYPE), mem);
2585         return;
2586     }
2587     if (src.GetSize() < WORD_SIZE) {
2588         EncodeCast(src, false, src.As(INT64_TYPE), false);
2589     }
2590     GetMasm()->Str(VixlReg(src.As(INT64_TYPE)), ConvertMem(mem));
2591 }
2592 
EncodeSti(int64_t src,uint8_t srcSizeBytes,MemRef mem)2593 void Aarch64Encoder::EncodeSti(int64_t src, uint8_t srcSizeBytes, MemRef mem)
2594 {
2595     if (mem.IsValid() && mem.IsOffsetMem() && src == 0 && srcSizeBytes == 1) {
2596         auto rzero = GetRegfile()->GetZeroReg();
2597         GetMasm()->Strb(VixlReg(rzero), ConvertMem(mem));
2598         return;
2599     }
2600     if (!ConvertMem(mem).IsValid()) {
2601         auto rzero = GetRegfile()->GetZeroReg();
2602         EncodeStr(rzero, mem);
2603         return;
2604     }
2605 
2606     ScopedTmpRegU64 tmpReg(this);
2607     auto tmp = VixlReg(tmpReg);
2608     GetMasm()->Mov(tmp, VixlImm(src));
2609     if (srcSizeBytes == 1U) {
2610         GetMasm()->Strb(tmp, ConvertMem(mem));
2611         return;
2612     }
2613     if (srcSizeBytes == HALF_WORD_SIZE_BYTES) {
2614         GetMasm()->Strh(tmp, ConvertMem(mem));
2615         return;
2616     }
2617     ASSERT((srcSizeBytes == WORD_SIZE_BYTES) || (srcSizeBytes == DOUBLE_WORD_SIZE_BYTES));
2618     GetMasm()->Str(tmp, ConvertMem(mem));
2619 }
2620 
EncodeSti(float src,MemRef mem)2621 void Aarch64Encoder::EncodeSti(float src, MemRef mem)
2622 {
2623     if (!ConvertMem(mem).IsValid()) {
2624         auto rzero = GetRegfile()->GetZeroReg();
2625         EncodeStr(rzero, mem);
2626         return;
2627     }
2628     ScopedTmpRegF32 tmpReg(this);
2629     GetMasm()->Fmov(VixlVReg(tmpReg).S(), src);
2630     EncodeStr(tmpReg, mem);
2631 }
2632 
EncodeSti(double src,MemRef mem)2633 void Aarch64Encoder::EncodeSti(double src, MemRef mem)
2634 {
2635     if (!ConvertMem(mem).IsValid()) {
2636         auto rzero = GetRegfile()->GetZeroReg();
2637         EncodeStr(rzero, mem);
2638         return;
2639     }
2640     ScopedTmpRegF64 tmpReg(this);
2641     GetMasm()->Fmov(VixlVReg(tmpReg).D(), src);
2642     EncodeStr(tmpReg, mem);
2643 }
2644 
EncodeMemCopy(MemRef memFrom,MemRef memTo,size_t size)2645 void Aarch64Encoder::EncodeMemCopy(MemRef memFrom, MemRef memTo, size_t size)
2646 {
2647     if (!ConvertMem(memFrom).IsValid() || !ConvertMem(memTo).IsValid()) {
2648         auto rzero = GetRegfile()->GetZeroReg();
2649         if (!ConvertMem(memFrom).IsValid()) {
2650             // Encode one load - will fix inside
2651             EncodeLdr(rzero, false, memFrom);
2652         } else {
2653             ASSERT(!ConvertMem(memTo).IsValid());
2654             // Encode one store - will fix inside
2655             EncodeStr(rzero, memTo);
2656         }
2657         return;
2658     }
2659     ASSERT(ConvertMem(memFrom).IsValid());
2660     ASSERT(ConvertMem(memTo).IsValid());
2661     ScopedTmpRegU64 tmpReg(this);
2662     auto tmp = VixlReg(tmpReg, std::min(size, static_cast<size_t>(DOUBLE_WORD_SIZE)));
2663     if (size == BYTE_SIZE) {
2664         GetMasm()->Ldrb(tmp, ConvertMem(memFrom));
2665         GetMasm()->Strb(tmp, ConvertMem(memTo));
2666     } else if (size == HALF_SIZE) {
2667         GetMasm()->Ldrh(tmp, ConvertMem(memFrom));
2668         GetMasm()->Strh(tmp, ConvertMem(memTo));
2669     } else {
2670         ASSERT(size == WORD_SIZE || size == DOUBLE_WORD_SIZE);
2671         GetMasm()->Ldr(tmp, ConvertMem(memFrom));
2672         GetMasm()->Str(tmp, ConvertMem(memTo));
2673     }
2674 }
2675 
EncodeMemCopyz(MemRef memFrom,MemRef memTo,size_t size)2676 void Aarch64Encoder::EncodeMemCopyz(MemRef memFrom, MemRef memTo, size_t size)
2677 {
2678     if (!ConvertMem(memFrom).IsValid() || !ConvertMem(memTo).IsValid()) {
2679         auto rzero = GetRegfile()->GetZeroReg();
2680         if (!ConvertMem(memFrom).IsValid()) {
2681             // Encode one load - will fix inside
2682             EncodeLdr(rzero, false, memFrom);
2683         } else {
2684             ASSERT(!ConvertMem(memTo).IsValid());
2685             // Encode one store - will fix inside
2686             EncodeStr(rzero, memTo);
2687         }
2688         return;
2689     }
2690     ASSERT(ConvertMem(memFrom).IsValid());
2691     ASSERT(ConvertMem(memTo).IsValid());
2692     ScopedTmpRegU64 tmpReg(this);
2693     auto tmp = VixlReg(tmpReg, std::min(size, static_cast<size_t>(DOUBLE_WORD_SIZE)));
2694     auto zero = VixlReg(GetRegfile()->GetZeroReg(), WORD_SIZE);
2695     if (size == BYTE_SIZE) {
2696         GetMasm()->Ldrb(tmp, ConvertMem(memFrom));
2697         GetMasm()->Stp(tmp, zero, ConvertMem(memTo));
2698     } else if (size == HALF_SIZE) {
2699         GetMasm()->Ldrh(tmp, ConvertMem(memFrom));
2700         GetMasm()->Stp(tmp, zero, ConvertMem(memTo));
2701     } else {
2702         ASSERT(size == WORD_SIZE || size == DOUBLE_WORD_SIZE);
2703         GetMasm()->Ldr(tmp, ConvertMem(memFrom));
2704         if (size == WORD_SIZE) {
2705             GetMasm()->Stp(tmp, zero, ConvertMem(memTo));
2706         } else {
2707             GetMasm()->Str(tmp, ConvertMem(memTo));
2708         }
2709     }
2710 }
2711 
EncodeCompare(Reg dst,Reg src0,Reg src1,Condition cc)2712 void Aarch64Encoder::EncodeCompare(Reg dst, Reg src0, Reg src1, Condition cc)
2713 {
2714     ASSERT(src0.IsFloat() == src1.IsFloat());
2715     if (src0.IsFloat()) {
2716         GetMasm()->Fcmp(VixlVReg(src0), VixlVReg(src1));
2717     } else {
2718         GetMasm()->Cmp(VixlReg(src0), VixlReg(src1));
2719     }
2720     GetMasm()->Cset(VixlReg(dst), Convert(cc));
2721 }
2722 
EncodeCompareTest(Reg dst,Reg src0,Reg src1,Condition cc)2723 void Aarch64Encoder::EncodeCompareTest(Reg dst, Reg src0, Reg src1, Condition cc)
2724 {
2725     ASSERT(src0.IsScalar() && src1.IsScalar());
2726 
2727     GetMasm()->Tst(VixlReg(src0), VixlReg(src1));
2728     GetMasm()->Cset(VixlReg(dst), ConvertTest(cc));
2729 }
2730 
EncodeAtomicByteOr(Reg addr,Reg value,bool fastEncoding)2731 void Aarch64Encoder::EncodeAtomicByteOr(Reg addr, Reg value, bool fastEncoding)
2732 {
2733     if (fastEncoding) {
2734 #ifndef NDEBUG
2735         vixl::CPUFeaturesScope scope(GetMasm(), vixl::CPUFeatures::kAtomics);
2736 #endif
2737         GetMasm()->Stsetb(VixlReg(value, BYTE_SIZE), MemOperand(VixlReg(addr)));
2738         return;
2739     }
2740 
2741     // Slow encoding, should not be used in production code!!!
2742     auto linkReg = GetTarget().GetLinkReg();
2743     auto frameReg = GetTarget().GetFrameReg();
2744     static constexpr size_t PAIR_OFFSET = 2 * DOUBLE_WORD_SIZE_BYTES;
2745 
2746     ScopedTmpRegLazy tmp1(this);
2747     ScopedTmpRegLazy tmp2(this);
2748     Reg orValue;
2749     Reg storeResult;
2750     bool hasTemps = GetScratchRegistersWithLrCount() >= 2U;
2751     if (hasTemps) {
2752         tmp1.AcquireWithLr();
2753         tmp2.AcquireWithLr();
2754         orValue = tmp1.GetReg().As(INT32_TYPE);
2755         storeResult = tmp2.GetReg().As(INT32_TYPE);
2756     } else {
2757         GetMasm()->stp(VixlReg(frameReg), VixlReg(linkReg),
2758                        MemOperand(vixl::aarch64::sp, -PAIR_OFFSET, vixl::aarch64::AddrMode::PreIndex));
2759         orValue = frameReg.As(INT32_TYPE);
2760         storeResult = linkReg.As(INT32_TYPE);
2761     }
2762 
2763     auto *loop = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
2764     GetMasm()->Bind(loop);
2765     GetMasm()->Ldxrb(VixlReg(orValue), MemOperand(VixlReg(addr)));
2766     GetMasm()->Orr(VixlReg(orValue), VixlReg(orValue), VixlReg(value, WORD_SIZE));
2767     GetMasm()->Stxrb(VixlReg(storeResult), VixlReg(orValue), MemOperand(VixlReg(addr)));
2768     GetMasm()->Cbnz(VixlReg(storeResult), loop);
2769     if (!hasTemps) {
2770         GetMasm()->ldp(VixlReg(frameReg), VixlReg(linkReg),
2771                        MemOperand(vixl::aarch64::sp, PAIR_OFFSET, vixl::aarch64::AddrMode::PostIndex));
2772     }
2773 }
2774 
EncodeCmp(Reg dst,Reg src0,Reg src1,Condition cc)2775 void Aarch64Encoder::EncodeCmp(Reg dst, Reg src0, Reg src1, Condition cc)
2776 {
2777     if (src0.IsFloat()) {
2778         ASSERT(src1.IsFloat());
2779         ASSERT(cc == Condition::MI || cc == Condition::LT);
2780         GetMasm()->Fcmp(VixlVReg(src0), VixlVReg(src1));
2781     } else {
2782         ASSERT(src0.IsScalar() && src1.IsScalar());
2783         ASSERT(cc == Condition::LO || cc == Condition::LT);
2784         GetMasm()->Cmp(VixlReg(src0), VixlReg(src1));
2785     }
2786     GetMasm()->Cset(VixlReg(dst), vixl::aarch64::Condition::ne);
2787     GetMasm()->Cneg(VixlReg(Promote(dst)), VixlReg(Promote(dst)), Convert(cc));
2788 }
2789 
EncodeSelect(ArgsSelect && args)2790 void Aarch64Encoder::EncodeSelect(ArgsSelect &&args)
2791 {
2792     auto [dst, src0, src1, src2, src3, cc] = args;
2793     if (src2.IsScalar()) {
2794         GetMasm()->Cmp(VixlReg(src2), VixlReg(src3));
2795     } else {
2796         GetMasm()->Fcmp(VixlVReg(src2), VixlVReg(src3));
2797     }
2798     if (dst.IsFloat()) {
2799         GetMasm()->Fcsel(VixlVReg(dst), VixlVReg(src0), VixlVReg(src1), Convert(cc));
2800     } else {
2801         GetMasm()->Csel(VixlReg(dst), VixlReg(src0), VixlReg(src1), Convert(cc));
2802     }
2803 }
2804 
EncodeSelect(ArgsSelectImm && args)2805 void Aarch64Encoder::EncodeSelect(ArgsSelectImm &&args)
2806 {
2807     auto [dst, src0, src1, src2, imm, cc] = args;
2808     if (src2.IsScalar()) {
2809         GetMasm()->Cmp(VixlReg(src2), VixlImm(imm));
2810     } else {
2811         GetMasm()->Fcmp(VixlVReg(src2), imm.GetAsDouble());
2812     }
2813     if (dst.IsFloat()) {
2814         GetMasm()->Fcsel(VixlVReg(dst), VixlVReg(src0), VixlVReg(src1), Convert(cc));
2815     } else {
2816         GetMasm()->Csel(VixlReg(dst), VixlReg(src0), VixlReg(src1), Convert(cc));
2817     }
2818 }
2819 
EncodeSelectTest(ArgsSelect && args)2820 void Aarch64Encoder::EncodeSelectTest(ArgsSelect &&args)
2821 {
2822     auto [dst, src0, src1, src2, src3, cc] = args;
2823     ASSERT(!src2.IsFloat() && !src3.IsFloat());
2824     GetMasm()->Tst(VixlReg(src2), VixlReg(src3));
2825     if (dst.IsFloat()) {
2826         GetMasm()->Fcsel(VixlVReg(dst), VixlVReg(src0), VixlVReg(src1), ConvertTest(cc));
2827     } else {
2828         GetMasm()->Csel(VixlReg(dst), VixlReg(src0), VixlReg(src1), ConvertTest(cc));
2829     }
2830 }
2831 
EncodeSelectTest(ArgsSelectImm && args)2832 void Aarch64Encoder::EncodeSelectTest(ArgsSelectImm &&args)
2833 {
2834     auto [dst, src0, src1, src2, imm, cc] = args;
2835     ASSERT(!src2.IsFloat());
2836     ASSERT(CanEncodeImmLogical(imm.GetAsInt(), src2.GetSize() > WORD_SIZE ? DOUBLE_WORD_SIZE : WORD_SIZE));
2837     GetMasm()->Tst(VixlReg(src2), VixlImm(imm));
2838     if (dst.IsFloat()) {
2839         GetMasm()->Fcsel(VixlVReg(dst), VixlVReg(src0), VixlVReg(src1), ConvertTest(cc));
2840     } else {
2841         GetMasm()->Csel(VixlReg(dst), VixlReg(src0), VixlReg(src1), ConvertTest(cc));
2842     }
2843 }
2844 
EncodeLdp(Reg dst0,Reg dst1,bool dstSigned,MemRef mem)2845 void Aarch64Encoder::EncodeLdp(Reg dst0, Reg dst1, bool dstSigned, MemRef mem)
2846 {
2847     ASSERT(dst0.IsFloat() == dst1.IsFloat());
2848     ASSERT(dst0.GetSize() == dst1.GetSize());
2849     if (!ConvertMem(mem).IsValid()) {
2850         // Encode one Ldr - will fix inside
2851         EncodeLdr(dst0, dstSigned, mem);
2852         return;
2853     }
2854 
2855     if (dst0.IsFloat()) {
2856         GetMasm()->Ldp(VixlVReg(dst0), VixlVReg(dst1), ConvertMem(mem));
2857         return;
2858     }
2859     if (dstSigned && dst0.GetSize() == WORD_SIZE) {
2860         GetMasm()->Ldpsw(VixlReg(dst0, DOUBLE_WORD_SIZE), VixlReg(dst1, DOUBLE_WORD_SIZE), ConvertMem(mem));
2861         return;
2862     }
2863     GetMasm()->Ldp(VixlReg(dst0), VixlReg(dst1), ConvertMem(mem));
2864 }
2865 
EncodeStp(Reg src0,Reg src1,MemRef mem)2866 void Aarch64Encoder::EncodeStp(Reg src0, Reg src1, MemRef mem)
2867 {
2868     ASSERT(src0.IsFloat() == src1.IsFloat());
2869     ASSERT(src0.GetSize() == src1.GetSize());
2870     if (!ConvertMem(mem).IsValid()) {
2871         // Encode one Str - will fix inside
2872         EncodeStr(src0, mem);
2873         return;
2874     }
2875 
2876     if (src0.IsFloat()) {
2877         GetMasm()->Stp(VixlVReg(src0), VixlVReg(src1), ConvertMem(mem));
2878         return;
2879     }
2880     GetMasm()->Stp(VixlReg(src0), VixlReg(src1), ConvertMem(mem));
2881 }
2882 
EncodeMAdd(Reg dst,Reg src0,Reg src1,Reg src2)2883 void Aarch64Encoder::EncodeMAdd(Reg dst, Reg src0, Reg src1, Reg src2)
2884 {
2885     ASSERT(dst.GetSize() == src1.GetSize() && dst.GetSize() == src0.GetSize() && dst.GetSize() == src2.GetSize());
2886     ASSERT(dst.IsScalar() == src0.IsScalar() && dst.IsScalar() == src1.IsScalar() && dst.IsScalar() == src2.IsScalar());
2887 
2888     ASSERT(!GetRegfile()->IsZeroReg(dst));
2889 
2890     if (GetRegfile()->IsZeroReg(src0) || GetRegfile()->IsZeroReg(src1)) {
2891         EncodeMov(dst, src2);
2892         return;
2893     }
2894 
2895     if (GetRegfile()->IsZeroReg(src2)) {
2896         EncodeMul(dst, src0, src1);
2897         return;
2898     }
2899 
2900     if (dst.IsScalar()) {
2901         GetMasm()->Madd(VixlReg(dst), VixlReg(src0), VixlReg(src1), VixlReg(src2));
2902     } else {
2903         GetMasm()->Fmadd(VixlVReg(dst), VixlVReg(src0), VixlVReg(src1), VixlVReg(src2));
2904     }
2905 }
2906 
EncodeMSub(Reg dst,Reg src0,Reg src1,Reg src2)2907 void Aarch64Encoder::EncodeMSub(Reg dst, Reg src0, Reg src1, Reg src2)
2908 {
2909     ASSERT(dst.GetSize() == src1.GetSize() && dst.GetSize() == src0.GetSize() && dst.GetSize() == src2.GetSize());
2910     ASSERT(dst.IsScalar() == src0.IsScalar() && dst.IsScalar() == src1.IsScalar() && dst.IsScalar() == src2.IsScalar());
2911 
2912     ASSERT(!GetRegfile()->IsZeroReg(dst));
2913 
2914     if (GetRegfile()->IsZeroReg(src0) || GetRegfile()->IsZeroReg(src1)) {
2915         EncodeMov(dst, src2);
2916         return;
2917     }
2918 
2919     if (GetRegfile()->IsZeroReg(src2)) {
2920         EncodeMNeg(dst, src0, src1);
2921         return;
2922     }
2923 
2924     if (dst.IsScalar()) {
2925         GetMasm()->Msub(VixlReg(dst), VixlReg(src0), VixlReg(src1), VixlReg(src2));
2926     } else {
2927         GetMasm()->Fmsub(VixlVReg(dst), VixlVReg(src0), VixlVReg(src1), VixlVReg(src2));
2928     }
2929 }
2930 
EncodeMNeg(Reg dst,Reg src0,Reg src1)2931 void Aarch64Encoder::EncodeMNeg(Reg dst, Reg src0, Reg src1)
2932 {
2933     ASSERT(dst.GetSize() == src1.GetSize() && dst.GetSize() == src0.GetSize());
2934     ASSERT(dst.IsScalar() == src0.IsScalar() && dst.IsScalar() == src1.IsScalar());
2935 
2936     ASSERT(!GetRegfile()->IsZeroReg(dst));
2937 
2938     if (GetRegfile()->IsZeroReg(src0) || GetRegfile()->IsZeroReg(src1)) {
2939         EncodeMov(dst, Imm(0U));
2940         return;
2941     }
2942 
2943     if (dst.IsScalar()) {
2944         GetMasm()->Mneg(VixlReg(dst), VixlReg(src0), VixlReg(src1));
2945     } else {
2946         GetMasm()->Fnmul(VixlVReg(dst), VixlVReg(src0), VixlVReg(src1));
2947     }
2948 }
2949 
EncodeOrNot(Reg dst,Reg src0,Reg src1)2950 void Aarch64Encoder::EncodeOrNot(Reg dst, Reg src0, Reg src1)
2951 {
2952     ASSERT(dst.GetSize() == src1.GetSize() && dst.GetSize() == src0.GetSize());
2953     ASSERT(dst.IsScalar() && src0.IsScalar() && src1.IsScalar());
2954     GetMasm()->Orn(VixlReg(dst), VixlReg(src0), VixlReg(src1));
2955 }
2956 
EncodeOrNot(Reg dst,Reg src0,Shift src1)2957 void Aarch64Encoder::EncodeOrNot(Reg dst, Reg src0, Shift src1)
2958 {
2959     ASSERT(dst.GetSize() == src0.GetSize() && dst.GetSize() == src1.GetBase().GetSize());
2960     ASSERT(dst.IsScalar() && src0.IsScalar() && src1.GetBase().IsScalar());
2961     GetMasm()->Orn(VixlReg(dst), VixlReg(src0), VixlShift(src1));
2962 }
2963 
EncodeExtractBits(Reg dst,Reg src0,Imm imm1,Imm imm2)2964 void Aarch64Encoder::EncodeExtractBits(Reg dst, Reg src0, Imm imm1, Imm imm2)
2965 {
2966     GetMasm()->Ubfx(VixlReg(dst), VixlReg(src0), imm1.GetAsInt(), imm2.GetAsInt());
2967 }
2968 
EncodeAndNot(Reg dst,Reg src0,Reg src1)2969 void Aarch64Encoder::EncodeAndNot(Reg dst, Reg src0, Reg src1)
2970 {
2971     ASSERT(dst.GetSize() == src1.GetSize() && dst.GetSize() == src0.GetSize());
2972     ASSERT(dst.IsScalar() && src0.IsScalar() && src1.IsScalar());
2973     GetMasm()->Bic(VixlReg(dst), VixlReg(src0), VixlReg(src1));
2974 }
2975 
EncodeAndNot(Reg dst,Reg src0,Shift src1)2976 void Aarch64Encoder::EncodeAndNot(Reg dst, Reg src0, Shift src1)
2977 {
2978     ASSERT(dst.GetSize() == src0.GetSize() && dst.GetSize() == src1.GetBase().GetSize());
2979     ASSERT(dst.IsScalar() && src0.IsScalar() && src1.GetBase().IsScalar());
2980     GetMasm()->Bic(VixlReg(dst), VixlReg(src0), VixlShift(src1));
2981 }
2982 
EncodeXorNot(Reg dst,Reg src0,Reg src1)2983 void Aarch64Encoder::EncodeXorNot(Reg dst, Reg src0, Reg src1)
2984 {
2985     ASSERT(dst.GetSize() == src1.GetSize() && dst.GetSize() == src0.GetSize());
2986     ASSERT(dst.IsScalar() && src0.IsScalar() && src1.IsScalar());
2987     GetMasm()->Eon(VixlReg(dst), VixlReg(src0), VixlReg(src1));
2988 }
2989 
EncodeXorNot(Reg dst,Reg src0,Shift src1)2990 void Aarch64Encoder::EncodeXorNot(Reg dst, Reg src0, Shift src1)
2991 {
2992     ASSERT(dst.GetSize() == src0.GetSize() && dst.GetSize() == src1.GetBase().GetSize());
2993     ASSERT(dst.IsScalar() && src0.IsScalar() && src1.GetBase().IsScalar());
2994     GetMasm()->Eon(VixlReg(dst), VixlReg(src0), VixlShift(src1));
2995 }
2996 
EncodeNeg(Reg dst,Shift src)2997 void Aarch64Encoder::EncodeNeg(Reg dst, Shift src)
2998 {
2999     ASSERT(dst.GetSize() == src.GetBase().GetSize());
3000     ASSERT(dst.IsScalar() && src.GetBase().IsScalar());
3001     GetMasm()->Neg(VixlReg(dst), VixlShift(src));
3002 }
3003 
EncodeStackOverflowCheck(ssize_t offset)3004 void Aarch64Encoder::EncodeStackOverflowCheck(ssize_t offset)
3005 {
3006     ScopedTmpReg tmp(this);
3007     EncodeAdd(tmp, GetTarget().GetStackReg(), Imm(offset));
3008     EncodeLdr(tmp, false, MemRef(tmp));
3009 }
3010 
CanEncodeImmAddSubCmp(int64_t imm,uint32_t size,bool signedCompare)3011 bool Aarch64Encoder::CanEncodeImmAddSubCmp(int64_t imm, [[maybe_unused]] uint32_t size,
3012                                            [[maybe_unused]] bool signedCompare)
3013 {
3014     if (imm == INT64_MIN) {
3015         return false;
3016     }
3017     if (imm < 0) {
3018         imm = -imm;
3019     }
3020     return vixl::aarch64::Assembler::IsImmAddSub(imm);
3021 }
3022 
CanEncodeImmLogical(uint64_t imm,uint32_t size)3023 bool Aarch64Encoder::CanEncodeImmLogical(uint64_t imm, uint32_t size)
3024 {
3025 #ifndef NDEBUG
3026     if (size < DOUBLE_WORD_SIZE) {
3027         // Test if the highest part is consistent:
3028         ASSERT(((imm >> size) == 0) || (((~imm) >> size) == 0));
3029     }
3030 #endif  // NDEBUG
3031     return vixl::aarch64::Assembler::IsImmLogical(imm, size);
3032 }
3033 
CanOptimizeImmDivMod(uint64_t imm,bool isSigned) const3034 bool Aarch64Encoder::CanOptimizeImmDivMod(uint64_t imm, bool isSigned) const
3035 {
3036     return CanOptimizeImmDivModCommon(imm, isSigned);
3037 }
3038 
3039 /*
3040  * From aarch64 instruction set
3041  *
3042  * ========================================================
3043  * Syntax
3044  *
3045  * LDR  Wt, [Xn|SP, Rm{, extend {amount}}]    ; 32-bit general registers
3046  *
3047  * LDR  Xt, [Xn|SP, Rm{, extend {amount}}]    ; 64-bit general registers
3048  *
3049  * amount
3050  * Is the index shift amount, optional and defaulting to #0 when extend is not LSL:
3051  *
3052  * 32-bit general registers
3053  * Can be one of #0 or #2.
3054  *
3055  * 64-bit general registers
3056  * Can be one of #0 or #3.
3057  * ========================================================
3058  * Syntax
3059  *
3060  * LDRH  Wt, [Xn|SP, Rm{, extend {amount}}]
3061  *
3062  * amount
3063  * Is the index shift amount, optional and defaulting to #0 when extend is not LSL, and can be either #0 or #1.
3064  * ========================================================
3065  *
3066  * Scale can be 0 or 1 for half load, 2 for word load, 3 for double word load
3067  */
CanEncodeScale(uint64_t imm,uint32_t size)3068 bool Aarch64Encoder::CanEncodeScale(uint64_t imm, uint32_t size)
3069 {
3070     return (imm == 0) || ((1U << imm) == (size >> 3U));
3071 }
3072 
CanEncodeShiftedOperand(ShiftOpcode opcode,ShiftType shiftType)3073 bool Aarch64Encoder::CanEncodeShiftedOperand(ShiftOpcode opcode, ShiftType shiftType)
3074 {
3075     switch (opcode) {
3076         case ShiftOpcode::NEG_SR:
3077         case ShiftOpcode::ADD_SR:
3078         case ShiftOpcode::SUB_SR:
3079             return shiftType == ShiftType::LSL || shiftType == ShiftType::LSR || shiftType == ShiftType::ASR;
3080         case ShiftOpcode::AND_SR:
3081         case ShiftOpcode::OR_SR:
3082         case ShiftOpcode::XOR_SR:
3083         case ShiftOpcode::AND_NOT_SR:
3084         case ShiftOpcode::OR_NOT_SR:
3085         case ShiftOpcode::XOR_NOT_SR:
3086             return shiftType != ShiftType::INVALID_SHIFT;
3087         default:
3088             return false;
3089     }
3090 }
3091 
CanEncodeFloatSelect()3092 bool Aarch64Encoder::CanEncodeFloatSelect()
3093 {
3094     return true;
3095 }
3096 
AcquireScratchRegister(TypeInfo type)3097 Reg Aarch64Encoder::AcquireScratchRegister(TypeInfo type)
3098 {
3099     ASSERT(GetMasm()->GetCurrentScratchRegisterScope() == nullptr);
3100     auto reg = type.IsFloat() ? GetMasm()->GetScratchVRegisterList()->PopLowestIndex()
3101                               : GetMasm()->GetScratchRegisterList()->PopLowestIndex();
3102     ASSERT(reg.IsValid());
3103     return Reg(reg.GetCode(), type);
3104 }
3105 
AcquireScratchRegister(Reg reg)3106 void Aarch64Encoder::AcquireScratchRegister(Reg reg)
3107 {
3108     ASSERT(GetMasm()->GetCurrentScratchRegisterScope() == nullptr);
3109     if (reg == GetTarget().GetLinkReg()) {
3110         ASSERT_PRINT(!lrAcquired_, "Trying to acquire LR, which hasn't been released before");
3111         lrAcquired_ = true;
3112         return;
3113     }
3114     auto type = reg.GetType();
3115     auto regId = reg.GetId();
3116 
3117     if (type.IsFloat()) {
3118         ASSERT(GetMasm()->GetScratchVRegisterList()->IncludesAliasOf(VixlVReg(reg)));
3119         GetMasm()->GetScratchVRegisterList()->Remove(regId);
3120     } else {
3121         ASSERT(GetMasm()->GetScratchRegisterList()->IncludesAliasOf(VixlReg(reg)));
3122         GetMasm()->GetScratchRegisterList()->Remove(regId);
3123     }
3124 }
3125 
ReleaseScratchRegister(Reg reg)3126 void Aarch64Encoder::ReleaseScratchRegister(Reg reg)
3127 {
3128     if (reg == GetTarget().GetLinkReg()) {
3129         ASSERT_PRINT(lrAcquired_, "Trying to release LR, which hasn't been acquired before");
3130         lrAcquired_ = false;
3131     } else if (reg.IsFloat()) {
3132         GetMasm()->GetScratchVRegisterList()->Combine(reg.GetId());
3133     } else if (reg.GetId() != GetTarget().GetLinkReg().GetId()) {
3134         GetMasm()->GetScratchRegisterList()->Combine(reg.GetId());
3135     }
3136 }
3137 
IsScratchRegisterReleased(Reg reg) const3138 bool Aarch64Encoder::IsScratchRegisterReleased(Reg reg) const
3139 {
3140     if (reg == GetTarget().GetLinkReg()) {
3141         return !lrAcquired_;
3142     }
3143     if (reg.IsFloat()) {
3144         return GetMasm()->GetScratchVRegisterList()->IncludesAliasOf(VixlVReg(reg));
3145     }
3146     return GetMasm()->GetScratchRegisterList()->IncludesAliasOf(VixlReg(reg));
3147 }
3148 
GetScratchRegistersMask() const3149 RegMask Aarch64Encoder::GetScratchRegistersMask() const
3150 {
3151     return RegMask(GetMasm()->GetScratchRegisterList()->GetList());
3152 }
3153 
GetScratchFpRegistersMask() const3154 RegMask Aarch64Encoder::GetScratchFpRegistersMask() const
3155 {
3156     return RegMask(GetMasm()->GetScratchVRegisterList()->GetList());
3157 }
3158 
GetAvailableScratchRegisters() const3159 RegMask Aarch64Encoder::GetAvailableScratchRegisters() const
3160 {
3161     return RegMask(GetMasm()->GetScratchRegisterList()->GetList());
3162 }
3163 
GetAvailableScratchFpRegisters() const3164 VRegMask Aarch64Encoder::GetAvailableScratchFpRegisters() const
3165 {
3166     return VRegMask(GetMasm()->GetScratchVRegisterList()->GetList());
3167 }
3168 
GetRefType()3169 TypeInfo Aarch64Encoder::GetRefType()
3170 {
3171     return INT64_TYPE;
3172 }
3173 
BufferData() const3174 void *Aarch64Encoder::BufferData() const
3175 {
3176     return GetMasm()->GetBuffer()->GetStartAddress<void *>();
3177 }
3178 
BufferSize() const3179 size_t Aarch64Encoder::BufferSize() const
3180 {
3181     return GetMasm()->GetBuffer()->GetSizeInBytes();
3182 }
3183 
MakeLibCall(Reg dst,Reg src0,Reg src1,const void * entryPoint)3184 void Aarch64Encoder::MakeLibCall(Reg dst, Reg src0, Reg src1, const void *entryPoint)
3185 {
3186     if (!dst.IsFloat()) {
3187         SetFalseResult();
3188         return;
3189     }
3190     if (dst.GetType() == FLOAT32_TYPE) {
3191         if (!src0.IsFloat() || !src1.IsFloat()) {
3192             SetFalseResult();
3193             return;
3194         }
3195 
3196         if (src0.GetId() != vixl::aarch64::s0.GetCode() || src1.GetId() != vixl::aarch64::s1.GetCode()) {
3197             ScopedTmpRegF32 tmp(this);
3198             GetMasm()->Fmov(VixlVReg(tmp), VixlVReg(src1));
3199             GetMasm()->Fmov(vixl::aarch64::s0, VixlVReg(src0));
3200             GetMasm()->Fmov(vixl::aarch64::s1, VixlVReg(tmp));
3201         }
3202 
3203         MakeCall(entryPoint);
3204 
3205         if (dst.GetId() != vixl::aarch64::s0.GetCode()) {
3206             GetMasm()->Fmov(VixlVReg(dst), vixl::aarch64::s0);
3207         }
3208     } else if (dst.GetType() == FLOAT64_TYPE) {
3209         if (!src0.IsFloat() || !src1.IsFloat()) {
3210             SetFalseResult();
3211             return;
3212         }
3213 
3214         if (src0.GetId() != vixl::aarch64::d0.GetCode() || src1.GetId() != vixl::aarch64::d1.GetCode()) {
3215             ScopedTmpRegF64 tmp(this);
3216             GetMasm()->Fmov(VixlVReg(tmp), VixlVReg(src1));
3217 
3218             GetMasm()->Fmov(vixl::aarch64::d0, VixlVReg(src0));
3219             GetMasm()->Fmov(vixl::aarch64::d1, VixlVReg(tmp));
3220         }
3221 
3222         MakeCall(entryPoint);
3223 
3224         if (dst.GetId() != vixl::aarch64::d0.GetCode()) {
3225             GetMasm()->Fmov(VixlVReg(dst), vixl::aarch64::d0);
3226         }
3227     } else {
3228         UNREACHABLE();
3229     }
3230 }
3231 
3232 template <bool IS_STORE>
LoadStoreRegisters(RegMask registers,ssize_t slot,size_t startReg,bool isFp)3233 void Aarch64Encoder::LoadStoreRegisters(RegMask registers, ssize_t slot, size_t startReg, bool isFp)
3234 {
3235     if (registers.none()) {
3236         return;
3237     }
3238     int32_t lastReg = registers.size() - 1;
3239     for (; lastReg >= 0; --lastReg) {
3240         if (registers.test(lastReg)) {
3241             break;
3242         }
3243     }
3244     // Construct single add for big offset
3245     size_t spOffset;
3246     auto lastOffset = (slot + lastReg - startReg) * DOUBLE_WORD_SIZE_BYTES;
3247 
3248     if (!vixl::aarch64::Assembler::IsImmLSPair(lastOffset, vixl::aarch64::kXRegSizeInBytesLog2)) {
3249         ScopedTmpReg lrReg(this, true);
3250         auto tmp = VixlReg(lrReg);
3251         spOffset = slot * DOUBLE_WORD_SIZE_BYTES;
3252         slot = 0;
3253         if (vixl::aarch64::Assembler::IsImmAddSub(spOffset)) {
3254             GetMasm()->Add(tmp, vixl::aarch64::sp, VixlImm(spOffset));
3255         } else {
3256             GetMasm()->Mov(tmp, VixlImm(spOffset));
3257             GetMasm()->Add(tmp, vixl::aarch64::sp, tmp);
3258         }
3259         LoadStoreRegistersLoop<IS_STORE>(registers, slot, startReg, isFp, tmp);
3260     } else {
3261         LoadStoreRegistersLoop<IS_STORE>(registers, slot, startReg, isFp, vixl::aarch64::sp);
3262     }
3263 }
3264 
3265 template <bool IS_STORE>
LoadStorePair(vixl::aarch64::MacroAssembler * masm,CPURegister lastReg,CPURegister reg,Reg base,int32_t idx)3266 static void LoadStorePair(vixl::aarch64::MacroAssembler *masm, CPURegister lastReg, CPURegister reg, Reg base,
3267                           int32_t idx)
3268 {
3269     auto baseReg = VixlReg(base);
3270     static constexpr int32_t OFFSET = 2;
3271     if constexpr (IS_STORE) {  // NOLINT
3272         masm->Stp(lastReg, reg, MemOperand(baseReg, (idx - OFFSET) * DOUBLE_WORD_SIZE_BYTES));
3273     } else {  // NOLINT
3274         masm->Ldp(lastReg, reg, MemOperand(baseReg, (idx - OFFSET) * DOUBLE_WORD_SIZE_BYTES));
3275     }
3276 }
3277 
3278 template <bool IS_STORE>
LoadStoreReg(vixl::aarch64::MacroAssembler * masm,CPURegister lastReg,Reg base,int32_t idx)3279 static void LoadStoreReg(vixl::aarch64::MacroAssembler *masm, CPURegister lastReg, Reg base, int32_t idx)
3280 {
3281     auto baseReg = VixlReg(base);
3282     if constexpr (IS_STORE) {  // NOLINT
3283         masm->Str(lastReg, MemOperand(baseReg, (idx - 1) * DOUBLE_WORD_SIZE_BYTES));
3284     } else {  // NOLINT
3285         masm->Ldr(lastReg, MemOperand(baseReg, (idx - 1) * DOUBLE_WORD_SIZE_BYTES));
3286     }
3287 }
3288 
3289 template <bool IS_STORE>
LoadStoreRegistersMainLoop(RegMask registers,bool isFp,int32_t slot,Reg base,RegMask mask)3290 void Aarch64Encoder::LoadStoreRegistersMainLoop(RegMask registers, bool isFp, int32_t slot, Reg base, RegMask mask)
3291 {
3292     bool hasMask = mask.any();
3293     int32_t index = hasMask ? static_cast<int32_t>(mask.GetMinRegister()) : 0;
3294     int32_t lastIndex = -1;
3295     ssize_t lastId = -1;
3296 
3297     slot -= index;
3298     for (ssize_t id = index; id < helpers::ToSigned(registers.size()); id++) {
3299         if (hasMask) {
3300             if (!mask.test(id)) {
3301                 continue;
3302             }
3303             index++;
3304         }
3305         if (!registers.test(id)) {
3306             continue;
3307         }
3308         if (!hasMask) {
3309             index++;
3310         }
3311         if (lastId == -1) {
3312             lastId = id;
3313             lastIndex = index;
3314             continue;
3315         }
3316 
3317         auto lastReg =
3318             CPURegister(lastId, vixl::aarch64::kXRegSize, isFp ? CPURegister::kVRegister : CPURegister::kRegister);
3319         if (!hasMask || lastId + 1 == id) {
3320             auto reg =
3321                 CPURegister(id, vixl::aarch64::kXRegSize, isFp ? CPURegister::kVRegister : CPURegister::kRegister);
3322             LoadStorePair<IS_STORE>(GetMasm(), lastReg, reg, base, slot + index);
3323             lastId = -1;
3324         } else {
3325             LoadStoreReg<IS_STORE>(GetMasm(), lastReg, base, slot + lastIndex);
3326             lastId = id;
3327             lastIndex = index;
3328         }
3329     }
3330     if (lastId != -1) {
3331         auto lastReg =
3332             CPURegister(lastId, vixl::aarch64::kXRegSize, isFp ? CPURegister::kVRegister : CPURegister::kRegister);
3333         LoadStoreReg<IS_STORE>(GetMasm(), lastReg, base, slot + lastIndex);
3334     }
3335 }
3336 
3337 template <bool IS_STORE>
LoadStoreRegisters(RegMask registers,bool isFp,int32_t slot,Reg base,RegMask mask)3338 void Aarch64Encoder::LoadStoreRegisters(RegMask registers, bool isFp, int32_t slot, Reg base, RegMask mask)
3339 {
3340     if (registers.none()) {
3341         return;
3342     }
3343 
3344     int32_t maxOffset = (slot + helpers::ToSigned(registers.GetMaxRegister())) * DOUBLE_WORD_SIZE_BYTES;
3345     int32_t minOffset = (slot + helpers::ToSigned(registers.GetMinRegister())) * DOUBLE_WORD_SIZE_BYTES;
3346 
3347     ScopedTmpRegLazy tmpReg(this, true);
3348     // Construct single add for big offset
3349     if (!vixl::aarch64::Assembler::IsImmLSPair(minOffset, vixl::aarch64::kXRegSizeInBytesLog2) ||
3350         !vixl::aarch64::Assembler::IsImmLSPair(maxOffset, vixl::aarch64::kXRegSizeInBytesLog2)) {
3351         tmpReg.AcquireWithLr();
3352         auto lrReg = VixlReg(tmpReg);
3353         ssize_t spOffset = slot * DOUBLE_WORD_SIZE_BYTES;
3354         if (vixl::aarch64::Assembler::IsImmAddSub(spOffset)) {
3355             GetMasm()->Add(lrReg, VixlReg(base), VixlImm(spOffset));
3356         } else {
3357             GetMasm()->Mov(lrReg, VixlImm(spOffset));
3358             GetMasm()->Add(lrReg, VixlReg(base), lrReg);
3359         }
3360         // Adjust new values for slot and base register
3361         slot = 0;
3362         base = tmpReg;
3363     }
3364 
3365     LoadStoreRegistersMainLoop<IS_STORE>(registers, isFp, slot, base, mask);
3366 }
3367 
3368 template <bool IS_STORE>
LoadStoreRegistersLoop(RegMask registers,ssize_t slot,size_t startReg,bool isFp,const vixl::aarch64::Register & baseReg)3369 void Aarch64Encoder::LoadStoreRegistersLoop(RegMask registers, ssize_t slot, size_t startReg, bool isFp,
3370                                             const vixl::aarch64::Register &baseReg)
3371 {
3372     size_t i = 0;
3373     const auto getNextReg = [&registers, &i, isFp]() {
3374         for (; i < registers.size(); i++) {
3375             if (registers.test(i)) {
3376                 return CPURegister(i++, vixl::aarch64::kXRegSize,
3377                                    isFp ? CPURegister::kVRegister : CPURegister::kRegister);
3378             }
3379         }
3380         return CPURegister();
3381     };
3382 
3383     for (CPURegister nextReg = getNextReg(); nextReg.IsValid();) {
3384         const CPURegister currReg = nextReg;
3385         nextReg = getNextReg();
3386         if (nextReg.IsValid() && (nextReg.GetCode() - 1 == currReg.GetCode())) {
3387             if constexpr (IS_STORE) {  // NOLINT
3388                 GetMasm()->Stp(currReg, nextReg,
3389                                MemOperand(baseReg, (slot + currReg.GetCode() - startReg) * DOUBLE_WORD_SIZE_BYTES));
3390             } else {  // NOLINT
3391                 GetMasm()->Ldp(currReg, nextReg,
3392                                MemOperand(baseReg, (slot + currReg.GetCode() - startReg) * DOUBLE_WORD_SIZE_BYTES));
3393             }
3394             nextReg = getNextReg();
3395         } else {
3396             if constexpr (IS_STORE) {  // NOLINT
3397                 GetMasm()->Str(currReg,
3398                                MemOperand(baseReg, (slot + currReg.GetCode() - startReg) * DOUBLE_WORD_SIZE_BYTES));
3399             } else {  // NOLINT
3400                 GetMasm()->Ldr(currReg,
3401                                MemOperand(baseReg, (slot + currReg.GetCode() - startReg) * DOUBLE_WORD_SIZE_BYTES));
3402             }
3403         }
3404     }
3405 }
3406 
SaveRegisters(RegMask registers,ssize_t slot,size_t startReg,bool isFp)3407 void Aarch64Encoder::SaveRegisters(RegMask registers, ssize_t slot, size_t startReg, bool isFp)
3408 {
3409     LoadStoreRegisters<true>(registers, slot, startReg, isFp);
3410 }
3411 
LoadRegisters(RegMask registers,ssize_t slot,size_t startReg,bool isFp)3412 void Aarch64Encoder::LoadRegisters(RegMask registers, ssize_t slot, size_t startReg, bool isFp)
3413 {
3414     LoadStoreRegisters<false>(registers, slot, startReg, isFp);
3415 }
3416 
SaveRegisters(RegMask registers,bool isFp,ssize_t slot,Reg base,RegMask mask)3417 void Aarch64Encoder::SaveRegisters(RegMask registers, bool isFp, ssize_t slot, Reg base, RegMask mask)
3418 {
3419     LoadStoreRegisters<true>(registers, isFp, slot, base, mask);
3420 }
3421 
LoadRegisters(RegMask registers,bool isFp,ssize_t slot,Reg base,RegMask mask)3422 void Aarch64Encoder::LoadRegisters(RegMask registers, bool isFp, ssize_t slot, Reg base, RegMask mask)
3423 {
3424     LoadStoreRegisters<false>(registers, isFp, slot, base, mask);
3425 }
3426 
PushRegisters(RegMask registers,bool isFp)3427 void Aarch64Encoder::PushRegisters(RegMask registers, bool isFp)
3428 {
3429     static constexpr size_t PAIR_OFFSET = 2 * DOUBLE_WORD_SIZE_BYTES;
3430     Register lastReg = INVALID_REG;
3431     for (size_t i = 0; i < registers.size(); i++) {
3432         if (registers[i]) {
3433             if (lastReg == INVALID_REG) {
3434                 lastReg = i;
3435                 continue;
3436             }
3437             if (isFp) {
3438                 GetMasm()->stp(vixl::aarch64::VRegister(lastReg, DOUBLE_WORD_SIZE),
3439                                vixl::aarch64::VRegister(i, DOUBLE_WORD_SIZE),
3440                                MemOperand(vixl::aarch64::sp, -PAIR_OFFSET, vixl::aarch64::AddrMode::PreIndex));
3441             } else {
3442                 GetMasm()->stp(vixl::aarch64::Register(lastReg, DOUBLE_WORD_SIZE),
3443                                vixl::aarch64::Register(i, DOUBLE_WORD_SIZE),
3444                                MemOperand(vixl::aarch64::sp, -PAIR_OFFSET, vixl::aarch64::AddrMode::PreIndex));
3445             }
3446             lastReg = INVALID_REG;
3447         }
3448     }
3449     if (lastReg != INVALID_REG) {
3450         if (isFp) {
3451             GetMasm()->str(vixl::aarch64::VRegister(lastReg, DOUBLE_WORD_SIZE),
3452                            MemOperand(vixl::aarch64::sp, -PAIR_OFFSET, vixl::aarch64::AddrMode::PreIndex));
3453         } else {
3454             GetMasm()->str(vixl::aarch64::Register(lastReg, DOUBLE_WORD_SIZE),
3455                            MemOperand(vixl::aarch64::sp, -PAIR_OFFSET, vixl::aarch64::AddrMode::PreIndex));
3456         }
3457     }
3458 }
3459 
PopRegisters(RegMask registers,bool isFp)3460 void Aarch64Encoder::PopRegisters(RegMask registers, bool isFp)
3461 {
3462     static constexpr size_t PAIR_OFFSET = 2 * DOUBLE_WORD_SIZE_BYTES;
3463     Register lastReg;
3464     if ((registers.count() & 1U) != 0) {
3465         lastReg = registers.GetMaxRegister();
3466         if (isFp) {
3467             GetMasm()->ldr(vixl::aarch64::VRegister(lastReg, DOUBLE_WORD_SIZE),
3468                            MemOperand(vixl::aarch64::sp, PAIR_OFFSET, vixl::aarch64::AddrMode::PostIndex));
3469         } else {
3470             GetMasm()->ldr(vixl::aarch64::Register(lastReg, DOUBLE_WORD_SIZE),
3471                            MemOperand(vixl::aarch64::sp, PAIR_OFFSET, vixl::aarch64::AddrMode::PostIndex));
3472         }
3473         registers.reset(lastReg);
3474     }
3475     lastReg = INVALID_REG;
3476     for (ssize_t i = registers.size() - 1; i >= 0; i--) {
3477         if (registers[i]) {
3478             if (lastReg == INVALID_REG) {
3479                 lastReg = i;
3480                 continue;
3481             }
3482             if (isFp) {
3483                 GetMasm()->ldp(vixl::aarch64::VRegister(i, DOUBLE_WORD_SIZE),
3484                                vixl::aarch64::VRegister(lastReg, DOUBLE_WORD_SIZE),
3485                                MemOperand(vixl::aarch64::sp, PAIR_OFFSET, vixl::aarch64::AddrMode::PostIndex));
3486             } else {
3487                 GetMasm()->ldp(vixl::aarch64::Register(i, DOUBLE_WORD_SIZE),
3488                                vixl::aarch64::Register(lastReg, DOUBLE_WORD_SIZE),
3489                                MemOperand(vixl::aarch64::sp, PAIR_OFFSET, vixl::aarch64::AddrMode::PostIndex));
3490             }
3491             lastReg = INVALID_REG;
3492         }
3493     }
3494 }
3495 
GetMasm() const3496 vixl::aarch64::MacroAssembler *Aarch64Encoder::GetMasm() const
3497 {
3498     ASSERT(masm_ != nullptr);
3499     return masm_;
3500 }
3501 
GetLabelAddress(LabelHolder::LabelId label)3502 size_t Aarch64Encoder::GetLabelAddress(LabelHolder::LabelId label)
3503 {
3504     auto plabel = labels_->GetLabel(label);
3505     ASSERT(plabel->IsBound());
3506     return GetMasm()->GetLabelAddress<size_t>(plabel);
3507 }
3508 
LabelHasLinks(LabelHolder::LabelId label)3509 bool Aarch64Encoder::LabelHasLinks(LabelHolder::LabelId label)
3510 {
3511     auto plabel = labels_->GetLabel(label);
3512     return plabel->IsLinked();
3513 }
3514 
3515 #ifndef PANDA_MINIMAL_VIXL
GetDecoder() const3516 vixl::aarch64::Decoder &Aarch64Encoder::GetDecoder() const
3517 {
3518     if (!decoder_) {
3519         decoder_.emplace(GetAllocator());
3520         decoder_->visitors()->push_back(&GetDisasm());
3521     }
3522     return *decoder_;
3523 }
3524 
GetDisasm() const3525 vixl::aarch64::Disassembler &Aarch64Encoder::GetDisasm() const
3526 {
3527     if (!disasm_) {
3528         disasm_.emplace(GetAllocator());
3529     }
3530     return *disasm_;
3531 }
3532 #endif
3533 
DisasmInstr(std::ostream & stream,size_t pc,ssize_t codeOffset) const3534 size_t Aarch64Encoder::DisasmInstr([[maybe_unused]] std::ostream &stream, size_t pc,
3535                                    [[maybe_unused]] ssize_t codeOffset) const
3536 {
3537 #ifndef PANDA_MINIMAL_VIXL
3538     auto bufferStart = GetMasm()->GetBuffer()->GetOffsetAddress<uintptr_t>(0);
3539     auto instr = GetMasm()->GetBuffer()->GetOffsetAddress<vixl::aarch64::Instruction *>(pc);
3540     GetDecoder().Decode(instr);
3541     if (codeOffset < 0) {
3542         stream << GetDisasm().GetOutput();
3543     } else {
3544         stream << std::setw(0x4) << std::right << std::setfill('0') << std::hex
3545                << reinterpret_cast<uintptr_t>(instr) - bufferStart + codeOffset << ": " << GetDisasm().GetOutput()
3546                << std::setfill(' ') << std::dec;
3547     }
3548 
3549 #endif
3550     return pc + vixl::aarch64::kInstructionSize;
3551 }
3552 }  // namespace ark::compiler::aarch64
3553