• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2021-2025 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16 Encoder (implementation of math and mem Low-level emitters)
17 */
18 
19 #include <aarch64/macro-assembler-aarch64.h>
20 #include <cstddef>
21 #include "compiler/optimizer/code_generator/target/aarch64/target.h"
22 #include "compiler/optimizer/code_generator/encode.h"
23 #include "compiler/optimizer/code_generator/fast_divisor.h"
24 #include "scoped_tmp_reg.h"
25 #include "compiler/optimizer/code_generator/relocations.h"
26 
27 #if defined(USE_VIXL_ARM64) && !defined(PANDA_MINIMAL_VIXL)
28 #include "aarch64/disasm-aarch64.h"
29 #endif
30 
31 #include <iomanip>
32 
33 #include "lib_helpers.inl"
34 
35 #ifndef PANDA_TARGET_MACOS
36 #include "elf.h"
37 #endif  // PANDA_TARGET_MACOS
38 
39 namespace ark::compiler::aarch64 {
40 using vixl::aarch64::CPURegister;
41 using vixl::aarch64::MemOperand;
42 
43 /// Converters
Convert(const Condition cc)44 static vixl::aarch64::Condition Convert(const Condition cc)
45 {
46     switch (cc) {
47         case Condition::EQ:
48             return vixl::aarch64::Condition::eq;
49         case Condition::NE:
50             return vixl::aarch64::Condition::ne;
51         case Condition::LT:
52             return vixl::aarch64::Condition::lt;
53         case Condition::GT:
54             return vixl::aarch64::Condition::gt;
55         case Condition::LE:
56             return vixl::aarch64::Condition::le;
57         case Condition::GE:
58             return vixl::aarch64::Condition::ge;
59         case Condition::LO:
60             return vixl::aarch64::Condition::lo;
61         case Condition::LS:
62             return vixl::aarch64::Condition::ls;
63         case Condition::HI:
64             return vixl::aarch64::Condition::hi;
65         case Condition::HS:
66             return vixl::aarch64::Condition::hs;
67         // NOTE(igorban) : Remove them
68         case Condition::MI:
69             return vixl::aarch64::Condition::mi;
70         case Condition::PL:
71             return vixl::aarch64::Condition::pl;
72         case Condition::VS:
73             return vixl::aarch64::Condition::vs;
74         case Condition::VC:
75             return vixl::aarch64::Condition::vc;
76         case Condition::AL:
77             return vixl::aarch64::Condition::al;
78         case Condition::NV:
79             return vixl::aarch64::Condition::nv;
80         default:
81             UNREACHABLE();
82             return vixl::aarch64::Condition::eq;
83     }
84 }
85 
ConvertTest(const Condition cc)86 static vixl::aarch64::Condition ConvertTest(const Condition cc)
87 {
88     ASSERT(cc == Condition::TST_EQ || cc == Condition::TST_NE);
89     return cc == Condition::TST_EQ ? vixl::aarch64::Condition::eq : vixl::aarch64::Condition::ne;
90 }
91 
Convert(const ShiftType type)92 static vixl::aarch64::Shift Convert(const ShiftType type)
93 {
94     switch (type) {
95         case ShiftType::LSL:
96             return vixl::aarch64::Shift::LSL;
97         case ShiftType::LSR:
98             return vixl::aarch64::Shift::LSR;
99         case ShiftType::ASR:
100             return vixl::aarch64::Shift::ASR;
101         case ShiftType::ROR:
102             return vixl::aarch64::Shift::ROR;
103         default:
104             UNREACHABLE();
105     }
106 }
107 
VixlVReg(Reg reg)108 static vixl::aarch64::VRegister VixlVReg(Reg reg)
109 {
110     ASSERT(reg.IsValid());
111     auto vixlVreg = vixl::aarch64::VRegister(reg.GetId(), reg.GetSize());
112     ASSERT(vixlVreg.IsValid());
113     return vixlVreg;
114 }
115 
VixlShift(Shift shift)116 static vixl::aarch64::Operand VixlShift(Shift shift)
117 {
118     Reg reg = shift.GetBase();
119     ASSERT(reg.IsValid());
120     if (reg.IsScalar()) {
121         ASSERT(reg.IsScalar());
122         size_t regSize = reg.GetSize();
123         if (regSize < WORD_SIZE) {
124             regSize = WORD_SIZE;
125         }
126         auto vixlReg = vixl::aarch64::Register(reg.GetId(), regSize);
127         ASSERT(vixlReg.IsValid());
128 
129         return vixl::aarch64::Operand(vixlReg, Convert(shift.GetType()), shift.GetScale());
130     }
131 
132     // Invalid register type
133     UNREACHABLE();
134 }
135 
ConvertMem(MemRef mem)136 static vixl::aarch64::MemOperand ConvertMem(MemRef mem)
137 {
138     bool base = mem.HasBase() && (mem.GetBase().GetId() != vixl::aarch64::xzr.GetCode());
139     bool hasIndex = mem.HasIndex();
140     bool shift = mem.HasScale();
141     bool offset = mem.HasDisp();
142     auto baseReg = Reg(mem.GetBase().GetId(), INT64_TYPE);
143     if (base && !hasIndex && !shift) {
144         // Memory address = x_reg(base) + imm(offset)
145         if (mem.GetDisp() != 0) {
146             auto disp = mem.GetDisp();
147             return vixl::aarch64::MemOperand(VixlReg(baseReg), VixlImm(disp));
148         }
149         // Memory address = x_reg(base)
150         return vixl::aarch64::MemOperand(VixlReg(mem.GetBase(), DOUBLE_WORD_SIZE));
151     }
152     if (base && hasIndex && !offset) {
153         auto scale = mem.GetScale();
154         auto indexReg = mem.GetIndex();
155         // Memory address = x_reg(base) + (SXTW(w_reg(index)) << scale)
156         if (indexReg.GetSize() == WORD_SIZE) {
157             // Sign-extend and shift w-register in offset-position (signed because index always has signed type)
158             return vixl::aarch64::MemOperand(VixlReg(baseReg), VixlReg(indexReg), vixl::aarch64::Extend::SXTW, scale);
159         }
160         // Memory address = x_reg(base) + (x_reg(index) << scale)
161         if (scale != 0) {
162             ASSERT(indexReg.GetSize() == DOUBLE_WORD_SIZE);
163             return vixl::aarch64::MemOperand(VixlReg(baseReg), VixlReg(indexReg), vixl::aarch64::LSL, scale);
164         }
165         // Memory address = x_reg(base) + x_reg(index)
166         return vixl::aarch64::MemOperand(VixlReg(baseReg), VixlReg(indexReg));
167     }
168     // Wrong memRef
169     // Return invalid memory operand
170     auto tmp = vixl::aarch64::MemOperand();
171     ASSERT(!tmp.IsValid());
172     return tmp;
173 }
174 
Promote(Reg reg)175 static Reg Promote(Reg reg)
176 {
177     if (reg.GetType() == INT8_TYPE) {
178         return Reg(reg.GetId(), INT16_TYPE);
179     }
180     return reg;
181 }
182 
CreateLabel()183 Aarch64LabelHolder::LabelId Aarch64LabelHolder::CreateLabel()
184 {
185     ++id_;
186     auto allocator = GetEncoder()->GetAllocator();
187     auto *label = allocator->New<LabelType>(allocator);
188     labels_.push_back(label);
189     ASSERT(labels_.size() == id_);
190     return id_ - 1;
191 }
192 
CreateLabels(LabelId size)193 void Aarch64LabelHolder::CreateLabels(LabelId size)
194 {
195     for (LabelId i = 0; i <= size; ++i) {
196         CreateLabel();
197     }
198 }
199 
BindLabel(LabelId id)200 void Aarch64LabelHolder::BindLabel(LabelId id)
201 {
202     static_cast<Aarch64Encoder *>(GetEncoder())->GetMasm()->Bind(labels_[id]);
203 }
204 
GetLabel(LabelId id) const205 Aarch64LabelHolder::LabelType *Aarch64LabelHolder::GetLabel(LabelId id) const
206 {
207     ASSERT(labels_.size() > id);
208     return labels_[id];
209 }
210 
Size()211 Aarch64LabelHolder::LabelId Aarch64LabelHolder::Size()
212 {
213     return labels_.size();
214 }
215 
Aarch64Encoder(ArenaAllocator * allocator)216 Aarch64Encoder::Aarch64Encoder(ArenaAllocator *allocator) : Encoder(allocator, Arch::AARCH64)
217 {
218     labels_ = allocator->New<Aarch64LabelHolder>(this);
219     if (labels_ == nullptr) {
220         SetFalseResult();
221     }
222     // We enable LR tmp reg by default in Aarch64
223     EnableLrAsTempReg(true);
224 }
225 
~Aarch64Encoder()226 Aarch64Encoder::~Aarch64Encoder()
227 {
228     auto labels = static_cast<Aarch64LabelHolder *>(GetLabels())->labels_;
229     for (auto label : labels) {
230         label->~Label();
231     }
232     if (masm_ != nullptr) {
233         masm_->~MacroAssembler();
234         masm_ = nullptr;
235     }
236 }
237 
GetLabels() const238 LabelHolder *Aarch64Encoder::GetLabels() const
239 {
240     ASSERT(labels_ != nullptr);
241     return labels_;
242 }
243 
IsValid() const244 bool Aarch64Encoder::IsValid() const
245 {
246     return true;
247 }
248 
GetTarget()249 constexpr auto Aarch64Encoder::GetTarget()
250 {
251     return ark::compiler::Target(Arch::AARCH64);
252 }
253 
SetMaxAllocatedBytes(size_t size)254 void Aarch64Encoder::SetMaxAllocatedBytes(size_t size)
255 {
256     GetMasm()->GetBuffer()->SetMmapMaxBytes(size);
257 }
258 
InitMasm()259 bool Aarch64Encoder::InitMasm()
260 {
261     if (masm_ == nullptr) {
262         // Initialize Masm
263         masm_ = GetAllocator()->New<vixl::aarch64::MacroAssembler>(GetAllocator());
264         if (masm_ == nullptr || !masm_->IsValid()) {
265             SetFalseResult();
266             return false;
267         }
268         ASSERT(GetMasm());
269 
270         // Make sure that the compiler uses the same scratch registers as the assembler
271         CHECK_EQ(RegMask(GetMasm()->GetScratchRegisterList()->GetList()), GetTarget().GetTempRegsMask());
272         CHECK_EQ(RegMask(GetMasm()->GetScratchVRegisterList()->GetList()), GetTarget().GetTempVRegsMask());
273     }
274     return true;
275 }
276 
Finalize()277 void Aarch64Encoder::Finalize()
278 {
279     GetMasm()->FinalizeCode();
280 }
281 
EncodeJump(LabelHolder::LabelId id)282 void Aarch64Encoder::EncodeJump(LabelHolder::LabelId id)
283 {
284     auto label = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(id);
285     GetMasm()->B(label);
286 }
287 
EncodeJump(LabelHolder::LabelId id,Reg src0,Reg src1,Condition cc)288 void Aarch64Encoder::EncodeJump(LabelHolder::LabelId id, Reg src0, Reg src1, Condition cc)
289 {
290     if (src1.GetId() == GetRegfile()->GetZeroReg().GetId()) {
291         EncodeJump(id, src0, cc);
292         return;
293     }
294 
295     if (src0.IsScalar()) {
296         GetMasm()->Cmp(VixlReg(src0), VixlReg(src1));
297     } else {
298         GetMasm()->Fcmp(VixlVReg(src0), VixlVReg(src1));
299     }
300 
301     auto label = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(id);
302     GetMasm()->B(label, Convert(cc));
303 }
304 
EncodeJump(LabelHolder::LabelId id,Reg src,Imm imm,Condition cc)305 void Aarch64Encoder::EncodeJump(LabelHolder::LabelId id, Reg src, Imm imm, Condition cc)
306 {
307     auto value = imm.GetAsInt();
308     if (value == 0) {
309         EncodeJump(id, src, cc);
310         return;
311     }
312 
313     if (value < 0) {
314         GetMasm()->Cmn(VixlReg(src), VixlImm(-value));
315     } else {  // if (value > 0)
316         GetMasm()->Cmp(VixlReg(src), VixlImm(value));
317     }
318 
319     auto label = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(id);
320     GetMasm()->B(label, Convert(cc));
321 }
322 
EncodeJumpTest(LabelHolder::LabelId id,Reg src0,Reg src1,Condition cc)323 void Aarch64Encoder::EncodeJumpTest(LabelHolder::LabelId id, Reg src0, Reg src1, Condition cc)
324 {
325     ASSERT(src0.IsScalar() && src1.IsScalar());
326 
327     GetMasm()->Tst(VixlReg(src0), VixlReg(src1));
328     auto label = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(id);
329     GetMasm()->B(label, ConvertTest(cc));
330 }
331 
EncodeJumpTest(LabelHolder::LabelId id,Reg src,Imm imm,Condition cc)332 void Aarch64Encoder::EncodeJumpTest(LabelHolder::LabelId id, Reg src, Imm imm, Condition cc)
333 {
334     ASSERT(src.IsScalar());
335 
336     auto value = imm.GetAsInt();
337     if (CanEncodeImmLogical(value, src.GetSize() > WORD_SIZE ? DOUBLE_WORD_SIZE : WORD_SIZE)) {
338         GetMasm()->Tst(VixlReg(src), VixlImm(value));
339         auto label = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(id);
340         GetMasm()->B(label, ConvertTest(cc));
341     } else {
342         ScopedTmpReg tmpReg(this, src.GetType());
343         EncodeMov(tmpReg, imm);
344         EncodeJumpTest(id, src, tmpReg, cc);
345     }
346 }
347 
EncodeJump(LabelHolder::LabelId id,Reg src,Condition cc)348 void Aarch64Encoder::EncodeJump(LabelHolder::LabelId id, Reg src, Condition cc)
349 {
350     auto label = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(id);
351     ASSERT(src.IsScalar());
352     auto rzero = Reg(GetRegfile()->GetZeroReg().GetId(), src.GetType());
353 
354     switch (cc) {
355         case Condition::LO:
356             // Always false
357             return;
358         case Condition::HS:
359             // Always true
360             GetMasm()->B(label);
361             return;
362         case Condition::EQ:
363         case Condition::LS:
364             if (src.GetId() == rzero.GetId()) {
365                 GetMasm()->B(label);
366                 return;
367             }
368             // True only when zero
369             GetMasm()->Cbz(VixlReg(src), label);
370             return;
371         case Condition::NE:
372         case Condition::HI:
373             if (src.GetId() == rzero.GetId()) {
374                 // Do nothing
375                 return;
376             }
377             // True only when non-zero
378             GetMasm()->Cbnz(VixlReg(src), label);
379             return;
380         default:
381             break;
382     }
383 
384     ASSERT(rzero.IsValid());
385     GetMasm()->Cmp(VixlReg(src), VixlReg(rzero));
386     GetMasm()->B(label, Convert(cc));
387 }
388 
EncodeJump(Reg dst)389 void Aarch64Encoder::EncodeJump(Reg dst)
390 {
391     GetMasm()->Br(VixlReg(dst));
392 }
393 
EncodeJump(RelocationInfo * relocation)394 void Aarch64Encoder::EncodeJump([[maybe_unused]] RelocationInfo *relocation)
395 {
396 #ifdef PANDA_TARGET_MACOS
397     LOG(FATAL, COMPILER) << "Not supported in Macos build";
398 #else
399     auto buffer = GetMasm()->GetBuffer();
400     relocation->offset = GetCursorOffset();
401     relocation->addend = 0;
402     relocation->type = R_AARCH64_CALL26;
403     static constexpr uint32_t CALL_WITH_ZERO_OFFSET = 0x14000000;
404     buffer->Emit32(CALL_WITH_ZERO_OFFSET);
405 #endif
406 }
407 
EncodeBitTestAndBranch(LabelHolder::LabelId id,compiler::Reg reg,uint32_t bitPos,bool bitValue)408 void Aarch64Encoder::EncodeBitTestAndBranch(LabelHolder::LabelId id, compiler::Reg reg, uint32_t bitPos, bool bitValue)
409 {
410     ASSERT(reg.IsScalar() && reg.GetSize() > bitPos);
411     auto label = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(id);
412     if (bitValue) {
413         GetMasm()->Tbnz(VixlReg(reg), bitPos, label);
414     } else {
415         GetMasm()->Tbz(VixlReg(reg), bitPos, label);
416     }
417 }
418 
EncodeNop()419 void Aarch64Encoder::EncodeNop()
420 {
421     GetMasm()->Nop();
422 }
423 
MakeCall(compiler::RelocationInfo * relocation)424 void Aarch64Encoder::MakeCall([[maybe_unused]] compiler::RelocationInfo *relocation)
425 {
426 #ifdef PANDA_TARGET_MACOS
427     LOG(FATAL, COMPILER) << "Not supported in Macos build";
428 #else
429     auto buffer = GetMasm()->GetBuffer();
430     relocation->offset = GetCursorOffset();
431     relocation->addend = 0;
432     relocation->type = R_AARCH64_CALL26;
433     static constexpr uint32_t CALL_WITH_ZERO_OFFSET = 0x94000000;
434     buffer->Emit32(CALL_WITH_ZERO_OFFSET);
435 #endif
436 }
437 
MakeCall(const void * entryPoint)438 void Aarch64Encoder::MakeCall(const void *entryPoint)
439 {
440     ScopedTmpReg tmp(this, true);
441     EncodeMov(tmp, Imm(reinterpret_cast<uintptr_t>(entryPoint)));
442     GetMasm()->Blr(VixlReg(tmp));
443 }
444 
MakeCall(MemRef entryPoint)445 void Aarch64Encoder::MakeCall(MemRef entryPoint)
446 {
447     ScopedTmpReg tmp(this, true);
448     EncodeLdr(tmp, false, entryPoint);
449     GetMasm()->Blr(VixlReg(tmp));
450 }
451 
MakeCall(Reg reg)452 void Aarch64Encoder::MakeCall(Reg reg)
453 {
454     GetMasm()->Blr(VixlReg(reg));
455 }
456 
MakeCall(LabelHolder::LabelId id)457 void Aarch64Encoder::MakeCall(LabelHolder::LabelId id)
458 {
459     auto label = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(id);
460     GetMasm()->Bl(label);
461 }
462 
LoadPcRelative(Reg reg,intptr_t offset,Reg regAddr)463 void Aarch64Encoder::LoadPcRelative(Reg reg, intptr_t offset, Reg regAddr)
464 {
465     ASSERT(GetCodeOffset() != Encoder::INVALID_OFFSET);
466     ASSERT(reg.IsValid() || regAddr.IsValid());
467 
468     if (!regAddr.IsValid()) {
469         regAddr = reg.As(INT64_TYPE);
470     }
471 
472     if (vixl::IsInt21(offset)) {
473         GetMasm()->adr(VixlReg(regAddr), offset);
474         if (reg != INVALID_REGISTER) {
475             EncodeLdr(reg, false, MemRef(regAddr));
476         }
477     } else {
478         size_t pc = GetCodeOffset() + GetCursorOffset();
479         size_t addr;
480         if (auto res = static_cast<intptr_t>(helpers::ToSigned(pc) + offset); res < 0) {
481             // Make both, pc and addr, positive
482             ssize_t extend = RoundUp(std::abs(res), vixl::aarch64::kPageSize);
483             addr = static_cast<size_t>(res + extend);
484             pc += static_cast<size_t>(extend);
485         } else {
486             addr = res;
487         }
488 
489         ssize_t adrpImm = (addr >> vixl::aarch64::kPageSizeLog2) - (pc >> vixl::aarch64::kPageSizeLog2);
490 
491         GetMasm()->adrp(VixlReg(regAddr), adrpImm);
492 
493         offset = ark::helpers::ToUnsigned(addr) & (vixl::aarch64::kPageSize - 1);
494         if (reg.GetId() != regAddr.GetId()) {
495             EncodeAdd(regAddr, regAddr, Imm(offset));
496             if (reg != INVALID_REGISTER) {
497                 EncodeLdr(reg, true, MemRef(regAddr));
498             }
499         } else {
500             EncodeLdr(reg, true, MemRef(regAddr, offset));
501         }
502     }
503 }
504 
MakeCallAot(intptr_t offset)505 void Aarch64Encoder::MakeCallAot(intptr_t offset)
506 {
507     ScopedTmpReg tmp(this, true);
508     LoadPcRelative(tmp, offset);
509     GetMasm()->Blr(VixlReg(tmp));
510 }
511 
CanMakeCallByOffset(intptr_t offset)512 bool Aarch64Encoder::CanMakeCallByOffset(intptr_t offset)
513 {
514     // NOLINTNEXTLINE(hicpp-signed-bitwise)
515     auto off = (static_cast<uintptr_t>(offset) >> vixl::aarch64::kInstructionSizeLog2);
516     return vixl::aarch64::Instruction::IsValidImmPCOffset(vixl::aarch64::ImmBranchType::UncondBranchType, off);
517 }
518 
MakeCallByOffset(intptr_t offset)519 void Aarch64Encoder::MakeCallByOffset(intptr_t offset)
520 {
521     GetMasm()->Bl(offset);
522 }
523 
MakeLoadAotTable(intptr_t offset,Reg reg)524 void Aarch64Encoder::MakeLoadAotTable(intptr_t offset, Reg reg)
525 {
526     LoadPcRelative(reg, offset);
527 }
528 
MakeLoadAotTableAddr(intptr_t offset,Reg addr,Reg val)529 void Aarch64Encoder::MakeLoadAotTableAddr(intptr_t offset, Reg addr, Reg val)
530 {
531     LoadPcRelative(val, offset, addr);
532 }
533 
EncodeAbort()534 void Aarch64Encoder::EncodeAbort()
535 {
536     GetMasm()->Brk();
537 }
538 
EncodeReturn()539 void Aarch64Encoder::EncodeReturn()
540 {
541     GetMasm()->Ret();
542 }
543 
EncodeMul(Reg unused1,Reg unused2,Imm unused3)544 void Aarch64Encoder::EncodeMul([[maybe_unused]] Reg unused1, [[maybe_unused]] Reg unused2, [[maybe_unused]] Imm unused3)
545 {
546     SetFalseResult();
547 }
548 
EncodeMov(Reg dst,Reg src)549 void Aarch64Encoder::EncodeMov(Reg dst, Reg src)
550 {
551     if (dst == src) {
552         return;
553     }
554     if (src.IsFloat() && dst.IsFloat()) {
555         if (src.GetSize() != dst.GetSize()) {
556             GetMasm()->Fcvt(VixlVReg(dst), VixlVReg(src));
557             return;
558         }
559         GetMasm()->Fmov(VixlVReg(dst), VixlVReg(src));
560         return;
561     }
562     if (src.IsFloat() && !dst.IsFloat()) {
563         GetMasm()->Fmov(VixlReg(dst, src.GetSize()), VixlVReg(src));
564         return;
565     }
566     if (dst.IsFloat()) {
567         ASSERT(src.IsScalar());
568         GetMasm()->Fmov(VixlVReg(dst), VixlReg(src));
569         return;
570     }
571     // DiscardForSameWReg below means we would drop "mov w0, w0", but it is guarded by "dst == src" above anyway.
572     // NOTE: "mov w0, w0" is not equal "nop", as it clears upper bits of x0.
573     // Keeping the option here helps to generate nothing when e.g. src is x0 and dst is w0.
574     // Probably, a better solution here is to system-wide checking register size on Encoder level.
575     if (src.GetSize() != dst.GetSize()) {
576         auto srcReg = Reg(src.GetId(), dst.GetType());
577         GetMasm()->Mov(VixlReg(dst), VixlReg(srcReg), vixl::aarch64::DiscardMoveMode::kDiscardForSameWReg);
578         return;
579     }
580     GetMasm()->Mov(VixlReg(dst), VixlReg(src), vixl::aarch64::DiscardMoveMode::kDiscardForSameWReg);
581 }
582 
EncodeNeg(Reg dst,Reg src)583 void Aarch64Encoder::EncodeNeg(Reg dst, Reg src)
584 {
585     if (dst.IsFloat()) {
586         GetMasm()->Fneg(VixlVReg(dst), VixlVReg(src));
587         return;
588     }
589     GetMasm()->Neg(VixlReg(dst), VixlReg(src));
590 }
591 
EncodeAbs(Reg dst,Reg src)592 void Aarch64Encoder::EncodeAbs(Reg dst, Reg src)
593 {
594     if (dst.IsFloat()) {
595         GetMasm()->Fabs(VixlVReg(dst), VixlVReg(src));
596         return;
597     }
598 
599     ASSERT(!GetRegfile()->IsZeroReg(dst));
600     if (GetRegfile()->IsZeroReg(src)) {
601         EncodeMov(dst, src);
602         return;
603     }
604 
605     if (src.GetSize() == DOUBLE_WORD_SIZE) {
606         GetMasm()->Cmp(VixlReg(src), vixl::aarch64::xzr);
607     } else {
608         GetMasm()->Cmp(VixlReg(src), vixl::aarch64::wzr);
609     }
610     GetMasm()->Cneg(VixlReg(Promote(dst)), VixlReg(Promote(src)), vixl::aarch64::Condition::lt);
611 }
612 
EncodeSqrt(Reg dst,Reg src)613 void Aarch64Encoder::EncodeSqrt(Reg dst, Reg src)
614 {
615     ASSERT(dst.IsFloat());
616     GetMasm()->Fsqrt(VixlVReg(dst), VixlVReg(src));
617 }
618 
EncodeIsInf(Reg dst,Reg src)619 void Aarch64Encoder::EncodeIsInf(Reg dst, Reg src)
620 {
621     ASSERT(dst.IsScalar() && src.IsFloat());
622 
623     if (src.GetSize() == WORD_SIZE) {
624         constexpr uint32_t INF_MASK = 0xff000000;
625 
626         ScopedTmpRegU32 tmpReg(this);
627         auto tmp = VixlReg(tmpReg);
628         GetMasm()->Fmov(tmp, VixlVReg(src));
629         GetMasm()->Mov(VixlReg(dst).W(), INF_MASK);
630         GetMasm()->Lsl(tmp, tmp, 1);
631         GetMasm()->Cmp(tmp, VixlReg(dst, WORD_SIZE));
632     } else {
633         constexpr uint64_t INF_MASK = 0xffe0000000000000;
634 
635         ScopedTmpRegU64 tmpReg(this);
636         auto tmp = VixlReg(tmpReg);
637         GetMasm()->Fmov(tmp, VixlVReg(src));
638         GetMasm()->Mov(VixlReg(dst).X(), INF_MASK);
639         GetMasm()->Lsl(tmp, tmp, 1);
640         GetMasm()->Cmp(tmp, VixlReg(dst, DOUBLE_WORD_SIZE));
641     }
642 
643     GetMasm()->Cset(VixlReg(dst), vixl::aarch64::Condition::eq);
644 }
645 
EncodeCmpFracWithDelta(Reg src)646 void Aarch64Encoder::EncodeCmpFracWithDelta(Reg src)
647 {
648     ASSERT(src.IsFloat());
649     ASSERT(src.GetSize() == WORD_SIZE || src.GetSize() == DOUBLE_WORD_SIZE);
650 
651     // Encode (fabs(src - trunc(src)) <= DELTA)
652     if (src.GetSize() == WORD_SIZE) {
653         ScopedTmpRegF32 tmp(this);
654         GetMasm()->Frintz(VixlVReg(tmp), VixlVReg(src));
655         EncodeSub(tmp, src, tmp);
656         EncodeAbs(tmp, tmp);
657         GetMasm()->Fcmp(VixlVReg(tmp), std::numeric_limits<float>::epsilon());
658     } else {
659         ScopedTmpRegF64 tmp(this);
660         GetMasm()->Frintz(VixlVReg(tmp), VixlVReg(src));
661         EncodeSub(tmp, src, tmp);
662         EncodeAbs(tmp, tmp);
663         GetMasm()->Fcmp(VixlVReg(tmp), std::numeric_limits<double>::epsilon());
664     }
665 }
666 
EncodeIsInteger(Reg dst,Reg src)667 void Aarch64Encoder::EncodeIsInteger(Reg dst, Reg src)
668 {
669     ASSERT(dst.IsScalar() && src.IsFloat());
670     ASSERT(src.GetSize() == WORD_SIZE || src.GetSize() == DOUBLE_WORD_SIZE);
671 
672     auto labelExit = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
673     auto labelInfOrNan = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
674 
675     EncodeCmpFracWithDelta(src);
676     GetMasm()->B(labelInfOrNan, vixl::aarch64::Condition::vs);  // Inf or NaN
677     GetMasm()->Cset(VixlReg(dst), vixl::aarch64::Condition::le);
678     GetMasm()->B(labelExit);
679 
680     // IsInteger returns false if src is Inf or NaN
681     GetMasm()->Bind(labelInfOrNan);
682     EncodeMov(dst, Imm(false));
683 
684     GetMasm()->Bind(labelExit);
685 }
686 
EncodeIsSafeInteger(Reg dst,Reg src)687 void Aarch64Encoder::EncodeIsSafeInteger(Reg dst, Reg src)
688 {
689     ASSERT(dst.IsScalar() && src.IsFloat());
690     ASSERT(src.GetSize() == WORD_SIZE || src.GetSize() == DOUBLE_WORD_SIZE);
691 
692     auto labelExit = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
693     auto labelFalse = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
694 
695     // Check if IsInteger
696     EncodeCmpFracWithDelta(src);
697     GetMasm()->B(labelFalse, vixl::aarch64::Condition::vs);  // Inf or NaN
698     GetMasm()->B(labelFalse, vixl::aarch64::Condition::gt);
699 
700     // Check if it is safe, i.e. src can be represented in float/double without losing precision
701     if (src.GetSize() == WORD_SIZE) {
702         ScopedTmpRegF32 tmp(this);
703         EncodeAbs(tmp, src);
704         GetMasm()->Fcmp(VixlVReg(tmp), MaxIntAsExactFloat());
705     } else {
706         ScopedTmpRegF64 tmp(this);
707         EncodeAbs(tmp, src);
708         GetMasm()->Fcmp(VixlVReg(tmp), MaxIntAsExactDouble());
709     }
710     GetMasm()->Cset(VixlReg(dst), vixl::aarch64::Condition::le);
711     GetMasm()->B(labelExit);
712 
713     // Return false if src !IsInteger
714     GetMasm()->Bind(labelFalse);
715     EncodeMov(dst, Imm(false));
716 
717     GetMasm()->Bind(labelExit);
718 }
719 
720 /* NaN values are needed to be canonicalized */
EncodeFpToBits(Reg dst,Reg src)721 void Aarch64Encoder::EncodeFpToBits(Reg dst, Reg src)
722 {
723     ASSERT(dst.IsScalar() && src.IsFloat());
724     ASSERT(dst.GetSize() == WORD_SIZE || dst.GetSize() == DOUBLE_WORD_SIZE);
725 
726     if (dst.GetSize() == WORD_SIZE) {
727         ASSERT(src.GetSize() == WORD_SIZE);
728 
729         constexpr auto FNAN = 0x7fc00000;
730 
731         ScopedTmpRegU32 tmp(this);
732 
733         GetMasm()->Fcmp(VixlVReg(src), VixlVReg(src));
734         GetMasm()->Mov(VixlReg(tmp), FNAN);
735         GetMasm()->Umov(VixlReg(dst), VixlVReg(src), 0);
736         GetMasm()->Csel(VixlReg(dst), VixlReg(tmp), VixlReg(dst), vixl::aarch64::Condition::ne);
737     } else {
738         ASSERT(src.GetSize() == DOUBLE_WORD_SIZE);
739 
740         constexpr auto DNAN = 0x7ff8000000000000;
741 
742         ScopedTmpRegU64 tmpReg(this);
743         auto tmp = VixlReg(tmpReg);
744 
745         GetMasm()->Fcmp(VixlVReg(src), VixlVReg(src));
746         GetMasm()->Mov(tmp, DNAN);
747         GetMasm()->Umov(VixlReg(dst), VixlVReg(src), 0);
748         GetMasm()->Csel(VixlReg(dst), tmp, VixlReg(dst), vixl::aarch64::Condition::ne);
749     }
750 }
751 
EncodeMoveBitsRaw(Reg dst,Reg src)752 void Aarch64Encoder::EncodeMoveBitsRaw(Reg dst, Reg src)
753 {
754     ASSERT((dst.IsFloat() && src.IsScalar()) || (src.IsFloat() && dst.IsScalar()));
755     if (dst.IsScalar()) {
756         ASSERT(src.GetSize() == dst.GetSize());
757         if (dst.GetSize() == WORD_SIZE) {
758             GetMasm()->Umov(VixlReg(dst).W(), VixlVReg(src).S(), 0);
759         } else {
760             GetMasm()->Umov(VixlReg(dst), VixlVReg(src), 0);
761         }
762     } else {
763         ASSERT(dst.GetSize() == src.GetSize());
764         ScopedTmpReg tmpReg(this, src.GetType());
765         auto srcReg = src;
766         auto rzero = GetRegfile()->GetZeroReg();
767         if (src.GetId() == rzero.GetId()) {
768             EncodeMov(tmpReg, Imm(0));
769             srcReg = tmpReg;
770         }
771 
772         if (srcReg.GetSize() == WORD_SIZE) {
773             GetMasm()->Fmov(VixlVReg(dst).S(), VixlReg(srcReg).W());
774         } else {
775             GetMasm()->Fmov(VixlVReg(dst), VixlReg(srcReg));
776         }
777     }
778 }
779 
EncodeReverseBytes(Reg dst,Reg src)780 void Aarch64Encoder::EncodeReverseBytes(Reg dst, Reg src)
781 {
782     auto rzero = GetRegfile()->GetZeroReg();
783     if (src.GetId() == rzero.GetId()) {
784         EncodeMov(dst, Imm(0));
785         return;
786     }
787 
788     ASSERT(src.GetSize() > BYTE_SIZE);
789     ASSERT(src.GetSize() == dst.GetSize());
790 
791     if (src.GetSize() == HALF_SIZE) {
792         GetMasm()->Rev16(VixlReg(dst), VixlReg(src));
793         GetMasm()->Sxth(VixlReg(dst), VixlReg(dst));
794     } else {
795         GetMasm()->Rev(VixlReg(dst), VixlReg(src));
796     }
797 }
798 
EncodeBitCount(Reg dst,Reg src)799 void Aarch64Encoder::EncodeBitCount(Reg dst, Reg src)
800 {
801     auto rzero = GetRegfile()->GetZeroReg();
802     if (src.GetId() == rzero.GetId()) {
803         EncodeMov(dst, Imm(0));
804         return;
805     }
806 
807     ASSERT(dst.GetSize() == WORD_SIZE);
808 
809     ScopedTmpRegF64 tmpReg0(this);
810     vixl::aarch64::VRegister tmpReg;
811     if (src.GetSize() == DOUBLE_WORD_SIZE) {
812         tmpReg = VixlVReg(tmpReg0).D();
813     } else {
814         tmpReg = VixlVReg(tmpReg0).S();
815     }
816 
817     if (src.GetSize() < WORD_SIZE) {
818         int64_t cutValue = (1ULL << src.GetSize()) - 1;
819         EncodeAnd(src, src, Imm(cutValue));
820     }
821 
822     GetMasm()->Fmov(tmpReg, VixlReg(src));
823     GetMasm()->Cnt(tmpReg.V8B(), tmpReg.V8B());
824     GetMasm()->Addv(tmpReg.B(), tmpReg.V8B());
825     EncodeMov(dst, tmpReg0);
826 }
827 
828 /* Since only ROR is supported on AArch64 we do
829  * left rotaion as ROR(v, -count) */
EncodeRotate(Reg dst,Reg src1,Reg src2,bool isRor)830 void Aarch64Encoder::EncodeRotate(Reg dst, Reg src1, Reg src2, bool isRor)
831 {
832     ASSERT(src1.GetSize() == WORD_SIZE || src1.GetSize() == DOUBLE_WORD_SIZE);
833     ASSERT(src1.GetSize() == dst.GetSize());
834     auto rzero = GetRegfile()->GetZeroReg();
835     if (rzero.GetId() == src2.GetId() || rzero.GetId() == src1.GetId()) {
836         EncodeMov(dst, src1);
837         return;
838     }
839     /* as the second parameters is always 32-bits long we have to
840      * adjust the counter register for the 64-bits first operand case */
841     if (isRor) {
842         auto count = (dst.GetSize() == WORD_SIZE ? VixlReg(src2) : VixlReg(src2).X());
843         GetMasm()->Ror(VixlReg(dst), VixlReg(src1), count);
844     } else {
845         ScopedTmpReg tmp(this);
846         auto cnt = (dst.GetId() == src1.GetId() ? tmp : dst);
847         auto count = (dst.GetSize() == WORD_SIZE ? VixlReg(cnt).W() : VixlReg(cnt).X());
848         auto source2 = (dst.GetSize() == WORD_SIZE ? VixlReg(src2).W() : VixlReg(src2).X());
849         GetMasm()->Neg(count, source2);
850         GetMasm()->Ror(VixlReg(dst), VixlReg(src1), count);
851     }
852 }
853 
EncodeSignum(Reg dst,Reg src)854 void Aarch64Encoder::EncodeSignum(Reg dst, Reg src)
855 {
856     ASSERT(src.GetSize() == WORD_SIZE || src.GetSize() == DOUBLE_WORD_SIZE);
857 
858     ScopedTmpRegU32 tmp(this);
859     auto sign = (dst.GetId() == src.GetId() ? tmp : dst);
860 
861     GetMasm()->Cmp(VixlReg(src), VixlImm(0));
862     GetMasm()->Cset(VixlReg(sign), vixl::aarch64::Condition::gt);
863 
864     constexpr auto SHIFT_WORD_BITS = 31;
865     constexpr auto SHIFT_DWORD_BITS = 63;
866 
867     /* The operation below is "sub dst, dst, src, lsr #reg_size-1"
868      * however, we can only encode as many as 32 bits in lsr field, so
869      * for 64-bits cases we cannot avoid having a separate lsr instruction */
870     if (src.GetSize() == WORD_SIZE) {
871         auto shift = Shift(src, LSR, SHIFT_WORD_BITS);
872         EncodeSub(dst, sign, shift);
873     } else {
874         ScopedTmpRegU64 shift(this);
875         sign = Reg(sign.GetId(), INT64_TYPE);
876         EncodeShr(shift, src, Imm(SHIFT_DWORD_BITS));
877         EncodeSub(dst, sign, shift);
878     }
879 }
880 
EncodeCountLeadingZeroBits(Reg dst,Reg src)881 void Aarch64Encoder::EncodeCountLeadingZeroBits(Reg dst, Reg src)
882 {
883     auto rzero = GetRegfile()->GetZeroReg();
884     if (rzero.GetId() == src.GetId()) {
885         EncodeMov(dst, Imm(src.GetSize()));
886         return;
887     }
888     GetMasm()->Clz(VixlReg(dst), VixlReg(src));
889 }
890 
EncodeCountTrailingZeroBits(Reg dst,Reg src)891 void Aarch64Encoder::EncodeCountTrailingZeroBits(Reg dst, Reg src)
892 {
893     auto rzero = GetRegfile()->GetZeroReg();
894     if (rzero.GetId() == src.GetId()) {
895         EncodeMov(dst, Imm(src.GetSize()));
896         return;
897     }
898     GetMasm()->Rbit(VixlReg(dst), VixlReg(src));
899     GetMasm()->Clz(VixlReg(dst), VixlReg(dst));
900 }
901 
EncodeCeil(Reg dst,Reg src)902 void Aarch64Encoder::EncodeCeil(Reg dst, Reg src)
903 {
904     GetMasm()->Frintp(VixlVReg(dst), VixlVReg(src));
905 }
906 
EncodeFloor(Reg dst,Reg src)907 void Aarch64Encoder::EncodeFloor(Reg dst, Reg src)
908 {
909     GetMasm()->Frintm(VixlVReg(dst), VixlVReg(src));
910 }
911 
EncodeRint(Reg dst,Reg src)912 void Aarch64Encoder::EncodeRint(Reg dst, Reg src)
913 {
914     GetMasm()->Frintn(VixlVReg(dst), VixlVReg(src));
915 }
916 
EncodeTrunc(Reg dst,Reg src)917 void Aarch64Encoder::EncodeTrunc(Reg dst, Reg src)
918 {
919     GetMasm()->Frintz(VixlVReg(dst), VixlVReg(src));
920 }
921 
EncodeRoundAway(Reg dst,Reg src)922 void Aarch64Encoder::EncodeRoundAway(Reg dst, Reg src)
923 {
924     GetMasm()->Frinta(VixlVReg(dst), VixlVReg(src));
925 }
926 
EncodeRoundToPInfReturnScalar(Reg dst,Reg src)927 void Aarch64Encoder::EncodeRoundToPInfReturnScalar(Reg dst, Reg src)
928 {
929     auto done = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
930     ScopedTmpReg tmp(this, src.GetType());
931     // round to nearest integer, ties away from zero
932     GetMasm()->Fcvtas(VixlReg(dst), VixlVReg(src));
933     // for positive values, zero and NaN inputs rounding is done
934     GetMasm()->Tbz(VixlReg(dst), dst.GetSize() - 1, done);
935     // if input is negative but not a tie, round to nearest is valid
936     // if input is a negative tie, dst += 1
937     GetMasm()->Frinta(VixlVReg(tmp), VixlVReg(src));
938     GetMasm()->Fsub(VixlVReg(tmp), VixlVReg(src), VixlVReg(tmp));
939     // NOLINTNEXTLINE(readability-magic-numbers)
940     GetMasm()->Fcmp(VixlVReg(tmp), 0.5F);
941     GetMasm()->Cinc(VixlReg(dst), VixlReg(dst), vixl::aarch64::Condition::eq);
942     GetMasm()->Bind(done);
943 }
944 
EncodeRoundToPInfReturnFloat(Reg dst,Reg src)945 void Aarch64Encoder::EncodeRoundToPInfReturnFloat(Reg dst, Reg src)
946 {
947     ASSERT(src.GetType() == FLOAT64_TYPE);
948     ASSERT(dst.GetType() == FLOAT64_TYPE);
949 
950     // CC-OFFNXT(G.NAM.03-CPP) project code style
951     constexpr double HALF = 0.5;
952     // CC-OFFNXT(G.NAM.03-CPP) project code style
953     constexpr double ONE = 1.0;
954 
955     ScopedTmpRegF64 ceil(this);
956 
957     // calculate ceil(val)
958     GetMasm()->Frintp(VixlVReg(ceil), VixlVReg(src));
959 
960     // compare ceil(val) - val with 0.5
961     GetMasm()->Fsub(VixlVReg(dst), VixlVReg(ceil), VixlVReg(src));
962     GetMasm()->Fcmp(VixlVReg(dst), HALF);
963 
964     // calculate ceil(val) - 1
965     GetMasm()->Fmov(VixlVReg(dst), ONE);
966     GetMasm()->Fsub(VixlVReg(dst), VixlVReg(ceil), VixlVReg(dst));
967 
968     // select final value based on comparison result
969     GetMasm()->Fcsel(VixlVReg(dst), VixlVReg(dst), VixlVReg(ceil), vixl::aarch64::Condition::gt);
970 }
971 
EncodeCrc32Update(Reg dst,Reg crcReg,Reg valReg)972 void Aarch64Encoder::EncodeCrc32Update(Reg dst, Reg crcReg, Reg valReg)
973 {
974     auto tmp = dst.GetId() != crcReg.GetId() && dst.GetId() != valReg.GetId() ? dst : ScopedTmpReg(this, dst.GetType());
975     GetMasm()->Mvn(VixlReg(tmp), VixlReg(crcReg));
976     GetMasm()->Crc32b(VixlReg(tmp), VixlReg(tmp), VixlReg(valReg));
977     GetMasm()->Mvn(VixlReg(dst), VixlReg(tmp));
978 }
979 
EncodeCompressEightUtf16ToUtf8CharsUsingSimd(Reg srcAddr,Reg dstAddr)980 void Aarch64Encoder::EncodeCompressEightUtf16ToUtf8CharsUsingSimd(Reg srcAddr, Reg dstAddr)
981 {
982     ScopedTmpReg tmp1(this, FLOAT64_TYPE);
983     ScopedTmpReg tmp2(this, FLOAT64_TYPE);
984     auto vixlVreg1 = vixl::aarch64::VRegister(tmp1.GetReg().GetId(), vixl::aarch64::VectorFormat::kFormat8B);
985     ASSERT(vixlVreg1.IsValid());
986     auto vixlVreg2 = vixl::aarch64::VRegister(tmp2.GetReg().GetId(), vixl::aarch64::VectorFormat::kFormat8B);
987     ASSERT(vixlVreg2.IsValid());
988     auto src = vixl::aarch64::MemOperand(VixlReg(srcAddr));
989     auto dst = vixl::aarch64::MemOperand(VixlReg(dstAddr));
990     GetMasm()->Ld2(vixlVreg1, vixlVreg2, src);
991     GetMasm()->St1(vixlVreg1, dst);
992 }
993 
EncodeCompressSixteenUtf16ToUtf8CharsUsingSimd(Reg srcAddr,Reg dstAddr)994 void Aarch64Encoder::EncodeCompressSixteenUtf16ToUtf8CharsUsingSimd(Reg srcAddr, Reg dstAddr)
995 {
996     ScopedTmpReg tmp1(this, FLOAT64_TYPE);
997     ScopedTmpReg tmp2(this, FLOAT64_TYPE);
998     auto vixlVreg1 = vixl::aarch64::VRegister(tmp1.GetReg().GetId(), vixl::aarch64::VectorFormat::kFormat16B);
999     ASSERT(vixlVreg1.IsValid());
1000     auto vixlVreg2 = vixl::aarch64::VRegister(tmp2.GetReg().GetId(), vixl::aarch64::VectorFormat::kFormat16B);
1001     ASSERT(vixlVreg2.IsValid());
1002     auto src = vixl::aarch64::MemOperand(VixlReg(srcAddr));
1003     auto dst = vixl::aarch64::MemOperand(VixlReg(dstAddr));
1004     GetMasm()->Ld2(vixlVreg1, vixlVreg2, src);
1005     GetMasm()->St1(vixlVreg1, dst);
1006 }
1007 
EncodeMemCharU8X32UsingSimd(Reg dst,Reg ch,Reg srcAddr,Reg tmp)1008 void Aarch64Encoder::EncodeMemCharU8X32UsingSimd(Reg dst, Reg ch, Reg srcAddr, Reg tmp)
1009 {
1010     ScopedTmpReg vTmp0(this, FLOAT64_TYPE);
1011     ScopedTmpReg vTmp1(this, FLOAT64_TYPE);
1012     auto vReg0 = vixl::aarch64::VRegister(vTmp0.GetReg().GetId(), vixl::aarch64::VectorFormat::kFormat16B);
1013     auto vReg1 = vixl::aarch64::VRegister(vTmp1.GetReg().GetId(), vixl::aarch64::VectorFormat::kFormat16B);
1014     auto vReg2 = vixl::aarch64::VRegister(tmp.GetId(), vixl::aarch64::VectorFormat::kFormat16B);
1015     auto xReg0 = vixl::aarch64::Register(dst.GetId(), vixl::aarch64::kXRegSize);
1016     auto labelReturn = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1017     auto labelFound = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1018     auto labelSecond16B = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1019     auto labelCheckV0D1 = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1020     auto labelCheckV1D1 = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1021 
1022     GetMasm()->Ld1(vReg0, vReg1, vixl::aarch64::MemOperand(VixlReg(srcAddr)));
1023     GetMasm()->Dup(vReg2, VixlReg(ch));
1024     GetMasm()->Cmeq(vReg0, vReg0, vReg2);
1025     GetMasm()->Cmeq(vReg1, vReg1, vReg2);
1026     // Give up if char is not there
1027     GetMasm()->Addp(vReg2, vReg0, vReg1);
1028     GetMasm()->Addp(vReg2.V2D(), vReg2.V2D(), vReg2.V2D());
1029     GetMasm()->Mov(xReg0, vReg2.D(), 0);
1030     GetMasm()->Cbz(xReg0, labelReturn);
1031     // Inspect the first 16-byte block
1032     GetMasm()->Mov(xReg0, vReg0.D(), 0);
1033     GetMasm()->Cbz(xReg0, labelCheckV0D1);
1034     GetMasm()->Rev(xReg0, xReg0);
1035     GetMasm()->Clz(xReg0, xReg0);
1036     GetMasm()->B(labelFound);
1037     GetMasm()->Bind(labelCheckV0D1);
1038     GetMasm()->Mov(xReg0, vReg0.D(), 1U);
1039     GetMasm()->Cbz(xReg0, labelSecond16B);
1040     GetMasm()->Rev(xReg0, xReg0);
1041     GetMasm()->Clz(xReg0, xReg0);
1042     GetMasm()->Add(xReg0, xReg0, VixlImm(BITS_PER_UINT64));
1043     GetMasm()->B(labelFound);
1044     // Inspect the second 16-byte block
1045     GetMasm()->Bind(labelSecond16B);
1046     GetMasm()->Mov(xReg0, vReg1.D(), 0);
1047     GetMasm()->Cbz(xReg0, labelCheckV1D1);
1048     GetMasm()->Rev(xReg0, xReg0);
1049     GetMasm()->Clz(xReg0, xReg0);
1050     GetMasm()->Add(xReg0, xReg0, VixlImm(2U * BITS_PER_UINT64));
1051     GetMasm()->B(labelFound);
1052     GetMasm()->Bind(labelCheckV1D1);
1053     GetMasm()->Mov(xReg0, vReg1.D(), 1U);
1054     GetMasm()->Rev(xReg0, xReg0);
1055     GetMasm()->Clz(xReg0, xReg0);
1056     GetMasm()->Add(xReg0, xReg0, VixlImm(3U * BITS_PER_UINT64));
1057 
1058     GetMasm()->Bind(labelFound);
1059     GetMasm()->Lsr(xReg0, xReg0, 3U);
1060     GetMasm()->Add(xReg0, xReg0, VixlReg(srcAddr));
1061     GetMasm()->Bind(labelReturn);
1062 }
1063 
EncodeMemCharU16X16UsingSimd(Reg dst,Reg ch,Reg srcAddr,Reg tmp)1064 void Aarch64Encoder::EncodeMemCharU16X16UsingSimd(Reg dst, Reg ch, Reg srcAddr, Reg tmp)
1065 {
1066     ScopedTmpReg vTmp0(this, FLOAT64_TYPE);
1067     ScopedTmpReg vTmp1(this, FLOAT64_TYPE);
1068     auto vReg0 = vixl::aarch64::VRegister(vTmp0.GetReg().GetId(), vixl::aarch64::VectorFormat::kFormat8H);
1069     auto vReg1 = vixl::aarch64::VRegister(vTmp1.GetReg().GetId(), vixl::aarch64::VectorFormat::kFormat8H);
1070     auto vReg2 = vixl::aarch64::VRegister(tmp.GetId(), vixl::aarch64::VectorFormat::kFormat8H);
1071     auto xReg0 = vixl::aarch64::Register(dst.GetId(), vixl::aarch64::kXRegSize);
1072     auto labelReturn = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1073     auto labelFound = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1074     auto labelSecond16B = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1075     auto labelCheckV0D1 = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1076     auto labelCheckV1D1 = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1077 
1078     GetMasm()->Ld1(vReg0, vReg1, vixl::aarch64::MemOperand(VixlReg(srcAddr)));
1079     GetMasm()->Dup(vReg2, VixlReg(ch));
1080     GetMasm()->Cmeq(vReg0, vReg0, vReg2);
1081     GetMasm()->Cmeq(vReg1, vReg1, vReg2);
1082     // Give up if char is not there
1083     GetMasm()->Addp(vReg2, vReg0, vReg1);
1084     GetMasm()->Addp(vReg2.V2D(), vReg2.V2D(), vReg2.V2D());
1085     GetMasm()->Mov(xReg0, vReg2.D(), 0);
1086     GetMasm()->Cbz(xReg0, labelReturn);
1087     // Inspect the first 16-byte block
1088     GetMasm()->Mov(xReg0, vReg0.D(), 0);
1089     GetMasm()->Cbz(xReg0, labelCheckV0D1);
1090     GetMasm()->Rev(xReg0, xReg0);
1091     GetMasm()->Clz(xReg0, xReg0);
1092     GetMasm()->B(labelFound);
1093     GetMasm()->Bind(labelCheckV0D1);
1094     GetMasm()->Mov(xReg0, vReg0.D(), 1U);
1095     GetMasm()->Cbz(xReg0, labelSecond16B);
1096     GetMasm()->Rev(xReg0, xReg0);
1097     GetMasm()->Clz(xReg0, xReg0);
1098     GetMasm()->Add(xReg0, xReg0, VixlImm(BITS_PER_UINT64));
1099     GetMasm()->B(labelFound);
1100     // Inspect the second 16-byte block
1101     GetMasm()->Bind(labelSecond16B);
1102     GetMasm()->Mov(xReg0, vReg1.D(), 0);
1103     GetMasm()->Cbz(xReg0, labelCheckV1D1);
1104     GetMasm()->Rev(xReg0, xReg0);
1105     GetMasm()->Clz(xReg0, xReg0);
1106     GetMasm()->Add(xReg0, xReg0, VixlImm(2U * BITS_PER_UINT64));
1107     GetMasm()->B(labelFound);
1108     GetMasm()->Bind(labelCheckV1D1);
1109     GetMasm()->Mov(xReg0, vReg1.D(), 1U);
1110     GetMasm()->Rev(xReg0, xReg0);
1111     GetMasm()->Clz(xReg0, xReg0);
1112     GetMasm()->Add(xReg0, xReg0, VixlImm(3U * BITS_PER_UINT64));
1113 
1114     GetMasm()->Bind(labelFound);
1115     GetMasm()->Lsr(xReg0, xReg0, 4U);
1116     GetMasm()->Lsl(xReg0, xReg0, 1U);
1117     GetMasm()->Add(xReg0, xReg0, VixlReg(srcAddr));
1118     GetMasm()->Bind(labelReturn);
1119 }
1120 
EncodeMemCharU8X16UsingSimd(Reg dst,Reg ch,Reg srcAddr,Reg tmp)1121 void Aarch64Encoder::EncodeMemCharU8X16UsingSimd(Reg dst, Reg ch, Reg srcAddr, Reg tmp)
1122 {
1123     ScopedTmpReg vTmp0(this, FLOAT64_TYPE);
1124     ScopedTmpReg vTmp1(this, FLOAT64_TYPE);
1125     auto vReg0 = vixl::aarch64::VRegister(vTmp0.GetReg().GetId(), vixl::aarch64::VectorFormat::kFormat16B);
1126     auto vReg1 = vixl::aarch64::VRegister(tmp.GetId(), vixl::aarch64::VectorFormat::kFormat16B);
1127     auto xReg0 = vixl::aarch64::Register(dst.GetId(), vixl::aarch64::kXRegSize);
1128     auto labelReturn = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1129     auto labelFound = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1130     auto labelCheckV0D1 = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1131 
1132     GetMasm()->Ld1(vReg0, vixl::aarch64::MemOperand(VixlReg(srcAddr)));
1133     GetMasm()->Dup(vReg1, VixlReg(ch));
1134     GetMasm()->Cmeq(vReg0, vReg0, vReg1);
1135     // Give up if char is not there
1136     GetMasm()->Addp(vReg1.V2D(), vReg0.V2D(), vReg0.V2D());
1137     GetMasm()->Mov(xReg0, vReg1.D(), 0);
1138     GetMasm()->Cbz(xReg0, labelReturn);
1139     // Compute a pointer to the char
1140     GetMasm()->Mov(xReg0, vReg0.D(), 0);
1141     GetMasm()->Cbz(xReg0, labelCheckV0D1);
1142     GetMasm()->Rev(xReg0, xReg0);
1143     GetMasm()->Clz(xReg0, xReg0);
1144     GetMasm()->B(labelFound);
1145     GetMasm()->Bind(labelCheckV0D1);
1146     GetMasm()->Mov(xReg0, vReg0.D(), 1U);
1147     GetMasm()->Rev(xReg0, xReg0);
1148     GetMasm()->Clz(xReg0, xReg0);
1149     GetMasm()->Add(xReg0, xReg0, VixlImm(BITS_PER_UINT64));
1150     GetMasm()->Bind(labelFound);
1151     GetMasm()->Lsr(xReg0, xReg0, 3U);  // number of 8-bit chars
1152     GetMasm()->Add(xReg0, xReg0, VixlReg(srcAddr));
1153     GetMasm()->Bind(labelReturn);
1154 }
1155 
EncodeMemCharU16X8UsingSimd(Reg dst,Reg ch,Reg srcAddr,Reg tmp)1156 void Aarch64Encoder::EncodeMemCharU16X8UsingSimd(Reg dst, Reg ch, Reg srcAddr, Reg tmp)
1157 {
1158     ScopedTmpReg vTmp0(this, FLOAT64_TYPE);
1159     ScopedTmpReg vTmp1(this, FLOAT64_TYPE);
1160     auto vReg0 = vixl::aarch64::VRegister(vTmp0.GetReg().GetId(), vixl::aarch64::VectorFormat::kFormat8H);
1161     auto vReg1 = vixl::aarch64::VRegister(tmp.GetId(), vixl::aarch64::VectorFormat::kFormat8H);
1162     auto xReg0 = vixl::aarch64::Register(dst.GetId(), vixl::aarch64::kXRegSize);
1163     auto labelReturn = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1164     auto labelFound = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1165     auto labelCheckV0D1 = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1166 
1167     GetMasm()->Ld1(vReg0, vixl::aarch64::MemOperand(VixlReg(srcAddr)));
1168     GetMasm()->Dup(vReg1, VixlReg(ch));
1169     GetMasm()->Cmeq(vReg0, vReg0, vReg1);
1170     // Give up if char is not there
1171     GetMasm()->Addp(vReg1.V2D(), vReg0.V2D(), vReg0.V2D());
1172     GetMasm()->Mov(xReg0, vReg1.D(), 0);
1173     GetMasm()->Cbz(xReg0, labelReturn);
1174     // Compute a pointer to the char
1175     GetMasm()->Mov(xReg0, vReg0.D(), 0);
1176     GetMasm()->Cbz(xReg0, labelCheckV0D1);
1177     GetMasm()->Rev(xReg0, xReg0);
1178     GetMasm()->Clz(xReg0, xReg0);
1179     GetMasm()->B(labelFound);
1180     GetMasm()->Bind(labelCheckV0D1);
1181     GetMasm()->Mov(xReg0, vReg0.D(), 1U);
1182     GetMasm()->Rev(xReg0, xReg0);
1183     GetMasm()->Clz(xReg0, xReg0);
1184     GetMasm()->Add(xReg0, xReg0, VixlImm(BITS_PER_UINT64));
1185     GetMasm()->Bind(labelFound);
1186     GetMasm()->Lsr(xReg0, xReg0, 4U);  // number of 16-bit chars
1187     GetMasm()->Lsl(xReg0, xReg0, 1U);  // number of bytes
1188     GetMasm()->Add(xReg0, xReg0, VixlReg(srcAddr));
1189     GetMasm()->Bind(labelReturn);
1190 }
1191 
EncodeUnsignedExtendBytesToShorts(Reg dst,Reg src)1192 void Aarch64Encoder::EncodeUnsignedExtendBytesToShorts(Reg dst, Reg src)
1193 {
1194     GetMasm()->Uxtl(VixlVReg(dst).V8H(), VixlVReg(src).V8B());
1195 }
1196 
EncodeReverseHalfWords(Reg dst,Reg src)1197 void Aarch64Encoder::EncodeReverseHalfWords(Reg dst, Reg src)
1198 {
1199     ASSERT(src.GetSize() == dst.GetSize());
1200 
1201     GetMasm()->rev64(VixlVReg(dst).V4H(), VixlVReg(src).V4H());
1202 }
1203 
CanEncodeBitCount()1204 bool Aarch64Encoder::CanEncodeBitCount()
1205 {
1206     return true;
1207 }
1208 
CanEncodeCompressedStringCharAt()1209 bool Aarch64Encoder::CanEncodeCompressedStringCharAt()
1210 {
1211     return true;
1212 }
1213 
CanEncodeCompressedStringCharAtI()1214 bool Aarch64Encoder::CanEncodeCompressedStringCharAtI()
1215 {
1216     return true;
1217 }
1218 
CanEncodeMAdd()1219 bool Aarch64Encoder::CanEncodeMAdd()
1220 {
1221     return true;
1222 }
1223 
CanEncodeMSub()1224 bool Aarch64Encoder::CanEncodeMSub()
1225 {
1226     return true;
1227 }
1228 
CanEncodeMNeg()1229 bool Aarch64Encoder::CanEncodeMNeg()
1230 {
1231     return true;
1232 }
1233 
CanEncodeOrNot()1234 bool Aarch64Encoder::CanEncodeOrNot()
1235 {
1236     return true;
1237 }
1238 
CanEncodeAndNot()1239 bool Aarch64Encoder::CanEncodeAndNot()
1240 {
1241     return true;
1242 }
1243 
CanEncodeXorNot()1244 bool Aarch64Encoder::CanEncodeXorNot()
1245 {
1246     return true;
1247 }
1248 
GetCursorOffset() const1249 size_t Aarch64Encoder::GetCursorOffset() const
1250 {
1251     return GetMasm()->GetBuffer()->GetCursorOffset();
1252 }
1253 
SetCursorOffset(size_t offset)1254 void Aarch64Encoder::SetCursorOffset(size_t offset)
1255 {
1256     GetMasm()->GetBuffer()->Rewind(offset);
1257 }
1258 
1259 /* return the power of 2 for the size of the type */
EncodeGetTypeSize(Reg size,Reg type)1260 void Aarch64Encoder::EncodeGetTypeSize(Reg size, Reg type)
1261 {
1262     auto sreg = VixlReg(type);
1263     auto dreg = VixlReg(size);
1264     constexpr uint8_t I16 = 0x5;
1265     constexpr uint8_t I32 = 0x7;
1266     constexpr uint8_t F64 = 0xa;
1267     constexpr uint8_t REF = 0xd;
1268     constexpr uint8_t SMALLREF = ark::OBJECT_POINTER_SIZE < sizeof(uint64_t) ? 1 : 0;
1269     auto end = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1270 
1271     GetMasm()->Mov(dreg, VixlImm(0));
1272     GetMasm()->Cmp(sreg, VixlImm(I16));
1273     GetMasm()->Cinc(dreg, dreg, vixl::aarch64::Condition::ge);
1274     GetMasm()->Cmp(sreg, VixlImm(I32));
1275     GetMasm()->Cinc(dreg, dreg, vixl::aarch64::Condition::ge);
1276     GetMasm()->Cmp(sreg, VixlImm(F64));
1277     GetMasm()->Cinc(dreg, dreg, vixl::aarch64::Condition::ge);
1278     GetMasm()->Cmp(sreg, VixlImm(REF));
1279     GetMasm()->B(end, vixl::aarch64::Condition::ne);
1280     GetMasm()->Sub(dreg, dreg, VixlImm(SMALLREF));
1281     GetMasm()->Bind(end);
1282 }
1283 
EncodeReverseBits(Reg dst,Reg src)1284 void Aarch64Encoder::EncodeReverseBits(Reg dst, Reg src)
1285 {
1286     auto rzero = GetRegfile()->GetZeroReg();
1287     if (rzero.GetId() == src.GetId()) {
1288         EncodeMov(dst, Imm(0));
1289         return;
1290     }
1291     ASSERT(src.GetSize() == WORD_SIZE || src.GetSize() == DOUBLE_WORD_SIZE);
1292     ASSERT(src.GetSize() == dst.GetSize());
1293 
1294     GetMasm()->Rbit(VixlReg(dst), VixlReg(src));
1295 }
1296 
EncodeCompressedStringCharAt(ArgsCompressedStringCharAt && args)1297 void Aarch64Encoder::EncodeCompressedStringCharAt(ArgsCompressedStringCharAt &&args)
1298 {
1299     auto [dst, str, idx, length, tmp, dataOffset, shift] = args;
1300     ASSERT(dst.GetSize() == HALF_SIZE);
1301 
1302     auto labelNotCompressed = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1303     auto labelCharLoaded = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1304     auto vixlTmp = VixlReg(tmp, DOUBLE_WORD_SIZE);
1305     auto vixlDst = VixlReg(dst);
1306 
1307     GetMasm()->Tbnz(VixlReg(length), 0, labelNotCompressed);
1308     EncodeAdd(tmp, str, idx);
1309     GetMasm()->ldrb(vixlDst, MemOperand(vixlTmp, dataOffset));
1310     GetMasm()->B(labelCharLoaded);
1311     GetMasm()->Bind(labelNotCompressed);
1312     EncodeAdd(tmp, str, Shift(idx, shift));
1313     GetMasm()->ldrh(vixlDst, MemOperand(vixlTmp, dataOffset));
1314     GetMasm()->Bind(labelCharLoaded);
1315 }
1316 
EncodeCompressedStringCharAtI(ArgsCompressedStringCharAtI && args)1317 void Aarch64Encoder::EncodeCompressedStringCharAtI(ArgsCompressedStringCharAtI &&args)
1318 {
1319     auto [dst, str, length, dataOffset, index, shift] = args;
1320     ASSERT(dst.GetSize() == HALF_SIZE);
1321 
1322     auto labelNotCompressed = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1323     auto labelCharLoaded = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1324     auto vixlStr = VixlReg(str);
1325     auto vixlDst = VixlReg(dst);
1326 
1327     auto rzero = GetRegfile()->GetZeroReg().GetId();
1328     if (str.GetId() == rzero) {
1329         return;
1330     }
1331     GetMasm()->Tbnz(VixlReg(length), 0, labelNotCompressed);
1332     GetMasm()->Ldrb(vixlDst, MemOperand(vixlStr, dataOffset + index));
1333     GetMasm()->B(labelCharLoaded);
1334     GetMasm()->Bind(labelNotCompressed);
1335     GetMasm()->Ldrh(vixlDst, MemOperand(vixlStr, dataOffset + (index << shift)));
1336     GetMasm()->Bind(labelCharLoaded);
1337 }
1338 
1339 /* Unsafe builtins implementation */
EncodeCompareAndSwap(Reg dst,Reg obj,Reg offset,Reg val,Reg newval)1340 void Aarch64Encoder::EncodeCompareAndSwap(Reg dst, Reg obj, Reg offset, Reg val, Reg newval)
1341 {
1342     /* Modeled according to the following logic:
1343       .L2:
1344       ldaxr   cur, [addr]
1345       cmp     cur, old
1346       bne     .L3
1347       stlxr   res, new, [addr]
1348       cbnz    res, .L2
1349       .L3:
1350       cset    w0, eq
1351     */
1352     ScopedTmpReg addr(this, true); /* LR is used */
1353     ScopedTmpReg cur(this, val.GetType());
1354     ScopedTmpReg res(this, val.GetType());
1355     auto loop = CreateLabel();
1356     auto exit = CreateLabel();
1357 
1358     /* ldaxr wants [reg]-form of memref (no offset or disp) */
1359     EncodeAdd(addr, obj, offset);
1360 
1361     BindLabel(loop);
1362     EncodeLdrExclusive(cur, addr, true);
1363     EncodeJump(exit, cur, val, Condition::NE);
1364     cur.Release();
1365     EncodeStrExclusive(res, newval, addr, true);
1366     EncodeJump(loop, res, Imm(0), Condition::NE);
1367     BindLabel(exit);
1368 
1369     GetMasm()->Cset(VixlReg(dst), vixl::aarch64::Condition::eq);
1370 }
1371 
EncodeUnsafeGetAndSet(Reg dst,Reg obj,Reg offset,Reg val)1372 void Aarch64Encoder::EncodeUnsafeGetAndSet(Reg dst, Reg obj, Reg offset, Reg val)
1373 {
1374     auto cur = ScopedTmpReg(this, val.GetType());
1375     auto last = ScopedTmpReg(this, val.GetType());
1376     auto addr = ScopedTmpReg(this, true); /* LR is used */
1377     auto mem = MemRef(addr);
1378     auto restart = CreateLabel();
1379     auto retryLdaxr = CreateLabel();
1380 
1381     /* ldaxr wants [reg]-form of memref (no offset or disp) */
1382     EncodeAdd(addr, obj, offset);
1383 
1384     /* Since GetAndSet is defined as a non-faulting operation we
1385      * have to cover two possible faulty cases:
1386      *      1. stlxr failed, we have to retry ldxar
1387      *      2. the value we got via ldxar was not the value we initially
1388      *         loaded, we have to start from the very beginning */
1389     BindLabel(restart);
1390     EncodeLdrAcquire(last, false, mem);
1391 
1392     BindLabel(retryLdaxr);
1393     EncodeLdrExclusive(cur, addr, true);
1394     EncodeJump(restart, cur, last, Condition::NE);
1395     last.Release();
1396     EncodeStrExclusive(dst, val, addr, true);
1397     EncodeJump(retryLdaxr, dst, Imm(0), Condition::NE);
1398 
1399     EncodeMov(dst, cur);
1400 }
1401 
EncodeUnsafeGetAndAdd(Reg dst,Reg obj,Reg offset,Reg val,Reg tmp)1402 void Aarch64Encoder::EncodeUnsafeGetAndAdd(Reg dst, Reg obj, Reg offset, Reg val, Reg tmp)
1403 {
1404     ScopedTmpReg cur(this, val.GetType());
1405     ScopedTmpReg last(this, val.GetType());
1406     auto newval = Reg(tmp.GetId(), val.GetType());
1407 
1408     auto restart = CreateLabel();
1409     auto retryLdaxr = CreateLabel();
1410 
1411     /* addr_reg aliases obj, obj reg will be restored bedore exit */
1412     auto addr = Reg(obj.GetId(), INT64_TYPE);
1413 
1414     /* ldaxr wants [reg]-form of memref (no offset or disp) */
1415     auto mem = MemRef(addr);
1416     EncodeAdd(addr, obj, offset);
1417 
1418     /* Since GetAndAdd is defined as a non-faulting operation we
1419      * have to cover two possible faulty cases:
1420      *      1. stlxr failed, we have to retry ldxar
1421      *      2. the value we got via ldxar was not the value we initially
1422      *         loaded, we have to start from the very beginning */
1423     BindLabel(restart);
1424     EncodeLdrAcquire(last, false, mem);
1425     EncodeAdd(newval, last, val);
1426 
1427     BindLabel(retryLdaxr);
1428     EncodeLdrExclusive(cur, addr, true);
1429     EncodeJump(restart, cur, last, Condition::NE);
1430     last.Release();
1431     EncodeStrExclusive(dst, newval, addr, true);
1432     EncodeJump(retryLdaxr, dst, Imm(0), Condition::NE);
1433 
1434     EncodeSub(obj, addr, offset); /* restore the original value */
1435     EncodeMov(dst, cur);
1436 }
1437 
EncodeMemoryBarrier(memory_order::Order order)1438 void Aarch64Encoder::EncodeMemoryBarrier(memory_order::Order order)
1439 {
1440     switch (order) {
1441         case memory_order::ACQUIRE: {
1442             GetMasm()->Dmb(vixl::aarch64::InnerShareable, vixl::aarch64::BarrierReads);
1443             break;
1444         }
1445         case memory_order::RELEASE: {
1446             GetMasm()->Dmb(vixl::aarch64::InnerShareable, vixl::aarch64::BarrierWrites);
1447             break;
1448         }
1449         case memory_order::FULL: {
1450             GetMasm()->Dmb(vixl::aarch64::InnerShareable, vixl::aarch64::BarrierAll);
1451             break;
1452         }
1453         default:
1454             break;
1455     }
1456 }
1457 
EncodeNot(Reg dst,Reg src)1458 void Aarch64Encoder::EncodeNot(Reg dst, Reg src)
1459 {
1460     GetMasm()->Mvn(VixlReg(dst), VixlReg(src));
1461 }
1462 
EncodeCastFloat(Reg dst,bool dstSigned,Reg src,bool srcSigned)1463 void Aarch64Encoder::EncodeCastFloat(Reg dst, bool dstSigned, Reg src, bool srcSigned)
1464 {
1465     // We DON'T support casts from float32/64 to int8/16 and bool, because this caste is not declared anywhere
1466     // in other languages and architecture, we do not know what the behavior should be.
1467     // But there is one implementation in other function: "EncodeCastFloatWithSmallDst". Call it in the "EncodeCast"
1468     // function instead of "EncodeCastFloat". It works as follows: cast from float32/64 to int32, moving sign bit from
1469     // int32 to dst type, then extend number from dst type to int32 (a necessary condition for an isa). All work in dst
1470     // register.
1471     ASSERT(dst.GetSize() >= WORD_SIZE);
1472 
1473     if (src.IsFloat() && dst.IsScalar()) {
1474         if (dstSigned) {
1475             GetMasm()->Fcvtzs(VixlReg(dst), VixlVReg(src));
1476         } else {
1477             GetMasm()->Fcvtzu(VixlReg(dst), VixlVReg(src));
1478         }
1479         return;
1480     }
1481     if (src.IsScalar() && dst.IsFloat()) {
1482         auto rzero = GetRegfile()->GetZeroReg().GetId();
1483         if (src.GetId() == rzero) {
1484             if (dst.GetSize() == WORD_SIZE) {
1485                 GetMasm()->Fmov(VixlVReg(dst), 0.0F);
1486             } else {
1487                 GetMasm()->Fmov(VixlVReg(dst), 0.0);
1488             }
1489         } else if (srcSigned) {
1490             GetMasm()->Scvtf(VixlVReg(dst), VixlReg(src));
1491         } else {
1492             GetMasm()->Ucvtf(VixlVReg(dst), VixlReg(src));
1493         }
1494         return;
1495     }
1496     if (src.IsFloat() && dst.IsFloat()) {
1497         if (src.GetSize() != dst.GetSize()) {
1498             GetMasm()->Fcvt(VixlVReg(dst), VixlVReg(src));
1499             return;
1500         }
1501         GetMasm()->Fmov(VixlVReg(dst), VixlVReg(src));
1502         return;
1503     }
1504     UNREACHABLE();
1505 }
1506 
EncodeCastFloatWithSmallDst(Reg dst,bool dstSigned,Reg src,bool srcSigned)1507 void Aarch64Encoder::EncodeCastFloatWithSmallDst(Reg dst, bool dstSigned, Reg src, bool srcSigned)
1508 {
1509     // Dst bool type don't supported!
1510 
1511     if (src.IsFloat() && dst.IsScalar()) {
1512         if (dstSigned) {
1513             GetMasm()->Fcvtzs(VixlReg(dst), VixlVReg(src));
1514             if (dst.GetSize() < WORD_SIZE) {
1515                 constexpr uint32_t TEST_BIT = (1U << (static_cast<uint32_t>(WORD_SIZE) - 1));
1516                 ScopedTmpReg tmpReg1(this, dst.GetType());
1517                 auto tmp1 = VixlReg(tmpReg1);
1518                 ScopedTmpReg tmpReg2(this, dst.GetType());
1519                 auto tmp2 = VixlReg(tmpReg2);
1520 
1521                 // NOLINTNEXTLINE(hicpp-signed-bitwise)
1522                 int32_t setBit = (dst.GetSize() == BYTE_SIZE) ? (1UL << (BYTE_SIZE - 1)) : (1UL << (HALF_SIZE - 1));
1523                 int32_t remBit = setBit - 1;
1524                 GetMasm()->Ands(tmp1, VixlReg(dst), TEST_BIT);
1525 
1526                 GetMasm()->Orr(tmp1, VixlReg(dst), setBit);
1527                 GetMasm()->And(tmp2, VixlReg(dst), remBit);
1528                 // Select result - if zero set - tmp2, else tmp1
1529                 GetMasm()->Csel(VixlReg(dst), tmp2, tmp1, vixl::aarch64::eq);
1530                 EncodeCastScalar(Reg(dst.GetId(), INT32_TYPE), dstSigned, dst, dstSigned);
1531             }
1532             return;
1533         }
1534         GetMasm()->Fcvtzu(VixlReg(dst), VixlVReg(src));
1535         if (dst.GetSize() < WORD_SIZE) {
1536             EncodeCastScalar(Reg(dst.GetId(), INT32_TYPE), dstSigned, dst, dstSigned);
1537         }
1538         return;
1539     }
1540     if (src.IsScalar() && dst.IsFloat()) {
1541         if (srcSigned) {
1542             GetMasm()->Scvtf(VixlVReg(dst), VixlReg(src));
1543         } else {
1544             GetMasm()->Ucvtf(VixlVReg(dst), VixlReg(src));
1545         }
1546         return;
1547     }
1548     if (src.IsFloat() && dst.IsFloat()) {
1549         if (src.GetSize() != dst.GetSize()) {
1550             GetMasm()->Fcvt(VixlVReg(dst), VixlVReg(src));
1551             return;
1552         }
1553         GetMasm()->Fmov(VixlVReg(dst), VixlVReg(src));
1554         return;
1555     }
1556     UNREACHABLE();
1557 }
1558 
EncodeCastSigned(Reg dst,Reg src)1559 void Aarch64Encoder::EncodeCastSigned(Reg dst, Reg src)
1560 {
1561     size_t srcSize = src.GetSize();
1562     size_t dstSize = dst.GetSize();
1563     auto srcR = Reg(src.GetId(), dst.GetType());
1564     // Else signed extend
1565     if (srcSize > dstSize) {
1566         srcSize = dstSize;
1567     }
1568     switch (srcSize) {
1569         case BYTE_SIZE:
1570             GetMasm()->Sxtb(VixlReg(dst), VixlReg(srcR));
1571             break;
1572         case HALF_SIZE:
1573             GetMasm()->Sxth(VixlReg(dst), VixlReg(srcR));
1574             break;
1575         case WORD_SIZE:
1576             GetMasm()->Sxtw(VixlReg(dst), VixlReg(srcR));
1577             break;
1578         case DOUBLE_WORD_SIZE:
1579             GetMasm()->Mov(VixlReg(dst), VixlReg(srcR));
1580             break;
1581         default:
1582             SetFalseResult();
1583             break;
1584     }
1585 }
1586 
EncodeCastUnsigned(Reg dst,Reg src)1587 void Aarch64Encoder::EncodeCastUnsigned(Reg dst, Reg src)
1588 {
1589     size_t srcSize = src.GetSize();
1590     size_t dstSize = dst.GetSize();
1591     auto srcR = Reg(src.GetId(), dst.GetType());
1592     if (srcSize > dstSize && dstSize < WORD_SIZE) {
1593         // We need to cut the number, if it is less, than 32-bit. It is by ISA agreement.
1594         int64_t cutValue = (1ULL << dstSize) - 1;
1595         GetMasm()->And(VixlReg(dst), VixlReg(src), VixlImm(cutValue));
1596         return;
1597     }
1598     // Else unsigned extend
1599     switch (srcSize) {
1600         case BYTE_SIZE:
1601             GetMasm()->Uxtb(VixlReg(dst), VixlReg(srcR));
1602             return;
1603         case HALF_SIZE:
1604             GetMasm()->Uxth(VixlReg(dst), VixlReg(srcR));
1605             return;
1606         case WORD_SIZE:
1607             GetMasm()->Uxtw(VixlReg(dst), VixlReg(srcR));
1608             return;
1609         case DOUBLE_WORD_SIZE:
1610             GetMasm()->Mov(VixlReg(dst), VixlReg(srcR));
1611             return;
1612         default:
1613             SetFalseResult();
1614             return;
1615     }
1616 }
1617 
EncodeCastScalar(Reg dst,bool dstSigned,Reg src,bool srcSigned)1618 void Aarch64Encoder::EncodeCastScalar(Reg dst, bool dstSigned, Reg src, bool srcSigned)
1619 {
1620     size_t srcSize = src.GetSize();
1621     size_t dstSize = dst.GetSize();
1622     // In our ISA minimal type is 32-bit, so type less then 32-bit
1623     // we should extend to 32-bit. So we can have 2 cast
1624     // (For examble, i8->u16 will work as i8->u16 and u16->u32)
1625     if (dstSize < WORD_SIZE) {
1626         if (srcSize > dstSize) {
1627             if (dstSigned) {
1628                 EncodeCastSigned(dst, src);
1629             } else {
1630                 EncodeCastUnsigned(dst, src);
1631             }
1632             return;
1633         }
1634         if (srcSize == dstSize) {
1635             GetMasm()->Mov(VixlReg(dst), VixlReg(src));
1636             if (!(srcSigned || dstSigned) || (srcSigned && dstSigned)) {
1637                 return;
1638             }
1639             if (dstSigned) {
1640                 EncodeCastSigned(Reg(dst.GetId(), INT32_TYPE), dst);
1641             } else {
1642                 EncodeCastUnsigned(Reg(dst.GetId(), INT32_TYPE), dst);
1643             }
1644             return;
1645         }
1646         if (srcSigned) {
1647             EncodeCastSigned(dst, src);
1648             if (!dstSigned) {
1649                 EncodeCastUnsigned(Reg(dst.GetId(), INT32_TYPE), dst);
1650             }
1651         } else {
1652             EncodeCastUnsigned(dst, src);
1653             if (dstSigned) {
1654                 EncodeCastSigned(Reg(dst.GetId(), INT32_TYPE), dst);
1655             }
1656         }
1657     } else {
1658         if (srcSize == dstSize) {
1659             GetMasm()->Mov(VixlReg(dst), VixlReg(src));
1660             return;
1661         }
1662         if (srcSigned) {
1663             EncodeCastSigned(dst, src);
1664         } else {
1665             EncodeCastUnsigned(dst, src);
1666         }
1667     }
1668 }
1669 
EncodeFastPathDynamicCast(Reg dst,Reg src,LabelHolder::LabelId slow)1670 void Aarch64Encoder::EncodeFastPathDynamicCast(Reg dst, Reg src, LabelHolder::LabelId slow)
1671 {
1672     ASSERT(src.IsFloat() && dst.IsScalar());
1673 
1674     CHECK_EQ(src.GetSize(), BITS_PER_UINT64);
1675     CHECK_EQ(dst.GetSize(), BITS_PER_UINT32);
1676 
1677     // We use slow path, because in general JS double -> int32 cast is complex and we check only few common cases here
1678     // and move other checks in slow path. In case CPU supports special JS double -> int32 instruction we do not need
1679     // slow path.
1680     if (!IsLabelValid(slow)) {
1681         // use special JS aarch64 instruction
1682 #ifndef NDEBUG
1683         vixl::CPUFeaturesScope scope(GetMasm(), vixl::CPUFeatures::kFP, vixl::CPUFeatures::kJSCVT);
1684 #endif
1685         GetMasm()->Fjcvtzs(VixlReg(dst), VixlVReg(src));
1686         return;
1687     }
1688 
1689     // infinite and big numbers will overflow here to INT64_MIN or INT64_MAX, but NaN casts to 0
1690     GetMasm()->Fcvtzs(VixlReg(dst, DOUBLE_WORD_SIZE), VixlVReg(src));
1691     // check INT64_MIN
1692     GetMasm()->Cmp(VixlReg(dst, DOUBLE_WORD_SIZE), VixlImm(1));
1693     // check INT64_MAX
1694     GetMasm()->Ccmp(VixlReg(dst, DOUBLE_WORD_SIZE), VixlImm(-1), vixl::aarch64::StatusFlags::VFlag,
1695                     vixl::aarch64::Condition::vc);
1696     auto slowLabel {static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(slow)};
1697     // jump to slow path in case of overflow
1698     GetMasm()->B(slowLabel, vixl::aarch64::Condition::vs);
1699 }
1700 
EncodeJsDoubleToCharCast(Reg dst,Reg src)1701 void Aarch64Encoder::EncodeJsDoubleToCharCast(Reg dst, Reg src)
1702 {
1703     ASSERT(src.IsFloat() && dst.IsScalar());
1704 
1705     CHECK_EQ(src.GetSize(), BITS_PER_UINT64);
1706     CHECK_EQ(dst.GetSize(), BITS_PER_UINT32);
1707 
1708     // use special JS aarch64 instruction
1709 #ifndef NDEBUG
1710     vixl::CPUFeaturesScope scope(GetMasm(), vixl::CPUFeatures::kFP, vixl::CPUFeatures::kJSCVT);
1711 #endif
1712     GetMasm()->Fjcvtzs(VixlReg(dst), VixlVReg(src));
1713 }
1714 
EncodeJsDoubleToCharCast(Reg dst,Reg src,Reg tmp,uint32_t failureResult)1715 void Aarch64Encoder::EncodeJsDoubleToCharCast(Reg dst, Reg src, Reg tmp, uint32_t failureResult)
1716 {
1717     ASSERT(src.IsFloat() && dst.IsScalar());
1718 
1719     CHECK_EQ(src.GetSize(), BITS_PER_UINT64);
1720     CHECK_EQ(dst.GetSize(), BITS_PER_UINT32);
1721 
1722     // infinite and big numbers will overflow here to INT64_MIN or INT64_MAX, but NaN casts to 0
1723     GetMasm()->Fcvtzs(VixlReg(dst, DOUBLE_WORD_SIZE), VixlVReg(src));
1724     // check INT64_MIN
1725     GetMasm()->Cmp(VixlReg(dst, DOUBLE_WORD_SIZE), VixlImm(1));
1726     // check INT64_MAX
1727     GetMasm()->Ccmp(VixlReg(dst, DOUBLE_WORD_SIZE), VixlImm(-1), vixl::aarch64::StatusFlags::VFlag,
1728                     vixl::aarch64::Condition::vc);
1729     // 'And' with 0xffff
1730     constexpr uint32_t UTF16_CHAR_MASK = 0xffff;
1731     GetMasm()->And(VixlReg(dst), VixlReg(dst), VixlImm(UTF16_CHAR_MASK));
1732     // 'And' and 'Mov' change no flags so we may conditionally move failure result in case of overflow at old checking
1733     // for INT64_MAX
1734     GetMasm()->mov(VixlReg(tmp), failureResult);
1735     GetMasm()->csel(VixlReg(dst), VixlReg(tmp), VixlReg(dst), vixl::aarch64::Condition::vs);
1736 }
1737 
EncodeCast(Reg dst,bool dstSigned,Reg src,bool srcSigned)1738 void Aarch64Encoder::EncodeCast(Reg dst, bool dstSigned, Reg src, bool srcSigned)
1739 {
1740     if (src.IsFloat() || dst.IsFloat()) {
1741         EncodeCastFloat(dst, dstSigned, src, srcSigned);
1742         return;
1743     }
1744 
1745     ASSERT(src.IsScalar() && dst.IsScalar());
1746     auto rzero = GetRegfile()->GetZeroReg().GetId();
1747     if (src.GetId() == rzero) {
1748         ASSERT(dst.GetId() != rzero);
1749         EncodeMov(dst, Imm(0));
1750         return;
1751     }
1752     // Scalar part
1753     EncodeCastScalar(dst, dstSigned, src, srcSigned);
1754 }
1755 
EncodeCastToBool(Reg dst,Reg src)1756 void Aarch64Encoder::EncodeCastToBool(Reg dst, Reg src)
1757 {
1758     // In ISA says that we only support casts:
1759     // i32tou1, i64tou1, u32tou1, u64tou1
1760     ASSERT(src.IsScalar());
1761     ASSERT(dst.IsScalar());
1762 
1763     GetMasm()->Cmp(VixlReg(src), VixlImm(0));
1764     // In our ISA minimal type is 32-bit, so bool in 32bit
1765     GetMasm()->Cset(VixlReg(Reg(dst.GetId(), INT32_TYPE)), vixl::aarch64::Condition::ne);
1766 }
1767 
EncodeAdd(Reg dst,Reg src0,Shift src1)1768 void Aarch64Encoder::EncodeAdd(Reg dst, Reg src0, Shift src1)
1769 {
1770     if (dst.IsFloat()) {
1771         UNREACHABLE();
1772     }
1773     ASSERT(src0.GetSize() <= dst.GetSize());
1774     if (src0.GetSize() < dst.GetSize()) {
1775         auto src0Reg = Reg(src0.GetId(), dst.GetType());
1776         auto src1Reg = Reg(src1.GetBase().GetId(), dst.GetType());
1777         GetMasm()->Add(VixlReg(dst), VixlReg(src0Reg), VixlShift(Shift(src1Reg, src1.GetType(), src1.GetScale())));
1778         return;
1779     }
1780     GetMasm()->Add(VixlReg(dst), VixlReg(src0), VixlShift(src1));
1781 }
1782 
EncodeAdd(Reg dst,Reg src0,Reg src1)1783 void Aarch64Encoder::EncodeAdd(Reg dst, Reg src0, Reg src1)
1784 {
1785     if (dst.IsFloat()) {
1786         GetMasm()->Fadd(VixlVReg(dst), VixlVReg(src0), VixlVReg(src1));
1787         return;
1788     }
1789 
1790     /* if any of the operands has 64-bits size,
1791      * forcibly do the 64-bits wide operation */
1792     if ((src0.GetSize() | src1.GetSize() | dst.GetSize()) >= DOUBLE_WORD_SIZE) {
1793         GetMasm()->Add(VixlReg(dst).X(), VixlReg(src0).X(), VixlReg(src1).X());
1794     } else {
1795         /* Otherwise do 32-bits operation as any lesser
1796          * sizes have to be upcasted to 32-bits anyway */
1797         GetMasm()->Add(VixlReg(dst).W(), VixlReg(src0).W(), VixlReg(src1).W());
1798     }
1799 }
1800 
EncodeSub(Reg dst,Reg src0,Shift src1)1801 void Aarch64Encoder::EncodeSub(Reg dst, Reg src0, Shift src1)
1802 {
1803     ASSERT(dst.IsScalar());
1804     GetMasm()->Sub(VixlReg(dst), VixlReg(src0), VixlShift(src1));
1805 }
1806 
EncodeSub(Reg dst,Reg src0,Reg src1)1807 void Aarch64Encoder::EncodeSub(Reg dst, Reg src0, Reg src1)
1808 {
1809     if (dst.IsFloat()) {
1810         GetMasm()->Fsub(VixlVReg(dst), VixlVReg(src0), VixlVReg(src1));
1811         return;
1812     }
1813 
1814     /* if any of the operands has 64-bits size,
1815      * forcibly do the 64-bits wide operation */
1816     if ((src0.GetSize() | src1.GetSize() | dst.GetSize()) >= DOUBLE_WORD_SIZE) {
1817         GetMasm()->Sub(VixlReg(dst).X(), VixlReg(src0).X(), VixlReg(src1).X());
1818     } else {
1819         /* Otherwise do 32-bits operation as any lesser
1820          * sizes have to be upcasted to 32-bits anyway */
1821         GetMasm()->Sub(VixlReg(dst).W(), VixlReg(src0).W(), VixlReg(src1).W());
1822     }
1823 }
1824 
EncodeMul(Reg dst,Reg src0,Reg src1)1825 void Aarch64Encoder::EncodeMul(Reg dst, Reg src0, Reg src1)
1826 {
1827     if (dst.IsFloat()) {
1828         GetMasm()->Fmul(VixlVReg(dst), VixlVReg(src0), VixlVReg(src1));
1829         return;
1830     }
1831     auto rzero = GetRegfile()->GetZeroReg().GetId();
1832     if (src0.GetId() == rzero || src1.GetId() == rzero) {
1833         EncodeMov(dst, Imm(0));
1834         return;
1835     }
1836     GetMasm()->Mul(VixlReg(dst), VixlReg(src0), VixlReg(src1));
1837 }
1838 
EncodeAddOverflow(compiler::LabelHolder::LabelId id,Reg dst,Reg src0,Reg src1,Condition cc)1839 void Aarch64Encoder::EncodeAddOverflow(compiler::LabelHolder::LabelId id, Reg dst, Reg src0, Reg src1, Condition cc)
1840 {
1841     ASSERT(!dst.IsFloat() && !src0.IsFloat() && !src1.IsFloat());
1842     ASSERT(cc == Condition::VS || cc == Condition::VC);
1843     if (dst.GetSize() == DOUBLE_WORD_SIZE) {
1844         GetMasm()->Adds(VixlReg(dst).X(), VixlReg(src0).X(), VixlReg(src1).X());
1845     } else {
1846         /* Otherwise do 32-bits operation as any lesser
1847          * sizes have to be upcasted to 32-bits anyway */
1848         GetMasm()->Adds(VixlReg(dst).W(), VixlReg(src0).W(), VixlReg(src1).W());
1849     }
1850     auto label = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(id);
1851     GetMasm()->B(label, Convert(cc));
1852 }
1853 
EncodeSubOverflow(compiler::LabelHolder::LabelId id,Reg dst,Reg src0,Reg src1,Condition cc)1854 void Aarch64Encoder::EncodeSubOverflow(compiler::LabelHolder::LabelId id, Reg dst, Reg src0, Reg src1, Condition cc)
1855 {
1856     ASSERT(!dst.IsFloat() && !src0.IsFloat() && !src1.IsFloat());
1857     ASSERT(cc == Condition::VS || cc == Condition::VC);
1858     if (dst.GetSize() == DOUBLE_WORD_SIZE) {
1859         GetMasm()->Subs(VixlReg(dst).X(), VixlReg(src0).X(), VixlReg(src1).X());
1860     } else {
1861         /* Otherwise do 32-bits operation as any lesser
1862          * sizes have to be upcasted to 32-bits anyway */
1863         GetMasm()->Subs(VixlReg(dst).W(), VixlReg(src0).W(), VixlReg(src1).W());
1864     }
1865     auto label = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(id);
1866     GetMasm()->B(label, Convert(cc));
1867 }
1868 
EncodeNegOverflowAndZero(compiler::LabelHolder::LabelId id,Reg dst,Reg src)1869 void Aarch64Encoder::EncodeNegOverflowAndZero(compiler::LabelHolder::LabelId id, Reg dst, Reg src)
1870 {
1871     ASSERT(!dst.IsFloat() && !src.IsFloat());
1872     // NOLINTNEXTLINE(readability-magic-numbers)
1873     EncodeJumpTest(id, src, Imm(0x7fffffff), Condition::TST_EQ);
1874     GetMasm()->Neg(VixlReg(dst).W(), VixlReg(src).W());
1875 }
1876 
EncodeDiv(Reg dst,bool dstSigned,Reg src0,Reg src1)1877 void Aarch64Encoder::EncodeDiv(Reg dst, bool dstSigned, Reg src0, Reg src1)
1878 {
1879     if (dst.IsFloat()) {
1880         GetMasm()->Fdiv(VixlVReg(dst), VixlVReg(src0), VixlVReg(src1));
1881         return;
1882     }
1883 
1884     auto rzero = GetRegfile()->GetZeroReg().GetId();
1885     if (src1.GetId() == rzero || src0.GetId() == rzero) {
1886         ScopedTmpReg tmpReg(this, src1.GetType());
1887         EncodeMov(tmpReg, Imm(0));
1888         // Denominator is zero-reg
1889         if (src1.GetId() == rzero) {
1890             // Encode Abort
1891             GetMasm()->Udiv(VixlReg(dst), VixlReg(tmpReg), VixlReg(tmpReg));
1892             return;
1893         }
1894 
1895         // But src1 still may be zero
1896         if (src1.GetId() != src0.GetId()) {
1897             if (dstSigned) {
1898                 GetMasm()->Sdiv(VixlReg(dst), VixlReg(tmpReg), VixlReg(src1));
1899             } else {
1900                 GetMasm()->Udiv(VixlReg(dst), VixlReg(tmpReg), VixlReg(src1));
1901             }
1902             return;
1903         }
1904         UNREACHABLE();
1905     }
1906     if (dstSigned) {
1907         GetMasm()->Sdiv(VixlReg(dst), VixlReg(src0), VixlReg(src1));
1908     } else {
1909         GetMasm()->Udiv(VixlReg(dst), VixlReg(src0), VixlReg(src1));
1910     }
1911 }
1912 
EncodeMod(Reg dst,bool dstSigned,Reg src0,Reg src1)1913 void Aarch64Encoder::EncodeMod(Reg dst, bool dstSigned, Reg src0, Reg src1)
1914 {
1915     if (dst.IsScalar()) {
1916         auto rzero = GetRegfile()->GetZeroReg().GetId();
1917         if (src1.GetId() == rzero || src0.GetId() == rzero) {
1918             ScopedTmpReg tmpReg(this, src1.GetType());
1919             EncodeMov(tmpReg, Imm(0));
1920             // Denominator is zero-reg
1921             if (src1.GetId() == rzero) {
1922                 // Encode Abort
1923                 GetMasm()->Udiv(VixlReg(dst), VixlReg(tmpReg), VixlReg(tmpReg));
1924                 return;
1925             }
1926 
1927             if (src1.GetId() == src0.GetId()) {
1928                 SetFalseResult();
1929                 return;
1930             }
1931             // But src1 still may be zero
1932             ScopedTmpRegU64 tmpRegUd(this);
1933             if (dst.GetSize() < DOUBLE_WORD_SIZE) {
1934                 tmpRegUd.ChangeType(INT32_TYPE);
1935             }
1936             auto tmp = VixlReg(tmpRegUd);
1937             if (!dstSigned) {
1938                 GetMasm()->Udiv(tmp, VixlReg(tmpReg), VixlReg(src1));
1939                 GetMasm()->Msub(VixlReg(dst), tmp, VixlReg(src1), VixlReg(tmpReg));
1940                 return;
1941             }
1942             GetMasm()->Sdiv(tmp, VixlReg(tmpReg), VixlReg(src1));
1943             GetMasm()->Msub(VixlReg(dst), tmp, VixlReg(src1), VixlReg(tmpReg));
1944             return;
1945         }
1946 
1947         ScopedTmpRegU64 tmpReg(this);
1948         if (dst.GetSize() < DOUBLE_WORD_SIZE) {
1949             tmpReg.ChangeType(INT32_TYPE);
1950         }
1951         auto tmp = VixlReg(tmpReg);
1952 
1953         if (!dstSigned) {
1954             GetMasm()->Udiv(tmp, VixlReg(src0), VixlReg(src1));
1955             GetMasm()->Msub(VixlReg(dst), tmp, VixlReg(src1), VixlReg(src0));
1956             return;
1957         }
1958         GetMasm()->Sdiv(tmp, VixlReg(src0), VixlReg(src1));
1959         GetMasm()->Msub(VixlReg(dst), tmp, VixlReg(src1), VixlReg(src0));
1960         return;
1961     }
1962 
1963     EncodeFMod(dst, src0, src1);
1964 }
1965 
EncodeFMod(Reg dst,Reg src0,Reg src1)1966 void Aarch64Encoder::EncodeFMod(Reg dst, Reg src0, Reg src1)
1967 {
1968     ASSERT(dst.IsFloat());
1969 
1970     if (dst.GetType() == FLOAT32_TYPE) {
1971         using Fp = float (*)(float, float);
1972         MakeLibCall(dst, src0, src1, reinterpret_cast<void *>(static_cast<Fp>(fmodf)));
1973     } else {
1974         using Fp = double (*)(double, double);
1975         MakeLibCall(dst, src0, src1, reinterpret_cast<void *>(static_cast<Fp>(fmod)));
1976     }
1977 }
1978 
EncodeSignedDiv(Reg dst,Reg src0,Imm imm)1979 void Aarch64Encoder::EncodeSignedDiv(Reg dst, Reg src0, Imm imm)
1980 {
1981     int64_t divisor = imm.GetAsInt();
1982 
1983     FastConstSignedDivisor fastDivisor(divisor, dst.GetSize());
1984     int64_t magic = fastDivisor.GetMagic();
1985 
1986     ScopedTmpReg tmp(this, dst.GetType());
1987     Reg tmp64 = tmp.GetReg().As(INT64_TYPE);
1988     EncodeMov(tmp, Imm(magic));
1989 
1990     int64_t extraShift = 0;
1991     if (dst.GetSize() == DOUBLE_WORD_SIZE) {
1992         GetMasm()->Smulh(VixlReg(tmp), VixlReg(src0), VixlReg(tmp));
1993     } else {
1994         GetMasm()->Smull(VixlReg(tmp64), VixlReg(src0), VixlReg(tmp));
1995         extraShift = WORD_SIZE;
1996     }
1997 
1998     bool useSignFlag = false;
1999     if (divisor > 0 && magic < 0) {
2000         GetMasm()->Adds(VixlReg(tmp64), VixlReg(tmp64), VixlShift(Shift(src0.As(INT64_TYPE), extraShift)));
2001         useSignFlag = true;
2002     } else if (divisor < 0 && magic > 0) {
2003         GetMasm()->Subs(VixlReg(tmp64), VixlReg(tmp64), VixlShift(Shift(src0.As(INT64_TYPE), extraShift)));
2004         useSignFlag = true;
2005     }
2006 
2007     int64_t shift = fastDivisor.GetShift();
2008     EncodeAShr(dst.As(INT64_TYPE), tmp64, Imm(shift + extraShift));
2009 
2010     // result = (result < 0 ? result + 1 : result)
2011     if (useSignFlag) {
2012         GetMasm()->Cinc(VixlReg(dst), VixlReg(dst), vixl::aarch64::Condition::mi);
2013     } else {
2014         GetMasm()->Add(VixlReg(dst), VixlReg(dst), VixlShift(Shift(dst, ShiftType::LSR, dst.GetSize() - 1U)));
2015     }
2016 }
2017 
EncodeUnsignedDiv(Reg dst,Reg src0,Imm imm)2018 void Aarch64Encoder::EncodeUnsignedDiv(Reg dst, Reg src0, Imm imm)
2019 {
2020     auto divisor = bit_cast<uint64_t>(imm.GetAsInt());
2021 
2022     FastConstUnsignedDivisor fastDivisor(divisor, dst.GetSize());
2023     uint64_t magic = fastDivisor.GetMagic();
2024 
2025     ScopedTmpReg tmp(this, dst.GetType());
2026     Reg tmp64 = tmp.GetReg().As(INT64_TYPE);
2027     EncodeMov(tmp, Imm(magic));
2028 
2029     uint64_t extraShift = 0;
2030     if (dst.GetSize() == DOUBLE_WORD_SIZE) {
2031         GetMasm()->Umulh(VixlReg(tmp), VixlReg(src0), VixlReg(tmp));
2032     } else {
2033         GetMasm()->Umull(VixlReg(tmp64), VixlReg(src0), VixlReg(tmp));
2034         extraShift = WORD_SIZE;
2035     }
2036 
2037     uint64_t shift = fastDivisor.GetShift();
2038     if (!fastDivisor.GetAdd()) {
2039         EncodeShr(dst.As(INT64_TYPE), tmp64, Imm(shift + extraShift));
2040     } else {
2041         ASSERT(shift >= 1U);
2042         if (extraShift > 0U) {
2043             EncodeShr(tmp64, tmp64, Imm(extraShift));
2044         }
2045         EncodeSub(dst, src0, tmp);
2046         GetMasm()->Add(VixlReg(dst), VixlReg(tmp), VixlShift(Shift(dst, ShiftType::LSR, 1U)));
2047         EncodeShr(dst, dst, Imm(shift - 1U));
2048     }
2049 }
2050 
EncodeDiv(Reg dst,Reg src0,Imm imm,bool isSigned)2051 void Aarch64Encoder::EncodeDiv(Reg dst, Reg src0, Imm imm, bool isSigned)
2052 {
2053     ASSERT(dst.IsScalar() && dst.GetSize() >= WORD_SIZE);
2054     ASSERT(CanOptimizeImmDivMod(bit_cast<uint64_t>(imm.GetAsInt()), isSigned));
2055     if (isSigned) {
2056         EncodeSignedDiv(dst, src0, imm);
2057     } else {
2058         EncodeUnsignedDiv(dst, src0, imm);
2059     }
2060 }
2061 
EncodeMod(Reg dst,Reg src0,Imm imm,bool isSigned)2062 void Aarch64Encoder::EncodeMod(Reg dst, Reg src0, Imm imm, bool isSigned)
2063 {
2064     ASSERT(dst.IsScalar() && dst.GetSize() >= WORD_SIZE);
2065     ASSERT(CanOptimizeImmDivMod(bit_cast<uint64_t>(imm.GetAsInt()), isSigned));
2066     // dst = src0 - imm * (src0 / imm)
2067     ScopedTmpReg tmp(this, dst.GetType());
2068     EncodeDiv(tmp, src0, imm, isSigned);
2069 
2070     ScopedTmpReg immReg(this, dst.GetType());
2071     EncodeMov(immReg, imm);
2072 
2073     GetMasm()->Msub(VixlReg(dst), VixlReg(immReg), VixlReg(tmp), VixlReg(src0));
2074 }
2075 
EncodeMin(Reg dst,bool dstSigned,Reg src0,Reg src1)2076 void Aarch64Encoder::EncodeMin(Reg dst, bool dstSigned, Reg src0, Reg src1)
2077 {
2078     if (dst.IsFloat()) {
2079         GetMasm()->Fmin(VixlVReg(dst), VixlVReg(src0), VixlVReg(src1));
2080         return;
2081     }
2082     if (dstSigned) {
2083         GetMasm()->Cmp(VixlReg(src0), VixlReg(src1));
2084         GetMasm()->Csel(VixlReg(dst), VixlReg(src0), VixlReg(src1), vixl::aarch64::Condition::lt);
2085         return;
2086     }
2087     GetMasm()->Cmp(VixlReg(src0), VixlReg(src1));
2088     GetMasm()->Csel(VixlReg(dst), VixlReg(src0), VixlReg(src1), vixl::aarch64::Condition::ls);
2089 }
2090 
EncodeMax(Reg dst,bool dstSigned,Reg src0,Reg src1)2091 void Aarch64Encoder::EncodeMax(Reg dst, bool dstSigned, Reg src0, Reg src1)
2092 {
2093     if (dst.IsFloat()) {
2094         GetMasm()->Fmax(VixlVReg(dst), VixlVReg(src0), VixlVReg(src1));
2095         return;
2096     }
2097     if (dstSigned) {
2098         GetMasm()->Cmp(VixlReg(src0), VixlReg(src1));
2099         GetMasm()->Csel(VixlReg(dst), VixlReg(src0), VixlReg(src1), vixl::aarch64::Condition::gt);
2100         return;
2101     }
2102     GetMasm()->Cmp(VixlReg(src0), VixlReg(src1));
2103     GetMasm()->Csel(VixlReg(dst), VixlReg(src0), VixlReg(src1), vixl::aarch64::Condition::hi);
2104 }
2105 
EncodeShl(Reg dst,Reg src0,Reg src1)2106 void Aarch64Encoder::EncodeShl(Reg dst, Reg src0, Reg src1)
2107 {
2108     auto rzero = GetRegfile()->GetZeroReg().GetId();
2109     ASSERT(dst.GetId() != rzero);
2110     if (src0.GetId() == rzero) {
2111         EncodeMov(dst, Imm(0));
2112         return;
2113     }
2114     if (src1.GetId() == rzero) {
2115         EncodeMov(dst, src0);
2116     }
2117     if (dst.GetSize() < WORD_SIZE) {
2118         GetMasm()->And(VixlReg(src1), VixlReg(src1), VixlImm(dst.GetSize() - 1));
2119     }
2120     GetMasm()->Lsl(VixlReg(dst), VixlReg(src0), VixlReg(src1));
2121 }
2122 
EncodeShr(Reg dst,Reg src0,Reg src1)2123 void Aarch64Encoder::EncodeShr(Reg dst, Reg src0, Reg src1)
2124 {
2125     auto rzero = GetRegfile()->GetZeroReg().GetId();
2126     ASSERT(dst.GetId() != rzero);
2127     if (src0.GetId() == rzero) {
2128         EncodeMov(dst, Imm(0));
2129         return;
2130     }
2131     if (src1.GetId() == rzero) {
2132         EncodeMov(dst, src0);
2133     }
2134 
2135     if (dst.GetSize() < WORD_SIZE) {
2136         GetMasm()->And(VixlReg(src1), VixlReg(src1), VixlImm(dst.GetSize() - 1));
2137     }
2138 
2139     GetMasm()->Lsr(VixlReg(dst), VixlReg(src0), VixlReg(src1));
2140 }
2141 
EncodeAShr(Reg dst,Reg src0,Reg src1)2142 void Aarch64Encoder::EncodeAShr(Reg dst, Reg src0, Reg src1)
2143 {
2144     auto rzero = GetRegfile()->GetZeroReg().GetId();
2145     ASSERT(dst.GetId() != rzero);
2146     if (src0.GetId() == rzero) {
2147         EncodeMov(dst, Imm(0));
2148         return;
2149     }
2150     if (src1.GetId() == rzero) {
2151         EncodeMov(dst, src0);
2152     }
2153 
2154     if (dst.GetSize() < WORD_SIZE) {
2155         GetMasm()->And(VixlReg(src1), VixlReg(src1), VixlImm(dst.GetSize() - 1));
2156     }
2157     GetMasm()->Asr(VixlReg(dst), VixlReg(src0), VixlReg(src1));
2158 }
2159 
EncodeAnd(Reg dst,Reg src0,Reg src1)2160 void Aarch64Encoder::EncodeAnd(Reg dst, Reg src0, Reg src1)
2161 {
2162     GetMasm()->And(VixlReg(dst), VixlReg(src0), VixlReg(src1));
2163 }
2164 
EncodeAnd(Reg dst,Reg src0,Shift src1)2165 void Aarch64Encoder::EncodeAnd(Reg dst, Reg src0, Shift src1)
2166 {
2167     GetMasm()->And(VixlReg(dst), VixlReg(src0), VixlShift(src1));
2168 }
2169 
EncodeOr(Reg dst,Reg src0,Reg src1)2170 void Aarch64Encoder::EncodeOr(Reg dst, Reg src0, Reg src1)
2171 {
2172     GetMasm()->Orr(VixlReg(dst), VixlReg(src0), VixlReg(src1));
2173 }
2174 
EncodeOr(Reg dst,Reg src0,Shift src1)2175 void Aarch64Encoder::EncodeOr(Reg dst, Reg src0, Shift src1)
2176 {
2177     GetMasm()->Orr(VixlReg(dst), VixlReg(src0), VixlShift(src1));
2178 }
2179 
EncodeXor(Reg dst,Reg src0,Reg src1)2180 void Aarch64Encoder::EncodeXor(Reg dst, Reg src0, Reg src1)
2181 {
2182     GetMasm()->Eor(VixlReg(dst), VixlReg(src0), VixlReg(src1));
2183 }
2184 
EncodeXor(Reg dst,Reg src0,Shift src1)2185 void Aarch64Encoder::EncodeXor(Reg dst, Reg src0, Shift src1)
2186 {
2187     GetMasm()->Eor(VixlReg(dst), VixlReg(src0), VixlShift(src1));
2188 }
2189 
EncodeAdd(Reg dst,Reg src,Imm imm)2190 void Aarch64Encoder::EncodeAdd(Reg dst, Reg src, Imm imm)
2191 {
2192     ASSERT(dst.IsScalar() && "UNIMPLEMENTED");
2193     ASSERT(dst.GetSize() >= src.GetSize());
2194     if (dst.GetSize() != src.GetSize()) {
2195         auto srcReg = Reg(src.GetId(), dst.GetType());
2196         GetMasm()->Add(VixlReg(dst), VixlReg(srcReg), VixlImm(imm));
2197         return;
2198     }
2199     GetMasm()->Add(VixlReg(dst), VixlReg(src), VixlImm(imm));
2200 }
2201 
EncodeSub(Reg dst,Reg src,Imm imm)2202 void Aarch64Encoder::EncodeSub(Reg dst, Reg src, Imm imm)
2203 {
2204     ASSERT(dst.IsScalar() && "UNIMPLEMENTED");
2205     GetMasm()->Sub(VixlReg(dst), VixlReg(src), VixlImm(imm));
2206 }
2207 
EncodeShl(Reg dst,Reg src,Imm imm)2208 void Aarch64Encoder::EncodeShl(Reg dst, Reg src, Imm imm)
2209 {
2210     ASSERT(dst.IsScalar() && "Invalid operand type");
2211     auto rzero = GetRegfile()->GetZeroReg().GetId();
2212     ASSERT(dst.GetId() != rzero);
2213     if (src.GetId() == rzero) {
2214         EncodeMov(dst, Imm(0));
2215         return;
2216     }
2217 
2218     GetMasm()->Lsl(VixlReg(dst), VixlReg(src), imm.GetAsInt());
2219 }
2220 
EncodeShr(Reg dst,Reg src,Imm imm)2221 void Aarch64Encoder::EncodeShr(Reg dst, Reg src, Imm imm)
2222 {
2223     int64_t immValue = static_cast<uint64_t>(imm.GetAsInt()) & (dst.GetSize() - 1);
2224 
2225     ASSERT(dst.IsScalar() && "Invalid operand type");
2226     auto rzero = GetRegfile()->GetZeroReg().GetId();
2227     ASSERT(dst.GetId() != rzero);
2228     if (src.GetId() == rzero) {
2229         EncodeMov(dst, Imm(0));
2230         return;
2231     }
2232 
2233     GetMasm()->Lsr(VixlReg(dst), VixlReg(src), immValue);
2234 }
2235 
EncodeAShr(Reg dst,Reg src,Imm imm)2236 void Aarch64Encoder::EncodeAShr(Reg dst, Reg src, Imm imm)
2237 {
2238     ASSERT(dst.IsScalar() && "Invalid operand type");
2239     GetMasm()->Asr(VixlReg(dst), VixlReg(src), imm.GetAsInt());
2240 }
2241 
EncodeAnd(Reg dst,Reg src,Imm imm)2242 void Aarch64Encoder::EncodeAnd(Reg dst, Reg src, Imm imm)
2243 {
2244     ASSERT(dst.IsScalar() && "Invalid operand type");
2245     GetMasm()->And(VixlReg(dst), VixlReg(src), VixlImm(imm));
2246 }
2247 
EncodeOr(Reg dst,Reg src,Imm imm)2248 void Aarch64Encoder::EncodeOr(Reg dst, Reg src, Imm imm)
2249 {
2250     ASSERT(dst.IsScalar() && "Invalid operand type");
2251     GetMasm()->Orr(VixlReg(dst), VixlReg(src), VixlImm(imm));
2252 }
2253 
EncodeXor(Reg dst,Reg src,Imm imm)2254 void Aarch64Encoder::EncodeXor(Reg dst, Reg src, Imm imm)
2255 {
2256     ASSERT(dst.IsScalar() && "Invalid operand type");
2257     GetMasm()->Eor(VixlReg(dst), VixlReg(src), VixlImm(imm));
2258 }
2259 
EncodeMov(Reg dst,Imm src)2260 void Aarch64Encoder::EncodeMov(Reg dst, Imm src)
2261 {
2262     if (dst.IsFloat()) {
2263         if (dst.GetSize() == WORD_SIZE) {
2264             GetMasm()->Fmov(VixlVReg(dst), src.GetAsFloat());
2265         } else {
2266             GetMasm()->Fmov(VixlVReg(dst), src.GetAsDouble());
2267         }
2268         return;
2269     }
2270     if (dst.GetSize() > WORD_SIZE) {
2271         GetMasm()->Mov(VixlReg(dst), VixlImm(src));
2272     } else {
2273         GetMasm()->Mov(VixlReg(dst), VixlImm(static_cast<int32_t>(src.GetAsInt())));
2274     }
2275 }
2276 
EncodeLdr(Reg dst,bool dstSigned,MemRef mem)2277 void Aarch64Encoder::EncodeLdr(Reg dst, bool dstSigned, MemRef mem)
2278 {
2279     auto rzero = GetRegfile()->GetZeroReg().GetId();
2280     if (!ConvertMem(mem).IsValid() || (dst.GetId() == rzero && dst.IsScalar())) {
2281         // Try move zero reg to dst (for do not create temp-reg)
2282         // Check: dst not vector, dst not index, dst not rzero
2283         [[maybe_unused]] auto baseReg = mem.GetBase();
2284         auto indexReg = mem.GetIndex();
2285 
2286         // Invalid == base is rzero or invalid
2287         ASSERT(baseReg.GetId() == rzero || !baseReg.IsValid());
2288         // checks for use dst-register
2289         if (dst.IsScalar() && dst.IsValid() &&    // not float
2290             (indexReg.GetId() != dst.GetId()) &&  // not index
2291             (dst.GetId() != rzero)) {             // not rzero
2292             // May use dst like rzero
2293             EncodeMov(dst, Imm(0));
2294 
2295             auto fixMem = MemRef(dst, indexReg, mem.GetScale(), mem.GetDisp());
2296             ASSERT(ConvertMem(fixMem).IsValid());
2297             EncodeLdr(dst, dstSigned, fixMem);
2298         } else {
2299             // Use tmp-reg
2300             ScopedTmpReg tmpReg(this);
2301             EncodeMov(tmpReg, Imm(0));
2302 
2303             auto fixMem = MemRef(tmpReg, indexReg, mem.GetScale(), mem.GetDisp());
2304             ASSERT(ConvertMem(fixMem).IsValid());
2305             // Used for zero-dst
2306             EncodeLdr(tmpReg, dstSigned, fixMem);
2307         }
2308         return;
2309     }
2310     ASSERT(ConvertMem(mem).IsValid());
2311     if (dst.IsFloat()) {
2312         GetMasm()->Ldr(VixlVReg(dst), ConvertMem(mem));
2313         return;
2314     }
2315     if (dstSigned) {
2316         if (dst.GetSize() == BYTE_SIZE) {
2317             GetMasm()->Ldrsb(VixlReg(dst, DOUBLE_WORD_SIZE), ConvertMem(mem));
2318             return;
2319         }
2320         if (dst.GetSize() == HALF_SIZE) {
2321             GetMasm()->Ldrsh(VixlReg(dst), ConvertMem(mem));
2322             return;
2323         }
2324     } else {
2325         if (dst.GetSize() == BYTE_SIZE) {
2326             GetMasm()->Ldrb(VixlReg(dst, WORD_SIZE), ConvertMem(mem));
2327             return;
2328         }
2329         if (dst.GetSize() == HALF_SIZE) {
2330             GetMasm()->Ldrh(VixlReg(dst), ConvertMem(mem));
2331             return;
2332         }
2333     }
2334     GetMasm()->Ldr(VixlReg(dst), ConvertMem(mem));
2335 }
2336 
EncodeLdrAcquireInvalid(Reg dst,bool dstSigned,MemRef mem)2337 void Aarch64Encoder::EncodeLdrAcquireInvalid(Reg dst, bool dstSigned, MemRef mem)
2338 {
2339     // Try move zero reg to dst (for do not create temp-reg)
2340     // Check: dst not vector, dst not index, dst not rzero
2341     [[maybe_unused]] auto baseReg = mem.GetBase();
2342     auto rzero = GetRegfile()->GetZeroReg().GetId();
2343 
2344     auto indexReg = mem.GetIndex();
2345 
2346     // Invalid == base is rzero or invalid
2347     ASSERT(baseReg.GetId() == rzero || !baseReg.IsValid());
2348     // checks for use dst-register
2349     if (dst.IsScalar() && dst.IsValid() &&    // not float
2350         (indexReg.GetId() != dst.GetId()) &&  // not index
2351         (dst.GetId() != rzero)) {             // not rzero
2352         // May use dst like rzero
2353         EncodeMov(dst, Imm(0));
2354 
2355         auto fixMem = MemRef(dst, indexReg, mem.GetScale(), mem.GetDisp());
2356         ASSERT(ConvertMem(fixMem).IsValid());
2357         EncodeLdrAcquire(dst, dstSigned, fixMem);
2358     } else {
2359         // Use tmp-reg
2360         ScopedTmpReg tmpReg(this);
2361         EncodeMov(tmpReg, Imm(0));
2362 
2363         auto fixMem = MemRef(tmpReg, indexReg, mem.GetScale(), mem.GetDisp());
2364         ASSERT(ConvertMem(fixMem).IsValid());
2365         // Used for zero-dst
2366         EncodeLdrAcquire(tmpReg, dstSigned, fixMem);
2367     }
2368 }
2369 
EncodeLdrAcquireScalar(Reg dst,bool dstSigned,MemRef mem)2370 void Aarch64Encoder::EncodeLdrAcquireScalar(Reg dst, bool dstSigned, MemRef mem)
2371 {
2372 #ifndef NDEBUG
2373     CheckAlignment(mem, dst.GetSize());
2374 #endif  // NDEBUG
2375     if (dstSigned) {
2376         if (dst.GetSize() == BYTE_SIZE) {
2377             GetMasm()->Ldarb(VixlReg(dst), ConvertMem(mem));
2378             GetMasm()->Sxtb(VixlReg(dst), VixlReg(dst));
2379             return;
2380         }
2381         if (dst.GetSize() == HALF_SIZE) {
2382             GetMasm()->Ldarh(VixlReg(dst), ConvertMem(mem));
2383             GetMasm()->Sxth(VixlReg(dst), VixlReg(dst));
2384             return;
2385         }
2386         if (dst.GetSize() == WORD_SIZE) {
2387             GetMasm()->Ldar(VixlReg(dst), ConvertMem(mem));
2388             GetMasm()->Sxtw(VixlReg(dst), VixlReg(dst));
2389             return;
2390         }
2391     } else {
2392         if (dst.GetSize() == BYTE_SIZE) {
2393             GetMasm()->Ldarb(VixlReg(dst, WORD_SIZE), ConvertMem(mem));
2394             return;
2395         }
2396         if (dst.GetSize() == HALF_SIZE) {
2397             GetMasm()->Ldarh(VixlReg(dst), ConvertMem(mem));
2398             return;
2399         }
2400     }
2401     GetMasm()->Ldar(VixlReg(dst), ConvertMem(mem));
2402 }
2403 
CheckAlignment(MemRef mem,size_t size)2404 void Aarch64Encoder::CheckAlignment(MemRef mem, size_t size)
2405 {
2406     ASSERT(size == WORD_SIZE || size == BYTE_SIZE || size == HALF_SIZE || size == DOUBLE_WORD_SIZE);
2407     if (size == BYTE_SIZE) {
2408         return;
2409     }
2410     size_t alignmentMask = (size >> 3U) - 1;
2411     ASSERT(!mem.HasIndex() && !mem.HasScale());
2412     if (mem.HasDisp()) {
2413         // We need additional tmp register for check base + offset.
2414         // The case when separately the base and the offset are not aligned, but in sum there are aligned very rarely.
2415         // Therefore, the alignment check for base and offset takes place separately
2416         [[maybe_unused]] auto offset = static_cast<size_t>(mem.GetDisp());
2417         ASSERT((offset & alignmentMask) == 0);
2418     }
2419     auto baseReg = mem.GetBase();
2420     auto end = CreateLabel();
2421     EncodeJumpTest(end, baseReg, Imm(alignmentMask), Condition::TST_EQ);
2422     EncodeAbort();
2423     BindLabel(end);
2424 }
2425 
EncodeLdrAcquire(Reg dst,bool dstSigned,MemRef mem)2426 void Aarch64Encoder::EncodeLdrAcquire(Reg dst, bool dstSigned, MemRef mem)
2427 {
2428     if (mem.HasIndex()) {
2429         ScopedTmpRegU64 tmpReg(this);
2430         if (mem.HasScale()) {
2431             EncodeAdd(tmpReg, mem.GetBase(), Shift(mem.GetIndex(), mem.GetScale()));
2432         } else {
2433             EncodeAdd(tmpReg, mem.GetBase(), mem.GetIndex());
2434         }
2435         mem = MemRef(tmpReg, mem.GetDisp());
2436     }
2437 
2438     auto rzero = GetRegfile()->GetZeroReg().GetId();
2439     if (!ConvertMem(mem).IsValid() || (dst.GetId() == rzero && dst.IsScalar())) {
2440         EncodeLdrAcquireInvalid(dst, dstSigned, mem);
2441         return;
2442     }
2443 
2444     ASSERT(!mem.HasIndex() && !mem.HasScale());
2445     if (dst.IsFloat()) {
2446         ScopedTmpRegU64 tmpReg(this);
2447         auto memLdar = mem;
2448         if (mem.HasDisp()) {
2449             if (vixl::aarch64::Assembler::IsImmAddSub(mem.GetDisp())) {
2450                 EncodeAdd(tmpReg, mem.GetBase(), Imm(mem.GetDisp()));
2451             } else {
2452                 EncodeMov(tmpReg, Imm(mem.GetDisp()));
2453                 EncodeAdd(tmpReg, mem.GetBase(), tmpReg);
2454             }
2455             memLdar = MemRef(tmpReg);
2456         }
2457 #ifndef NDEBUG
2458         CheckAlignment(memLdar, dst.GetSize());
2459 #endif  // NDEBUG
2460         auto tmp = VixlReg(tmpReg, dst.GetSize());
2461         GetMasm()->Ldar(tmp, ConvertMem(memLdar));
2462         GetMasm()->Fmov(VixlVReg(dst), tmp);
2463         return;
2464     }
2465 
2466     if (!mem.HasDisp()) {
2467         EncodeLdrAcquireScalar(dst, dstSigned, mem);
2468         return;
2469     }
2470 
2471     Reg dst64(dst.GetId(), INT64_TYPE);
2472     if (vixl::aarch64::Assembler::IsImmAddSub(mem.GetDisp())) {
2473         EncodeAdd(dst64, mem.GetBase(), Imm(mem.GetDisp()));
2474     } else {
2475         EncodeMov(dst64, Imm(mem.GetDisp()));
2476         EncodeAdd(dst64, mem.GetBase(), dst64);
2477     }
2478     EncodeLdrAcquireScalar(dst, dstSigned, MemRef(dst64));
2479 }
2480 
EncodeStr(Reg src,MemRef mem)2481 void Aarch64Encoder::EncodeStr(Reg src, MemRef mem)
2482 {
2483     if (!ConvertMem(mem).IsValid()) {
2484         auto indexReg = mem.GetIndex();
2485         auto rzero = GetRegfile()->GetZeroReg().GetId();
2486         // Invalid == base is rzero or invalid
2487         ASSERT(mem.GetBase().GetId() == rzero || !mem.GetBase().IsValid());
2488         // Use tmp-reg
2489         ScopedTmpReg tmpReg(this);
2490         EncodeMov(tmpReg, Imm(0));
2491 
2492         auto fixMem = MemRef(tmpReg, indexReg, mem.GetScale(), mem.GetDisp());
2493         ASSERT(ConvertMem(fixMem).IsValid());
2494         if (src.GetId() != rzero) {
2495             EncodeStr(src, fixMem);
2496         } else {
2497             EncodeStr(tmpReg, fixMem);
2498         }
2499         return;
2500     }
2501     ASSERT(ConvertMem(mem).IsValid());
2502     if (src.IsFloat()) {
2503         GetMasm()->Str(VixlVReg(src), ConvertMem(mem));
2504         return;
2505     }
2506     if (src.GetSize() == BYTE_SIZE) {
2507         GetMasm()->Strb(VixlReg(src), ConvertMem(mem));
2508         return;
2509     }
2510     if (src.GetSize() == HALF_SIZE) {
2511         GetMasm()->Strh(VixlReg(src), ConvertMem(mem));
2512         return;
2513     }
2514     GetMasm()->Str(VixlReg(src), ConvertMem(mem));
2515 }
2516 
EncodeStrRelease(Reg src,MemRef mem)2517 void Aarch64Encoder::EncodeStrRelease(Reg src, MemRef mem)
2518 {
2519     ScopedTmpRegLazy base(this);
2520     MemRef fixedMem;
2521     bool memWasFixed = false;
2522     if (mem.HasDisp()) {
2523         if (vixl::aarch64::Assembler::IsImmAddSub(mem.GetDisp())) {
2524             base.AcquireIfInvalid();
2525             EncodeAdd(base, mem.GetBase(), Imm(mem.GetDisp()));
2526         } else {
2527             base.AcquireIfInvalid();
2528             EncodeMov(base, Imm(mem.GetDisp()));
2529             EncodeAdd(base, mem.GetBase(), base);
2530         }
2531         memWasFixed = true;
2532     }
2533     if (mem.HasIndex()) {
2534         base.AcquireIfInvalid();
2535         if (mem.HasScale()) {
2536             EncodeAdd(base, memWasFixed ? base : mem.GetBase(), Shift(mem.GetIndex(), mem.GetScale()));
2537         } else {
2538             EncodeAdd(base, memWasFixed ? base : mem.GetBase(), mem.GetIndex());
2539         }
2540         memWasFixed = true;
2541     }
2542 
2543     if (memWasFixed) {
2544         fixedMem = MemRef(base);
2545     } else {
2546         fixedMem = mem;
2547     }
2548 
2549 #ifndef NDEBUG
2550     CheckAlignment(fixedMem, src.GetSize());
2551 #endif  // NDEBUG
2552     if (src.IsFloat()) {
2553         ScopedTmpRegU64 tmpReg(this);
2554         auto tmp = VixlReg(tmpReg, src.GetSize());
2555         GetMasm()->Fmov(tmp, VixlVReg(src));
2556         GetMasm()->Stlr(tmp, ConvertMem(fixedMem));
2557         return;
2558     }
2559     if (src.GetSize() == BYTE_SIZE) {
2560         GetMasm()->Stlrb(VixlReg(src), ConvertMem(fixedMem));
2561         return;
2562     }
2563     if (src.GetSize() == HALF_SIZE) {
2564         GetMasm()->Stlrh(VixlReg(src), ConvertMem(fixedMem));
2565         return;
2566     }
2567     GetMasm()->Stlr(VixlReg(src), ConvertMem(fixedMem));
2568 }
2569 
EncodeLdrExclusive(Reg dst,Reg addr,bool acquire)2570 void Aarch64Encoder::EncodeLdrExclusive(Reg dst, Reg addr, bool acquire)
2571 {
2572     ASSERT(dst.IsScalar());
2573     auto dstReg = VixlReg(dst);
2574     auto memCvt = ConvertMem(MemRef(addr));
2575 #ifndef NDEBUG
2576     CheckAlignment(MemRef(addr), dst.GetSize());
2577 #endif  // NDEBUG
2578     if (dst.GetSize() == BYTE_SIZE) {
2579         if (acquire) {
2580             GetMasm()->Ldaxrb(dstReg, memCvt);
2581             return;
2582         }
2583         GetMasm()->Ldxrb(dstReg, memCvt);
2584         return;
2585     }
2586     if (dst.GetSize() == HALF_SIZE) {
2587         if (acquire) {
2588             GetMasm()->Ldaxrh(dstReg, memCvt);
2589             return;
2590         }
2591         GetMasm()->Ldxrh(dstReg, memCvt);
2592         return;
2593     }
2594     if (acquire) {
2595         GetMasm()->Ldaxr(dstReg, memCvt);
2596         return;
2597     }
2598     GetMasm()->Ldxr(dstReg, memCvt);
2599 }
2600 
EncodeStrExclusive(Reg dst,Reg src,Reg addr,bool release)2601 void Aarch64Encoder::EncodeStrExclusive(Reg dst, Reg src, Reg addr, bool release)
2602 {
2603     ASSERT(dst.IsScalar() && src.IsScalar());
2604 
2605     bool copyDst = dst.GetId() == src.GetId() || dst.GetId() == addr.GetId();
2606     ScopedTmpReg tmp(this);
2607     auto srcReg = VixlReg(src);
2608     auto memCvt = ConvertMem(MemRef(addr));
2609     auto dstReg = copyDst ? VixlReg(tmp) : VixlReg(dst);
2610 #ifndef NDEBUG
2611     CheckAlignment(MemRef(addr), src.GetSize());
2612 #endif  // NDEBUG
2613 
2614     if (src.GetSize() == BYTE_SIZE) {
2615         if (release) {
2616             GetMasm()->Stlxrb(dstReg, srcReg, memCvt);
2617         } else {
2618             GetMasm()->Stxrb(dstReg, srcReg, memCvt);
2619         }
2620     } else if (src.GetSize() == HALF_SIZE) {
2621         if (release) {
2622             GetMasm()->Stlxrh(dstReg, srcReg, memCvt);
2623         } else {
2624             GetMasm()->Stxrh(dstReg, srcReg, memCvt);
2625         }
2626     } else {
2627         if (release) {
2628             GetMasm()->Stlxr(dstReg, srcReg, memCvt);
2629         } else {
2630             GetMasm()->Stxr(dstReg, srcReg, memCvt);
2631         }
2632     }
2633     if (copyDst) {
2634         EncodeMov(dst, tmp);
2635     }
2636 }
2637 
EncodeStrz(Reg src,MemRef mem)2638 void Aarch64Encoder::EncodeStrz(Reg src, MemRef mem)
2639 {
2640     if (!ConvertMem(mem).IsValid()) {
2641         EncodeStr(src, mem);
2642         return;
2643     }
2644     ASSERT(ConvertMem(mem).IsValid());
2645     // Upper half of registers must be zeroed by-default
2646     if (src.IsFloat()) {
2647         EncodeStr(src.As(FLOAT64_TYPE), mem);
2648         return;
2649     }
2650     if (src.GetSize() < WORD_SIZE) {
2651         EncodeCast(src, false, src.As(INT64_TYPE), false);
2652     }
2653     GetMasm()->Str(VixlReg(src.As(INT64_TYPE)), ConvertMem(mem));
2654 }
2655 
EncodeSti(int64_t src,uint8_t srcSizeBytes,MemRef mem)2656 void Aarch64Encoder::EncodeSti(int64_t src, uint8_t srcSizeBytes, MemRef mem)
2657 {
2658     if (mem.IsValid() && mem.IsOffsetMem() && src == 0 && srcSizeBytes == 1) {
2659         auto rzero = GetRegfile()->GetZeroReg();
2660         GetMasm()->Strb(VixlReg(rzero), ConvertMem(mem));
2661         return;
2662     }
2663     if (!ConvertMem(mem).IsValid()) {
2664         auto rzero = GetRegfile()->GetZeroReg();
2665         EncodeStr(rzero, mem);
2666         return;
2667     }
2668 
2669     ScopedTmpRegU64 tmpReg(this);
2670     auto tmp = VixlReg(tmpReg);
2671     GetMasm()->Mov(tmp, VixlImm(src));
2672     if (srcSizeBytes == 1U) {
2673         GetMasm()->Strb(tmp, ConvertMem(mem));
2674         return;
2675     }
2676     if (srcSizeBytes == HALF_WORD_SIZE_BYTES) {
2677         GetMasm()->Strh(tmp, ConvertMem(mem));
2678         return;
2679     }
2680     ASSERT((srcSizeBytes == WORD_SIZE_BYTES) || (srcSizeBytes == DOUBLE_WORD_SIZE_BYTES));
2681     GetMasm()->Str(tmp, ConvertMem(mem));
2682 }
2683 
EncodeSti(float src,MemRef mem)2684 void Aarch64Encoder::EncodeSti(float src, MemRef mem)
2685 {
2686     if (!ConvertMem(mem).IsValid()) {
2687         auto rzero = GetRegfile()->GetZeroReg();
2688         EncodeStr(rzero, mem);
2689         return;
2690     }
2691     ScopedTmpRegF32 tmpReg(this);
2692     GetMasm()->Fmov(VixlVReg(tmpReg).S(), src);
2693     EncodeStr(tmpReg, mem);
2694 }
2695 
EncodeSti(double src,MemRef mem)2696 void Aarch64Encoder::EncodeSti(double src, MemRef mem)
2697 {
2698     if (!ConvertMem(mem).IsValid()) {
2699         auto rzero = GetRegfile()->GetZeroReg();
2700         EncodeStr(rzero, mem);
2701         return;
2702     }
2703     ScopedTmpRegF64 tmpReg(this);
2704     GetMasm()->Fmov(VixlVReg(tmpReg).D(), src);
2705     EncodeStr(tmpReg, mem);
2706 }
2707 
EncodeMemCopy(MemRef memFrom,MemRef memTo,size_t size)2708 void Aarch64Encoder::EncodeMemCopy(MemRef memFrom, MemRef memTo, size_t size)
2709 {
2710     if (!ConvertMem(memFrom).IsValid() || !ConvertMem(memTo).IsValid()) {
2711         auto rzero = GetRegfile()->GetZeroReg();
2712         if (!ConvertMem(memFrom).IsValid()) {
2713             // Encode one load - will fix inside
2714             EncodeLdr(rzero, false, memFrom);
2715         } else {
2716             ASSERT(!ConvertMem(memTo).IsValid());
2717             // Encode one store - will fix inside
2718             EncodeStr(rzero, memTo);
2719         }
2720         return;
2721     }
2722     ASSERT(ConvertMem(memFrom).IsValid());
2723     ASSERT(ConvertMem(memTo).IsValid());
2724     ScopedTmpRegU64 tmpReg(this);
2725     auto tmp = VixlReg(tmpReg, std::min(size, static_cast<size_t>(DOUBLE_WORD_SIZE)));
2726     if (size == BYTE_SIZE) {
2727         GetMasm()->Ldrb(tmp, ConvertMem(memFrom));
2728         GetMasm()->Strb(tmp, ConvertMem(memTo));
2729     } else if (size == HALF_SIZE) {
2730         GetMasm()->Ldrh(tmp, ConvertMem(memFrom));
2731         GetMasm()->Strh(tmp, ConvertMem(memTo));
2732     } else {
2733         ASSERT(size == WORD_SIZE || size == DOUBLE_WORD_SIZE);
2734         GetMasm()->Ldr(tmp, ConvertMem(memFrom));
2735         GetMasm()->Str(tmp, ConvertMem(memTo));
2736     }
2737 }
2738 
EncodeMemCopyz(MemRef memFrom,MemRef memTo,size_t size)2739 void Aarch64Encoder::EncodeMemCopyz(MemRef memFrom, MemRef memTo, size_t size)
2740 {
2741     if (!ConvertMem(memFrom).IsValid() || !ConvertMem(memTo).IsValid()) {
2742         auto rzero = GetRegfile()->GetZeroReg();
2743         if (!ConvertMem(memFrom).IsValid()) {
2744             // Encode one load - will fix inside
2745             EncodeLdr(rzero, false, memFrom);
2746         } else {
2747             ASSERT(!ConvertMem(memTo).IsValid());
2748             // Encode one store - will fix inside
2749             EncodeStr(rzero, memTo);
2750         }
2751         return;
2752     }
2753     ASSERT(ConvertMem(memFrom).IsValid());
2754     ASSERT(ConvertMem(memTo).IsValid());
2755     ScopedTmpRegU64 tmpReg(this);
2756     auto tmp = VixlReg(tmpReg, std::min(size, static_cast<size_t>(DOUBLE_WORD_SIZE)));
2757     auto zero = VixlReg(GetRegfile()->GetZeroReg(), WORD_SIZE);
2758     if (size == BYTE_SIZE) {
2759         GetMasm()->Ldrb(tmp, ConvertMem(memFrom));
2760         GetMasm()->Stp(tmp, zero, ConvertMem(memTo));
2761     } else if (size == HALF_SIZE) {
2762         GetMasm()->Ldrh(tmp, ConvertMem(memFrom));
2763         GetMasm()->Stp(tmp, zero, ConvertMem(memTo));
2764     } else {
2765         ASSERT(size == WORD_SIZE || size == DOUBLE_WORD_SIZE);
2766         GetMasm()->Ldr(tmp, ConvertMem(memFrom));
2767         if (size == WORD_SIZE) {
2768             GetMasm()->Stp(tmp, zero, ConvertMem(memTo));
2769         } else {
2770             GetMasm()->Str(tmp, ConvertMem(memTo));
2771         }
2772     }
2773 }
2774 
EncodeCompare(Reg dst,Reg src0,Reg src1,Condition cc)2775 void Aarch64Encoder::EncodeCompare(Reg dst, Reg src0, Reg src1, Condition cc)
2776 {
2777     ASSERT(src0.IsFloat() == src1.IsFloat());
2778     if (src0.IsFloat()) {
2779         GetMasm()->Fcmp(VixlVReg(src0), VixlVReg(src1));
2780     } else {
2781         GetMasm()->Cmp(VixlReg(src0), VixlReg(src1));
2782     }
2783     GetMasm()->Cset(VixlReg(dst), Convert(cc));
2784 }
2785 
EncodeCompareTest(Reg dst,Reg src0,Reg src1,Condition cc)2786 void Aarch64Encoder::EncodeCompareTest(Reg dst, Reg src0, Reg src1, Condition cc)
2787 {
2788     ASSERT(src0.IsScalar() && src1.IsScalar());
2789 
2790     GetMasm()->Tst(VixlReg(src0), VixlReg(src1));
2791     GetMasm()->Cset(VixlReg(dst), ConvertTest(cc));
2792 }
2793 
EncodeAtomicByteOr(Reg addr,Reg value,bool fastEncoding)2794 void Aarch64Encoder::EncodeAtomicByteOr(Reg addr, Reg value, bool fastEncoding)
2795 {
2796     if (fastEncoding) {
2797 #ifndef NDEBUG
2798         vixl::CPUFeaturesScope scope(GetMasm(), vixl::CPUFeatures::kAtomics);
2799 #endif
2800         GetMasm()->Stsetb(VixlReg(value, BYTE_SIZE), MemOperand(VixlReg(addr)));
2801         return;
2802     }
2803 
2804     // Slow encoding, should not be used in production code!!!
2805     auto linkReg = GetTarget().GetLinkReg();
2806     auto frameReg = GetTarget().GetFrameReg();
2807     static constexpr size_t PAIR_OFFSET = 2 * DOUBLE_WORD_SIZE_BYTES;
2808 
2809     ScopedTmpRegLazy tmp1(this);
2810     ScopedTmpRegLazy tmp2(this);
2811     Reg orValue;
2812     Reg storeResult;
2813     bool hasTemps = GetScratchRegistersWithLrCount() >= 2U;
2814     if (hasTemps) {
2815         tmp1.AcquireWithLr();
2816         tmp2.AcquireWithLr();
2817         orValue = tmp1.GetReg().As(INT32_TYPE);
2818         storeResult = tmp2.GetReg().As(INT32_TYPE);
2819     } else {
2820         GetMasm()->stp(VixlReg(frameReg), VixlReg(linkReg),
2821                        MemOperand(vixl::aarch64::sp, -PAIR_OFFSET, vixl::aarch64::AddrMode::PreIndex));
2822         orValue = frameReg.As(INT32_TYPE);
2823         storeResult = linkReg.As(INT32_TYPE);
2824     }
2825 
2826     auto *loop = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
2827     GetMasm()->Bind(loop);
2828     GetMasm()->Ldxrb(VixlReg(orValue), MemOperand(VixlReg(addr)));
2829     GetMasm()->Orr(VixlReg(orValue), VixlReg(orValue), VixlReg(value, WORD_SIZE));
2830     GetMasm()->Stxrb(VixlReg(storeResult), VixlReg(orValue), MemOperand(VixlReg(addr)));
2831     GetMasm()->Cbnz(VixlReg(storeResult), loop);
2832     if (!hasTemps) {
2833         GetMasm()->ldp(VixlReg(frameReg), VixlReg(linkReg),
2834                        MemOperand(vixl::aarch64::sp, PAIR_OFFSET, vixl::aarch64::AddrMode::PostIndex));
2835     }
2836 }
2837 
EncodeCmp(Reg dst,Reg src0,Reg src1,Condition cc)2838 void Aarch64Encoder::EncodeCmp(Reg dst, Reg src0, Reg src1, Condition cc)
2839 {
2840     if (src0.IsFloat()) {
2841         ASSERT(src1.IsFloat());
2842         ASSERT(cc == Condition::MI || cc == Condition::LT);
2843         GetMasm()->Fcmp(VixlVReg(src0), VixlVReg(src1));
2844     } else {
2845         ASSERT(src0.IsScalar() && src1.IsScalar());
2846         ASSERT(cc == Condition::LO || cc == Condition::LT);
2847         GetMasm()->Cmp(VixlReg(src0), VixlReg(src1));
2848     }
2849     GetMasm()->Cset(VixlReg(dst), vixl::aarch64::Condition::ne);
2850     GetMasm()->Cneg(VixlReg(Promote(dst)), VixlReg(Promote(dst)), Convert(cc));
2851 }
2852 
EncodeSelect(ArgsSelect && args)2853 void Aarch64Encoder::EncodeSelect(ArgsSelect &&args)
2854 {
2855     auto [dst, src0, src1, src2, src3, cc] = args;
2856     if (src2.IsScalar()) {
2857         GetMasm()->Cmp(VixlReg(src2), VixlReg(src3));
2858     } else {
2859         GetMasm()->Fcmp(VixlVReg(src2), VixlVReg(src3));
2860     }
2861     if (dst.IsFloat()) {
2862         GetMasm()->Fcsel(VixlVReg(dst), VixlVReg(src0), VixlVReg(src1), Convert(cc));
2863     } else {
2864         GetMasm()->Csel(VixlReg(dst), VixlReg(src0), VixlReg(src1), Convert(cc));
2865     }
2866 }
2867 
EncodeSelect(ArgsSelectImm && args)2868 void Aarch64Encoder::EncodeSelect(ArgsSelectImm &&args)
2869 {
2870     auto [dst, src0, src1, src2, imm, cc] = args;
2871     if (src2.IsScalar()) {
2872         GetMasm()->Cmp(VixlReg(src2), VixlImm(imm));
2873     } else {
2874         GetMasm()->Fcmp(VixlVReg(src2), imm.GetAsDouble());
2875     }
2876     if (dst.IsFloat()) {
2877         GetMasm()->Fcsel(VixlVReg(dst), VixlVReg(src0), VixlVReg(src1), Convert(cc));
2878     } else {
2879         GetMasm()->Csel(VixlReg(dst), VixlReg(src0), VixlReg(src1), Convert(cc));
2880     }
2881 }
2882 
EncodeSelectTest(ArgsSelect && args)2883 void Aarch64Encoder::EncodeSelectTest(ArgsSelect &&args)
2884 {
2885     auto [dst, src0, src1, src2, src3, cc] = args;
2886     ASSERT(!src2.IsFloat() && !src3.IsFloat());
2887     GetMasm()->Tst(VixlReg(src2), VixlReg(src3));
2888     if (dst.IsFloat()) {
2889         GetMasm()->Fcsel(VixlVReg(dst), VixlVReg(src0), VixlVReg(src1), ConvertTest(cc));
2890     } else {
2891         GetMasm()->Csel(VixlReg(dst), VixlReg(src0), VixlReg(src1), ConvertTest(cc));
2892     }
2893 }
2894 
EncodeSelectTest(ArgsSelectImm && args)2895 void Aarch64Encoder::EncodeSelectTest(ArgsSelectImm &&args)
2896 {
2897     auto [dst, src0, src1, src2, imm, cc] = args;
2898     ASSERT(!src2.IsFloat());
2899     ASSERT(CanEncodeImmLogical(imm.GetAsInt(), src2.GetSize() > WORD_SIZE ? DOUBLE_WORD_SIZE : WORD_SIZE));
2900     GetMasm()->Tst(VixlReg(src2), VixlImm(imm));
2901     if (dst.IsFloat()) {
2902         GetMasm()->Fcsel(VixlVReg(dst), VixlVReg(src0), VixlVReg(src1), ConvertTest(cc));
2903     } else {
2904         GetMasm()->Csel(VixlReg(dst), VixlReg(src0), VixlReg(src1), ConvertTest(cc));
2905     }
2906 }
2907 
EncodeLdp(Reg dst0,Reg dst1,bool dstSigned,MemRef mem)2908 void Aarch64Encoder::EncodeLdp(Reg dst0, Reg dst1, bool dstSigned, MemRef mem)
2909 {
2910     ASSERT(dst0.IsFloat() == dst1.IsFloat());
2911     ASSERT(dst0.GetSize() == dst1.GetSize());
2912     if (!ConvertMem(mem).IsValid()) {
2913         // Encode one Ldr - will fix inside
2914         EncodeLdr(dst0, dstSigned, mem);
2915         return;
2916     }
2917 
2918     if (dst0.IsFloat()) {
2919         GetMasm()->Ldp(VixlVReg(dst0), VixlVReg(dst1), ConvertMem(mem));
2920         return;
2921     }
2922     if (dstSigned && dst0.GetSize() == WORD_SIZE) {
2923         GetMasm()->Ldpsw(VixlReg(dst0, DOUBLE_WORD_SIZE), VixlReg(dst1, DOUBLE_WORD_SIZE), ConvertMem(mem));
2924         return;
2925     }
2926     GetMasm()->Ldp(VixlReg(dst0), VixlReg(dst1), ConvertMem(mem));
2927 }
2928 
EncodeStp(Reg src0,Reg src1,MemRef mem)2929 void Aarch64Encoder::EncodeStp(Reg src0, Reg src1, MemRef mem)
2930 {
2931     ASSERT(src0.IsFloat() == src1.IsFloat());
2932     ASSERT(src0.GetSize() == src1.GetSize());
2933     if (!ConvertMem(mem).IsValid()) {
2934         // Encode one Str - will fix inside
2935         EncodeStr(src0, mem);
2936         return;
2937     }
2938 
2939     if (src0.IsFloat()) {
2940         GetMasm()->Stp(VixlVReg(src0), VixlVReg(src1), ConvertMem(mem));
2941         return;
2942     }
2943     GetMasm()->Stp(VixlReg(src0), VixlReg(src1), ConvertMem(mem));
2944 }
2945 
EncodeMAdd(Reg dst,Reg src0,Reg src1,Reg src2)2946 void Aarch64Encoder::EncodeMAdd(Reg dst, Reg src0, Reg src1, Reg src2)
2947 {
2948     ASSERT(dst.GetSize() == src1.GetSize() && dst.GetSize() == src0.GetSize() && dst.GetSize() == src2.GetSize());
2949     ASSERT(dst.IsScalar() == src0.IsScalar() && dst.IsScalar() == src1.IsScalar() && dst.IsScalar() == src2.IsScalar());
2950 
2951     ASSERT(!GetRegfile()->IsZeroReg(dst));
2952 
2953     if (GetRegfile()->IsZeroReg(src0) || GetRegfile()->IsZeroReg(src1)) {
2954         EncodeMov(dst, src2);
2955         return;
2956     }
2957 
2958     if (GetRegfile()->IsZeroReg(src2)) {
2959         EncodeMul(dst, src0, src1);
2960         return;
2961     }
2962 
2963     if (dst.IsScalar()) {
2964         GetMasm()->Madd(VixlReg(dst), VixlReg(src0), VixlReg(src1), VixlReg(src2));
2965     } else {
2966         GetMasm()->Fmadd(VixlVReg(dst), VixlVReg(src0), VixlVReg(src1), VixlVReg(src2));
2967     }
2968 }
2969 
EncodeMSub(Reg dst,Reg src0,Reg src1,Reg src2)2970 void Aarch64Encoder::EncodeMSub(Reg dst, Reg src0, Reg src1, Reg src2)
2971 {
2972     ASSERT(dst.GetSize() == src1.GetSize() && dst.GetSize() == src0.GetSize() && dst.GetSize() == src2.GetSize());
2973     ASSERT(dst.IsScalar() == src0.IsScalar() && dst.IsScalar() == src1.IsScalar() && dst.IsScalar() == src2.IsScalar());
2974 
2975     ASSERT(!GetRegfile()->IsZeroReg(dst));
2976 
2977     if (GetRegfile()->IsZeroReg(src0) || GetRegfile()->IsZeroReg(src1)) {
2978         EncodeMov(dst, src2);
2979         return;
2980     }
2981 
2982     if (GetRegfile()->IsZeroReg(src2)) {
2983         EncodeMNeg(dst, src0, src1);
2984         return;
2985     }
2986 
2987     if (dst.IsScalar()) {
2988         GetMasm()->Msub(VixlReg(dst), VixlReg(src0), VixlReg(src1), VixlReg(src2));
2989     } else {
2990         GetMasm()->Fmsub(VixlVReg(dst), VixlVReg(src0), VixlVReg(src1), VixlVReg(src2));
2991     }
2992 }
2993 
EncodeMNeg(Reg dst,Reg src0,Reg src1)2994 void Aarch64Encoder::EncodeMNeg(Reg dst, Reg src0, Reg src1)
2995 {
2996     ASSERT(dst.GetSize() == src1.GetSize() && dst.GetSize() == src0.GetSize());
2997     ASSERT(dst.IsScalar() == src0.IsScalar() && dst.IsScalar() == src1.IsScalar());
2998 
2999     ASSERT(!GetRegfile()->IsZeroReg(dst));
3000 
3001     if (GetRegfile()->IsZeroReg(src0) || GetRegfile()->IsZeroReg(src1)) {
3002         EncodeMov(dst, Imm(0U));
3003         return;
3004     }
3005 
3006     if (dst.IsScalar()) {
3007         GetMasm()->Mneg(VixlReg(dst), VixlReg(src0), VixlReg(src1));
3008     } else {
3009         GetMasm()->Fnmul(VixlVReg(dst), VixlVReg(src0), VixlVReg(src1));
3010     }
3011 }
3012 
EncodeOrNot(Reg dst,Reg src0,Reg src1)3013 void Aarch64Encoder::EncodeOrNot(Reg dst, Reg src0, Reg src1)
3014 {
3015     ASSERT(dst.GetSize() == src1.GetSize() && dst.GetSize() == src0.GetSize());
3016     ASSERT(dst.IsScalar() && src0.IsScalar() && src1.IsScalar());
3017     GetMasm()->Orn(VixlReg(dst), VixlReg(src0), VixlReg(src1));
3018 }
3019 
EncodeOrNot(Reg dst,Reg src0,Shift src1)3020 void Aarch64Encoder::EncodeOrNot(Reg dst, Reg src0, Shift src1)
3021 {
3022     ASSERT(dst.GetSize() == src0.GetSize() && dst.GetSize() == src1.GetBase().GetSize());
3023     ASSERT(dst.IsScalar() && src0.IsScalar() && src1.GetBase().IsScalar());
3024     GetMasm()->Orn(VixlReg(dst), VixlReg(src0), VixlShift(src1));
3025 }
3026 
EncodeExtractBits(Reg dst,Reg src0,Imm imm1,Imm imm2)3027 void Aarch64Encoder::EncodeExtractBits(Reg dst, Reg src0, Imm imm1, Imm imm2)
3028 {
3029     GetMasm()->Ubfx(VixlReg(dst), VixlReg(src0), imm1.GetAsInt(), imm2.GetAsInt());
3030 }
3031 
EncodeAndNot(Reg dst,Reg src0,Reg src1)3032 void Aarch64Encoder::EncodeAndNot(Reg dst, Reg src0, Reg src1)
3033 {
3034     ASSERT(dst.GetSize() == src1.GetSize() && dst.GetSize() == src0.GetSize());
3035     ASSERT(dst.IsScalar() && src0.IsScalar() && src1.IsScalar());
3036     GetMasm()->Bic(VixlReg(dst), VixlReg(src0), VixlReg(src1));
3037 }
3038 
EncodeAndNot(Reg dst,Reg src0,Shift src1)3039 void Aarch64Encoder::EncodeAndNot(Reg dst, Reg src0, Shift src1)
3040 {
3041     ASSERT(dst.GetSize() == src0.GetSize() && dst.GetSize() == src1.GetBase().GetSize());
3042     ASSERT(dst.IsScalar() && src0.IsScalar() && src1.GetBase().IsScalar());
3043     GetMasm()->Bic(VixlReg(dst), VixlReg(src0), VixlShift(src1));
3044 }
3045 
EncodeXorNot(Reg dst,Reg src0,Reg src1)3046 void Aarch64Encoder::EncodeXorNot(Reg dst, Reg src0, Reg src1)
3047 {
3048     ASSERT(dst.GetSize() == src1.GetSize() && dst.GetSize() == src0.GetSize());
3049     ASSERT(dst.IsScalar() && src0.IsScalar() && src1.IsScalar());
3050     GetMasm()->Eon(VixlReg(dst), VixlReg(src0), VixlReg(src1));
3051 }
3052 
EncodeXorNot(Reg dst,Reg src0,Shift src1)3053 void Aarch64Encoder::EncodeXorNot(Reg dst, Reg src0, Shift src1)
3054 {
3055     ASSERT(dst.GetSize() == src0.GetSize() && dst.GetSize() == src1.GetBase().GetSize());
3056     ASSERT(dst.IsScalar() && src0.IsScalar() && src1.GetBase().IsScalar());
3057     GetMasm()->Eon(VixlReg(dst), VixlReg(src0), VixlShift(src1));
3058 }
3059 
EncodeNeg(Reg dst,Shift src)3060 void Aarch64Encoder::EncodeNeg(Reg dst, Shift src)
3061 {
3062     ASSERT(dst.GetSize() == src.GetBase().GetSize());
3063     ASSERT(dst.IsScalar() && src.GetBase().IsScalar());
3064     GetMasm()->Neg(VixlReg(dst), VixlShift(src));
3065 }
3066 
EncodeStackOverflowCheck(ssize_t offset)3067 void Aarch64Encoder::EncodeStackOverflowCheck(ssize_t offset)
3068 {
3069     ScopedTmpReg tmp(this);
3070     EncodeAdd(tmp, GetTarget().GetStackReg(), Imm(offset));
3071     EncodeLdr(tmp, false, MemRef(tmp));
3072 }
3073 
EncodeGetCurrentPc(Reg dst)3074 void Aarch64Encoder::EncodeGetCurrentPc(Reg dst)
3075 {
3076     ASSERT(dst.GetType() == INT64_TYPE);
3077 
3078     auto currentPc = CreateLabel();
3079     BindLabel(currentPc);
3080 
3081     auto *labelHolder = static_cast<Aarch64LabelHolder *>(GetLabels());
3082     GetMasm()->Adr(VixlReg(dst), labelHolder->GetLabel(currentPc));
3083 }
3084 
CanEncodeImmAddSubCmp(int64_t imm,uint32_t size,bool signedCompare)3085 bool Aarch64Encoder::CanEncodeImmAddSubCmp(int64_t imm, [[maybe_unused]] uint32_t size,
3086                                            [[maybe_unused]] bool signedCompare)
3087 {
3088     if (imm == INT64_MIN) {
3089         return false;
3090     }
3091     if (imm < 0) {
3092         imm = -imm;
3093     }
3094     return vixl::aarch64::Assembler::IsImmAddSub(imm);
3095 }
3096 
CanEncodeImmLogical(uint64_t imm,uint32_t size)3097 bool Aarch64Encoder::CanEncodeImmLogical(uint64_t imm, uint32_t size)
3098 {
3099 #ifndef NDEBUG
3100     if (size < DOUBLE_WORD_SIZE) {
3101         // Test if the highest part is consistent:
3102         ASSERT(((imm >> size) == 0) || (((~imm) >> size) == 0));
3103     }
3104 #endif  // NDEBUG
3105     return vixl::aarch64::Assembler::IsImmLogical(imm, size);
3106 }
3107 
CanOptimizeImmDivMod(uint64_t imm,bool isSigned) const3108 bool Aarch64Encoder::CanOptimizeImmDivMod(uint64_t imm, bool isSigned) const
3109 {
3110     return CanOptimizeImmDivModCommon(imm, isSigned);
3111 }
3112 
3113 /*
3114  * From aarch64 instruction set
3115  *
3116  * ========================================================
3117  * Syntax
3118  *
3119  * LDR  Wt, [Xn|SP, Rm{, extend {amount}}]    ; 32-bit general registers
3120  *
3121  * LDR  Xt, [Xn|SP, Rm{, extend {amount}}]    ; 64-bit general registers
3122  *
3123  * amount
3124  * Is the index shift amount, optional and defaulting to #0 when extend is not LSL:
3125  *
3126  * 32-bit general registers
3127  * Can be one of #0 or #2.
3128  *
3129  * 64-bit general registers
3130  * Can be one of #0 or #3.
3131  * ========================================================
3132  * Syntax
3133  *
3134  * LDRH  Wt, [Xn|SP, Rm{, extend {amount}}]
3135  *
3136  * amount
3137  * Is the index shift amount, optional and defaulting to #0 when extend is not LSL, and can be either #0 or #1.
3138  * ========================================================
3139  *
3140  * Scale can be 0 or 1 for half load, 2 for word load, 3 for double word load
3141  */
CanEncodeScale(uint64_t imm,uint32_t size)3142 bool Aarch64Encoder::CanEncodeScale(uint64_t imm, uint32_t size)
3143 {
3144     return (imm == 0) || ((1U << imm) == (size >> 3U));
3145 }
3146 
CanEncodeShiftedOperand(ShiftOpcode opcode,ShiftType shiftType)3147 bool Aarch64Encoder::CanEncodeShiftedOperand(ShiftOpcode opcode, ShiftType shiftType)
3148 {
3149     switch (opcode) {
3150         case ShiftOpcode::NEG_SR:
3151         case ShiftOpcode::ADD_SR:
3152         case ShiftOpcode::SUB_SR:
3153             return shiftType == ShiftType::LSL || shiftType == ShiftType::LSR || shiftType == ShiftType::ASR;
3154         case ShiftOpcode::AND_SR:
3155         case ShiftOpcode::OR_SR:
3156         case ShiftOpcode::XOR_SR:
3157         case ShiftOpcode::AND_NOT_SR:
3158         case ShiftOpcode::OR_NOT_SR:
3159         case ShiftOpcode::XOR_NOT_SR:
3160             return shiftType != ShiftType::INVALID_SHIFT;
3161         default:
3162             return false;
3163     }
3164 }
3165 
CanEncodeFloatSelect()3166 bool Aarch64Encoder::CanEncodeFloatSelect()
3167 {
3168     return true;
3169 }
3170 
AcquireScratchRegister(TypeInfo type)3171 Reg Aarch64Encoder::AcquireScratchRegister(TypeInfo type)
3172 {
3173     ASSERT(GetMasm()->GetCurrentScratchRegisterScope() == nullptr);
3174     auto reg = type.IsFloat() ? GetMasm()->GetScratchVRegisterList()->PopLowestIndex()
3175                               : GetMasm()->GetScratchRegisterList()->PopLowestIndex();
3176     ASSERT(reg.IsValid());
3177     return Reg(reg.GetCode(), type);
3178 }
3179 
AcquireScratchRegister(Reg reg)3180 void Aarch64Encoder::AcquireScratchRegister(Reg reg)
3181 {
3182     ASSERT(GetMasm()->GetCurrentScratchRegisterScope() == nullptr);
3183     if (reg == GetTarget().GetLinkReg()) {
3184         ASSERT_PRINT(!lrAcquired_, "Trying to acquire LR, which hasn't been released before");
3185         lrAcquired_ = true;
3186         return;
3187     }
3188     auto type = reg.GetType();
3189     auto regId = reg.GetId();
3190 
3191     if (type.IsFloat()) {
3192         ASSERT(GetMasm()->GetScratchVRegisterList()->IncludesAliasOf(VixlVReg(reg)));
3193         GetMasm()->GetScratchVRegisterList()->Remove(regId);
3194     } else {
3195         ASSERT(GetMasm()->GetScratchRegisterList()->IncludesAliasOf(VixlReg(reg)));
3196         GetMasm()->GetScratchRegisterList()->Remove(regId);
3197     }
3198 }
3199 
ReleaseScratchRegister(Reg reg)3200 void Aarch64Encoder::ReleaseScratchRegister(Reg reg)
3201 {
3202     if (reg == GetTarget().GetLinkReg()) {
3203         ASSERT_PRINT(lrAcquired_, "Trying to release LR, which hasn't been acquired before");
3204         lrAcquired_ = false;
3205     } else if (reg.IsFloat()) {
3206         GetMasm()->GetScratchVRegisterList()->Combine(reg.GetId());
3207     } else if (reg.GetId() != GetTarget().GetLinkReg().GetId()) {
3208         GetMasm()->GetScratchRegisterList()->Combine(reg.GetId());
3209     }
3210 }
3211 
IsScratchRegisterReleased(Reg reg) const3212 bool Aarch64Encoder::IsScratchRegisterReleased(Reg reg) const
3213 {
3214     if (reg == GetTarget().GetLinkReg()) {
3215         return !lrAcquired_;
3216     }
3217     if (reg.IsFloat()) {
3218         return GetMasm()->GetScratchVRegisterList()->IncludesAliasOf(VixlVReg(reg));
3219     }
3220     return GetMasm()->GetScratchRegisterList()->IncludesAliasOf(VixlReg(reg));
3221 }
3222 
GetScratchRegistersMask() const3223 RegMask Aarch64Encoder::GetScratchRegistersMask() const
3224 {
3225     return RegMask(GetMasm()->GetScratchRegisterList()->GetList());
3226 }
3227 
GetScratchFpRegistersMask() const3228 RegMask Aarch64Encoder::GetScratchFpRegistersMask() const
3229 {
3230     return RegMask(GetMasm()->GetScratchVRegisterList()->GetList());
3231 }
3232 
GetAvailableScratchRegisters() const3233 RegMask Aarch64Encoder::GetAvailableScratchRegisters() const
3234 {
3235     return RegMask(GetMasm()->GetScratchRegisterList()->GetList());
3236 }
3237 
GetAvailableScratchFpRegisters() const3238 VRegMask Aarch64Encoder::GetAvailableScratchFpRegisters() const
3239 {
3240     return VRegMask(GetMasm()->GetScratchVRegisterList()->GetList());
3241 }
3242 
GetRefType()3243 TypeInfo Aarch64Encoder::GetRefType()
3244 {
3245     return INT64_TYPE;
3246 }
3247 
BufferData() const3248 void *Aarch64Encoder::BufferData() const
3249 {
3250     return GetMasm()->GetBuffer()->GetStartAddress<void *>();
3251 }
3252 
BufferSize() const3253 size_t Aarch64Encoder::BufferSize() const
3254 {
3255     return GetMasm()->GetBuffer()->GetSizeInBytes();
3256 }
3257 
MakeLibCall(Reg dst,Reg src0,Reg src1,const void * entryPoint)3258 void Aarch64Encoder::MakeLibCall(Reg dst, Reg src0, Reg src1, const void *entryPoint)
3259 {
3260     if (!dst.IsFloat()) {
3261         SetFalseResult();
3262         return;
3263     }
3264     if (dst.GetType() == FLOAT32_TYPE) {
3265         if (!src0.IsFloat() || !src1.IsFloat()) {
3266             SetFalseResult();
3267             return;
3268         }
3269 
3270         if (src0.GetId() != vixl::aarch64::s0.GetCode() || src1.GetId() != vixl::aarch64::s1.GetCode()) {
3271             ScopedTmpRegF32 tmp(this);
3272             GetMasm()->Fmov(VixlVReg(tmp), VixlVReg(src1));
3273             GetMasm()->Fmov(vixl::aarch64::s0, VixlVReg(src0));
3274             GetMasm()->Fmov(vixl::aarch64::s1, VixlVReg(tmp));
3275         }
3276 
3277         MakeCall(entryPoint);
3278 
3279         if (dst.GetId() != vixl::aarch64::s0.GetCode()) {
3280             GetMasm()->Fmov(VixlVReg(dst), vixl::aarch64::s0);
3281         }
3282     } else if (dst.GetType() == FLOAT64_TYPE) {
3283         if (!src0.IsFloat() || !src1.IsFloat()) {
3284             SetFalseResult();
3285             return;
3286         }
3287 
3288         if (src0.GetId() != vixl::aarch64::d0.GetCode() || src1.GetId() != vixl::aarch64::d1.GetCode()) {
3289             ScopedTmpRegF64 tmp(this);
3290             GetMasm()->Fmov(VixlVReg(tmp), VixlVReg(src1));
3291 
3292             GetMasm()->Fmov(vixl::aarch64::d0, VixlVReg(src0));
3293             GetMasm()->Fmov(vixl::aarch64::d1, VixlVReg(tmp));
3294         }
3295 
3296         MakeCall(entryPoint);
3297 
3298         if (dst.GetId() != vixl::aarch64::d0.GetCode()) {
3299             GetMasm()->Fmov(VixlVReg(dst), vixl::aarch64::d0);
3300         }
3301     } else {
3302         UNREACHABLE();
3303     }
3304 }
3305 
3306 template <bool IS_STORE>
LoadStoreRegisters(RegMask registers,ssize_t slot,size_t startReg,bool isFp)3307 void Aarch64Encoder::LoadStoreRegisters(RegMask registers, ssize_t slot, size_t startReg, bool isFp)
3308 {
3309     if (registers.none()) {
3310         return;
3311     }
3312     auto lastReg = static_cast<int32_t>(registers.size() - 1);
3313     for (; lastReg >= 0; --lastReg) {
3314         if (registers.test(lastReg)) {
3315             break;
3316         }
3317     }
3318     // Construct single add for big offset
3319     size_t spOffset;
3320     auto lastOffset = (slot + lastReg - static_cast<ssize_t>(startReg)) * static_cast<ssize_t>(DOUBLE_WORD_SIZE_BYTES);
3321 
3322     if (!vixl::aarch64::Assembler::IsImmLSPair(lastOffset, vixl::aarch64::kXRegSizeInBytesLog2)) {
3323         ScopedTmpReg lrReg(this, true);
3324         auto tmp = VixlReg(lrReg);
3325         spOffset = static_cast<size_t>(slot * DOUBLE_WORD_SIZE_BYTES);
3326         slot = 0;
3327         if (vixl::aarch64::Assembler::IsImmAddSub(spOffset)) {
3328             GetMasm()->Add(tmp, vixl::aarch64::sp, VixlImm(spOffset));
3329         } else {
3330             GetMasm()->Mov(tmp, VixlImm(spOffset));
3331             GetMasm()->Add(tmp, vixl::aarch64::sp, tmp);
3332         }
3333         LoadStoreRegistersLoop<IS_STORE>(registers, slot, startReg, isFp, tmp);
3334     } else {
3335         LoadStoreRegistersLoop<IS_STORE>(registers, slot, startReg, isFp, vixl::aarch64::sp);
3336     }
3337 }
3338 
3339 template <bool IS_STORE>
LoadStorePair(vixl::aarch64::MacroAssembler * masm,CPURegister lastReg,CPURegister reg,Reg base,int32_t idx)3340 static void LoadStorePair(vixl::aarch64::MacroAssembler *masm, CPURegister lastReg, CPURegister reg, Reg base,
3341                           int32_t idx)
3342 {
3343     auto baseReg = VixlReg(base);
3344     static constexpr int32_t OFFSET = 2;
3345     if constexpr (IS_STORE) {  // NOLINT
3346         masm->Stp(lastReg, reg, MemOperand(baseReg, (idx - OFFSET) * DOUBLE_WORD_SIZE_BYTES));
3347     } else {  // NOLINT
3348         masm->Ldp(lastReg, reg, MemOperand(baseReg, (idx - OFFSET) * DOUBLE_WORD_SIZE_BYTES));
3349     }
3350 }
3351 
3352 template <bool IS_STORE>
LoadStoreReg(vixl::aarch64::MacroAssembler * masm,CPURegister lastReg,Reg base,int32_t idx)3353 static void LoadStoreReg(vixl::aarch64::MacroAssembler *masm, CPURegister lastReg, Reg base, int32_t idx)
3354 {
3355     auto baseReg = VixlReg(base);
3356     if constexpr (IS_STORE) {  // NOLINT
3357         masm->Str(lastReg, MemOperand(baseReg, (idx - 1) * DOUBLE_WORD_SIZE_BYTES));
3358     } else {  // NOLINT
3359         masm->Ldr(lastReg, MemOperand(baseReg, (idx - 1) * DOUBLE_WORD_SIZE_BYTES));
3360     }
3361 }
3362 
3363 template <bool IS_STORE>
LoadStoreRegistersMainLoop(RegMask registers,bool isFp,int32_t slot,Reg base,RegMask mask)3364 void Aarch64Encoder::LoadStoreRegistersMainLoop(RegMask registers, bool isFp, int32_t slot, Reg base, RegMask mask)
3365 {
3366     bool hasMask = mask.any();
3367     int32_t index = hasMask ? static_cast<int32_t>(mask.GetMinRegister()) : 0;
3368     int32_t lastIndex = -1;
3369     ssize_t lastId = -1;
3370 
3371     slot -= index;
3372     for (ssize_t id = index; id < helpers::ToSigned(registers.size()); id++) {
3373         if (hasMask) {
3374             if (!mask.test(id)) {
3375                 continue;
3376             }
3377             index++;
3378         }
3379         if (!registers.test(id)) {
3380             continue;
3381         }
3382         if (!hasMask) {
3383             index++;
3384         }
3385         if (lastId == -1) {
3386             lastId = id;
3387             lastIndex = index;
3388             continue;
3389         }
3390 
3391         auto lastReg =
3392             CPURegister(lastId, vixl::aarch64::kXRegSize, isFp ? CPURegister::kVRegister : CPURegister::kRegister);
3393         if (!hasMask || lastId + 1 == id) {
3394             auto reg =
3395                 CPURegister(id, vixl::aarch64::kXRegSize, isFp ? CPURegister::kVRegister : CPURegister::kRegister);
3396             LoadStorePair<IS_STORE>(GetMasm(), lastReg, reg, base, slot + index);
3397             lastId = -1;
3398         } else {
3399             LoadStoreReg<IS_STORE>(GetMasm(), lastReg, base, slot + lastIndex);
3400             lastId = id;
3401             lastIndex = index;
3402         }
3403     }
3404     if (lastId != -1) {
3405         auto lastReg =
3406             CPURegister(lastId, vixl::aarch64::kXRegSize, isFp ? CPURegister::kVRegister : CPURegister::kRegister);
3407         LoadStoreReg<IS_STORE>(GetMasm(), lastReg, base, slot + lastIndex);
3408     }
3409 }
3410 
3411 template <bool IS_STORE>
LoadStoreRegisters(RegMask registers,bool isFp,int32_t slot,Reg base,RegMask mask)3412 void Aarch64Encoder::LoadStoreRegisters(RegMask registers, bool isFp, int32_t slot, Reg base, RegMask mask)
3413 {
3414     if (registers.none()) {
3415         return;
3416     }
3417 
3418     int32_t maxOffset = (slot + helpers::ToSigned(registers.GetMaxRegister())) * DOUBLE_WORD_SIZE_BYTES;
3419     int32_t minOffset = (slot + helpers::ToSigned(registers.GetMinRegister())) * DOUBLE_WORD_SIZE_BYTES;
3420 
3421     ScopedTmpRegLazy tmpReg(this, true);
3422     // Construct single add for big offset
3423     if (!vixl::aarch64::Assembler::IsImmLSPair(minOffset, vixl::aarch64::kXRegSizeInBytesLog2) ||
3424         !vixl::aarch64::Assembler::IsImmLSPair(maxOffset, vixl::aarch64::kXRegSizeInBytesLog2)) {
3425         tmpReg.AcquireWithLr();
3426         auto lrReg = VixlReg(tmpReg);
3427         ssize_t spOffset = slot * DOUBLE_WORD_SIZE_BYTES;
3428         if (vixl::aarch64::Assembler::IsImmAddSub(spOffset)) {
3429             GetMasm()->Add(lrReg, VixlReg(base), VixlImm(spOffset));
3430         } else {
3431             GetMasm()->Mov(lrReg, VixlImm(spOffset));
3432             GetMasm()->Add(lrReg, VixlReg(base), lrReg);
3433         }
3434         // Adjust new values for slot and base register
3435         slot = 0;
3436         base = tmpReg;
3437     }
3438 
3439     LoadStoreRegistersMainLoop<IS_STORE>(registers, isFp, slot, base, mask);
3440 }
3441 
3442 template <bool IS_STORE>
LoadStoreRegistersLoop(RegMask registers,ssize_t slot,size_t startReg,bool isFp,const vixl::aarch64::Register & baseReg)3443 void Aarch64Encoder::LoadStoreRegistersLoop(RegMask registers, ssize_t slot, size_t startReg, bool isFp,
3444                                             const vixl::aarch64::Register &baseReg)
3445 {
3446     size_t i = 0;
3447     const auto getNextReg = [&registers, &i, isFp]() {
3448         for (; i < registers.size(); i++) {
3449             if (registers.test(i)) {
3450                 return CPURegister(i++, vixl::aarch64::kXRegSize,
3451                                    isFp ? CPURegister::kVRegister : CPURegister::kRegister);
3452             }
3453         }
3454         return CPURegister();
3455     };
3456 
3457     for (CPURegister nextReg = getNextReg(); nextReg.IsValid();) {
3458         const CPURegister currReg = nextReg;
3459         nextReg = getNextReg();
3460         if (nextReg.IsValid() && (nextReg.GetCode() - 1 == currReg.GetCode())) {
3461             if constexpr (IS_STORE) {  // NOLINT
3462                 GetMasm()->Stp(currReg, nextReg,
3463                                MemOperand(baseReg, (slot + currReg.GetCode() - startReg) * DOUBLE_WORD_SIZE_BYTES));
3464             } else {  // NOLINT
3465                 GetMasm()->Ldp(currReg, nextReg,
3466                                MemOperand(baseReg, (slot + currReg.GetCode() - startReg) * DOUBLE_WORD_SIZE_BYTES));
3467             }
3468             nextReg = getNextReg();
3469         } else {
3470             if constexpr (IS_STORE) {  // NOLINT
3471                 GetMasm()->Str(currReg,
3472                                MemOperand(baseReg, (slot + currReg.GetCode() - startReg) * DOUBLE_WORD_SIZE_BYTES));
3473             } else {  // NOLINT
3474                 GetMasm()->Ldr(currReg,
3475                                MemOperand(baseReg, (slot + currReg.GetCode() - startReg) * DOUBLE_WORD_SIZE_BYTES));
3476             }
3477         }
3478     }
3479 }
3480 
SaveRegisters(RegMask registers,ssize_t slot,size_t startReg,bool isFp)3481 void Aarch64Encoder::SaveRegisters(RegMask registers, ssize_t slot, size_t startReg, bool isFp)
3482 {
3483     LoadStoreRegisters<true>(registers, slot, startReg, isFp);
3484 }
3485 
LoadRegisters(RegMask registers,ssize_t slot,size_t startReg,bool isFp)3486 void Aarch64Encoder::LoadRegisters(RegMask registers, ssize_t slot, size_t startReg, bool isFp)
3487 {
3488     LoadStoreRegisters<false>(registers, slot, startReg, isFp);
3489 }
3490 
SaveRegisters(RegMask registers,bool isFp,ssize_t slot,Reg base,RegMask mask)3491 void Aarch64Encoder::SaveRegisters(RegMask registers, bool isFp, ssize_t slot, Reg base, RegMask mask)
3492 {
3493     LoadStoreRegisters<true>(registers, isFp, slot, base, mask);
3494 }
3495 
LoadRegisters(RegMask registers,bool isFp,ssize_t slot,Reg base,RegMask mask)3496 void Aarch64Encoder::LoadRegisters(RegMask registers, bool isFp, ssize_t slot, Reg base, RegMask mask)
3497 {
3498     LoadStoreRegisters<false>(registers, isFp, slot, base, mask);
3499 }
3500 
PushRegisters(RegMask registers,bool isFp)3501 void Aarch64Encoder::PushRegisters(RegMask registers, bool isFp)
3502 {
3503     static constexpr size_t PAIR_OFFSET = 2 * DOUBLE_WORD_SIZE_BYTES;
3504     Register lastReg = INVALID_REG;
3505     for (size_t i = 0; i < registers.size(); i++) {
3506         if (registers[i]) {
3507             if (lastReg == INVALID_REG) {
3508                 lastReg = i;
3509                 continue;
3510             }
3511             if (isFp) {
3512                 GetMasm()->stp(vixl::aarch64::VRegister(lastReg, DOUBLE_WORD_SIZE),
3513                                vixl::aarch64::VRegister(i, DOUBLE_WORD_SIZE),
3514                                MemOperand(vixl::aarch64::sp, -PAIR_OFFSET, vixl::aarch64::AddrMode::PreIndex));
3515             } else {
3516                 GetMasm()->stp(vixl::aarch64::Register(lastReg, DOUBLE_WORD_SIZE),
3517                                vixl::aarch64::Register(i, DOUBLE_WORD_SIZE),
3518                                MemOperand(vixl::aarch64::sp, -PAIR_OFFSET, vixl::aarch64::AddrMode::PreIndex));
3519             }
3520             lastReg = INVALID_REG;
3521         }
3522     }
3523     if (lastReg != INVALID_REG) {
3524         if (isFp) {
3525             GetMasm()->str(vixl::aarch64::VRegister(lastReg, DOUBLE_WORD_SIZE),
3526                            MemOperand(vixl::aarch64::sp, -PAIR_OFFSET, vixl::aarch64::AddrMode::PreIndex));
3527         } else {
3528             GetMasm()->str(vixl::aarch64::Register(lastReg, DOUBLE_WORD_SIZE),
3529                            MemOperand(vixl::aarch64::sp, -PAIR_OFFSET, vixl::aarch64::AddrMode::PreIndex));
3530         }
3531     }
3532 }
3533 
PopRegisters(RegMask registers,bool isFp)3534 void Aarch64Encoder::PopRegisters(RegMask registers, bool isFp)
3535 {
3536     static constexpr size_t PAIR_OFFSET = 2 * DOUBLE_WORD_SIZE_BYTES;
3537     Register lastReg;
3538     if ((registers.count() & 1U) != 0) {
3539         lastReg = registers.GetMaxRegister();
3540         if (isFp) {
3541             GetMasm()->ldr(vixl::aarch64::VRegister(lastReg, DOUBLE_WORD_SIZE),
3542                            MemOperand(vixl::aarch64::sp, PAIR_OFFSET, vixl::aarch64::AddrMode::PostIndex));
3543         } else {
3544             GetMasm()->ldr(vixl::aarch64::Register(lastReg, DOUBLE_WORD_SIZE),
3545                            MemOperand(vixl::aarch64::sp, PAIR_OFFSET, vixl::aarch64::AddrMode::PostIndex));
3546         }
3547         registers.reset(lastReg);
3548     }
3549     lastReg = INVALID_REG;
3550     for (auto i = static_cast<ssize_t>(registers.size() - 1); i >= 0; i--) {
3551         if (registers[i]) {
3552             if (lastReg == INVALID_REG) {
3553                 lastReg = i;
3554                 continue;
3555             }
3556             if (isFp) {
3557                 GetMasm()->ldp(vixl::aarch64::VRegister(i, DOUBLE_WORD_SIZE),
3558                                vixl::aarch64::VRegister(lastReg, DOUBLE_WORD_SIZE),
3559                                MemOperand(vixl::aarch64::sp, PAIR_OFFSET, vixl::aarch64::AddrMode::PostIndex));
3560             } else {
3561                 GetMasm()->ldp(vixl::aarch64::Register(i, DOUBLE_WORD_SIZE),
3562                                vixl::aarch64::Register(lastReg, DOUBLE_WORD_SIZE),
3563                                MemOperand(vixl::aarch64::sp, PAIR_OFFSET, vixl::aarch64::AddrMode::PostIndex));
3564             }
3565             lastReg = INVALID_REG;
3566         }
3567     }
3568 }
3569 
GetMasm() const3570 vixl::aarch64::MacroAssembler *Aarch64Encoder::GetMasm() const
3571 {
3572     ASSERT(masm_ != nullptr);
3573     return masm_;
3574 }
3575 
GetLabelAddress(LabelHolder::LabelId label)3576 size_t Aarch64Encoder::GetLabelAddress(LabelHolder::LabelId label)
3577 {
3578     auto plabel = labels_->GetLabel(label);
3579     ASSERT(plabel->IsBound());
3580     return GetMasm()->GetLabelAddress<size_t>(plabel);
3581 }
3582 
LabelHasLinks(LabelHolder::LabelId label)3583 bool Aarch64Encoder::LabelHasLinks(LabelHolder::LabelId label)
3584 {
3585     auto plabel = labels_->GetLabel(label);
3586     return plabel->IsLinked();
3587 }
3588 
3589 #ifndef PANDA_MINIMAL_VIXL
GetDecoder() const3590 vixl::aarch64::Decoder &Aarch64Encoder::GetDecoder() const
3591 {
3592     if (!decoder_) {
3593         decoder_.emplace(GetAllocator());
3594         decoder_->visitors()->push_back(&GetDisasm());
3595     }
3596     return *decoder_;
3597 }
3598 
GetDisasm() const3599 vixl::aarch64::Disassembler &Aarch64Encoder::GetDisasm() const
3600 {
3601     if (!disasm_) {
3602         disasm_.emplace(GetAllocator());
3603     }
3604     return *disasm_;
3605 }
3606 #endif
3607 
DisasmInstr(std::ostream & stream,size_t pc,ssize_t codeOffset) const3608 size_t Aarch64Encoder::DisasmInstr([[maybe_unused]] std::ostream &stream, size_t pc,
3609                                    [[maybe_unused]] ssize_t codeOffset) const
3610 {
3611 #ifndef PANDA_MINIMAL_VIXL
3612     auto bufferStart = GetMasm()->GetBuffer()->GetOffsetAddress<uintptr_t>(0);
3613     auto instr = GetMasm()->GetBuffer()->GetOffsetAddress<vixl::aarch64::Instruction *>(pc);
3614     GetDecoder().Decode(instr);
3615     if (codeOffset < 0) {
3616         stream << GetDisasm().GetOutput();
3617     } else {
3618         stream << std::setw(0x4) << std::right << std::setfill('0') << std::hex
3619                << reinterpret_cast<uintptr_t>(instr) - bufferStart + static_cast<size_t>(codeOffset) << ": "
3620                << GetDisasm().GetOutput() << std::setfill(' ') << std::dec;
3621     }
3622 
3623 #endif
3624     return pc + vixl::aarch64::kInstructionSize;
3625 }
3626 }  // namespace ark::compiler::aarch64
3627