• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16 Encoder (implementation of math and mem Low-level emitters)
17 */
18 
19 #include "encode.h"
20 #include "target/aarch64/target.h"
21 #include "compiler/optimizer/code_generator/relocations.h"
22 
23 #if defined(USE_VIXL_ARM64) && !defined(PANDA_MINIMAL_VIXL)
24 #include "aarch64/disasm-aarch64.h"
25 #endif
26 
27 #include <iomanip>
28 
29 #include "lib_helpers.inl"
30 
31 #ifndef PANDA_TARGET_MACOS
32 #include "elf.h"
33 #endif  // PANDA_TARGET_MACOS
34 
35 namespace panda::compiler::aarch64 {
36 using vixl::aarch64::CPURegister;
37 using vixl::aarch64::MemOperand;
38 
Promote(Reg reg)39 static inline Reg Promote(Reg reg)
40 {
41     if (reg.GetType() == INT8_TYPE) {
42         return Reg(reg.GetId(), INT16_TYPE);
43     }
44     return reg;
45 }
46 
BindLabel(LabelId id)47 void Aarch64LabelHolder::BindLabel(LabelId id)
48 {
49     static_cast<Aarch64Encoder *>(GetEncoder())->GetMasm()->Bind(labels_[id]);
50 }
51 
Aarch64Encoder(ArenaAllocator * allocator)52 Aarch64Encoder::Aarch64Encoder(ArenaAllocator *allocator) : Encoder(allocator, Arch::AARCH64)
53 {
54     labels_ = allocator->New<Aarch64LabelHolder>(this);
55     if (labels_ == nullptr) {
56         SetFalseResult();
57     }
58     // We enable LR tmp reg by default in Aarch64
59     EnableLrAsTempReg(true);
60 }
61 
~Aarch64Encoder()62 Aarch64Encoder::~Aarch64Encoder()
63 {
64     auto labels = static_cast<Aarch64LabelHolder *>(GetLabels())->labels_;
65     for (auto label : labels) {
66         label->~Label();
67     }
68     if (masm_ != nullptr) {
69         masm_->~MacroAssembler();
70         masm_ = nullptr;
71     }
72 #ifndef PANDA_MINIMAL_VIXL
73     if (decoder_ != nullptr) {
74         decoder_->~Decoder();
75         decoder_ = nullptr;
76     }
77 #endif
78 }
79 
InitMasm()80 bool Aarch64Encoder::InitMasm()
81 {
82     if (masm_ == nullptr) {
83         // Initialize Masm
84         masm_ = GetAllocator()->New<vixl::aarch64::MacroAssembler>(GetAllocator());
85         if (masm_ == nullptr || !masm_->IsValid()) {
86             SetFalseResult();
87             return false;
88         }
89         ASSERT(GetMasm());
90 
91         // Make sure that the compiler uses the same scratch registers as the assembler
92         CHECK_EQ(RegMask(GetMasm()->GetScratchRegisterList()->GetList()), GetTarget().GetTempRegsMask());
93         CHECK_EQ(RegMask(GetMasm()->GetScratchVRegisterList()->GetList()), GetTarget().GetTempVRegsMask());
94     }
95     return true;
96 }
97 
Finalize()98 void Aarch64Encoder::Finalize()
99 {
100     GetMasm()->FinalizeCode();
101 }
102 
EncodeJump(LabelHolder::LabelId id)103 void Aarch64Encoder::EncodeJump(LabelHolder::LabelId id)
104 {
105     auto label = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(id);
106     GetMasm()->B(label);
107 }
108 
EncodeJump(LabelHolder::LabelId id,Reg src0,Reg src1,Condition cc)109 void Aarch64Encoder::EncodeJump(LabelHolder::LabelId id, Reg src0, Reg src1, Condition cc)
110 {
111     if (src1.GetId() == GetRegfile()->GetZeroReg().GetId()) {
112         EncodeJump(id, src0, cc);
113         return;
114     }
115 
116     if (src0.IsScalar()) {
117         GetMasm()->Cmp(VixlReg(src0), VixlReg(src1));
118     } else {
119         GetMasm()->Fcmp(VixlVReg(src0), VixlVReg(src1));
120     }
121 
122     auto label = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(id);
123     GetMasm()->B(label, Convert(cc));
124 }
125 
EncodeJump(LabelHolder::LabelId id,Reg src,Imm imm,Condition cc)126 void Aarch64Encoder::EncodeJump(LabelHolder::LabelId id, Reg src, Imm imm, Condition cc)
127 {
128     auto value = GetIntValue(imm);
129     if (value == 0) {
130         EncodeJump(id, src, cc);
131         return;
132     }
133 
134     ASSERT(CanEncodeImmAddSubCmp(value, src.GetSize(), false));
135     if (value < 0) {
136         GetMasm()->Cmn(VixlReg(src), VixlImm(-value));
137     } else {  // if (value > 0)
138         GetMasm()->Cmp(VixlReg(src), VixlImm(value));
139     }
140 
141     auto label = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(id);
142     GetMasm()->B(label, Convert(cc));
143 }
144 
EncodeJumpTest(LabelHolder::LabelId id,Reg src0,Reg src1,Condition cc)145 void Aarch64Encoder::EncodeJumpTest(LabelHolder::LabelId id, Reg src0, Reg src1, Condition cc)
146 {
147     ASSERT(src0.IsScalar() && src1.IsScalar());
148 
149     GetMasm()->Tst(VixlReg(src0), VixlReg(src1));
150     auto label = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(id);
151     GetMasm()->B(label, ConvertTest(cc));
152 }
153 
EncodeJumpTest(LabelHolder::LabelId id,Reg src,Imm imm,Condition cc)154 void Aarch64Encoder::EncodeJumpTest(LabelHolder::LabelId id, Reg src, Imm imm, Condition cc)
155 {
156     ASSERT(src.IsScalar());
157 
158     auto value = GetIntValue(imm);
159     ASSERT(CanEncodeImmLogical(value, imm.GetSize() > WORD_SIZE ? DOUBLE_WORD_SIZE : WORD_SIZE));
160 
161     GetMasm()->Tst(VixlReg(src), VixlImm(value));
162     auto label = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(id);
163     GetMasm()->B(label, ConvertTest(cc));
164 }
165 
EncodeJump(LabelHolder::LabelId id,Reg src,Condition cc)166 void Aarch64Encoder::EncodeJump(LabelHolder::LabelId id, Reg src, Condition cc)
167 {
168     auto label = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(id);
169     ASSERT(src.IsScalar());
170     auto rzero = Reg(GetRegfile()->GetZeroReg().GetId(), src.GetType());
171 
172     switch (cc) {
173         case Condition::LO:
174             // Always false
175             return;
176         case Condition::HS:
177             // Always true
178             GetMasm()->B(label);
179             return;
180         case Condition::EQ:
181         case Condition::LS:
182             if (src.GetId() == rzero.GetId()) {
183                 GetMasm()->B(label);
184                 return;
185             }
186             // True only when zero
187             GetMasm()->Cbz(VixlReg(src), label);
188             return;
189         case Condition::NE:
190         case Condition::HI:
191             if (src.GetId() == rzero.GetId()) {
192                 // Do nothing
193                 return;
194             }
195             // True only when non-zero
196             GetMasm()->Cbnz(VixlReg(src), label);
197             return;
198         default:
199             break;
200     }
201 
202     ASSERT(rzero.IsValid());
203     GetMasm()->Cmp(VixlReg(src), VixlReg(rzero));
204     GetMasm()->B(label, Convert(cc));
205 }
206 
EncodeJump(Reg dst)207 void Aarch64Encoder::EncodeJump(Reg dst)
208 {
209     GetMasm()->Br(VixlReg(dst));
210 }
211 
EncodeJump(RelocationInfo * relocation)212 void Aarch64Encoder::EncodeJump([[maybe_unused]] RelocationInfo *relocation)
213 {
214 #ifdef PANDA_TARGET_MACOS
215     LOG(FATAL, COMPILER) << "Not supported in Macos build";
216 #else
217     auto buffer = GetMasm()->GetBuffer();
218     relocation->offset = GetCursorOffset();
219     relocation->addend = 0;
220     relocation->type = R_AARCH64_CALL26;
221     static constexpr uint32_t CALL_WITH_ZERO_OFFSET = 0x14000000;
222     buffer->Emit32(CALL_WITH_ZERO_OFFSET);
223 #endif
224 }
225 
EncodeBitTestAndBranch(LabelHolder::LabelId id,compiler::Reg reg,uint32_t bit_pos,bool bit_value)226 void Aarch64Encoder::EncodeBitTestAndBranch(LabelHolder::LabelId id, compiler::Reg reg, uint32_t bit_pos,
227                                             bool bit_value)
228 {
229     ASSERT(reg.IsScalar() && reg.GetSize() > bit_pos);
230     auto label = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(id);
231     if (bit_value) {
232         GetMasm()->Tbnz(VixlReg(reg), bit_pos, label);
233     } else {
234         GetMasm()->Tbz(VixlReg(reg), bit_pos, label);
235     }
236 }
237 
EncodeNop()238 void Aarch64Encoder::EncodeNop()
239 {
240     GetMasm()->Nop();
241 }
242 
MakeCall(compiler::RelocationInfo * relocation)243 void Aarch64Encoder::MakeCall([[maybe_unused]] compiler::RelocationInfo *relocation)
244 {
245 #ifdef PANDA_TARGET_MACOS
246     LOG(FATAL, COMPILER) << "Not supported in Macos build";
247 #else
248     auto buffer = GetMasm()->GetBuffer();
249     relocation->offset = GetCursorOffset();
250     relocation->addend = 0;
251     relocation->type = R_AARCH64_CALL26;
252     static constexpr uint32_t CALL_WITH_ZERO_OFFSET = 0x94000000;
253     buffer->Emit32(CALL_WITH_ZERO_OFFSET);
254 #endif
255 }
256 
MakeCall(const void * entry_point)257 void Aarch64Encoder::MakeCall(const void *entry_point)
258 {
259     auto lr_reg = GetTarget().GetLinkReg();
260     EncodeMov(lr_reg, Imm(reinterpret_cast<uintptr_t>(entry_point)));
261     GetMasm()->Blr(VixlReg(lr_reg));
262 }
263 
MakeCall(MemRef entry_point)264 void Aarch64Encoder::MakeCall(MemRef entry_point)
265 {
266     auto lr_reg = GetTarget().GetLinkReg();
267     EncodeLdr(lr_reg, false, entry_point);
268     GetMasm()->Blr(VixlReg(lr_reg));
269 }
270 
MakeCall(Reg reg)271 void Aarch64Encoder::MakeCall(Reg reg)
272 {
273     GetMasm()->Blr(VixlReg(reg));
274 }
275 
MakeCall(LabelHolder::LabelId id)276 void Aarch64Encoder::MakeCall(LabelHolder::LabelId id)
277 {
278     auto label = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(id);
279     GetMasm()->Bl(label);
280 }
281 
LoadPcRelative(Reg reg,intptr_t offset,Reg reg_addr)282 void Aarch64Encoder::LoadPcRelative(Reg reg, intptr_t offset, Reg reg_addr)
283 {
284     ASSERT(GetCodeOffset() != Encoder::INVALID_OFFSET);
285     ASSERT(reg.IsValid() || reg_addr.IsValid());
286 
287     if (!reg_addr.IsValid()) {
288         reg_addr = reg.As(INT64_TYPE);
289     }
290 
291     if (vixl::IsInt21(offset)) {
292         GetMasm()->adr(VixlReg(reg_addr), offset);
293         if (reg != INVALID_REGISTER) {
294             EncodeLdr(reg, false, MemRef(reg_addr));
295         }
296     } else {
297         size_t pc = GetCodeOffset() + GetCursorOffset();
298         size_t addr;
299         if (intptr_t res = helpers::ToSigned(pc) + offset; res < 0) {
300             // Make both, pc and addr, positive
301             ssize_t extend = RoundUp(std::abs(res), vixl::aarch64::kPageSize);
302             addr = res + extend;
303             pc += extend;
304         } else {
305             addr = res;
306         }
307 
308         ssize_t adrp_imm = (addr >> vixl::aarch64::kPageSizeLog2) - (pc >> vixl::aarch64::kPageSizeLog2);
309 
310         GetMasm()->adrp(VixlReg(reg_addr), adrp_imm);
311 
312         offset = panda::helpers::ToUnsigned(addr) & (vixl::aarch64::kPageSize - 1);
313         if (reg.GetId() != reg_addr.GetId()) {
314             EncodeAdd(reg_addr, reg_addr, Imm(offset));
315             if (reg != INVALID_REGISTER) {
316                 EncodeLdr(reg, true, MemRef(reg_addr));
317             }
318         } else {
319             EncodeLdr(reg, true, MemRef(reg_addr, offset));
320         }
321     }
322 }
323 
MakeCallAot(intptr_t offset)324 void Aarch64Encoder::MakeCallAot(intptr_t offset)
325 {
326     auto lr_reg = GetTarget().GetLinkReg();
327     LoadPcRelative(lr_reg, offset);
328     GetMasm()->Blr(VixlReg(lr_reg));
329 }
330 
CanMakeCallByOffset(intptr_t offset)331 bool Aarch64Encoder::CanMakeCallByOffset(intptr_t offset)
332 {
333     // NOLINTNEXTLINE(hicpp-signed-bitwise)
334     auto off = (offset >> vixl::aarch64::kInstructionSizeLog2);
335     return vixl::aarch64::Instruction::IsValidImmPCOffset(vixl::aarch64::ImmBranchType::UncondBranchType, off);
336 }
337 
MakeCallByOffset(intptr_t offset)338 void Aarch64Encoder::MakeCallByOffset(intptr_t offset)
339 {
340     GetMasm()->Bl(offset);
341 }
342 
MakeLoadAotTable(intptr_t offset,Reg reg)343 void Aarch64Encoder::MakeLoadAotTable(intptr_t offset, Reg reg)
344 {
345     LoadPcRelative(reg, offset);
346 }
347 
MakeLoadAotTableAddr(intptr_t offset,Reg addr,Reg val)348 void Aarch64Encoder::MakeLoadAotTableAddr(intptr_t offset, Reg addr, Reg val)
349 {
350     LoadPcRelative(val, offset, addr);
351 }
352 
EncodeAbort()353 void Aarch64Encoder::EncodeAbort()
354 {
355     GetMasm()->Brk();
356 }
357 
EncodeReturn()358 void Aarch64Encoder::EncodeReturn()
359 {
360     GetMasm()->Ret();
361 }
362 
EncodeMul(Reg unused1,Reg unused2,Imm unused3)363 void Aarch64Encoder::EncodeMul([[maybe_unused]] Reg unused1, [[maybe_unused]] Reg unused2, [[maybe_unused]] Imm unused3)
364 {
365     SetFalseResult();
366 }
367 
EncodeMov(Reg dst,Reg src)368 void Aarch64Encoder::EncodeMov(Reg dst, Reg src)
369 {
370     if (dst == src) {
371         return;
372     }
373     if (src.IsFloat() && dst.IsFloat()) {
374         if (src.GetSize() != dst.GetSize()) {
375             GetMasm()->Fcvt(VixlVReg(dst), VixlVReg(src));
376             return;
377         }
378         GetMasm()->Fmov(VixlVReg(dst), VixlVReg(src));
379         return;
380     }
381     if (src.IsFloat() && !dst.IsFloat()) {
382         GetMasm()->Fmov(VixlReg(dst, src.GetSize()), VixlVReg(src));
383         return;
384     }
385     if (dst.IsFloat()) {
386         ASSERT(src.IsScalar());
387         GetMasm()->Fmov(VixlVReg(dst), VixlReg(src));
388         return;
389     }
390     // DiscardForSameWReg below means we would drop "mov w0, w0", but it is guarded by "dst == src" above anyway.
391     // NOTE: "mov w0, w0" is not equal "nop", as it clears upper bits of x0.
392     // Keeping the option here helps to generate nothing when e.g. src is x0 and dst is w0.
393     // Probably, a better solution here is to system-wide checking register size on Encoder level.
394     if (src.GetSize() != dst.GetSize()) {
395         auto src_reg = Reg(src.GetId(), dst.GetType());
396         GetMasm()->Mov(VixlReg(dst), VixlReg(src_reg), vixl::aarch64::DiscardMoveMode::kDiscardForSameWReg);
397         return;
398     }
399     GetMasm()->Mov(VixlReg(dst), VixlReg(src), vixl::aarch64::DiscardMoveMode::kDiscardForSameWReg);
400 }
401 
EncodeNeg(Reg dst,Reg src)402 void Aarch64Encoder::EncodeNeg(Reg dst, Reg src)
403 {
404     if (dst.IsFloat()) {
405         GetMasm()->Fneg(VixlVReg(dst), VixlVReg(src));
406         return;
407     }
408     GetMasm()->Neg(VixlReg(dst), VixlReg(src));
409 }
410 
EncodeAbs(Reg dst,Reg src)411 void Aarch64Encoder::EncodeAbs(Reg dst, Reg src)
412 {
413     if (dst.IsFloat()) {
414         GetMasm()->Fabs(VixlVReg(dst), VixlVReg(src));
415         return;
416     }
417 
418     ASSERT(!GetRegfile()->IsZeroReg(dst));
419     if (GetRegfile()->IsZeroReg(src)) {
420         EncodeMov(dst, src);
421         return;
422     }
423 
424     if (src.GetSize() == DOUBLE_WORD_SIZE) {
425         GetMasm()->Cmp(VixlReg(src), vixl::aarch64::xzr);
426     } else {
427         GetMasm()->Cmp(VixlReg(src), vixl::aarch64::wzr);
428     }
429     GetMasm()->Cneg(VixlReg(Promote(dst)), VixlReg(Promote(src)), vixl::aarch64::Condition::lt);
430 }
431 
EncodeSqrt(Reg dst,Reg src)432 void Aarch64Encoder::EncodeSqrt(Reg dst, Reg src)
433 {
434     ASSERT(dst.IsFloat());
435     GetMasm()->Fsqrt(VixlVReg(dst), VixlVReg(src));
436 }
437 
EncodeIsInf(Reg dst,Reg src)438 void Aarch64Encoder::EncodeIsInf(Reg dst, Reg src)
439 {
440     ASSERT(dst.IsScalar() && src.IsFloat());
441 
442     if (src.GetSize() == WORD_SIZE) {
443         constexpr uint32_t INF_MASK = 0xff000000;
444 
445         ScopedTmpRegU32 tmp_reg(this);
446         auto tmp = VixlReg(tmp_reg);
447         GetMasm()->Fmov(tmp, VixlVReg(src));
448         GetMasm()->Mov(VixlReg(dst).W(), INF_MASK);
449         GetMasm()->Lsl(tmp, tmp, 1);
450         GetMasm()->Cmp(tmp, VixlReg(dst, WORD_SIZE));
451     } else {
452         constexpr uint64_t INF_MASK = 0xffe0000000000000;
453 
454         ScopedTmpRegU64 tmp_reg(this);
455         auto tmp = VixlReg(tmp_reg);
456         GetMasm()->Fmov(tmp, VixlVReg(src));
457         GetMasm()->Mov(VixlReg(dst).X(), INF_MASK);
458         GetMasm()->Lsl(tmp, tmp, 1);
459         GetMasm()->Cmp(tmp, VixlReg(dst, DOUBLE_WORD_SIZE));
460     }
461 
462     GetMasm()->Cset(VixlReg(dst), vixl::aarch64::Condition::eq);
463 }
464 
465 /* NaN values are needed to be canonicalized */
EncodeFpToBits(Reg dst,Reg src)466 void Aarch64Encoder::EncodeFpToBits(Reg dst, Reg src)
467 {
468     ASSERT(dst.IsScalar() && src.IsFloat());
469     ASSERT(dst.GetSize() == WORD_SIZE || dst.GetSize() == DOUBLE_WORD_SIZE);
470 
471     if (dst.GetSize() == WORD_SIZE) {
472         ASSERT(src.GetSize() == WORD_SIZE);
473 
474         constexpr auto FNAN = 0x7fc00000;
475 
476         ScopedTmpRegU32 tmp(this);
477 
478         GetMasm()->Fcmp(VixlVReg(src), VixlVReg(src));
479         GetMasm()->Mov(VixlReg(tmp), FNAN);
480         GetMasm()->Umov(VixlReg(dst), VixlVReg(src), 0);
481         GetMasm()->Csel(VixlReg(dst), VixlReg(tmp), VixlReg(dst), vixl::aarch64::Condition::ne);
482     } else {
483         ASSERT(src.GetSize() == DOUBLE_WORD_SIZE);
484 
485         constexpr auto DNAN = 0x7ff8000000000000;
486 
487         ScopedTmpRegU64 tmp_reg(this);
488         auto tmp = VixlReg(tmp_reg);
489 
490         GetMasm()->Fcmp(VixlVReg(src), VixlVReg(src));
491         GetMasm()->Mov(tmp, DNAN);
492         GetMasm()->Umov(VixlReg(dst), VixlVReg(src), 0);
493         GetMasm()->Csel(VixlReg(dst), tmp, VixlReg(dst), vixl::aarch64::Condition::ne);
494     }
495 }
496 
EncodeMoveBitsRaw(Reg dst,Reg src)497 void Aarch64Encoder::EncodeMoveBitsRaw(Reg dst, Reg src)
498 {
499     ASSERT((dst.IsFloat() && src.IsScalar()) || (src.IsFloat() && dst.IsScalar()));
500     if (dst.IsScalar()) {
501         ASSERT(src.GetSize() == dst.GetSize());
502         if (dst.GetSize() == WORD_SIZE) {
503             GetMasm()->Umov(VixlReg(dst).W(), VixlVReg(src).S(), 0);
504         } else {
505             GetMasm()->Umov(VixlReg(dst), VixlVReg(src), 0);
506         }
507     } else {
508         ASSERT(dst.GetSize() == src.GetSize());
509         ScopedTmpReg tmp_reg(this, src.GetType());
510         auto src_reg = src;
511         auto rzero = GetRegfile()->GetZeroReg();
512         if (src.GetId() == rzero.GetId()) {
513             EncodeMov(tmp_reg, Imm(0));
514             src_reg = tmp_reg;
515         }
516 
517         if (src_reg.GetSize() == WORD_SIZE) {
518             GetMasm()->Fmov(VixlVReg(dst).S(), VixlReg(src_reg).W());
519         } else {
520             GetMasm()->Fmov(VixlVReg(dst), VixlReg(src_reg));
521         }
522     }
523 }
524 
EncodeReverseBytes(Reg dst,Reg src)525 void Aarch64Encoder::EncodeReverseBytes(Reg dst, Reg src)
526 {
527     auto rzero = GetRegfile()->GetZeroReg();
528     if (src.GetId() == rzero.GetId()) {
529         EncodeMov(dst, Imm(0));
530         return;
531     }
532 
533     ASSERT(src.GetSize() > BYTE_SIZE);
534     ASSERT(src.GetSize() == dst.GetSize());
535 
536     if (src.GetSize() == HALF_SIZE) {
537         GetMasm()->Rev16(VixlReg(dst), VixlReg(src));
538         GetMasm()->Sxth(VixlReg(dst), VixlReg(dst));
539     } else {
540         GetMasm()->Rev(VixlReg(dst), VixlReg(src));
541     }
542 }
543 
EncodeBitCount(Reg dst,Reg src)544 void Aarch64Encoder::EncodeBitCount(Reg dst, Reg src)
545 {
546     auto rzero = GetRegfile()->GetZeroReg();
547     if (src.GetId() == rzero.GetId()) {
548         EncodeMov(dst, Imm(0));
549         return;
550     }
551 
552     ASSERT(dst.GetSize() == WORD_SIZE);
553 
554     ScopedTmpRegF64 tmp_reg0(this);
555     vixl::aarch64::VRegister tmp_reg;
556     if (src.GetSize() == DOUBLE_WORD_SIZE) {
557         tmp_reg = VixlVReg(tmp_reg0).D();
558     } else {
559         tmp_reg = VixlVReg(tmp_reg0).S();
560     }
561 
562     if (src.GetSize() < WORD_SIZE) {
563         int64_t cut_value = (1ULL << src.GetSize()) - 1;
564         EncodeAnd(src, src, Imm(cut_value));
565     }
566 
567     GetMasm()->Fmov(tmp_reg, VixlReg(src));
568     GetMasm()->Cnt(tmp_reg.V8B(), tmp_reg.V8B());
569     GetMasm()->Addv(tmp_reg.B(), tmp_reg.V8B());
570     EncodeMov(dst, tmp_reg0);
571 }
572 
573 /* Since only ROR is supported on AArch64 we do
574  * left rotaion as ROR(v, -count) */
EncodeRotate(Reg dst,Reg src1,Reg src2,bool is_ror)575 void Aarch64Encoder::EncodeRotate(Reg dst, Reg src1, Reg src2, bool is_ror)
576 {
577     ASSERT(src1.GetSize() == WORD_SIZE || src1.GetSize() == DOUBLE_WORD_SIZE);
578     ASSERT(src1.GetSize() == dst.GetSize());
579     auto rzero = GetRegfile()->GetZeroReg();
580     if (rzero.GetId() == src2.GetId() || rzero.GetId() == src1.GetId()) {
581         EncodeMov(dst, src1);
582         return;
583     }
584     /* as the second parameters is always 32-bits long we have to
585      * adjust the counter register for the 64-bits first operand case */
586     if (is_ror) {
587         auto count = (dst.GetSize() == WORD_SIZE ? VixlReg(src2) : VixlReg(src2).X());
588         GetMasm()->Ror(VixlReg(dst), VixlReg(src1), count);
589     } else {
590         ScopedTmpReg tmp(this);
591         auto cnt = (dst.GetId() == src1.GetId() ? tmp : dst);
592         auto count = (dst.GetSize() == WORD_SIZE ? VixlReg(cnt).W() : VixlReg(cnt).X());
593         auto source2 = (dst.GetSize() == WORD_SIZE ? VixlReg(src2).W() : VixlReg(src2).X());
594         GetMasm()->Neg(count, source2);
595         GetMasm()->Ror(VixlReg(dst), VixlReg(src1), count);
596     }
597 }
598 
EncodeSignum(Reg dst,Reg src)599 void Aarch64Encoder::EncodeSignum(Reg dst, Reg src)
600 {
601     ASSERT(src.GetSize() == WORD_SIZE || src.GetSize() == DOUBLE_WORD_SIZE);
602 
603     ScopedTmpRegU32 tmp(this);
604     auto sign = (dst.GetId() == src.GetId() ? tmp : dst);
605 
606     GetMasm()->Cmp(VixlReg(src), VixlImm(0));
607     GetMasm()->Cset(VixlReg(sign), vixl::aarch64::Condition::gt);
608 
609     constexpr auto SHIFT_WORD_BITS = 31;
610     constexpr auto SHIFT_DWORD_BITS = 63;
611 
612     /* The operation below is "sub dst, dst, src, lsr #reg_size-1"
613      * however, we can only encode as many as 32 bits in lsr field, so
614      * for 64-bits cases we cannot avoid having a separate lsr instruction */
615     if (src.GetSize() == WORD_SIZE) {
616         auto shift = Shift(src, LSR, SHIFT_WORD_BITS);
617         EncodeSub(dst, sign, shift);
618     } else {
619         ScopedTmpRegU64 shift(this);
620         sign = Reg(sign.GetId(), INT64_TYPE);
621         EncodeShr(shift, src, Imm(SHIFT_DWORD_BITS));
622         EncodeSub(dst, sign, shift);
623     }
624 }
625 
EncodeCountLeadingZeroBits(Reg dst,Reg src)626 void Aarch64Encoder::EncodeCountLeadingZeroBits(Reg dst, Reg src)
627 {
628     auto rzero = GetRegfile()->GetZeroReg();
629     if (rzero.GetId() == src.GetId()) {
630         EncodeMov(dst, Imm(src.GetSize()));
631         return;
632     }
633     GetMasm()->Clz(VixlReg(dst), VixlReg(src));
634 }
635 
EncodeCountTrailingZeroBits(Reg dst,Reg src)636 void Aarch64Encoder::EncodeCountTrailingZeroBits(Reg dst, Reg src)
637 {
638     auto rzero = GetRegfile()->GetZeroReg();
639     if (rzero.GetId() == src.GetId()) {
640         EncodeMov(dst, Imm(src.GetSize()));
641         return;
642     }
643     GetMasm()->Rbit(VixlReg(dst), VixlReg(src));
644     GetMasm()->Clz(VixlReg(dst), VixlReg(dst));
645 }
646 
EncodeCeil(Reg dst,Reg src)647 void Aarch64Encoder::EncodeCeil(Reg dst, Reg src)
648 {
649     GetMasm()->Frintp(VixlVReg(dst), VixlVReg(src));
650 }
651 
EncodeFloor(Reg dst,Reg src)652 void Aarch64Encoder::EncodeFloor(Reg dst, Reg src)
653 {
654     GetMasm()->Frintm(VixlVReg(dst), VixlVReg(src));
655 }
656 
EncodeRint(Reg dst,Reg src)657 void Aarch64Encoder::EncodeRint(Reg dst, Reg src)
658 {
659     GetMasm()->Frintn(VixlVReg(dst), VixlVReg(src));
660 }
661 
EncodeRound(Reg dst,Reg src)662 void Aarch64Encoder::EncodeRound(Reg dst, Reg src)
663 {
664     auto done = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
665     ScopedTmpReg tmp(this, src.GetType());
666     // round to nearest integer, ties away from zero
667     GetMasm()->Fcvtas(VixlReg(dst), VixlVReg(src));
668     // for positive values, zero and NaN inputs rounding is done
669     GetMasm()->Tbz(VixlReg(dst), dst.GetSize() - 1, done);
670     // if input is negative but not a tie, round to nearest is valid
671     // if input is a negative tie, dst += 1
672     GetMasm()->Frinta(VixlVReg(tmp), VixlVReg(src));
673     GetMasm()->Fsub(VixlVReg(tmp), VixlVReg(src), VixlVReg(tmp));
674     // NOLINTNEXTLINE(readability-magic-numbers)
675     const auto HALF = 0.5;
676     GetMasm()->Fcmp(VixlVReg(tmp), HALF);
677     GetMasm()->Cinc(VixlReg(dst), VixlReg(dst), vixl::aarch64::Condition::eq);
678     GetMasm()->Bind(done);
679 }
680 
EncodeStringEquals(Reg dst,Reg str1,Reg str2,bool COMPRESSION,uint32_t LENGTH_OFFSET,uint32_t DATA_OFFSET)681 void Aarch64Encoder::EncodeStringEquals(Reg dst, Reg str1, Reg str2, bool COMPRESSION, uint32_t LENGTH_OFFSET,
682                                         uint32_t DATA_OFFSET)
683 {
684     /* Pseudo code:
685       if (str1 == str2) return true;                                // pointers compare. Fast path for same object
686       if (str1.length_field() != str2.length_field()) return false; // case length or compression is different
687 
688       // code below use tmp3 both as counter and as offset to keep str1 and str2 untouched and to
689       // use minimal amount of scratch register. Then only 3 scratch registers are used: tmp1 and tmp2 for
690       // loaded string data of str1 and str2 respectively. And tmp3 as counter and offset at the same time.
691       // Then tmp3 will be "DATA_OFFSET + <offset inside string data>" almost everywhere. Check string from
692       // the end to make tmp3 manipulation easier. It'll be probably a bit less effective on large string and
693       // almost identical strings due to mostly unaligned access, but we can ignore it because most strings
694       // are less than 32 chars and in most cases it'll be different characters on first comparison. Then simpler
695       // code without additional operations wins.
696 
697       int tmp3 = str1.length() * <size of str1 characters>;         // data size in bytes
698       tmp3 = tmp3 + DATA_OFFSET - DOUBLE_WORD_SIZE_BYTE;            // offset of last 8 data bytes (last octet)
699       while (tmp3 >= DATA_OFFSET) {
700           if (<load-8-bytes-at>(str1 + tmp3) != <load-8-bytes-at>(str2 + tmp3)) return false;
701           tmp3 -= 8;
702       }
703       // less than 8 bytes left to load and check. possibly 0.
704       if (tmp3 == DATA_OFFSET - DOUBLE_WORD_SIZE_BYTE) return true; // 0 bytes left
705       // 1..7 bytes left. Read whole octet (8 bytes) including few bytes from object header. Shift off header bytes
706       tmp1 = <load-8-bytes-at>(str1 + tmp3);
707       tmp2 = <load-8-bytes-at>(str2 + tmp3);
708       tmp3 = tmp3 - DATA_OFFSET;                                    // <useful bytes> - 8 (== -<bytes to shift off>)
709       // calculate amount of bits to shift off. Note that for negative numbers shift result is undefined behavior
710       // for some languages like c/c++, but it's still fine for h/w logical shift on assembly level. We can use it.
711       tmp3 = - (tmp3 << 3);
712       if ((tmp1 >> tmp3) != (tmp2 >> tmp3)) return false;
713       return true;
714     */
715 
716     ASSERT(dst.IsScalar());
717 
718     ScopedTmpRegU64 tmp1_scoped(this);
719     ScopedTmpRegU64 tmp2_scoped(this);
720     ScopedTmpRegU64 tmp3_scoped(this);
721 
722     auto tmp1_u32 = VixlReg(Reg(tmp1_scoped.GetReg().GetId(), INT32_TYPE));  // 32-bit alias for tmp1
723     auto tmp2_u32 = VixlReg(Reg(tmp2_scoped.GetReg().GetId(), INT32_TYPE));  // 32-bit alias for tmp2
724 
725     auto tmp1 = VixlReg(tmp1_scoped.GetReg());
726     auto tmp2 = VixlReg(tmp2_scoped.GetReg());
727     auto tmp3 = VixlReg(tmp3_scoped.GetReg());
728 
729     auto label_false = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
730     auto label_cset = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
731 
732     // compare to itself case
733     GetMasm()->Cmp(VixlReg(str1), VixlReg(str2));
734     GetMasm()->B(label_cset, vixl::aarch64::Condition::eq);
735 
736     EncodeLdr(tmp1_scoped.GetReg().As(INT32_TYPE), false, MemRef(str1, LENGTH_OFFSET));
737     EncodeLdr(tmp2_scoped.GetReg().As(INT32_TYPE), false, MemRef(str2, LENGTH_OFFSET));
738 
739     // compare length and potentially, compressed-string status
740     GetMasm()->Cmp(tmp1_u32, tmp2_u32);
741     GetMasm()->B(label_cset, vixl::aarch64::Condition::ne);
742 
743     // compare data. Assume result is "true" unless different bytes found
744     if (COMPRESSION) {
745         // branchless byte length calculation
746         GetMasm()->Lsr(tmp1_u32, tmp1_u32, 1);  // string length
747         GetMasm()->And(tmp2_u32, tmp2_u32, 1);  // compressed-string bit. If 1 then not compressed.
748         GetMasm()->Lsl(tmp3, tmp1, tmp2);       // if not compressed, then shift left by 1 bit
749     }
750     EncodeStringEqualsMainLoop(dst, str1, str2, tmp1_scoped, tmp2_scoped, tmp3_scoped, label_false, label_cset,
751                                DATA_OFFSET);
752 }
753 
EncodeStringEqualsMainLoop(Reg dst,Reg str1,Reg str2,Reg tmp1_scoped,Reg tmp2_scoped,Reg tmp3_scoped,vixl::aarch64::Label * label_false,vixl::aarch64::Label * label_cset,const uint32_t DATA_OFFSET)754 void Aarch64Encoder::EncodeStringEqualsMainLoop(Reg dst, Reg str1, Reg str2, Reg tmp1_scoped, Reg tmp2_scoped,
755                                                 Reg tmp3_scoped, vixl::aarch64::Label *label_false,
756                                                 vixl::aarch64::Label *label_cset, const uint32_t DATA_OFFSET)
757 {
758     auto tmp1 = VixlReg(tmp1_scoped);
759     auto tmp2 = VixlReg(tmp2_scoped);
760     auto tmp3 = VixlReg(tmp3_scoped);
761 
762     auto label_loop_begin = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
763     auto label_end = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
764     auto label_loop = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
765     // Now tmp3 is byte-counter. Use it as offset register as well.
766     GetMasm()->Add(tmp3, tmp3, DATA_OFFSET - DOUBLE_WORD_SIZE_BYTE);
767     GetMasm()->B(label_loop_begin);
768     GetMasm()->Bind(label_false);
769     EncodeMov(dst, Imm(0));
770     GetMasm()->B(label_end);
771     // case: >=8 bytes
772     GetMasm()->Bind(label_loop);
773 
774     auto str1_last_word_mem = MemRef(str1, tmp3_scoped, 0);
775     auto str2_last_word_mem = MemRef(str2, tmp3_scoped, 0);
776 
777     {
778         EncodeLdr(tmp1_scoped, false, str1_last_word_mem);
779         EncodeLdr(tmp2_scoped, false, str2_last_word_mem);
780         GetMasm()->Cmp(tmp1, tmp2);
781         GetMasm()->B(label_cset, vixl::aarch64::Condition::ne);
782         GetMasm()->Sub(tmp3, tmp3, DOUBLE_WORD_SIZE_BYTE);
783         GetMasm()->Bind(label_loop_begin);
784         GetMasm()->Cmp(tmp3, DATA_OFFSET);
785         GetMasm()->B(label_loop, vixl::aarch64::Condition::ge);
786     }
787 
788     // case: 0..7 bytes left (tmp3 is DATA_OFFSET + -8..0)
789     GetMasm()->Cmp(tmp3, DATA_OFFSET - DOUBLE_WORD_SIZE_BYTE);
790     GetMasm()->B(label_cset, vixl::aarch64::Condition::eq);
791     EncodeLdr(tmp1_scoped, false, str1_last_word_mem);
792     EncodeLdr(tmp2_scoped, false, str2_last_word_mem);
793     // 1..7 bytes left to check. tmp3 is DATA_OFFSET + -7..-1
794     GetMasm()->Sub(tmp3, tmp3, DATA_OFFSET);
795 
796     auto zero = VixlReg(GetRegfile()->GetZeroReg(), DOUBLE_WORD_SIZE);
797     // tmp3 is now -(amount_of_bytes_to_shift_off). Convert it to bits via single instruction
798     GetMasm()->Sub(tmp3, zero, vixl::aarch64::Operand(tmp3, vixl::aarch64::Shift::LSL, 3));
799     GetMasm()->Lsr(tmp1, tmp1, tmp3);
800     GetMasm()->Lsr(tmp2, tmp2, tmp3);
801     GetMasm()->Cmp(tmp1, tmp2);
802     GetMasm()->Bind(label_cset);
803     GetMasm()->Cset(VixlReg(dst), vixl::aarch64::Condition::eq);
804     GetMasm()->Bind(label_end);
805 }
806 
EncodeCrc32Update(Reg dst,Reg crc_reg,Reg val_reg)807 void Aarch64Encoder::EncodeCrc32Update(Reg dst, Reg crc_reg, Reg val_reg)
808 {
809     auto tmp =
810         dst.GetId() != crc_reg.GetId() && dst.GetId() != val_reg.GetId() ? dst : ScopedTmpReg(this, dst.GetType());
811     GetMasm()->Mvn(VixlReg(tmp), VixlReg(crc_reg));
812     GetMasm()->Crc32b(VixlReg(tmp), VixlReg(tmp), VixlReg(val_reg));
813     GetMasm()->Mvn(VixlReg(dst), VixlReg(tmp));
814 }
815 
816 /**
817  * Helper function for generating String::indexOf intrinsic: case of Latin1 (8-bit) character search
818  *
819  * Inputs: str - pointer to first character in string
820  *         character - character to search
821  *         idx: original start index
822  *         tmp: address of 1st string character
823  *         tmp1: length field value (potentially with compression bit).
824  *         tmp2: MAX(idx, 0) if idx is not zero register. Anything otherwise.
825  *         tmp3: temporary register to use
826  *         label_found: label to jump when match found.
827  *               Label contract requirement 1: leave calculated result in tmp1.
828  *         label_not_found: label to jump when no match found.
829  * Assumptions: starting search index is less than string length (tmp1)
830  */
IndexOfHandleLatin1Case(Reg str,Reg character,Reg idx,Reg tmp,const bool COMPRESSION,const uint32_t DATA_OFFSET,const vixl::aarch64::Register & tmp1,const vixl::aarch64::Register & tmp2,const vixl::aarch64::Register & tmp3,vixl::aarch64::Label * label_found,vixl::aarch64::Label * label_not_found)831 void Aarch64Encoder::IndexOfHandleLatin1Case(Reg str, Reg character, Reg idx, Reg tmp, const bool COMPRESSION,
832                                              const uint32_t DATA_OFFSET, const vixl::aarch64::Register &tmp1,
833                                              const vixl::aarch64::Register &tmp2, const vixl::aarch64::Register &tmp3,
834                                              vixl::aarch64::Label *label_found, vixl::aarch64::Label *label_not_found)
835 {
836     // vixl register aliases
837     auto character_w = VixlReg(character).W();
838     auto character_x = VixlReg(character).X();
839     auto tmp0_x = VixlReg(tmp).X();
840 
841     // more vixl aliases
842     auto lsl = vixl::aarch64::Shift::LSL;
843     auto lsr = vixl::aarch64::Shift::LSR;
844 
845     bool idx_is_zero = (idx.GetId() == GetRegfile()->GetZeroReg().GetId());
846     bool character_is_zero = (character.GetId() == GetRegfile()->GetZeroReg().GetId());
847 
848     // calculate address of first byte after string
849     if (COMPRESSION) {
850         GetMasm()->Add(tmp0_x, tmp0_x, vixl::aarch64::Operand(tmp1.X(), lsr, 1));
851         if (idx_is_zero) {
852             GetMasm()->Neg(tmp2.X(), vixl::aarch64::Operand(tmp1.X(), lsr, 1));
853         } else {
854             GetMasm()->Sub(tmp2.X(), tmp2.X(), vixl::aarch64::Operand(tmp1.X(), lsr, 1));
855         }
856     } else {
857         if (idx_is_zero) {
858             GetMasm()->Neg(tmp2.X(), tmp1.X());
859         } else {
860             GetMasm()->Sub(tmp2.X(), tmp2.X(), tmp1.X());
861         }
862     }
863     GetMasm()->Cmp(tmp2.X(), -DOUBLE_WORD_SIZE_BYTE);
864 
865     auto label_small_loop = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
866     GetMasm()->B(label_small_loop, vixl::aarch64::Condition::gt);
867     // clone character to the size of register (i.e. 8 x 8-bit characters)
868     if (!character_is_zero) {
869         GetMasm()->Orr(character_w, character_w, vixl::aarch64::Operand(character_w, lsl, BYTE_SIZE));
870         GetMasm()->Orr(character_w, character_w, vixl::aarch64::Operand(character_w, lsl, HALF_SIZE));
871         GetMasm()->Orr(character_x, character_x, vixl::aarch64::Operand(character_x, lsl, WORD_SIZE));
872     }
873     IndexOfHandleLatin1CaseMainLoop(str, character, tmp, DATA_OFFSET, tmp1, tmp2, tmp3, label_found, label_not_found,
874                                     label_small_loop);
875 }
876 
877 // constants for the indexOf implementation
878 constexpr int32_t MAX_8BIT_CHAR = 0xFF;
879 constexpr int32_t LOG2_BITS_PER_BYTE = 3;
880 constexpr uint32_t CLEAR_BIT_MASK = -2;
881 constexpr int32_t MIN_SUPPLEMENTARY_CODE_POINT = 0x10000;
882 constexpr int32_t MAX_SUPPLEMENTARY_CODE_POINT = 0X10FFFF;
883 constexpr uint64_t LATIN1_MASK = 0x7f7f7f7f7f7f7f7f;
884 constexpr uint64_t LATIN1_MASK2 = 0x0101010101010101;
885 
IndexOfHandleLatin1CaseMainLoop(Reg str,Reg character,Reg tmp,const uint32_t DATA_OFFSET,const vixl::aarch64::Register & tmp1,const vixl::aarch64::Register & tmp2,const vixl::aarch64::Register & tmp3,vixl::aarch64::Label * label_found,vixl::aarch64::Label * label_not_found,vixl::aarch64::Label * label_small_loop)886 void Aarch64Encoder::IndexOfHandleLatin1CaseMainLoop(
887     Reg str, Reg character, Reg tmp, const uint32_t DATA_OFFSET, const vixl::aarch64::Register &tmp1,
888     const vixl::aarch64::Register &tmp2, const vixl::aarch64::Register &tmp3, vixl::aarch64::Label *label_found,
889     vixl::aarch64::Label *label_not_found, vixl::aarch64::Label *label_small_loop)
890 {
891     auto character_w = VixlReg(character).W();
892     bool character_is_zero = (character.GetId() == GetRegfile()->GetZeroReg().GetId());
893 
894     auto label_loop = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
895     auto label_has_zero = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
896     auto label_small_match = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
897     auto label_not_found_restore_char = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
898 
899     auto mem_tmp_tmp2_x = vixl::aarch64::MemOperand(VixlReg(tmp).X(), tmp2.X());
900 
901     GetMasm()->Bind(label_loop);
902     {
903         GetMasm()->Ldr(tmp1.X(), mem_tmp_tmp2_x);
904         GetMasm()->Mov(tmp3.X(), LATIN1_MASK2);  // can (re)init during ldr to save 1 reg
905         GetMasm()->Eor(tmp1.X(), tmp1.X(), VixlReg(character).X());
906         GetMasm()->Sub(tmp3.X(), tmp1.X(), tmp3.X());
907         GetMasm()->Orr(tmp1.X(), tmp1.X(), LATIN1_MASK);
908         GetMasm()->Bics(tmp1.X(), tmp3.X(), tmp1.X());
909         GetMasm()->B(label_has_zero, vixl::aarch64::Condition::ne);
910         GetMasm()->Add(tmp2.X(), tmp2.X(), DOUBLE_WORD_SIZE_BYTE);
911         GetMasm()->Cmp(tmp2.X(), -DOUBLE_WORD_SIZE_BYTE);        // has enough bytes left to read whole register?
912         GetMasm()->B(label_loop, vixl::aarch64::Condition::lt);  // yes. time to loop
913     }
914     GetMasm()->Cbz(tmp2.X(), character_is_zero ? label_not_found : label_not_found_restore_char);  // done
915     GetMasm()->Mov(tmp2.X(), -DOUBLE_WORD_SIZE_BYTE);  // setup data to read last 8 bytes. One more loop
916     GetMasm()->B(label_loop);
917     GetMasm()->Bind(label_small_loop);
918     {
919         GetMasm()->Ldrb(tmp1.W(), mem_tmp_tmp2_x);
920         GetMasm()->Cmp(tmp1.W(), character_w);
921         GetMasm()->B(label_small_match, vixl::aarch64::Condition::eq);
922         GetMasm()->Adds(tmp2.X(), tmp2.X(), BYTE_SIZE / BITS_PER_BYTE);
923         GetMasm()->Cbnz(tmp2.X(), label_small_loop);
924         GetMasm()->B(label_not_found);
925     }
926     GetMasm()->Bind(label_has_zero);
927     GetMasm()->Rev(tmp1.X(), tmp1.X());
928     if (!character_is_zero) {
929         GetMasm()->And(character_w, character_w, MAX_8BIT_CHAR);
930     }
931     GetMasm()->Clz(tmp1.X(), tmp1.X());  // difference bit index in current octet
932     GetMasm()->Add(tmp2.X(), tmp2.X(), vixl::aarch64::Operand(tmp1.X(), vixl::aarch64::Shift::ASR, LOG2_BITS_PER_BYTE));
933     GetMasm()->Bind(label_small_match);
934     // string length in bytes is: tmp - str - DATA_OFFSET
935     GetMasm()->Add(tmp2.X(), tmp2.X(), VixlReg(tmp).X());
936     GetMasm()->Sub(tmp2.X(), tmp2.X(), VixlReg(str).X());
937     GetMasm()->Sub(tmp2.X(), tmp2.X(), DATA_OFFSET);
938     GetMasm()->B(label_found);
939     GetMasm()->Bind(label_not_found_restore_char);
940     if (!character_is_zero) {
941         GetMasm()->And(character_w, character_w, MAX_8BIT_CHAR);
942     }
943     GetMasm()->B(label_not_found);
944 }
945 
946 constexpr uint32_t UTF16_IDX2OFFSET_SHIFT = 1;
947 
948 /**
949  * Helper function for generating String::indexOf intrinsic: case of normal utf-16 character search
950  *
951  * Inputs: str - pointer to first character in string
952  *         character - character to search
953  *         idx: original start index
954  *         tmp: address of 1st string character
955  *         tmp1: length field value (potentially with compression bit).
956  *         tmp2: MAX(idx, 0) if idx is not zero register. Anything otherwise.
957  *         tmp3: temporary register to use
958  *         label_found: label to jump when match found.
959  *               Label contract requirement 1: leave calculated result in tmp1.
960  *         label_not_found: label to jump when no match found.
961  * Assumptions: starting search index is less than string length (tmp1)
962  */
IndexOfHandleUtf16NormalCase(Reg str,Reg character,Reg idx,Reg tmp,const bool COMPRESSION,const uint32_t DATA_OFFSET,const vixl::aarch64::Register & tmp1,const vixl::aarch64::Register & tmp2,const vixl::aarch64::Register & tmp3,vixl::aarch64::Label * label_found,vixl::aarch64::Label * label_not_found)963 void Aarch64Encoder::IndexOfHandleUtf16NormalCase(Reg str, Reg character, Reg idx, Reg tmp, const bool COMPRESSION,
964                                                   const uint32_t DATA_OFFSET, const vixl::aarch64::Register &tmp1,
965                                                   const vixl::aarch64::Register &tmp2,
966                                                   const vixl::aarch64::Register &tmp3,
967                                                   vixl::aarch64::Label *label_found,
968                                                   vixl::aarch64::Label *label_not_found)
969 {
970     // vixl register aliases
971     auto character_w = VixlReg(character).W();
972     auto character_x = VixlReg(character).X();
973     auto tmp0_x = VixlReg(tmp).X();
974 
975     // more vixl aliases
976     auto lsl = vixl::aarch64::Shift::LSL;
977 
978     // local labels
979     auto label_small_loop = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
980 
981     bool idx_is_zero = (idx.GetId() == GetRegfile()->GetZeroReg().GetId());
982     bool character_is_zero = (character.GetId() == GetRegfile()->GetZeroReg().GetId());
983 
984     if (COMPRESSION) {
985         GetMasm()->And(tmp1.W(), tmp1.W(), CLEAR_BIT_MASK);  // clear lowest bit to get string length in bytes
986     } else {
987         GetMasm()->Lsl(tmp1.W(), tmp1.W(), 1);  // string length in bytes for non-compressed case
988     }
989     // amount of bytes to scan in worst case
990     GetMasm()->Add(tmp0_x, tmp0_x, tmp1.X());  // calculate address of first byte after string
991     if (idx_is_zero) {
992         GetMasm()->Neg(tmp2.X(), tmp1.X());
993     } else {
994         GetMasm()->Sub(tmp2.X(), tmp1.X(), vixl::aarch64::Operand(tmp2.X(), lsl, UTF16_IDX2OFFSET_SHIFT));
995         GetMasm()->Neg(tmp2.X(), tmp2.X());
996     }
997     GetMasm()->Cmp(tmp2.X(), -DOUBLE_WORD_SIZE_BYTE);
998     GetMasm()->B(label_small_loop, vixl::aarch64::Condition::gt);
999     // clone character to the size of register (i.e. 4 x 16-bit characters)
1000     if (!character_is_zero) {
1001         GetMasm()->Orr(character_w, character_w, vixl::aarch64::Operand(character_w, lsl, HALF_SIZE));
1002         GetMasm()->Orr(character_x, character_x, vixl::aarch64::Operand(character_x, lsl, WORD_SIZE));
1003     }
1004     IndexOfHandleUtf16NormalCaseMainLoop(str, character, tmp, DATA_OFFSET, tmp1, tmp2, tmp3, label_found,
1005                                          label_not_found, label_small_loop);
1006 }
1007 
1008 constexpr uint64_t UTF16_MASK = 0x7fff7fff7fff7fff;
1009 constexpr uint64_t UTF16_MASK2 = 0x0001000100010001;
1010 constexpr int32_t MAX_UTF16_CHAR = 0xFFFF;
1011 
IndexOfHandleUtf16NormalCaseMainLoop(Reg str,Reg character,Reg tmp,const uint32_t DATA_OFFSET,const vixl::aarch64::Register & tmp1,const vixl::aarch64::Register & tmp2,const vixl::aarch64::Register & tmp3,vixl::aarch64::Label * label_found,vixl::aarch64::Label * label_not_found,vixl::aarch64::Label * label_small_loop)1012 void Aarch64Encoder::IndexOfHandleUtf16NormalCaseMainLoop(
1013     Reg str, Reg character, Reg tmp, const uint32_t DATA_OFFSET, const vixl::aarch64::Register &tmp1,
1014     const vixl::aarch64::Register &tmp2, const vixl::aarch64::Register &tmp3, vixl::aarch64::Label *label_found,
1015     vixl::aarch64::Label *label_not_found, vixl::aarch64::Label *label_small_loop)
1016 {
1017     auto tmp0_x = VixlReg(tmp).X();
1018     auto character_w = VixlReg(character).W();
1019     bool character_is_zero = (character.GetId() == GetRegfile()->GetZeroReg().GetId());
1020 
1021     auto label_loop = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1022     auto label_has_zero = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1023     auto label_small_match = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1024     auto label_not_found_restore_char = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1025 
1026     GetMasm()->Bind(label_loop);
1027     {
1028         GetMasm()->Ldr(tmp1.X(), vixl::aarch64::MemOperand(tmp0_x, tmp2.X()));
1029         GetMasm()->Mov(tmp3.X(), UTF16_MASK2);  // can (re)init during ldr to save 1 reg
1030         GetMasm()->Eor(tmp1.X(), tmp1.X(), VixlReg(character).X());
1031         GetMasm()->Sub(tmp3.X(), tmp1.X(), tmp3.X());
1032         GetMasm()->Orr(tmp1.X(), tmp1.X(), UTF16_MASK);
1033         GetMasm()->Bics(tmp1.X(), tmp3.X(), tmp1.X());
1034         GetMasm()->B(label_has_zero, vixl::aarch64::Condition::ne);
1035         GetMasm()->Add(tmp2.X(), tmp2.X(), DOUBLE_WORD_SIZE_BYTE);
1036         GetMasm()->Cmp(tmp2.X(), -DOUBLE_WORD_SIZE_BYTE);        // has enough bytes left to read whole register?
1037         GetMasm()->B(label_loop, vixl::aarch64::Condition::lt);  // yes. time to loop
1038     }
1039     GetMasm()->Cbz(tmp2.X(), character_is_zero ? label_not_found : label_not_found_restore_char);  // done
1040     GetMasm()->Mov(tmp2.X(), -DOUBLE_WORD_SIZE_BYTE);  // setup data to read last 8 bytes. One more loop
1041     GetMasm()->B(label_loop);
1042     GetMasm()->Bind(label_small_loop);
1043     {
1044         GetMasm()->Ldrh(tmp1.W(), vixl::aarch64::MemOperand(tmp0_x, tmp2.X()));
1045         GetMasm()->Cmp(tmp1.W(), character_w);
1046         GetMasm()->B(label_small_match, vixl::aarch64::Condition::eq);
1047         GetMasm()->Adds(tmp2.X(), tmp2.X(), HALF_SIZE / BITS_PER_BYTE);
1048         GetMasm()->Cbnz(tmp2.X(), label_small_loop);
1049         GetMasm()->B(label_not_found);
1050     }
1051     GetMasm()->Bind(label_has_zero);
1052     GetMasm()->Rev(tmp1.X(), tmp1.X());
1053     if (!character_is_zero) {
1054         GetMasm()->And(character_w, character_w, MAX_UTF16_CHAR);
1055     }
1056     GetMasm()->Clz(tmp1.X(), tmp1.X());  // difference bit index in current octet
1057     GetMasm()->Add(tmp2.X(), tmp2.X(), vixl::aarch64::Operand(tmp1.X(), vixl::aarch64::Shift::ASR, LOG2_BITS_PER_BYTE));
1058     GetMasm()->Bind(label_small_match);
1059     // string length in bytes is: tmp - str - DATA_OFFSET
1060     GetMasm()->Add(tmp2.X(), tmp2.X(), tmp0_x);
1061     GetMasm()->Sub(tmp2.X(), tmp2.X(), VixlReg(str).X());
1062     GetMasm()->Sub(tmp2.X(), tmp2.X(), DATA_OFFSET);
1063     GetMasm()->Lsr(tmp2.X(), tmp2.X(), UTF16_IDX2OFFSET_SHIFT);
1064     GetMasm()->B(label_found);
1065     GetMasm()->Bind(label_not_found_restore_char);
1066     if (!character_is_zero) {
1067         GetMasm()->And(character_w, character_w, MAX_UTF16_CHAR);
1068     }
1069     GetMasm()->B(label_not_found);
1070 }
1071 
1072 /**
1073  * Helper function for generating String::indexOf intrinsic: case of surrogate character search
1074  *
1075  * Inputs: str - pointer to first character in string
1076  *         character - character to search
1077  *         idx: original start index
1078  *         tmp: address of 1st string character
1079  *         tmp1: length field value (potentially with compression bit).
1080  *         tmp2: MAX(idx, 0) if idx is not zero register. Anything otherwise.
1081  *         tmp3: temporary register to use
1082  *         label_found: label to jump when match found.
1083  *               Label contract requirement 1: leave calculated result in tmp1.
1084  *         label_not_found: label to jump when no match found.
1085  * Assumptions: starting search index is less than string length (tmp1)
1086  */
IndexOfHandleSurrogateCase(Reg str,Reg character,Reg idx,Reg tmp,const bool COMPRESSION,const uint32_t DATA_OFFSET,const vixl::aarch64::Register & tmp1,const vixl::aarch64::Register & tmp2,const vixl::aarch64::Register & tmp3,vixl::aarch64::Label * label_found,vixl::aarch64::Label * label_not_found)1087 void Aarch64Encoder::IndexOfHandleSurrogateCase(Reg str, Reg character, Reg idx, Reg tmp, const bool COMPRESSION,
1088                                                 const uint32_t DATA_OFFSET, const vixl::aarch64::Register &tmp1,
1089                                                 const vixl::aarch64::Register &tmp2,
1090                                                 const vixl::aarch64::Register &tmp3, vixl::aarch64::Label *label_found,
1091                                                 vixl::aarch64::Label *label_not_found)
1092 {
1093     // local constants
1094     constexpr uint32_t MIN_HIGH_SURROGATE = 0xD800;
1095     constexpr uint32_t MIN_LOW_SURROGATE = 0xDC00;
1096     constexpr uint32_t SURROGATE_LOW_BITS = 10;
1097 
1098     // vixl register aliases
1099     auto character_w = VixlReg(character).W();
1100     auto str_x = VixlReg(str).X();
1101     auto tmp0_x = VixlReg(tmp).X();
1102 
1103     // more vixl aliases
1104     auto lsl = vixl::aarch64::Shift::LSL;
1105     auto lsr = vixl::aarch64::Shift::LSR;
1106 
1107     // local labels
1108     auto label_sur_loop = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1109     auto label_match = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1110 
1111     bool idx_is_zero = (idx.GetId() == GetRegfile()->GetZeroReg().GetId());
1112 
1113     if (COMPRESSION) {
1114         GetMasm()->And(tmp1.W(), tmp1.W(), CLEAR_BIT_MASK);  // clear lowest bit to get string length in bytes
1115     } else {
1116         GetMasm()->Lsl(tmp1.W(), tmp1.W(), 1);  // string length in bytes for non-compressed case
1117     }
1118     GetMasm()->Add(tmp0_x, tmp0_x, tmp1.X());                   // calculate address of first byte after string
1119     GetMasm()->Sub(tmp0_x, tmp0_x, HALF_SIZE / BITS_PER_BYTE);  // don't scan last UTF-16 entry
1120     // amount of bytes to scan in worst case
1121     if (idx_is_zero) {
1122         GetMasm()->Neg(tmp2.X(), tmp1.X());
1123     } else {
1124         GetMasm()->Sub(tmp2.X(), tmp1.X(), vixl::aarch64::Operand(tmp2.X(), lsl, UTF16_IDX2OFFSET_SHIFT));
1125         GetMasm()->Neg(tmp2.X(), tmp2.X());
1126     }
1127     GetMasm()->Add(tmp2.X(), tmp2.X(), HALF_SIZE / BITS_PER_BYTE);
1128     GetMasm()->Cbz(tmp2.X(), label_not_found);
1129     GetMasm()->Sub(tmp1.W(), character_w, MIN_SUPPLEMENTARY_CODE_POINT);  // shifted immediate version
1130     GetMasm()->Mov(tmp3.W(), MIN_HIGH_SURROGATE);
1131     GetMasm()->Add(tmp1.W(), tmp3.W(), vixl::aarch64::Operand(tmp1.W(), lsr, SURROGATE_LOW_BITS));  // high surrogate
1132     // low surrogate calculation below
1133     GetMasm()->Movk(tmp1.X(), MIN_LOW_SURROGATE, HALF_SIZE);
1134     // copy lowest 10 bits into (low surrogate)'s lowest 10 bits
1135     GetMasm()->Bfm(tmp1.W(), character_w, HALF_SIZE, SURROGATE_LOW_BITS - 1);
1136     GetMasm()->Bind(label_sur_loop);
1137     GetMasm()->Ldr(tmp3.W(), vixl::aarch64::MemOperand(tmp0_x, tmp2.X()));
1138     GetMasm()->Cmp(tmp3.W(), tmp1.W());
1139     GetMasm()->B(label_match, vixl::aarch64::Condition::eq);
1140     GetMasm()->Adds(tmp2.X(), tmp2.X(), HALF_SIZE / BITS_PER_BYTE);
1141     GetMasm()->Cbnz(tmp2.X(), label_sur_loop);
1142     GetMasm()->B(label_not_found);
1143     GetMasm()->Bind(label_match);
1144     // string length in bytes is: tmp - str - DATA_OFFSET
1145     GetMasm()->Add(tmp2.X(), tmp2.X(), tmp0_x);
1146     GetMasm()->Sub(tmp2.X(), tmp2.X(), str_x);
1147     GetMasm()->Sub(tmp2.X(), tmp2.X(), DATA_OFFSET);
1148     GetMasm()->Lsr(tmp2.X(), tmp2.X(), UTF16_IDX2OFFSET_SHIFT);
1149     GetMasm()->B(label_found);
1150 }
1151 
EncodeStringIndexOfAfter(Reg dst,Reg str,Reg character,Reg idx,Reg tmp,bool COMPRESSION,uint32_t LENGTH_OFFSET,uint32_t DATA_OFFSET,int32_t CHAR_CONST_VALUE)1152 void Aarch64Encoder::EncodeStringIndexOfAfter(Reg dst, Reg str, Reg character, Reg idx, Reg tmp, bool COMPRESSION,
1153                                               uint32_t LENGTH_OFFSET, uint32_t DATA_OFFSET, int32_t CHAR_CONST_VALUE)
1154 {
1155     // NullCheck must check str register before StringIndexOfAfter.
1156     // If str is zero register, execution mustn't go to this instruction.
1157     auto zero_reg_id = GetRegfile()->GetZeroReg().GetId();
1158     if (str.GetId() == zero_reg_id) {
1159         EncodeAbort();
1160         return;
1161     }
1162 
1163     auto zero = VixlReg(GetRegfile()->GetZeroReg(), DOUBLE_WORD_SIZE);
1164     ScopedTmpRegU64 tmp1_scoped(this);
1165     ScopedTmpRegU64 tmp2_scoped(this);
1166     ScopedTmpRegU64 tmp3_scoped(this);
1167     auto tmp1 = VixlReg(tmp1_scoped.GetReg());
1168     auto tmp2 = VixlReg(tmp2_scoped.GetReg());
1169     auto tmp3 = VixlReg(tmp3_scoped.GetReg());
1170 
1171     // vixl register aliases
1172     bool idx_is_zero = (idx.GetId() == zero_reg_id);
1173 
1174     /*  Pseudo code:
1175         if (idx < 0) idx = 0;
1176 
1177         if (idx >= length) {
1178             return -1;
1179         }
1180 
1181         if (!<character_is_utf16_surrogate_pair>) { // main case
1182             if (<string_is_utf16>) {
1183                 <search char in utf-16 string>; // IndexOfHandleUtf16NormalCase
1184             } else { // 8-bit string case
1185                 if (<character_is_utf16>) {
1186                     return -1;
1187                 }
1188                 <search 8-bit char in 8-bit string>; // IndexOfHandleLatin1
1189             }
1190         } else { // surrogate pair case
1191             if (!<string_is_utf16>) {
1192                 return -1;
1193             }
1194             <per-character surrogate pair search>;  // IndexOfHandleSurrogateCase
1195         }
1196     */
1197 
1198     if (!idx_is_zero) {
1199         auto idx_w = VixlReg(idx).W();
1200         GetMasm()->Cmp(idx_w, zero.W());
1201         GetMasm()->Csel(tmp2.W(), idx_w, zero.W(), vixl::aarch64::Condition::gt);  // max(idx, 0)
1202     }
1203 
1204     GetMasm()->Ldr(tmp1.W(), vixl::aarch64::MemOperand(VixlReg(str).X(),
1205                                                        LENGTH_OFFSET));  // string length with potential compression bit
1206     GetMasm()->Cmp(idx_is_zero ? zero.W() : tmp2.W(),
1207                    COMPRESSION ? vixl::aarch64::Operand(tmp1.W(), vixl::aarch64::Shift::LSR, 1) : tmp1.W());
1208 
1209     auto label_not_found = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1210     GetMasm()->B(label_not_found, vixl::aarch64::Condition::ge);
1211 
1212     // check if character is larger than upper bound of UTF-16
1213     GetMasm()->Mov(tmp3.X(), MAX_SUPPLEMENTARY_CODE_POINT);
1214     GetMasm()->Cmp(VixlReg(character).X(), tmp3);
1215     GetMasm()->B(label_not_found, vixl::aarch64::Condition::gt);
1216 
1217     // memo: compression: 0 = compressed(i.e. 8 bits), 1 = uncompressed(i.e. utf16)
1218     EncodeStringIndexOfAfterMainCase(dst, str, character, idx, tmp, tmp1, tmp2, tmp3, COMPRESSION, DATA_OFFSET,
1219                                      CHAR_CONST_VALUE, label_not_found);
1220 
1221     // local constants
1222     constexpr int32_t RESULT_NOT_FOUND = -1;
1223     auto label_done = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1224 
1225     GetMasm()->B(label_done);
1226     GetMasm()->Bind(label_not_found);
1227     GetMasm()->Mov(VixlReg(dst).W(), RESULT_NOT_FOUND);
1228     GetMasm()->Bind(label_done);
1229 }
1230 
HandleChar(int32_t ch,const vixl::aarch64::Register & tmp,vixl::aarch64::Label * label_not_found,vixl::aarch64::Label * label_uncompressed_string)1231 void Aarch64Encoder::HandleChar(int32_t ch, const vixl::aarch64::Register &tmp, vixl::aarch64::Label *label_not_found,
1232                                 vixl::aarch64::Label *label_uncompressed_string)
1233 {
1234     if (ch > MAX_8BIT_CHAR) {
1235         GetMasm()->Tbz(tmp.W(), 0,
1236                        label_not_found);  // no need to search 16-bit character in compressed string
1237     } else {
1238         GetMasm()->Tbnz(tmp.W(), 0,
1239                         label_uncompressed_string);  // go to utf16 case if string is uncompressed
1240     }
1241 }
1242 
EncodeStringIndexOfAfterMainCase(Reg dst,Reg str,Reg character,Reg idx,Reg tmp,const vixl::aarch64::Register & tmp1,const vixl::aarch64::Register & tmp2,const vixl::aarch64::Register & tmp3,const bool COMPRESSION,const uint32_t DATA_OFFSET,const int32_t CHAR_CONST_VALUE,vixl::aarch64::Label * label_not_found)1243 void Aarch64Encoder::EncodeStringIndexOfAfterMainCase(Reg dst, Reg str, Reg character, Reg idx, Reg tmp,
1244                                                       const vixl::aarch64::Register &tmp1,
1245                                                       const vixl::aarch64::Register &tmp2,
1246                                                       const vixl::aarch64::Register &tmp3, const bool COMPRESSION,
1247                                                       const uint32_t DATA_OFFSET, const int32_t CHAR_CONST_VALUE,
1248                                                       vixl::aarch64::Label *label_not_found)
1249 {
1250     constexpr int32_t CHAR_CONST_UNKNOWN = -1;
1251     auto label_surrogate = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1252     auto label_found = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1253     auto label_uncompressed_string = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1254 
1255     GetMasm()->Add(VixlReg(tmp).X(), VixlReg(str).X(), DATA_OFFSET);
1256 
1257     if (CHAR_CONST_VALUE < MIN_SUPPLEMENTARY_CODE_POINT) {
1258         // case of non-surrogate constant char or non-constant char
1259         if (CHAR_CONST_VALUE == CHAR_CONST_UNKNOWN) {  // run time check for surrogate pair
1260             GetMasm()->Cmp(VixlReg(character).W(),
1261                            MIN_SUPPLEMENTARY_CODE_POINT);  // shifted immediate form of Cmp (i.e. 0x10 << 12)
1262             GetMasm()->B(label_surrogate, vixl::aarch64::Condition::ge);
1263         }
1264         if (COMPRESSION) {
1265             if (CHAR_CONST_VALUE != CHAR_CONST_UNKNOWN) {
1266                 HandleChar(CHAR_CONST_VALUE, tmp1, label_not_found, label_uncompressed_string);
1267             } else {
1268                 GetMasm()->Tbnz(tmp1.W(), 0, label_uncompressed_string);
1269                 GetMasm()->Cmp(VixlReg(character).W(), MAX_8BIT_CHAR);
1270                 GetMasm()->B(label_not_found,
1271                              vixl::aarch64::Condition::gt);  // do no search 16-bit char in compressed string
1272             }
1273             if (CHAR_CONST_VALUE <= MAX_8BIT_CHAR) {  // i.e. character is 8-bit constant or unknown
1274                 IndexOfHandleLatin1Case(str, character, idx, tmp, COMPRESSION, DATA_OFFSET, tmp1, tmp2, tmp3,
1275                                         label_found, label_not_found);
1276             }
1277             GetMasm()->Bind(label_uncompressed_string);
1278         }
1279         IndexOfHandleUtf16NormalCase(str, character, idx, tmp, COMPRESSION, DATA_OFFSET, tmp1, tmp2, tmp3, label_found,
1280                                      label_not_found);
1281     }
1282 
1283     if (CHAR_CONST_VALUE >= MIN_SUPPLEMENTARY_CODE_POINT || CHAR_CONST_VALUE == CHAR_CONST_UNKNOWN) {
1284         GetMasm()->Bind(label_surrogate);
1285         if (COMPRESSION) {
1286             GetMasm()->Tbz(tmp1.W(), 0, label_not_found);  // no need to search 16-bit character in compressed string
1287         }
1288         IndexOfHandleSurrogateCase(str, character, idx, tmp, COMPRESSION, DATA_OFFSET, tmp1, tmp2, tmp3, label_found,
1289                                    label_not_found);
1290     }
1291     // various exit handling below
1292     GetMasm()->Bind(label_found);
1293     GetMasm()->Mov(VixlReg(dst).W(), tmp2.W());
1294 }
1295 
1296 /* return the power of 2 for the size of the type */
EncodeGetTypeSize(Reg size,Reg type)1297 void Aarch64Encoder::EncodeGetTypeSize(Reg size, Reg type)
1298 {
1299     auto sreg = VixlReg(type);
1300     auto dreg = VixlReg(size);
1301     constexpr uint8_t I16 = 0x5;
1302     constexpr uint8_t I32 = 0x7;
1303     constexpr uint8_t F64 = 0xa;
1304     constexpr uint8_t REF = 0xd;
1305     constexpr uint8_t SMALLREF = panda::OBJECT_POINTER_SIZE < sizeof(uint64_t) ? 1 : 0;
1306     auto end = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1307 
1308     GetMasm()->Mov(dreg, VixlImm(0));
1309     GetMasm()->Cmp(sreg, VixlImm(I16));
1310     GetMasm()->Cinc(dreg, dreg, vixl::aarch64::Condition::ge);
1311     GetMasm()->Cmp(sreg, VixlImm(I32));
1312     GetMasm()->Cinc(dreg, dreg, vixl::aarch64::Condition::ge);
1313     GetMasm()->Cmp(sreg, VixlImm(F64));
1314     GetMasm()->Cinc(dreg, dreg, vixl::aarch64::Condition::ge);
1315     GetMasm()->Cmp(sreg, VixlImm(REF));
1316     GetMasm()->B(end, vixl::aarch64::Condition::ne);
1317     GetMasm()->Sub(dreg, dreg, VixlImm(SMALLREF));
1318     GetMasm()->Bind(end);
1319 }
1320 
EncodeReverseBits(Reg dst,Reg src)1321 void Aarch64Encoder::EncodeReverseBits(Reg dst, Reg src)
1322 {
1323     auto rzero = GetRegfile()->GetZeroReg();
1324     if (rzero.GetId() == src.GetId()) {
1325         EncodeMov(dst, Imm(0));
1326         return;
1327     }
1328     ASSERT(src.GetSize() == WORD_SIZE || src.GetSize() == DOUBLE_WORD_SIZE);
1329     ASSERT(src.GetSize() == dst.GetSize());
1330 
1331     GetMasm()->Rbit(VixlReg(dst), VixlReg(src));
1332 }
1333 
EncodeCompressedStringCharAt(Reg dst,Reg str,Reg idx,Reg length,Reg tmp,size_t data_offset,uint32_t shift)1334 void Aarch64Encoder::EncodeCompressedStringCharAt(Reg dst, Reg str, Reg idx, Reg length, Reg tmp, size_t data_offset,
1335                                                   uint32_t shift)
1336 {
1337     ASSERT(dst.GetSize() == HALF_SIZE);
1338 
1339     auto label_not_compressed = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1340     auto label_char_loaded = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1341     auto vixl_tmp = VixlReg(tmp, DOUBLE_WORD_SIZE);
1342     auto vixl_dst = VixlReg(dst);
1343 
1344     GetMasm()->Tbnz(VixlReg(length), 0, label_not_compressed);
1345     EncodeAdd(tmp, str, idx);
1346     GetMasm()->ldrb(vixl_dst, MemOperand(vixl_tmp, data_offset));
1347     GetMasm()->B(label_char_loaded);
1348     GetMasm()->Bind(label_not_compressed);
1349     EncodeAdd(tmp, str, Shift(idx, shift));
1350     GetMasm()->ldrh(vixl_dst, MemOperand(vixl_tmp, data_offset));
1351     GetMasm()->Bind(label_char_loaded);
1352 }
1353 
EncodeCompressedStringCharAtI(Reg dst,Reg str,Reg length,size_t data_offset,uint32_t index,uint32_t shift)1354 void Aarch64Encoder::EncodeCompressedStringCharAtI(Reg dst, Reg str, Reg length, size_t data_offset, uint32_t index,
1355                                                    uint32_t shift)
1356 {
1357     ASSERT(dst.GetSize() == HALF_SIZE);
1358 
1359     auto label_not_compressed = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1360     auto label_char_loaded = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1361     auto vixl_str = VixlReg(str);
1362     auto vixl_dst = VixlReg(dst);
1363 
1364     auto rzero = GetRegfile()->GetZeroReg().GetId();
1365     if (str.GetId() == rzero) {
1366         return;
1367     }
1368     GetMasm()->Tbnz(VixlReg(length), 0, label_not_compressed);
1369     GetMasm()->Ldrb(vixl_dst, MemOperand(vixl_str, data_offset + index));
1370     GetMasm()->B(label_char_loaded);
1371     GetMasm()->Bind(label_not_compressed);
1372     GetMasm()->Ldrh(vixl_dst, MemOperand(vixl_str, data_offset + (index << shift)));
1373     GetMasm()->Bind(label_char_loaded);
1374 }
1375 
1376 /* Unsafe builtins implementation */
EncodeCompareAndSwap(Reg dst,Reg obj,Reg offset,Reg val,Reg newval)1377 void Aarch64Encoder::EncodeCompareAndSwap(Reg dst, Reg obj, Reg offset, Reg val, Reg newval)
1378 {
1379     /* Modeled according to the following logic:
1380       .L2:
1381       ldaxr   cur, [addr]
1382       cmp     cur, old
1383       bne     .L3
1384       stlxr   res, new, [addr]
1385       cbnz    res, .L2
1386       .L3:
1387       cset    w0, eq
1388     */
1389     ScopedTmpReg addr(this, true); /* LR is used */
1390     ScopedTmpReg cur(this, val.GetType());
1391     ScopedTmpReg res(this, val.GetType());
1392     auto loop = CreateLabel();
1393     auto exit = CreateLabel();
1394 
1395     /* ldaxr wants [reg]-form of memref (no offset or disp) */
1396     EncodeAdd(addr, obj, offset);
1397 
1398     BindLabel(loop);
1399     EncodeLdrExclusive(cur, addr, true);
1400     EncodeJump(exit, cur, val, Condition::NE);
1401     EncodeStrExclusive(res, newval, addr, true);
1402     EncodeJump(loop, res, Imm(0), Condition::NE);
1403     BindLabel(exit);
1404 
1405     GetMasm()->Cset(VixlReg(dst), vixl::aarch64::Condition::eq);
1406 }
1407 
EncodeUnsafeGetAndSet(Reg dst,Reg obj,Reg offset,Reg val)1408 void Aarch64Encoder::EncodeUnsafeGetAndSet(Reg dst, Reg obj, Reg offset, Reg val)
1409 {
1410     auto cur = ScopedTmpReg(this, val.GetType());
1411     auto last = ScopedTmpReg(this, val.GetType());
1412     auto addr = ScopedTmpReg(this, true); /* LR is used */
1413     auto mem = MemRef(addr);
1414     auto restart = CreateLabel();
1415     auto retry_ldaxr = CreateLabel();
1416 
1417     /* ldaxr wants [reg]-form of memref (no offset or disp) */
1418     EncodeAdd(addr, obj, offset);
1419 
1420     /* Since GetAndSet is defined as a non-faulting operation we
1421      * have to cover two possible faulty cases:
1422      *      1. stlxr failed, we have to retry ldxar
1423      *      2. the value we got via ldxar was not the value we initially
1424      *         loaded, we have to start from the very beginning */
1425     BindLabel(restart);
1426     EncodeLdrAcquire(last, false, mem);
1427 
1428     BindLabel(retry_ldaxr);
1429     EncodeLdrExclusive(cur, addr, true);
1430     EncodeJump(restart, cur, last, Condition::NE);
1431     EncodeStrExclusive(dst, val, addr, true);
1432     EncodeJump(retry_ldaxr, dst, Imm(0), Condition::NE);
1433 
1434     EncodeMov(dst, cur);
1435 }
1436 
EncodeUnsafeGetAndAdd(Reg dst,Reg obj,Reg offset,Reg val,Reg tmp)1437 void Aarch64Encoder::EncodeUnsafeGetAndAdd(Reg dst, Reg obj, Reg offset, Reg val, Reg tmp)
1438 {
1439     ScopedTmpReg cur(this, val.GetType());
1440     ScopedTmpReg last(this, val.GetType());
1441     auto newval = Reg(tmp.GetId(), val.GetType());
1442 
1443     auto restart = CreateLabel();
1444     auto retry_ldaxr = CreateLabel();
1445 
1446     /* addr_reg aliases obj, obj reg will be restored bedore exit */
1447     auto addr = Reg(obj.GetId(), INT64_TYPE);
1448 
1449     /* ldaxr wants [reg]-form of memref (no offset or disp) */
1450     auto mem = MemRef(addr);
1451     EncodeAdd(addr, obj, offset);
1452 
1453     /* Since GetAndAdd is defined as a non-faulting operation we
1454      * have to cover two possible faulty cases:
1455      *      1. stlxr failed, we have to retry ldxar
1456      *      2. the value we got via ldxar was not the value we initially
1457      *         loaded, we have to start from the very beginning */
1458     BindLabel(restart);
1459     EncodeLdrAcquire(last, false, mem);
1460     EncodeAdd(newval, last, val);
1461 
1462     BindLabel(retry_ldaxr);
1463     EncodeLdrExclusive(cur, addr, true);
1464     EncodeJump(restart, cur, last, Condition::NE);
1465     EncodeStrExclusive(dst, newval, addr, true);
1466     EncodeJump(retry_ldaxr, dst, Imm(0), Condition::NE);
1467 
1468     EncodeSub(obj, addr, offset); /* restore the original value */
1469     EncodeMov(dst, cur);
1470 }
1471 
EncodeMemoryBarrier(MemoryOrder::Order order)1472 void Aarch64Encoder::EncodeMemoryBarrier(MemoryOrder::Order order)
1473 {
1474     switch (order) {
1475         case MemoryOrder::Acquire: {
1476             GetMasm()->Dmb(vixl::aarch64::InnerShareable, vixl::aarch64::BarrierReads);
1477             break;
1478         }
1479         case MemoryOrder::Release: {
1480             GetMasm()->Dmb(vixl::aarch64::InnerShareable, vixl::aarch64::BarrierWrites);
1481             break;
1482         }
1483         case MemoryOrder::Full: {
1484             GetMasm()->Dmb(vixl::aarch64::InnerShareable, vixl::aarch64::BarrierAll);
1485             break;
1486         }
1487         default:
1488             break;
1489     }
1490 }
1491 
EncodeNot(Reg dst,Reg src)1492 void Aarch64Encoder::EncodeNot(Reg dst, Reg src)
1493 {
1494     GetMasm()->Mvn(VixlReg(dst), VixlReg(src));
1495 }
1496 
EncodeCastFloat(Reg dst,bool dst_signed,Reg src,bool src_signed)1497 void Aarch64Encoder::EncodeCastFloat(Reg dst, bool dst_signed, Reg src, bool src_signed)
1498 {
1499     // We DON'T support casts from float32/64 to int8/16 and bool, because this caste is not declared anywhere
1500     // in other languages and architecture, we do not know what the behavior should be.
1501     // But there is one implementation in other function: "EncodeCastFloatWithSmallDst". Call it in the "EncodeCast"
1502     // function instead of "EncodeCastFloat". It works as follows: cast from float32/64 to int32, moving sign bit from
1503     // int32 to dst type, then extend number from dst type to int32 (a necessary condition for an isa). All work in dst
1504     // register.
1505     ASSERT(dst.GetSize() >= WORD_SIZE);
1506 
1507     if (src.IsFloat() && dst.IsScalar()) {
1508         if (dst_signed) {
1509             if (!IsJsNumberCast()) {
1510                 GetMasm()->Fcvtzs(VixlReg(dst), VixlVReg(src));
1511             } else {
1512                 CHECK_EQ(src.GetSize(), BITS_PER_UINT64);
1513                 vixl::CPUFeaturesScope scope(GetMasm(), vixl::CPUFeatures::kFP, vixl::CPUFeatures::kJSCVT);
1514                 GetMasm()->Fjcvtzs(VixlReg(dst.As(INT32_TYPE)), VixlVReg(src));
1515             }
1516             return;
1517         }
1518         GetMasm()->Fcvtzu(VixlReg(dst), VixlVReg(src));
1519         return;
1520     }
1521     if (src.IsScalar() && dst.IsFloat()) {
1522         if (src_signed) {
1523             GetMasm()->Scvtf(VixlVReg(dst), VixlReg(src));
1524         } else {
1525             GetMasm()->Ucvtf(VixlVReg(dst), VixlReg(src));
1526         }
1527         return;
1528     }
1529     if (src.IsFloat() && dst.IsFloat()) {
1530         if (src.GetSize() != dst.GetSize()) {
1531             GetMasm()->Fcvt(VixlVReg(dst), VixlVReg(src));
1532             return;
1533         }
1534         GetMasm()->Fmov(VixlVReg(dst), VixlVReg(src));
1535         return;
1536     }
1537     UNREACHABLE();
1538 }
1539 
EncodeCastFloatWithSmallDst(Reg dst,bool dst_signed,Reg src,bool src_signed)1540 void Aarch64Encoder::EncodeCastFloatWithSmallDst(Reg dst, bool dst_signed, Reg src, bool src_signed)
1541 {
1542     // Dst bool type don't supported!
1543 
1544     if (src.IsFloat() && dst.IsScalar()) {
1545         if (dst_signed) {
1546             GetMasm()->Fcvtzs(VixlReg(dst), VixlVReg(src));
1547             if (dst.GetSize() < WORD_SIZE) {
1548                 constexpr uint32_t TEST_BIT = (1U << (static_cast<uint32_t>(WORD_SIZE) - 1));
1549                 ScopedTmpReg tmp_reg1(this, dst.GetType());
1550                 auto tmp1 = VixlReg(tmp_reg1);
1551                 ScopedTmpReg tmp_reg2(this, dst.GetType());
1552                 auto tmp2 = VixlReg(tmp_reg2);
1553 
1554                 // NOLINTNEXTLINE(hicpp-signed-bitwise)
1555                 int32_t set_bit = (dst.GetSize() == BYTE_SIZE) ? (1UL << (BYTE_SIZE - 1)) : (1UL << (HALF_SIZE - 1));
1556                 int32_t rem_bit = set_bit - 1;
1557                 GetMasm()->Ands(tmp1, VixlReg(dst), TEST_BIT);
1558 
1559                 GetMasm()->Orr(tmp1, VixlReg(dst), set_bit);
1560                 GetMasm()->And(tmp2, VixlReg(dst), rem_bit);
1561                 // Select result - if zero set - tmp2, else tmp1
1562                 GetMasm()->Csel(VixlReg(dst), tmp2, tmp1, vixl::aarch64::eq);
1563                 EncodeCastScalar(Reg(dst.GetId(), INT32_TYPE), dst_signed, dst, dst_signed);
1564             }
1565             return;
1566         }
1567         GetMasm()->Fcvtzu(VixlReg(dst), VixlVReg(src));
1568         if (dst.GetSize() < WORD_SIZE) {
1569             EncodeCastScalar(Reg(dst.GetId(), INT32_TYPE), dst_signed, dst, dst_signed);
1570         }
1571         return;
1572     }
1573     if (src.IsScalar() && dst.IsFloat()) {
1574         if (src_signed) {
1575             GetMasm()->Scvtf(VixlVReg(dst), VixlReg(src));
1576         } else {
1577             GetMasm()->Ucvtf(VixlVReg(dst), VixlReg(src));
1578         }
1579         return;
1580     }
1581     if (src.IsFloat() && dst.IsFloat()) {
1582         if (src.GetSize() != dst.GetSize()) {
1583             GetMasm()->Fcvt(VixlVReg(dst), VixlVReg(src));
1584             return;
1585         }
1586         GetMasm()->Fmov(VixlVReg(dst), VixlVReg(src));
1587         return;
1588     }
1589     UNREACHABLE();
1590 }
1591 
EncodeCastSigned(Reg dst,Reg src)1592 void Aarch64Encoder::EncodeCastSigned(Reg dst, Reg src)
1593 {
1594     size_t src_size = src.GetSize();
1595     size_t dst_size = dst.GetSize();
1596     auto src_r = Reg(src.GetId(), dst.GetType());
1597     // Else signed extend
1598     if (src_size > dst_size) {
1599         src_size = dst_size;
1600     }
1601     switch (src_size) {
1602         case BYTE_SIZE:
1603             GetMasm()->Sxtb(VixlReg(dst), VixlReg(src_r));
1604             break;
1605         case HALF_SIZE:
1606             GetMasm()->Sxth(VixlReg(dst), VixlReg(src_r));
1607             break;
1608         case WORD_SIZE:
1609             GetMasm()->Sxtw(VixlReg(dst), VixlReg(src_r));
1610             break;
1611         case DOUBLE_WORD_SIZE:
1612             GetMasm()->Mov(VixlReg(dst), VixlReg(src_r));
1613             break;
1614         default:
1615             SetFalseResult();
1616             break;
1617     }
1618 }
1619 
EncodeCastUnsigned(Reg dst,Reg src)1620 void Aarch64Encoder::EncodeCastUnsigned(Reg dst, Reg src)
1621 {
1622     size_t src_size = src.GetSize();
1623     size_t dst_size = dst.GetSize();
1624     auto src_r = Reg(src.GetId(), dst.GetType());
1625     if (src_size > dst_size && dst_size < WORD_SIZE) {
1626         // We need to cut the number, if it is less, than 32-bit. It is by ISA agreement.
1627         int64_t cut_value = (1ULL << dst_size) - 1;
1628         GetMasm()->And(VixlReg(dst), VixlReg(src), VixlImm(cut_value));
1629         return;
1630     }
1631     // Else unsigned extend
1632     switch (src_size) {
1633         case BYTE_SIZE:
1634             GetMasm()->Uxtb(VixlReg(dst), VixlReg(src_r));
1635             return;
1636         case HALF_SIZE:
1637             GetMasm()->Uxth(VixlReg(dst), VixlReg(src_r));
1638             return;
1639         case WORD_SIZE:
1640             GetMasm()->Uxtw(VixlReg(dst), VixlReg(src_r));
1641             return;
1642         case DOUBLE_WORD_SIZE:
1643             GetMasm()->Mov(VixlReg(dst), VixlReg(src_r));
1644             return;
1645         default:
1646             SetFalseResult();
1647             return;
1648     }
1649 }
1650 
EncodeCastScalar(Reg dst,bool dst_signed,Reg src,bool src_signed)1651 void Aarch64Encoder::EncodeCastScalar(Reg dst, bool dst_signed, Reg src, bool src_signed)
1652 {
1653     size_t src_size = src.GetSize();
1654     size_t dst_size = dst.GetSize();
1655     // In our ISA minimal type is 32-bit, so type less then 32-bit
1656     // we should extend to 32-bit. So we can have 2 cast
1657     // (For examble, i8->u16 will work as i8->u16 and u16->u32)
1658     if (dst_size < WORD_SIZE) {
1659         if (src_size > dst_size) {
1660             if (dst_signed) {
1661                 EncodeCastSigned(dst, src);
1662             } else {
1663                 EncodeCastUnsigned(dst, src);
1664             }
1665             return;
1666         }
1667         if (src_size == dst_size) {
1668             GetMasm()->Mov(VixlReg(dst), VixlReg(src));
1669             if (!(src_signed || dst_signed) || (src_signed && dst_signed)) {
1670                 return;
1671             }
1672             if (dst_signed) {
1673                 EncodeCastSigned(Reg(dst.GetId(), INT32_TYPE), dst);
1674             } else {
1675                 EncodeCastUnsigned(Reg(dst.GetId(), INT32_TYPE), dst);
1676             }
1677             return;
1678         }
1679         if (src_signed) {
1680             EncodeCastSigned(dst, src);
1681             if (!dst_signed) {
1682                 EncodeCastUnsigned(Reg(dst.GetId(), INT32_TYPE), dst);
1683             }
1684         } else {
1685             EncodeCastUnsigned(dst, src);
1686             if (dst_signed) {
1687                 EncodeCastSigned(Reg(dst.GetId(), INT32_TYPE), dst);
1688             }
1689         }
1690     } else {
1691         if (src_size == dst_size) {
1692             GetMasm()->Mov(VixlReg(dst), VixlReg(src));
1693             return;
1694         }
1695         if (src_signed) {
1696             EncodeCastSigned(dst, src);
1697         } else {
1698             EncodeCastUnsigned(dst, src);
1699         }
1700     }
1701 }
1702 
EncodeCast(Reg dst,bool dst_signed,Reg src,bool src_signed)1703 void Aarch64Encoder::EncodeCast(Reg dst, bool dst_signed, Reg src, bool src_signed)
1704 {
1705     if (src.IsFloat() || dst.IsFloat()) {
1706         EncodeCastFloat(dst, dst_signed, src, src_signed);
1707         return;
1708     }
1709 
1710     ASSERT(src.IsScalar() && dst.IsScalar());
1711     auto rzero = GetRegfile()->GetZeroReg().GetId();
1712     if (src.GetId() == rzero) {
1713         ASSERT(dst.GetId() != rzero);
1714         EncodeMov(dst, Imm(0));
1715         return;
1716     }
1717     // Scalar part
1718     EncodeCastScalar(dst, dst_signed, src, src_signed);
1719 }
1720 
EncodeCastToBool(Reg dst,Reg src)1721 void Aarch64Encoder::EncodeCastToBool(Reg dst, Reg src)
1722 {
1723     // In ISA says that we only support casts:
1724     // i32tou1, i64tou1, u32tou1, u64tou1
1725     ASSERT(src.IsScalar());
1726     ASSERT(dst.IsScalar());
1727 
1728     GetMasm()->Cmp(VixlReg(src), VixlImm(0));
1729     // In our ISA minimal type is 32-bit, so bool in 32bit
1730     GetMasm()->Cset(VixlReg(Reg(dst.GetId(), INT32_TYPE)), vixl::aarch64::Condition::ne);
1731 }
1732 
EncodeAdd(Reg dst,Reg src0,Shift src1)1733 void Aarch64Encoder::EncodeAdd(Reg dst, Reg src0, Shift src1)
1734 {
1735     if (dst.IsFloat()) {
1736         UNREACHABLE();
1737     }
1738     ASSERT(src0.GetSize() <= dst.GetSize());
1739     if (src0.GetSize() < dst.GetSize()) {
1740         auto src0_reg = Reg(src0.GetId(), dst.GetType());
1741         auto src1_reg = Reg(src1.GetBase().GetId(), dst.GetType());
1742         GetMasm()->Add(VixlReg(dst), VixlReg(src0_reg), VixlShift(Shift(src1_reg, src1.GetType(), src1.GetScale())));
1743         return;
1744     }
1745     GetMasm()->Add(VixlReg(dst), VixlReg(src0), VixlShift(src1));
1746 }
1747 
EncodeAdd(Reg dst,Reg src0,Reg src1)1748 void Aarch64Encoder::EncodeAdd(Reg dst, Reg src0, Reg src1)
1749 {
1750     if (dst.IsFloat()) {
1751         GetMasm()->Fadd(VixlVReg(dst), VixlVReg(src0), VixlVReg(src1));
1752         return;
1753     }
1754 
1755     /* if any of the operands has 64-bits size,
1756      * forcibly do the 64-bits wide operation */
1757     if ((src0.GetSize() | src1.GetSize() | dst.GetSize()) >= DOUBLE_WORD_SIZE) {
1758         GetMasm()->Add(VixlReg(dst).X(), VixlReg(src0).X(), VixlReg(src1).X());
1759     } else {
1760         /* Otherwise do 32-bits operation as any lesser
1761          * sizes have to be upcasted to 32-bits anyway */
1762         GetMasm()->Add(VixlReg(dst).W(), VixlReg(src0).W(), VixlReg(src1).W());
1763     }
1764 }
1765 
EncodeSub(Reg dst,Reg src0,Shift src1)1766 void Aarch64Encoder::EncodeSub(Reg dst, Reg src0, Shift src1)
1767 {
1768     ASSERT(dst.IsScalar());
1769     GetMasm()->Sub(VixlReg(dst), VixlReg(src0), VixlShift(src1));
1770 }
1771 
EncodeSub(Reg dst,Reg src0,Reg src1)1772 void Aarch64Encoder::EncodeSub(Reg dst, Reg src0, Reg src1)
1773 {
1774     if (dst.IsFloat()) {
1775         GetMasm()->Fsub(VixlVReg(dst), VixlVReg(src0), VixlVReg(src1));
1776         return;
1777     }
1778 
1779     /* if any of the operands has 64-bits size,
1780      * forcibly do the 64-bits wide operation */
1781     if ((src0.GetSize() | src1.GetSize() | dst.GetSize()) >= DOUBLE_WORD_SIZE) {
1782         GetMasm()->Sub(VixlReg(dst).X(), VixlReg(src0).X(), VixlReg(src1).X());
1783     } else {
1784         /* Otherwise do 32-bits operation as any lesser
1785          * sizes have to be upcasted to 32-bits anyway */
1786         GetMasm()->Sub(VixlReg(dst).W(), VixlReg(src0).W(), VixlReg(src1).W());
1787     }
1788 }
1789 
EncodeMul(Reg dst,Reg src0,Reg src1)1790 void Aarch64Encoder::EncodeMul(Reg dst, Reg src0, Reg src1)
1791 {
1792     if (dst.IsFloat()) {
1793         GetMasm()->Fmul(VixlVReg(dst), VixlVReg(src0), VixlVReg(src1));
1794         return;
1795     }
1796     auto rzero = GetRegfile()->GetZeroReg().GetId();
1797     if (src0.GetId() == rzero || src1.GetId() == rzero) {
1798         EncodeMov(dst, Imm(0));
1799         return;
1800     }
1801     GetMasm()->Mul(VixlReg(dst), VixlReg(src0), VixlReg(src1));
1802 }
1803 
EncodeAddOverflow(compiler::LabelHolder::LabelId id,Reg dst,Reg src0,Reg src1,Condition cc)1804 void Aarch64Encoder::EncodeAddOverflow(compiler::LabelHolder::LabelId id, Reg dst, Reg src0, Reg src1, Condition cc)
1805 {
1806     ASSERT(!dst.IsFloat() && !src0.IsFloat() && !src1.IsFloat());
1807     ASSERT(cc == Condition::VS || cc == Condition::VC);
1808     if (dst.GetSize() == DOUBLE_WORD_SIZE) {
1809         GetMasm()->Adds(VixlReg(dst).X(), VixlReg(src0).X(), VixlReg(src1).X());
1810     } else {
1811         /* Otherwise do 32-bits operation as any lesser
1812          * sizes have to be upcasted to 32-bits anyway */
1813         GetMasm()->Adds(VixlReg(dst).W(), VixlReg(src0).W(), VixlReg(src1).W());
1814     }
1815     auto label = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(id);
1816     GetMasm()->B(label, Convert(cc));
1817 }
1818 
EncodeSubOverflow(compiler::LabelHolder::LabelId id,Reg dst,Reg src0,Reg src1,Condition cc)1819 void Aarch64Encoder::EncodeSubOverflow(compiler::LabelHolder::LabelId id, Reg dst, Reg src0, Reg src1, Condition cc)
1820 {
1821     ASSERT(!dst.IsFloat() && !src0.IsFloat() && !src1.IsFloat());
1822     ASSERT(cc == Condition::VS || cc == Condition::VC);
1823     if (dst.GetSize() == DOUBLE_WORD_SIZE) {
1824         GetMasm()->Subs(VixlReg(dst).X(), VixlReg(src0).X(), VixlReg(src1).X());
1825     } else {
1826         /* Otherwise do 32-bits operation as any lesser
1827          * sizes have to be upcasted to 32-bits anyway */
1828         GetMasm()->Subs(VixlReg(dst).W(), VixlReg(src0).W(), VixlReg(src1).W());
1829     }
1830     auto label = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(id);
1831     GetMasm()->B(label, Convert(cc));
1832 }
1833 
EncodeDiv(Reg dst,bool dst_signed,Reg src0,Reg src1)1834 void Aarch64Encoder::EncodeDiv(Reg dst, bool dst_signed, Reg src0, Reg src1)
1835 {
1836     if (dst.IsFloat()) {
1837         GetMasm()->Fdiv(VixlVReg(dst), VixlVReg(src0), VixlVReg(src1));
1838         return;
1839     }
1840 
1841     auto rzero = GetRegfile()->GetZeroReg().GetId();
1842     if (src1.GetId() == rzero || src0.GetId() == rzero) {
1843         ScopedTmpReg tmp_reg(this, src1.GetType());
1844         EncodeMov(tmp_reg, Imm(0));
1845         // Denominator is zero-reg
1846         if (src1.GetId() == rzero) {
1847             // Encode Abort
1848             GetMasm()->Udiv(VixlReg(dst), VixlReg(tmp_reg), VixlReg(tmp_reg));
1849             return;
1850         }
1851 
1852         // But src1 still may be zero
1853         if (src1.GetId() != src0.GetId()) {
1854             if (dst_signed) {
1855                 GetMasm()->Sdiv(VixlReg(dst), VixlReg(tmp_reg), VixlReg(src1));
1856             } else {
1857                 GetMasm()->Udiv(VixlReg(dst), VixlReg(tmp_reg), VixlReg(src1));
1858             }
1859             return;
1860         }
1861         UNREACHABLE();
1862     }
1863     if (dst_signed) {
1864         GetMasm()->Sdiv(VixlReg(dst), VixlReg(src0), VixlReg(src1));
1865     } else {
1866         GetMasm()->Udiv(VixlReg(dst), VixlReg(src0), VixlReg(src1));
1867     }
1868 }
1869 
EncodeMod(Reg dst,bool dst_signed,Reg src0,Reg src1)1870 void Aarch64Encoder::EncodeMod(Reg dst, bool dst_signed, Reg src0, Reg src1)
1871 {
1872     if (dst.IsScalar()) {
1873         auto rzero = GetRegfile()->GetZeroReg().GetId();
1874         if (src1.GetId() == rzero || src0.GetId() == rzero) {
1875             ScopedTmpReg tmp_reg(this, src1.GetType());
1876             EncodeMov(tmp_reg, Imm(0));
1877             // Denominator is zero-reg
1878             if (src1.GetId() == rzero) {
1879                 // Encode Abort
1880                 GetMasm()->Udiv(VixlReg(dst), VixlReg(tmp_reg), VixlReg(tmp_reg));
1881                 return;
1882             }
1883 
1884             if (src1.GetId() == src0.GetId()) {
1885                 SetFalseResult();
1886                 return;
1887             }
1888             // But src1 still may be zero
1889             ScopedTmpRegU64 tmp_reg_ud(this);
1890             if (dst.GetSize() < DOUBLE_WORD_SIZE) {
1891                 tmp_reg_ud.ChangeType(INT32_TYPE);
1892             }
1893             auto tmp = VixlReg(tmp_reg_ud);
1894             if (!dst_signed) {
1895                 GetMasm()->Udiv(tmp, VixlReg(tmp_reg), VixlReg(src1));
1896                 GetMasm()->Msub(VixlReg(dst), tmp, VixlReg(src1), VixlReg(tmp_reg));
1897                 return;
1898             }
1899             GetMasm()->Sdiv(tmp, VixlReg(tmp_reg), VixlReg(src1));
1900             GetMasm()->Msub(VixlReg(dst), tmp, VixlReg(src1), VixlReg(tmp_reg));
1901             return;
1902         }
1903 
1904         ScopedTmpRegU64 tmp_reg(this);
1905         if (dst.GetSize() < DOUBLE_WORD_SIZE) {
1906             tmp_reg.ChangeType(INT32_TYPE);
1907         }
1908         auto tmp = VixlReg(tmp_reg);
1909 
1910         if (!dst_signed) {
1911             GetMasm()->Udiv(tmp, VixlReg(src0), VixlReg(src1));
1912             GetMasm()->Msub(VixlReg(dst), tmp, VixlReg(src1), VixlReg(src0));
1913             return;
1914         }
1915         GetMasm()->Sdiv(tmp, VixlReg(src0), VixlReg(src1));
1916         GetMasm()->Msub(VixlReg(dst), tmp, VixlReg(src1), VixlReg(src0));
1917         return;
1918     }
1919 
1920     EncodeFMod(dst, src0, src1);
1921 }
1922 
EncodeFMod(Reg dst,Reg src0,Reg src1)1923 void Aarch64Encoder::EncodeFMod(Reg dst, Reg src0, Reg src1)
1924 {
1925     ASSERT(dst.IsFloat());
1926 
1927     if (dst.GetType() == FLOAT32_TYPE) {
1928         using fp = float (*)(float, float);
1929         MakeLibCall(dst, src0, src1, reinterpret_cast<void *>(static_cast<fp>(fmodf)));
1930     } else {
1931         using fp = double (*)(double, double);
1932         MakeLibCall(dst, src0, src1, reinterpret_cast<void *>(static_cast<fp>(fmod)));
1933     }
1934 }
1935 
EncodeMin(Reg dst,bool dst_signed,Reg src0,Reg src1)1936 void Aarch64Encoder::EncodeMin(Reg dst, bool dst_signed, Reg src0, Reg src1)
1937 {
1938     if (dst.IsFloat()) {
1939         GetMasm()->Fmin(VixlVReg(dst), VixlVReg(src0), VixlVReg(src1));
1940         return;
1941     }
1942     if (dst_signed) {
1943         GetMasm()->Cmp(VixlReg(src0), VixlReg(src1));
1944         GetMasm()->Csel(VixlReg(dst), VixlReg(src0), VixlReg(src1), vixl::aarch64::Condition::lt);
1945         return;
1946     }
1947     GetMasm()->Cmp(VixlReg(src0), VixlReg(src1));
1948     GetMasm()->Csel(VixlReg(dst), VixlReg(src0), VixlReg(src1), vixl::aarch64::Condition::ls);
1949 }
1950 
EncodeMax(Reg dst,bool dst_signed,Reg src0,Reg src1)1951 void Aarch64Encoder::EncodeMax(Reg dst, bool dst_signed, Reg src0, Reg src1)
1952 {
1953     if (dst.IsFloat()) {
1954         GetMasm()->Fmax(VixlVReg(dst), VixlVReg(src0), VixlVReg(src1));
1955         return;
1956     }
1957     if (dst_signed) {
1958         GetMasm()->Cmp(VixlReg(src0), VixlReg(src1));
1959         GetMasm()->Csel(VixlReg(dst), VixlReg(src0), VixlReg(src1), vixl::aarch64::Condition::gt);
1960         return;
1961     }
1962     GetMasm()->Cmp(VixlReg(src0), VixlReg(src1));
1963     GetMasm()->Csel(VixlReg(dst), VixlReg(src0), VixlReg(src1), vixl::aarch64::Condition::hi);
1964 }
1965 
EncodeShl(Reg dst,Reg src0,Reg src1)1966 void Aarch64Encoder::EncodeShl(Reg dst, Reg src0, Reg src1)
1967 {
1968     auto rzero = GetRegfile()->GetZeroReg().GetId();
1969     ASSERT(dst.GetId() != rzero);
1970     if (src0.GetId() == rzero) {
1971         EncodeMov(dst, Imm(0));
1972         return;
1973     }
1974     if (src1.GetId() == rzero) {
1975         EncodeMov(dst, src0);
1976     }
1977     if (dst.GetSize() < WORD_SIZE) {
1978         GetMasm()->And(VixlReg(src1), VixlReg(src1), VixlImm(dst.GetSize() - 1));
1979     }
1980     GetMasm()->Lsl(VixlReg(dst), VixlReg(src0), VixlReg(src1));
1981 }
1982 
EncodeShr(Reg dst,Reg src0,Reg src1)1983 void Aarch64Encoder::EncodeShr(Reg dst, Reg src0, Reg src1)
1984 {
1985     auto rzero = GetRegfile()->GetZeroReg().GetId();
1986     ASSERT(dst.GetId() != rzero);
1987     if (src0.GetId() == rzero) {
1988         EncodeMov(dst, Imm(0));
1989         return;
1990     }
1991     if (src1.GetId() == rzero) {
1992         EncodeMov(dst, src0);
1993     }
1994 
1995     if (dst.GetSize() < WORD_SIZE) {
1996         GetMasm()->And(VixlReg(src1), VixlReg(src1), VixlImm(dst.GetSize() - 1));
1997     }
1998 
1999     GetMasm()->Lsr(VixlReg(dst), VixlReg(src0), VixlReg(src1));
2000 }
2001 
EncodeAShr(Reg dst,Reg src0,Reg src1)2002 void Aarch64Encoder::EncodeAShr(Reg dst, Reg src0, Reg src1)
2003 {
2004     auto rzero = GetRegfile()->GetZeroReg().GetId();
2005     ASSERT(dst.GetId() != rzero);
2006     if (src0.GetId() == rzero) {
2007         EncodeMov(dst, Imm(0));
2008         return;
2009     }
2010     if (src1.GetId() == rzero) {
2011         EncodeMov(dst, src0);
2012     }
2013 
2014     if (dst.GetSize() < WORD_SIZE) {
2015         GetMasm()->And(VixlReg(src1), VixlReg(src1), VixlImm(dst.GetSize() - 1));
2016     }
2017     GetMasm()->Asr(VixlReg(dst), VixlReg(src0), VixlReg(src1));
2018 }
2019 
EncodeAnd(Reg dst,Reg src0,Reg src1)2020 void Aarch64Encoder::EncodeAnd(Reg dst, Reg src0, Reg src1)
2021 {
2022     GetMasm()->And(VixlReg(dst), VixlReg(src0), VixlReg(src1));
2023 }
2024 
EncodeAnd(Reg dst,Reg src0,Shift src1)2025 void Aarch64Encoder::EncodeAnd(Reg dst, Reg src0, Shift src1)
2026 {
2027     GetMasm()->And(VixlReg(dst), VixlReg(src0), VixlShift(src1));
2028 }
2029 
EncodeOr(Reg dst,Reg src0,Reg src1)2030 void Aarch64Encoder::EncodeOr(Reg dst, Reg src0, Reg src1)
2031 {
2032     GetMasm()->Orr(VixlReg(dst), VixlReg(src0), VixlReg(src1));
2033 }
2034 
EncodeOr(Reg dst,Reg src0,Shift src1)2035 void Aarch64Encoder::EncodeOr(Reg dst, Reg src0, Shift src1)
2036 {
2037     GetMasm()->Orr(VixlReg(dst), VixlReg(src0), VixlShift(src1));
2038 }
2039 
EncodeXor(Reg dst,Reg src0,Reg src1)2040 void Aarch64Encoder::EncodeXor(Reg dst, Reg src0, Reg src1)
2041 {
2042     GetMasm()->Eor(VixlReg(dst), VixlReg(src0), VixlReg(src1));
2043 }
2044 
EncodeXor(Reg dst,Reg src0,Shift src1)2045 void Aarch64Encoder::EncodeXor(Reg dst, Reg src0, Shift src1)
2046 {
2047     GetMasm()->Eor(VixlReg(dst), VixlReg(src0), VixlShift(src1));
2048 }
2049 
EncodeAdd(Reg dst,Reg src,Imm imm)2050 void Aarch64Encoder::EncodeAdd(Reg dst, Reg src, Imm imm)
2051 {
2052     ASSERT(dst.IsScalar() && "UNIMPLEMENTED");
2053     ASSERT(dst.GetSize() >= src.GetSize());
2054     if (dst.GetSize() != src.GetSize()) {
2055         auto src_reg = Reg(src.GetId(), dst.GetType());
2056         GetMasm()->Add(VixlReg(dst), VixlReg(src_reg), VixlImm(imm));
2057         return;
2058     }
2059     GetMasm()->Add(VixlReg(dst), VixlReg(src), VixlImm(imm));
2060 }
2061 
EncodeSub(Reg dst,Reg src,Imm imm)2062 void Aarch64Encoder::EncodeSub(Reg dst, Reg src, Imm imm)
2063 {
2064     ASSERT(dst.IsScalar() && "UNIMPLEMENTED");
2065     GetMasm()->Sub(VixlReg(dst), VixlReg(src), VixlImm(imm));
2066 }
2067 
EncodeShl(Reg dst,Reg src,Imm imm)2068 void Aarch64Encoder::EncodeShl(Reg dst, Reg src, Imm imm)
2069 {
2070     ASSERT(dst.IsScalar() && "Invalid operand type");
2071     auto rzero = GetRegfile()->GetZeroReg().GetId();
2072     ASSERT(dst.GetId() != rzero);
2073     if (src.GetId() == rzero) {
2074         EncodeMov(dst, Imm(0));
2075         return;
2076     }
2077 
2078     GetMasm()->Lsl(VixlReg(dst), VixlReg(src), GetIntValue(imm));
2079 }
2080 
EncodeShr(Reg dst,Reg src,Imm imm)2081 void Aarch64Encoder::EncodeShr(Reg dst, Reg src, Imm imm)
2082 {
2083     int64_t imm_value = static_cast<uint64_t>(GetIntValue(imm)) & (dst.GetSize() - 1);
2084 
2085     ASSERT(dst.IsScalar() && "Invalid operand type");
2086     auto rzero = GetRegfile()->GetZeroReg().GetId();
2087     ASSERT(dst.GetId() != rzero);
2088     if (src.GetId() == rzero) {
2089         EncodeMov(dst, Imm(0));
2090         return;
2091     }
2092 
2093     GetMasm()->Lsr(VixlReg(dst), VixlReg(src), imm_value);
2094 }
2095 
EncodeAShr(Reg dst,Reg src,Imm imm)2096 void Aarch64Encoder::EncodeAShr(Reg dst, Reg src, Imm imm)
2097 {
2098     ASSERT(dst.IsScalar() && "Invalid operand type");
2099     GetMasm()->Asr(VixlReg(dst), VixlReg(src), GetIntValue(imm));
2100 }
2101 
EncodeAnd(Reg dst,Reg src,Imm imm)2102 void Aarch64Encoder::EncodeAnd(Reg dst, Reg src, Imm imm)
2103 {
2104     ASSERT(dst.IsScalar() && "Invalid operand type");
2105     GetMasm()->And(VixlReg(dst), VixlReg(src), VixlImm(imm));
2106 }
2107 
EncodeOr(Reg dst,Reg src,Imm imm)2108 void Aarch64Encoder::EncodeOr(Reg dst, Reg src, Imm imm)
2109 {
2110     ASSERT(dst.IsScalar() && "Invalid operand type");
2111     GetMasm()->Orr(VixlReg(dst), VixlReg(src), VixlImm(imm));
2112 }
2113 
EncodeXor(Reg dst,Reg src,Imm imm)2114 void Aarch64Encoder::EncodeXor(Reg dst, Reg src, Imm imm)
2115 {
2116     ASSERT(dst.IsScalar() && "Invalid operand type");
2117     GetMasm()->Eor(VixlReg(dst), VixlReg(src), VixlImm(imm));
2118 }
2119 
EncodeMov(Reg dst,Imm src)2120 void Aarch64Encoder::EncodeMov(Reg dst, Imm src)
2121 {
2122     if (dst.IsFloat()) {
2123         if (dst.GetSize() == WORD_SIZE) {
2124             GetMasm()->Fmov(VixlVReg(dst), src.GetValue<float>());
2125         } else {
2126             GetMasm()->Fmov(VixlVReg(dst), src.GetValue<double>());
2127         }
2128         return;
2129     }
2130     GetMasm()->Mov(VixlReg(dst), VixlImm(src));
2131 }
2132 
EncodeLdr(Reg dst,bool dst_signed,MemRef mem)2133 void Aarch64Encoder::EncodeLdr(Reg dst, bool dst_signed, MemRef mem)
2134 {
2135     auto rzero = GetRegfile()->GetZeroReg().GetId();
2136 
2137     if (!ConvertMem(mem).IsValid() || (dst.GetId() == rzero && dst.IsScalar())) {
2138         // Try move zero reg to dst (for do not create temp-reg)
2139         // Check: dst not vector, dst not index, dst not rzero
2140         [[maybe_unused]] auto base_reg = mem.GetBase();
2141         auto index_reg = mem.GetIndex();
2142 
2143         // Invalid == base is rzero or invalid
2144         ASSERT(base_reg.GetId() == rzero || !base_reg.IsValid());
2145         // checks for use dst-register
2146         if (dst.IsScalar() && dst.IsValid() &&     // not float
2147             (index_reg.GetId() != dst.GetId()) &&  // not index
2148             (dst.GetId() != rzero)) {              // not rzero
2149             // May use dst like rzero
2150             EncodeMov(dst, Imm(0));
2151 
2152             auto fix_mem = MemRef(dst, index_reg, mem.GetScale(), mem.GetDisp());
2153             ASSERT(ConvertMem(fix_mem).IsValid());
2154             EncodeLdr(dst, dst_signed, fix_mem);
2155         } else {
2156             // Use tmp-reg
2157             ScopedTmpReg tmp_reg(this);
2158             EncodeMov(tmp_reg, Imm(0));
2159 
2160             auto fix_mem = MemRef(tmp_reg, index_reg, mem.GetScale(), mem.GetDisp());
2161             ASSERT(ConvertMem(fix_mem).IsValid());
2162             // Used for zero-dst
2163             EncodeLdr(tmp_reg, dst_signed, fix_mem);
2164         }
2165         return;
2166     }
2167     ASSERT(ConvertMem(mem).IsValid());
2168     if (dst.IsFloat()) {
2169         GetMasm()->Ldr(VixlVReg(dst), ConvertMem(mem));
2170         return;
2171     }
2172     if (dst_signed) {
2173         if (dst.GetSize() == BYTE_SIZE) {
2174             GetMasm()->Ldrsb(VixlReg(dst, DOUBLE_WORD_SIZE), ConvertMem(mem));
2175             return;
2176         }
2177         if (dst.GetSize() == HALF_SIZE) {
2178             GetMasm()->Ldrsh(VixlReg(dst), ConvertMem(mem));
2179             return;
2180         }
2181     } else {
2182         if (dst.GetSize() == BYTE_SIZE) {
2183             GetMasm()->Ldrb(VixlReg(dst, WORD_SIZE), ConvertMem(mem));
2184             return;
2185         }
2186         if (dst.GetSize() == HALF_SIZE) {
2187             GetMasm()->Ldrh(VixlReg(dst), ConvertMem(mem));
2188             return;
2189         }
2190     }
2191     GetMasm()->Ldr(VixlReg(dst), ConvertMem(mem));
2192 }
2193 
EncodeLdrAcquireInvalid(Reg dst,bool dst_signed,MemRef mem)2194 void Aarch64Encoder::EncodeLdrAcquireInvalid(Reg dst, bool dst_signed, MemRef mem)
2195 {
2196     // Try move zero reg to dst (for do not create temp-reg)
2197     // Check: dst not vector, dst not index, dst not rzero
2198     [[maybe_unused]] auto base_reg = mem.GetBase();
2199     auto rzero = GetRegfile()->GetZeroReg().GetId();
2200 
2201     auto index_reg = mem.GetIndex();
2202 
2203     // Invalid == base is rzero or invalid
2204     ASSERT(base_reg.GetId() == rzero || !base_reg.IsValid());
2205     // checks for use dst-register
2206     if (dst.IsScalar() && dst.IsValid() &&     // not float
2207         (index_reg.GetId() != dst.GetId()) &&  // not index
2208         (dst.GetId() != rzero)) {              // not rzero
2209         // May use dst like rzero
2210         EncodeMov(dst, Imm(0));
2211 
2212         auto fix_mem = MemRef(dst, index_reg, mem.GetScale(), mem.GetDisp());
2213         ASSERT(ConvertMem(fix_mem).IsValid());
2214         EncodeLdrAcquire(dst, dst_signed, fix_mem);
2215     } else {
2216         // Use tmp-reg
2217         ScopedTmpReg tmp_reg(this);
2218         EncodeMov(tmp_reg, Imm(0));
2219 
2220         auto fix_mem = MemRef(tmp_reg, index_reg, mem.GetScale(), mem.GetDisp());
2221         ASSERT(ConvertMem(fix_mem).IsValid());
2222         // Used for zero-dst
2223         EncodeLdrAcquire(tmp_reg, dst_signed, fix_mem);
2224     }
2225 }
2226 
EncodeLdrAcquireScalar(Reg dst,bool dst_signed,MemRef mem)2227 void Aarch64Encoder::EncodeLdrAcquireScalar(Reg dst, bool dst_signed, MemRef mem)
2228 {
2229 #ifndef NDEBUG
2230     CheckAlignment(mem, dst.GetSize());
2231 #endif  // NDEBUG
2232     if (dst_signed) {
2233         if (dst.GetSize() == BYTE_SIZE) {
2234             GetMasm()->Ldarb(VixlReg(dst), ConvertMem(mem));
2235             GetMasm()->Sxtb(VixlReg(dst), VixlReg(dst));
2236             return;
2237         }
2238         if (dst.GetSize() == HALF_SIZE) {
2239             GetMasm()->Ldarh(VixlReg(dst), ConvertMem(mem));
2240             GetMasm()->Sxth(VixlReg(dst), VixlReg(dst));
2241             return;
2242         }
2243         if (dst.GetSize() == WORD_SIZE) {
2244             GetMasm()->Ldar(VixlReg(dst), ConvertMem(mem));
2245             GetMasm()->Sxtw(VixlReg(dst), VixlReg(dst));
2246             return;
2247         }
2248     } else {
2249         if (dst.GetSize() == BYTE_SIZE) {
2250             GetMasm()->Ldarb(VixlReg(dst, WORD_SIZE), ConvertMem(mem));
2251             return;
2252         }
2253         if (dst.GetSize() == HALF_SIZE) {
2254             GetMasm()->Ldarh(VixlReg(dst), ConvertMem(mem));
2255             return;
2256         }
2257     }
2258     GetMasm()->Ldar(VixlReg(dst), ConvertMem(mem));
2259 }
2260 
CheckAlignment(MemRef mem,size_t size)2261 void Aarch64Encoder::CheckAlignment(MemRef mem, size_t size)
2262 {
2263     ASSERT(size == WORD_SIZE || size == BYTE_SIZE || size == HALF_SIZE || size == DOUBLE_WORD_SIZE);
2264     if (size == BYTE_SIZE) {
2265         return;
2266     }
2267     size_t alignment_mask = (size >> 3U) - 1;
2268     ASSERT(!mem.HasIndex() && !mem.HasScale());
2269     if (mem.HasDisp()) {
2270         // We need additional tmp register for check base + offset.
2271         // The case when separately the base and the offset are not aligned, but in sum there are aligned very rarely.
2272         // Therefore, the alignment check for base and offset takes place separately
2273         [[maybe_unused]] size_t offset = mem.GetDisp();
2274         ASSERT((offset & alignment_mask) == 0);
2275     }
2276     auto base_reg = mem.GetBase();
2277     auto end = CreateLabel();
2278     EncodeJumpTest(end, base_reg, Imm(alignment_mask), Condition::TST_EQ);
2279     EncodeAbort();
2280     BindLabel(end);
2281 }
2282 
EncodeLdrAcquire(Reg dst,bool dst_signed,MemRef mem)2283 void Aarch64Encoder::EncodeLdrAcquire(Reg dst, bool dst_signed, MemRef mem)
2284 {
2285     ASSERT(!mem.HasIndex() && !mem.HasScale());
2286     auto rzero = GetRegfile()->GetZeroReg().GetId();
2287     if (!ConvertMem(mem).IsValid() || (dst.GetId() == rzero && dst.IsScalar())) {
2288         EncodeLdrAcquireInvalid(dst, dst_signed, mem);
2289         return;
2290     }
2291 
2292     if (dst.IsFloat()) {
2293         ScopedTmpRegU64 tmp_reg(this);
2294         auto mem_ldar = mem;
2295         if (mem.HasDisp()) {
2296             if (vixl::aarch64::Assembler::IsImmAddSub(mem.GetDisp())) {
2297                 EncodeAdd(tmp_reg, mem.GetBase(), Imm(mem.GetDisp()));
2298             } else {
2299                 EncodeMov(tmp_reg, Imm(mem.GetDisp()));
2300                 EncodeAdd(tmp_reg, mem.GetBase(), tmp_reg);
2301             }
2302             mem_ldar = MemRef(tmp_reg);
2303         }
2304 #ifndef NDEBUG
2305         CheckAlignment(mem_ldar, dst.GetSize());
2306 #endif  // NDEBUG
2307         auto tmp = VixlReg(tmp_reg, dst.GetSize());
2308         GetMasm()->Ldar(tmp, ConvertMem(mem_ldar));
2309         GetMasm()->Fmov(VixlVReg(dst), tmp);
2310         return;
2311     }
2312 
2313     if (!mem.HasDisp()) {
2314         EncodeLdrAcquireScalar(dst, dst_signed, mem);
2315         return;
2316     }
2317 
2318     Reg dst_64(dst.GetId(), INT64_TYPE);
2319     if (vixl::aarch64::Assembler::IsImmAddSub(mem.GetDisp())) {
2320         EncodeAdd(dst_64, mem.GetBase(), Imm(mem.GetDisp()));
2321     } else {
2322         EncodeMov(dst_64, Imm(mem.GetDisp()));
2323         EncodeAdd(dst_64, mem.GetBase(), dst_64);
2324     }
2325     EncodeLdrAcquireScalar(dst, dst_signed, MemRef(dst_64));
2326 }
2327 
EncodeStr(Reg src,MemRef mem)2328 void Aarch64Encoder::EncodeStr(Reg src, MemRef mem)
2329 {
2330     if (!ConvertMem(mem).IsValid()) {
2331         auto index_reg = mem.GetIndex();
2332         auto rzero = GetRegfile()->GetZeroReg().GetId();
2333         // Invalid == base is rzero or invalid
2334         ASSERT(mem.GetBase().GetId() == rzero || !mem.GetBase().IsValid());
2335         // Use tmp-reg
2336         ScopedTmpReg tmp_reg(this);
2337         EncodeMov(tmp_reg, Imm(0));
2338 
2339         auto fix_mem = MemRef(tmp_reg, index_reg, mem.GetScale(), mem.GetDisp());
2340         ASSERT(ConvertMem(fix_mem).IsValid());
2341         if (src.GetId() != rzero) {
2342             EncodeStr(src, fix_mem);
2343         } else {
2344             EncodeStr(tmp_reg, fix_mem);
2345         }
2346         return;
2347     }
2348     ASSERT(ConvertMem(mem).IsValid());
2349     if (src.IsFloat()) {
2350         GetMasm()->Str(VixlVReg(src), ConvertMem(mem));
2351         return;
2352     }
2353     if (src.GetSize() == BYTE_SIZE) {
2354         GetMasm()->Strb(VixlReg(src), ConvertMem(mem));
2355         return;
2356     }
2357     if (src.GetSize() == HALF_SIZE) {
2358         GetMasm()->Strh(VixlReg(src), ConvertMem(mem));
2359         return;
2360     }
2361     GetMasm()->Str(VixlReg(src), ConvertMem(mem));
2362 }
2363 
EncodeStrRelease(Reg src,MemRef mem)2364 void Aarch64Encoder::EncodeStrRelease(Reg src, MemRef mem)
2365 {
2366     ASSERT(!mem.HasScale());
2367 
2368     ScopedTmpRegLazy base(this);
2369     MemRef fixed_mem;
2370     bool mem_was_fixed = false;
2371     if (mem.HasDisp()) {
2372         if (vixl::aarch64::Assembler::IsImmAddSub(mem.GetDisp())) {
2373             base.AcquireIfInvalid();
2374             EncodeAdd(base, mem.GetBase(), Imm(mem.GetDisp()));
2375         } else {
2376             base.AcquireIfInvalid();
2377             EncodeMov(base, Imm(mem.GetDisp()));
2378             EncodeAdd(base, mem.GetBase(), base);
2379         }
2380         mem_was_fixed = true;
2381     }
2382     if (mem.HasIndex()) {
2383         base.AcquireIfInvalid();
2384         EncodeAdd(base, mem_was_fixed ? base : mem.GetBase(), mem.GetIndex());
2385         mem_was_fixed = true;
2386     }
2387 
2388     if (mem_was_fixed) {
2389         fixed_mem = MemRef(base);
2390     } else {
2391         fixed_mem = mem;
2392     }
2393 
2394 #ifndef NDEBUG
2395     CheckAlignment(mem, src.GetSize());
2396 #endif  // NDEBUG
2397     if (src.IsFloat()) {
2398         ScopedTmpRegU64 tmp_reg(this);
2399         auto tmp = VixlReg(tmp_reg, src.GetSize());
2400         GetMasm()->Fmov(tmp, VixlVReg(src));
2401         GetMasm()->Stlr(tmp, ConvertMem(fixed_mem));
2402         return;
2403     }
2404     if (src.GetSize() == BYTE_SIZE) {
2405         GetMasm()->Stlrb(VixlReg(src), ConvertMem(fixed_mem));
2406         return;
2407     }
2408     if (src.GetSize() == HALF_SIZE) {
2409         GetMasm()->Stlrh(VixlReg(src), ConvertMem(fixed_mem));
2410         return;
2411     }
2412     GetMasm()->Stlr(VixlReg(src), ConvertMem(fixed_mem));
2413 }
2414 
EncodeLdrExclusive(Reg dst,Reg addr,bool acquire)2415 void Aarch64Encoder::EncodeLdrExclusive(Reg dst, Reg addr, bool acquire)
2416 {
2417     ASSERT(dst.IsScalar());
2418     auto dst_reg = VixlReg(dst);
2419     auto mem_cvt = ConvertMem(MemRef(addr));
2420 #ifndef NDEBUG
2421     CheckAlignment(MemRef(addr), dst.GetSize());
2422 #endif  // NDEBUG
2423     if (dst.GetSize() == BYTE_SIZE) {
2424         if (acquire) {
2425             GetMasm()->Ldaxrb(dst_reg, mem_cvt);
2426             return;
2427         }
2428         GetMasm()->Ldxrb(dst_reg, mem_cvt);
2429         return;
2430     }
2431     if (dst.GetSize() == HALF_SIZE) {
2432         if (acquire) {
2433             GetMasm()->Ldaxrh(dst_reg, mem_cvt);
2434             return;
2435         }
2436         GetMasm()->Ldxrh(dst_reg, mem_cvt);
2437         return;
2438     }
2439     if (acquire) {
2440         GetMasm()->Ldaxr(dst_reg, mem_cvt);
2441         return;
2442     }
2443     GetMasm()->Ldxr(dst_reg, mem_cvt);
2444 }
2445 
EncodeStrExclusive(Reg dst,Reg src,Reg addr,bool release)2446 void Aarch64Encoder::EncodeStrExclusive(Reg dst, Reg src, Reg addr, bool release)
2447 {
2448     ASSERT(dst.IsScalar() && src.IsScalar());
2449 
2450     bool copy_dst = dst.GetId() == src.GetId() || dst.GetId() == addr.GetId();
2451     ScopedTmpReg tmp(this);
2452     auto src_reg = VixlReg(src);
2453     auto mem_cvt = ConvertMem(MemRef(addr));
2454     auto dst_reg = copy_dst ? VixlReg(tmp) : VixlReg(dst);
2455 #ifndef NDEBUG
2456     CheckAlignment(MemRef(addr), src.GetSize());
2457 #endif  // NDEBUG
2458 
2459     if (src.GetSize() == BYTE_SIZE) {
2460         if (release) {
2461             GetMasm()->Stlxrb(dst_reg, src_reg, mem_cvt);
2462         } else {
2463             GetMasm()->Stxrb(dst_reg, src_reg, mem_cvt);
2464         }
2465     } else if (src.GetSize() == HALF_SIZE) {
2466         if (release) {
2467             GetMasm()->Stlxrh(dst_reg, src_reg, mem_cvt);
2468         } else {
2469             GetMasm()->Stxrh(dst_reg, src_reg, mem_cvt);
2470         }
2471     } else {
2472         if (release) {
2473             GetMasm()->Stlxr(dst_reg, src_reg, mem_cvt);
2474         } else {
2475             GetMasm()->Stxr(dst_reg, src_reg, mem_cvt);
2476         }
2477     }
2478     if (copy_dst) {
2479         EncodeMov(dst, tmp);
2480     }
2481 }
2482 
EncodeStrz(Reg src,MemRef mem)2483 void Aarch64Encoder::EncodeStrz(Reg src, MemRef mem)
2484 {
2485     if (!ConvertMem(mem).IsValid()) {
2486         EncodeStr(src, mem);
2487         return;
2488     }
2489     ASSERT(ConvertMem(mem).IsValid());
2490     // Upper half of registers must be zeroed by-default
2491     if (src.IsFloat()) {
2492         EncodeStr(src.As(FLOAT64_TYPE), mem);
2493         return;
2494     }
2495     if (src.GetSize() < WORD_SIZE) {
2496         EncodeCast(src, false, src.As(INT64_TYPE), false);
2497     }
2498     GetMasm()->Str(VixlReg(src.As(INT64_TYPE)), ConvertMem(mem));
2499 }
2500 
EncodeSti(Imm src,MemRef mem)2501 void Aarch64Encoder::EncodeSti(Imm src, MemRef mem)
2502 {
2503     if (!ConvertMem(mem).IsValid()) {
2504         auto rzero = GetRegfile()->GetZeroReg();
2505         EncodeStr(rzero, mem);
2506         return;
2507     }
2508 
2509     if (src.GetType().IsFloat()) {
2510         if (src.GetSize() == WORD_SIZE) {
2511             ScopedTmpRegF32 tmp_reg(this);
2512             GetMasm()->Fmov(VixlVReg(tmp_reg).S(), src.GetValue<float>());
2513             EncodeStr(tmp_reg, mem);
2514         } else {
2515             ScopedTmpRegF64 tmp_reg(this);
2516             GetMasm()->Fmov(VixlVReg(tmp_reg).D(), src.GetValue<double>());
2517             EncodeStr(tmp_reg, mem);
2518         }
2519         return;
2520     }
2521 
2522     ScopedTmpRegU64 tmp_reg(this);
2523     auto tmp = VixlReg(tmp_reg);
2524     GetMasm()->Mov(tmp, VixlImm(src));
2525     if (src.GetSize() == BYTE_SIZE) {
2526         GetMasm()->Strb(tmp, ConvertMem(mem));
2527         return;
2528     }
2529     if (src.GetSize() == HALF_SIZE) {
2530         GetMasm()->Strh(tmp, ConvertMem(mem));
2531         return;
2532     }
2533     GetMasm()->Str(tmp, ConvertMem(mem));
2534 }
2535 
EncodeMemCopy(MemRef mem_from,MemRef mem_to,size_t size)2536 void Aarch64Encoder::EncodeMemCopy(MemRef mem_from, MemRef mem_to, size_t size)
2537 {
2538     if (!ConvertMem(mem_from).IsValid() || !ConvertMem(mem_to).IsValid()) {
2539         auto rzero = GetRegfile()->GetZeroReg();
2540         if (!ConvertMem(mem_from).IsValid()) {
2541             // Encode one load - will fix inside
2542             EncodeLdr(rzero, false, mem_from);
2543         } else {
2544             ASSERT(!ConvertMem(mem_to).IsValid());
2545             // Encode one store - will fix inside
2546             EncodeStr(rzero, mem_to);
2547         }
2548         return;
2549     }
2550     ASSERT(ConvertMem(mem_from).IsValid());
2551     ASSERT(ConvertMem(mem_to).IsValid());
2552     ScopedTmpRegU64 tmp_reg(this);
2553     auto tmp = VixlReg(tmp_reg, std::min(size, static_cast<size_t>(DOUBLE_WORD_SIZE)));
2554     if (size == BYTE_SIZE) {
2555         GetMasm()->Ldrb(tmp, ConvertMem(mem_from));
2556         GetMasm()->Strb(tmp, ConvertMem(mem_to));
2557     } else if (size == HALF_SIZE) {
2558         GetMasm()->Ldrh(tmp, ConvertMem(mem_from));
2559         GetMasm()->Strh(tmp, ConvertMem(mem_to));
2560     } else {
2561         ASSERT(size == WORD_SIZE || size == DOUBLE_WORD_SIZE);
2562         GetMasm()->Ldr(tmp, ConvertMem(mem_from));
2563         GetMasm()->Str(tmp, ConvertMem(mem_to));
2564     }
2565 }
2566 
EncodeMemCopyz(MemRef mem_from,MemRef mem_to,size_t size)2567 void Aarch64Encoder::EncodeMemCopyz(MemRef mem_from, MemRef mem_to, size_t size)
2568 {
2569     if (!ConvertMem(mem_from).IsValid() || !ConvertMem(mem_to).IsValid()) {
2570         auto rzero = GetRegfile()->GetZeroReg();
2571         if (!ConvertMem(mem_from).IsValid()) {
2572             // Encode one load - will fix inside
2573             EncodeLdr(rzero, false, mem_from);
2574         } else {
2575             ASSERT(!ConvertMem(mem_to).IsValid());
2576             // Encode one store - will fix inside
2577             EncodeStr(rzero, mem_to);
2578         }
2579         return;
2580     }
2581     ASSERT(ConvertMem(mem_from).IsValid());
2582     ASSERT(ConvertMem(mem_to).IsValid());
2583     ScopedTmpRegU64 tmp_reg(this);
2584     auto tmp = VixlReg(tmp_reg, std::min(size, static_cast<size_t>(DOUBLE_WORD_SIZE)));
2585     auto zero = VixlReg(GetRegfile()->GetZeroReg(), WORD_SIZE);
2586     if (size == BYTE_SIZE) {
2587         GetMasm()->Ldrb(tmp, ConvertMem(mem_from));
2588         GetMasm()->Stp(tmp, zero, ConvertMem(mem_to));
2589     } else if (size == HALF_SIZE) {
2590         GetMasm()->Ldrh(tmp, ConvertMem(mem_from));
2591         GetMasm()->Stp(tmp, zero, ConvertMem(mem_to));
2592     } else {
2593         ASSERT(size == WORD_SIZE || size == DOUBLE_WORD_SIZE);
2594         GetMasm()->Ldr(tmp, ConvertMem(mem_from));
2595         if (size == WORD_SIZE) {
2596             GetMasm()->Stp(tmp, zero, ConvertMem(mem_to));
2597         } else {
2598             GetMasm()->Str(tmp, ConvertMem(mem_to));
2599         }
2600     }
2601 }
2602 
EncodeCompare(Reg dst,Reg src0,Reg src1,Condition cc)2603 void Aarch64Encoder::EncodeCompare(Reg dst, Reg src0, Reg src1, Condition cc)
2604 {
2605     ASSERT(src0.IsFloat() == src1.IsFloat());
2606     if (src0.IsFloat()) {
2607         GetMasm()->Fcmp(VixlVReg(src0), VixlVReg(src1));
2608     } else {
2609         GetMasm()->Cmp(VixlReg(src0), VixlReg(src1));
2610     }
2611     GetMasm()->Cset(VixlReg(dst), Convert(cc));
2612 }
2613 
EncodeCompareTest(Reg dst,Reg src0,Reg src1,Condition cc)2614 void Aarch64Encoder::EncodeCompareTest(Reg dst, Reg src0, Reg src1, Condition cc)
2615 {
2616     ASSERT(src0.IsScalar() && src1.IsScalar());
2617 
2618     GetMasm()->Tst(VixlReg(src0), VixlReg(src1));
2619     GetMasm()->Cset(VixlReg(dst), ConvertTest(cc));
2620 }
2621 
EncodeCmp(Reg dst,Reg src0,Reg src1,Condition cc)2622 void Aarch64Encoder::EncodeCmp(Reg dst, Reg src0, Reg src1, Condition cc)
2623 {
2624     if (src0.IsFloat()) {
2625         ASSERT(src1.IsFloat());
2626         ASSERT(cc == Condition::MI || cc == Condition::LT);
2627         GetMasm()->Fcmp(VixlVReg(src0), VixlVReg(src1));
2628     } else {
2629         ASSERT(src0.IsScalar() && src1.IsScalar());
2630         ASSERT(cc == Condition::LO || cc == Condition::LT);
2631         GetMasm()->Cmp(VixlReg(src0), VixlReg(src1));
2632     }
2633     GetMasm()->Cset(VixlReg(dst), vixl::aarch64::Condition::ne);
2634     GetMasm()->Cneg(VixlReg(Promote(dst)), VixlReg(Promote(dst)), Convert(cc));
2635 }
2636 
EncodeSelect(Reg dst,Reg src0,Reg src1,Reg src2,Reg src3,Condition cc)2637 void Aarch64Encoder::EncodeSelect(Reg dst, Reg src0, Reg src1, Reg src2, Reg src3, Condition cc)
2638 {
2639     ASSERT(!src0.IsFloat() && !src1.IsFloat());
2640     if (src2.IsScalar()) {
2641         GetMasm()->Cmp(VixlReg(src2), VixlReg(src3));
2642     } else {
2643         GetMasm()->Fcmp(VixlVReg(src2), VixlVReg(src3));
2644     }
2645     GetMasm()->Csel(VixlReg(dst), VixlReg(src0), VixlReg(src1), Convert(cc));
2646 }
2647 
EncodeSelect(Reg dst,Reg src0,Reg src1,Reg src2,Imm imm,Condition cc)2648 void Aarch64Encoder::EncodeSelect(Reg dst, Reg src0, Reg src1, Reg src2, Imm imm, Condition cc)
2649 {
2650     ASSERT(!src0.IsFloat() && !src1.IsFloat() && !src2.IsFloat());
2651     GetMasm()->Cmp(VixlReg(src2), VixlImm(imm));
2652     GetMasm()->Csel(VixlReg(dst), VixlReg(src0), VixlReg(src1), Convert(cc));
2653 }
2654 
EncodeSelectTest(Reg dst,Reg src0,Reg src1,Reg src2,Reg src3,Condition cc)2655 void Aarch64Encoder::EncodeSelectTest(Reg dst, Reg src0, Reg src1, Reg src2, Reg src3, Condition cc)
2656 {
2657     ASSERT(!src0.IsFloat() && !src1.IsFloat() && !src2.IsFloat() && !src3.IsFloat());
2658     GetMasm()->Tst(VixlReg(src2), VixlReg(src3));
2659     GetMasm()->Csel(VixlReg(dst), VixlReg(src0), VixlReg(src1), ConvertTest(cc));
2660 }
2661 
EncodeSelectTest(Reg dst,Reg src0,Reg src1,Reg src2,Imm imm,Condition cc)2662 void Aarch64Encoder::EncodeSelectTest(Reg dst, Reg src0, Reg src1, Reg src2, Imm imm, Condition cc)
2663 {
2664     ASSERT(!src0.IsFloat() && !src1.IsFloat() && !src2.IsFloat());
2665     ASSERT(CanEncodeImmLogical(GetIntValue(imm), imm.GetSize() > WORD_SIZE ? DOUBLE_WORD_SIZE : WORD_SIZE));
2666     GetMasm()->Tst(VixlReg(src2), VixlImm(imm));
2667     GetMasm()->Csel(VixlReg(dst), VixlReg(src0), VixlReg(src1), ConvertTest(cc));
2668 }
2669 
EncodeLdp(Reg dst0,Reg dst1,bool dst_signed,MemRef mem)2670 void Aarch64Encoder::EncodeLdp(Reg dst0, Reg dst1, bool dst_signed, MemRef mem)
2671 {
2672     ASSERT(dst0.IsFloat() == dst1.IsFloat());
2673     ASSERT(dst0.GetSize() == dst1.GetSize());
2674     if (!ConvertMem(mem).IsValid()) {
2675         // Encode one Ldr - will fix inside
2676         EncodeLdr(dst0, dst_signed, mem);
2677         return;
2678     }
2679 
2680     if (dst0.IsFloat()) {
2681         GetMasm()->Ldp(VixlVReg(dst0), VixlVReg(dst1), ConvertMem(mem));
2682         return;
2683     }
2684     if (dst_signed && dst0.GetSize() == WORD_SIZE) {
2685         GetMasm()->Ldpsw(VixlReg(dst0, DOUBLE_WORD_SIZE), VixlReg(dst1, DOUBLE_WORD_SIZE), ConvertMem(mem));
2686         return;
2687     }
2688     GetMasm()->Ldp(VixlReg(dst0), VixlReg(dst1), ConvertMem(mem));
2689 }
2690 
EncodeStp(Reg src0,Reg src1,MemRef mem)2691 void Aarch64Encoder::EncodeStp(Reg src0, Reg src1, MemRef mem)
2692 {
2693     ASSERT(src0.IsFloat() == src1.IsFloat());
2694     ASSERT(src0.GetSize() == src1.GetSize());
2695     if (!ConvertMem(mem).IsValid()) {
2696         // Encode one Str - will fix inside
2697         EncodeStr(src0, mem);
2698         return;
2699     }
2700 
2701     if (src0.IsFloat()) {
2702         GetMasm()->Stp(VixlVReg(src0), VixlVReg(src1), ConvertMem(mem));
2703         return;
2704     }
2705     GetMasm()->Stp(VixlReg(src0), VixlReg(src1), ConvertMem(mem));
2706 }
2707 
EncodeMAdd(Reg dst,Reg src0,Reg src1,Reg src2)2708 void Aarch64Encoder::EncodeMAdd(Reg dst, Reg src0, Reg src1, Reg src2)
2709 {
2710     ASSERT(dst.GetSize() == src1.GetSize() && dst.GetSize() == src0.GetSize() && dst.GetSize() == src2.GetSize());
2711     ASSERT(dst.IsScalar() == src0.IsScalar() && dst.IsScalar() == src1.IsScalar() && dst.IsScalar() == src2.IsScalar());
2712 
2713     ASSERT(!GetRegfile()->IsZeroReg(dst));
2714 
2715     if (GetRegfile()->IsZeroReg(src0) || GetRegfile()->IsZeroReg(src1)) {
2716         EncodeMov(dst, src2);
2717         return;
2718     }
2719 
2720     if (GetRegfile()->IsZeroReg(src2)) {
2721         EncodeMul(dst, src0, src1);
2722         return;
2723     }
2724 
2725     if (dst.IsScalar()) {
2726         GetMasm()->Madd(VixlReg(dst), VixlReg(src0), VixlReg(src1), VixlReg(src2));
2727     } else {
2728         GetMasm()->Fmadd(VixlVReg(dst), VixlVReg(src0), VixlVReg(src1), VixlVReg(src2));
2729     }
2730 }
2731 
EncodeMSub(Reg dst,Reg src0,Reg src1,Reg src2)2732 void Aarch64Encoder::EncodeMSub(Reg dst, Reg src0, Reg src1, Reg src2)
2733 {
2734     ASSERT(dst.GetSize() == src1.GetSize() && dst.GetSize() == src0.GetSize() && dst.GetSize() == src2.GetSize());
2735     ASSERT(dst.IsScalar() == src0.IsScalar() && dst.IsScalar() == src1.IsScalar() && dst.IsScalar() == src2.IsScalar());
2736 
2737     ASSERT(!GetRegfile()->IsZeroReg(dst));
2738 
2739     if (GetRegfile()->IsZeroReg(src0) || GetRegfile()->IsZeroReg(src1)) {
2740         EncodeMov(dst, src2);
2741         return;
2742     }
2743 
2744     if (GetRegfile()->IsZeroReg(src2)) {
2745         EncodeMNeg(dst, src0, src1);
2746         return;
2747     }
2748 
2749     if (dst.IsScalar()) {
2750         GetMasm()->Msub(VixlReg(dst), VixlReg(src0), VixlReg(src1), VixlReg(src2));
2751     } else {
2752         GetMasm()->Fmsub(VixlVReg(dst), VixlVReg(src0), VixlVReg(src1), VixlVReg(src2));
2753     }
2754 }
2755 
EncodeMNeg(Reg dst,Reg src0,Reg src1)2756 void Aarch64Encoder::EncodeMNeg(Reg dst, Reg src0, Reg src1)
2757 {
2758     ASSERT(dst.GetSize() == src1.GetSize() && dst.GetSize() == src0.GetSize());
2759     ASSERT(dst.IsScalar() == src0.IsScalar() && dst.IsScalar() == src1.IsScalar());
2760 
2761     ASSERT(!GetRegfile()->IsZeroReg(dst));
2762 
2763     if (GetRegfile()->IsZeroReg(src0) || GetRegfile()->IsZeroReg(src1)) {
2764         EncodeMov(dst, Imm(0U));
2765         return;
2766     }
2767 
2768     if (dst.IsScalar()) {
2769         GetMasm()->Mneg(VixlReg(dst), VixlReg(src0), VixlReg(src1));
2770     } else {
2771         GetMasm()->Fnmul(VixlVReg(dst), VixlVReg(src0), VixlVReg(src1));
2772     }
2773 }
2774 
EncodeOrNot(Reg dst,Reg src0,Reg src1)2775 void Aarch64Encoder::EncodeOrNot(Reg dst, Reg src0, Reg src1)
2776 {
2777     ASSERT(dst.GetSize() == src1.GetSize() && dst.GetSize() == src0.GetSize());
2778     ASSERT(dst.IsScalar() && src0.IsScalar() && src1.IsScalar());
2779     GetMasm()->Orn(VixlReg(dst), VixlReg(src0), VixlReg(src1));
2780 }
2781 
EncodeOrNot(Reg dst,Reg src0,Shift src1)2782 void Aarch64Encoder::EncodeOrNot(Reg dst, Reg src0, Shift src1)
2783 {
2784     ASSERT(dst.GetSize() == src0.GetSize() && dst.GetSize() == src1.GetBase().GetSize());
2785     ASSERT(dst.IsScalar() && src0.IsScalar() && src1.GetBase().IsScalar());
2786     GetMasm()->Orn(VixlReg(dst), VixlReg(src0), VixlShift(src1));
2787 }
2788 
EncodeExtractBits(Reg dst,Reg src0,Imm imm1,Imm imm2)2789 void Aarch64Encoder::EncodeExtractBits(Reg dst, Reg src0, Imm imm1, Imm imm2)
2790 {
2791     GetMasm()->Ubfx(VixlReg(dst), VixlReg(src0), GetIntValue(imm1), GetIntValue(imm2));
2792 }
2793 
EncodeAndNot(Reg dst,Reg src0,Reg src1)2794 void Aarch64Encoder::EncodeAndNot(Reg dst, Reg src0, Reg src1)
2795 {
2796     ASSERT(dst.GetSize() == src1.GetSize() && dst.GetSize() == src0.GetSize());
2797     ASSERT(dst.IsScalar() && src0.IsScalar() && src1.IsScalar());
2798     GetMasm()->Bic(VixlReg(dst), VixlReg(src0), VixlReg(src1));
2799 }
2800 
EncodeAndNot(Reg dst,Reg src0,Shift src1)2801 void Aarch64Encoder::EncodeAndNot(Reg dst, Reg src0, Shift src1)
2802 {
2803     ASSERT(dst.GetSize() == src0.GetSize() && dst.GetSize() == src1.GetBase().GetSize());
2804     ASSERT(dst.IsScalar() && src0.IsScalar() && src1.GetBase().IsScalar());
2805     GetMasm()->Bic(VixlReg(dst), VixlReg(src0), VixlShift(src1));
2806 }
2807 
EncodeXorNot(Reg dst,Reg src0,Reg src1)2808 void Aarch64Encoder::EncodeXorNot(Reg dst, Reg src0, Reg src1)
2809 {
2810     ASSERT(dst.GetSize() == src1.GetSize() && dst.GetSize() == src0.GetSize());
2811     ASSERT(dst.IsScalar() && src0.IsScalar() && src1.IsScalar());
2812     GetMasm()->Eon(VixlReg(dst), VixlReg(src0), VixlReg(src1));
2813 }
2814 
EncodeXorNot(Reg dst,Reg src0,Shift src1)2815 void Aarch64Encoder::EncodeXorNot(Reg dst, Reg src0, Shift src1)
2816 {
2817     ASSERT(dst.GetSize() == src0.GetSize() && dst.GetSize() == src1.GetBase().GetSize());
2818     ASSERT(dst.IsScalar() && src0.IsScalar() && src1.GetBase().IsScalar());
2819     GetMasm()->Eon(VixlReg(dst), VixlReg(src0), VixlShift(src1));
2820 }
2821 
EncodeNeg(Reg dst,Shift src)2822 void Aarch64Encoder::EncodeNeg(Reg dst, Shift src)
2823 {
2824     ASSERT(dst.GetSize() == src.GetBase().GetSize());
2825     ASSERT(dst.IsScalar() && src.GetBase().IsScalar());
2826     GetMasm()->Neg(VixlReg(dst), VixlShift(src));
2827 }
2828 
EncodeStackOverflowCheck(ssize_t offset)2829 void Aarch64Encoder::EncodeStackOverflowCheck(ssize_t offset)
2830 {
2831     ScopedTmpReg tmp(this);
2832     EncodeAdd(tmp, GetTarget().GetStackReg(), Imm(offset));
2833     EncodeLdr(tmp, false, MemRef(tmp));
2834 }
2835 
CanEncodeImmAddSubCmp(int64_t imm,uint32_t size,bool signed_compare)2836 bool Aarch64Encoder::CanEncodeImmAddSubCmp(int64_t imm, [[maybe_unused]] uint32_t size,
2837                                            [[maybe_unused]] bool signed_compare)
2838 {
2839     if (imm == INT64_MIN) {
2840         return false;
2841     }
2842     if (imm < 0) {
2843         imm = -imm;
2844     }
2845     return vixl::aarch64::Assembler::IsImmAddSub(imm);
2846 }
2847 
CanEncodeImmLogical(uint64_t imm,uint32_t size)2848 bool Aarch64Encoder::CanEncodeImmLogical(uint64_t imm, uint32_t size)
2849 {
2850     return vixl::aarch64::Assembler::IsImmLogical(imm, size);
2851 }
2852 
2853 /*
2854  * From aarch64 instruction set
2855  *
2856  * ========================================================
2857  * Syntax
2858  *
2859  * LDR  Wt, [Xn|SP, Rm{, extend {amount}}]    ; 32-bit general registers
2860  *
2861  * LDR  Xt, [Xn|SP, Rm{, extend {amount}}]    ; 64-bit general registers
2862  *
2863  * amount
2864  * Is the index shift amount, optional and defaulting to #0 when extend is not LSL:
2865  *
2866  * 32-bit general registers
2867  * Can be one of #0 or #2.
2868  *
2869  * 64-bit general registers
2870  * Can be one of #0 or #3.
2871  * ========================================================
2872  * Syntax
2873  *
2874  * LDRH  Wt, [Xn|SP, Rm{, extend {amount}}]
2875  *
2876  * amount
2877  * Is the index shift amount, optional and defaulting to #0 when extend is not LSL, and can be either #0 or #1.
2878  * ========================================================
2879  *
2880  * Scale can be 0 or 1 for half load, 2 for word load, 3 for double word load
2881  */
CanEncodeScale(uint64_t imm,uint32_t size)2882 bool Aarch64Encoder::CanEncodeScale(uint64_t imm, uint32_t size)
2883 {
2884     return (imm == 0) || ((1U << imm) == (size >> 3U));
2885 }
2886 
CanEncodeShiftedOperand(ShiftOpcode opcode,ShiftType shift_type)2887 bool Aarch64Encoder::CanEncodeShiftedOperand(ShiftOpcode opcode, ShiftType shift_type)
2888 {
2889     switch (opcode) {
2890         case ShiftOpcode::NEG_SR:
2891         case ShiftOpcode::ADD_SR:
2892         case ShiftOpcode::SUB_SR:
2893             return shift_type == ShiftType::LSL || shift_type == ShiftType::LSR || shift_type == ShiftType::ASR;
2894         case ShiftOpcode::AND_SR:
2895         case ShiftOpcode::OR_SR:
2896         case ShiftOpcode::XOR_SR:
2897         case ShiftOpcode::AND_NOT_SR:
2898         case ShiftOpcode::OR_NOT_SR:
2899         case ShiftOpcode::XOR_NOT_SR:
2900             return shift_type != ShiftType::INVALID_SHIFT;
2901         default:
2902             return false;
2903     }
2904 }
2905 
AcquireScratchRegister(TypeInfo type)2906 Reg Aarch64Encoder::AcquireScratchRegister(TypeInfo type)
2907 {
2908     ASSERT(GetMasm()->GetCurrentScratchRegisterScope() == nullptr);
2909     auto reg = type.IsFloat() ? GetMasm()->GetScratchVRegisterList()->PopLowestIndex()
2910                               : GetMasm()->GetScratchRegisterList()->PopLowestIndex();
2911     ASSERT(reg.IsValid());
2912     return Reg(reg.GetCode(), type);
2913 }
2914 
AcquireScratchRegister(Reg reg)2915 void Aarch64Encoder::AcquireScratchRegister(Reg reg)
2916 {
2917     ASSERT(GetMasm()->GetCurrentScratchRegisterScope() == nullptr);
2918     if (reg == GetTarget().GetLinkReg()) {
2919         ASSERT_PRINT(!lr_acquired_, "Trying to acquire LR, which hasn't been released before");
2920         lr_acquired_ = true;
2921         return;
2922     }
2923     auto type = reg.GetType();
2924     auto reg_id = reg.GetId();
2925 
2926     if (type.IsFloat()) {
2927         ASSERT(GetMasm()->GetScratchVRegisterList()->IncludesAliasOf(VixlVReg(reg)));
2928         GetMasm()->GetScratchVRegisterList()->Remove(reg_id);
2929     } else {
2930         ASSERT(GetMasm()->GetScratchRegisterList()->IncludesAliasOf(VixlReg(reg)));
2931         GetMasm()->GetScratchRegisterList()->Remove(reg_id);
2932     }
2933 }
2934 
ReleaseScratchRegister(Reg reg)2935 void Aarch64Encoder::ReleaseScratchRegister(Reg reg)
2936 {
2937     if (reg == GetTarget().GetLinkReg()) {
2938         ASSERT_PRINT(lr_acquired_, "Trying to release LR, which hasn't been acquired before");
2939         lr_acquired_ = false;
2940     } else if (reg.IsFloat()) {
2941         GetMasm()->GetScratchVRegisterList()->Combine(reg.GetId());
2942     } else if (reg.GetId() != GetTarget().GetLinkReg().GetId()) {
2943         GetMasm()->GetScratchRegisterList()->Combine(reg.GetId());
2944     }
2945 }
2946 
IsScratchRegisterReleased(Reg reg)2947 bool Aarch64Encoder::IsScratchRegisterReleased(Reg reg)
2948 {
2949     if (reg == GetTarget().GetLinkReg()) {
2950         return !lr_acquired_;
2951     }
2952     if (reg.IsFloat()) {
2953         return GetMasm()->GetScratchVRegisterList()->IncludesAliasOf(VixlVReg(reg));
2954     }
2955     return GetMasm()->GetScratchRegisterList()->IncludesAliasOf(VixlReg(reg));
2956 }
2957 
MakeLibCall(Reg dst,Reg src0,Reg src1,const void * entry_point)2958 void Aarch64Encoder::MakeLibCall(Reg dst, Reg src0, Reg src1, const void *entry_point)
2959 {
2960     if (!dst.IsFloat()) {
2961         SetFalseResult();
2962         return;
2963     }
2964     if (dst.GetType() == FLOAT32_TYPE) {
2965         if (!src0.IsFloat() || !src1.IsFloat()) {
2966             SetFalseResult();
2967             return;
2968         }
2969 
2970         if (src0.GetId() != vixl::aarch64::s0.GetCode() || src1.GetId() != vixl::aarch64::s1.GetCode()) {
2971             ScopedTmpRegF32 tmp(this);
2972             GetMasm()->Fmov(VixlVReg(tmp), VixlVReg(src1));
2973             GetMasm()->Fmov(vixl::aarch64::s0, VixlVReg(src0));
2974             GetMasm()->Fmov(vixl::aarch64::s1, VixlVReg(tmp));
2975         }
2976 
2977         MakeCall(entry_point);
2978 
2979         if (dst.GetId() != vixl::aarch64::s0.GetCode()) {
2980             GetMasm()->Fmov(VixlVReg(dst), vixl::aarch64::s0);
2981         }
2982     } else if (dst.GetType() == FLOAT64_TYPE) {
2983         if (!src0.IsFloat() || !src1.IsFloat()) {
2984             SetFalseResult();
2985             return;
2986         }
2987 
2988         if (src0.GetId() != vixl::aarch64::d0.GetCode() || src1.GetId() != vixl::aarch64::d1.GetCode()) {
2989             ScopedTmpRegF64 tmp(this);
2990             GetMasm()->Fmov(VixlVReg(tmp), VixlVReg(src1));
2991 
2992             GetMasm()->Fmov(vixl::aarch64::d0, VixlVReg(src0));
2993             GetMasm()->Fmov(vixl::aarch64::d1, VixlVReg(tmp));
2994         }
2995 
2996         MakeCall(entry_point);
2997 
2998         if (dst.GetId() != vixl::aarch64::d0.GetCode()) {
2999             GetMasm()->Fmov(VixlVReg(dst), vixl::aarch64::d0);
3000         }
3001     } else {
3002         UNREACHABLE();
3003     }
3004 }
3005 
3006 template <bool is_store>
LoadStoreRegisters(RegMask registers,ssize_t slot,size_t start_reg,bool is_fp)3007 void Aarch64Encoder::LoadStoreRegisters(RegMask registers, ssize_t slot, size_t start_reg, bool is_fp)
3008 {
3009     if (registers.none()) {
3010         return;
3011     }
3012     int32_t last_reg = registers.size() - 1;
3013     for (; last_reg >= 0; --last_reg) {
3014         if (registers.test(last_reg)) {
3015             break;
3016         }
3017     }
3018     // Construct single add for big offset
3019     size_t sp_offset = 0;
3020     auto last_offset = (slot + last_reg - start_reg) * DOUBLE_WORD_SIZE_BYTE;
3021 
3022     if (!vixl::aarch64::Assembler::IsImmLSPair(last_offset, vixl::aarch64::kXRegSizeInBytesLog2)) {
3023         ScopedTmpReg lr_reg(this, true);
3024         auto tmp = VixlReg(lr_reg);
3025         sp_offset = slot * DOUBLE_WORD_SIZE_BYTE;
3026         slot = 0;
3027         if (vixl::aarch64::Assembler::IsImmAddSub(sp_offset)) {
3028             GetMasm()->Add(tmp, vixl::aarch64::sp, VixlImm(sp_offset));
3029         } else {
3030             GetMasm()->Mov(tmp, VixlImm(sp_offset));
3031             GetMasm()->Add(tmp, vixl::aarch64::sp, tmp);
3032         }
3033         LoadStoreRegistersLoop<is_store>(registers, slot, start_reg, is_fp, tmp);
3034     } else {
3035         LoadStoreRegistersLoop<is_store>(registers, slot, start_reg, is_fp, vixl::aarch64::sp);
3036     }
3037 }
3038 
3039 template <bool is_store>
LoadStoreRegisters(RegMask registers,bool is_fp,int32_t slot,Reg base,RegMask mask)3040 void Aarch64Encoder::LoadStoreRegisters(RegMask registers, bool is_fp, int32_t slot, Reg base, RegMask mask)
3041 {
3042     if (registers.none()) {
3043         return;
3044     }
3045 
3046     int32_t max_offset = (slot + helpers::ToSigned(registers.GetMaxRegister())) * DOUBLE_WORD_SIZE_BYTE;
3047     int32_t min_offset = (slot + helpers::ToSigned(registers.GetMinRegister())) * DOUBLE_WORD_SIZE_BYTE;
3048 
3049     ScopedTmpRegLazy tmp_reg(this, true);
3050     // Construct single add for big offset
3051     if (!vixl::aarch64::Assembler::IsImmLSPair(min_offset, vixl::aarch64::kXRegSizeInBytesLog2) ||
3052         !vixl::aarch64::Assembler::IsImmLSPair(max_offset, vixl::aarch64::kXRegSizeInBytesLog2)) {
3053         tmp_reg.Acquire();
3054         auto lr_reg = VixlReg(tmp_reg);
3055         ssize_t sp_offset = slot * DOUBLE_WORD_SIZE_BYTE;
3056         if (vixl::aarch64::Assembler::IsImmAddSub(sp_offset)) {
3057             GetMasm()->Add(lr_reg, VixlReg(base), VixlImm(sp_offset));
3058         } else {
3059             GetMasm()->Mov(lr_reg, VixlImm(sp_offset));
3060             GetMasm()->Add(lr_reg, VixlReg(base), lr_reg);
3061         }
3062         // Adjust new values for slot and base register
3063         slot = 0;
3064         base = tmp_reg;
3065     }
3066 
3067     auto base_reg = VixlReg(base);
3068     bool has_mask = mask.any();
3069     int32_t index = has_mask ? static_cast<int32_t>(mask.GetMinRegister()) : 0;
3070     int32_t last_index = -1;
3071     ssize_t last_id = -1;
3072 
3073     slot -= index;
3074     for (ssize_t id = index; id < helpers::ToSigned(registers.size()); id++) {
3075         if (has_mask) {
3076             if (!mask.test(id)) {
3077                 continue;
3078             }
3079             index++;
3080         }
3081         if (!registers.test(id)) {
3082             continue;
3083         }
3084         if (!has_mask) {
3085             index++;
3086         }
3087         if (last_id != -1) {
3088             auto reg =
3089                 CPURegister(id, vixl::aarch64::kXRegSize, is_fp ? CPURegister::kVRegister : CPURegister::kRegister);
3090             auto last_reg = CPURegister(last_id, vixl::aarch64::kXRegSize,
3091                                         is_fp ? CPURegister::kVRegister : CPURegister::kRegister);
3092             if (!has_mask || last_id + 1 == id) {
3093                 static constexpr ssize_t OFFSET = 2;
3094                 if constexpr (is_store) {  // NOLINT
3095                     GetMasm()->Stp(last_reg, reg,
3096                                    MemOperand(base_reg, (slot + index - OFFSET) * DOUBLE_WORD_SIZE_BYTE));
3097                 } else {  // NOLINT
3098                     GetMasm()->Ldp(last_reg, reg,
3099                                    MemOperand(base_reg, (slot + index - OFFSET) * DOUBLE_WORD_SIZE_BYTE));
3100                 }
3101                 last_id = -1;
3102             } else {
3103                 if constexpr (is_store) {  // NOLINT
3104                     GetMasm()->Str(last_reg, MemOperand(base_reg, (slot + last_index - 1) * DOUBLE_WORD_SIZE_BYTE));
3105                 } else {  // NOLINT
3106                     GetMasm()->Ldr(last_reg, MemOperand(base_reg, (slot + last_index - 1) * DOUBLE_WORD_SIZE_BYTE));
3107                 }
3108                 last_id = id;
3109                 last_index = index;
3110             }
3111         } else {
3112             last_id = id;
3113             last_index = index;
3114         }
3115     }
3116     if (last_id != -1) {
3117         auto last_reg =
3118             CPURegister(last_id, vixl::aarch64::kXRegSize, is_fp ? CPURegister::kVRegister : CPURegister::kRegister);
3119         if constexpr (is_store) {  // NOLINT
3120             GetMasm()->Str(last_reg, MemOperand(base_reg, (slot + last_index - 1) * DOUBLE_WORD_SIZE_BYTE));
3121         } else {  // NOLINT
3122             GetMasm()->Ldr(last_reg, MemOperand(base_reg, (slot + last_index - 1) * DOUBLE_WORD_SIZE_BYTE));
3123         }
3124     }
3125 }
3126 
3127 template <bool is_store>
LoadStoreRegistersLoop(RegMask registers,ssize_t slot,size_t start_reg,bool is_fp,const vixl::aarch64::Register & base_reg)3128 void Aarch64Encoder::LoadStoreRegistersLoop(RegMask registers, ssize_t slot, size_t start_reg, bool is_fp,
3129                                             const vixl::aarch64::Register &base_reg)
3130 {
3131     size_t i = 0;
3132     const auto GET_NEXT_REG = [&registers, &i, is_fp]() {
3133         for (; i < registers.size(); i++) {
3134             if (registers.test(i)) {
3135                 return CPURegister(i++, vixl::aarch64::kXRegSize,
3136                                    is_fp ? CPURegister::kVRegister : CPURegister::kRegister);
3137             }
3138         }
3139         return CPURegister();
3140     };
3141 
3142     for (CPURegister next_reg = GET_NEXT_REG(); next_reg.IsValid();) {
3143         const CPURegister CURR_REG = next_reg;
3144         next_reg = GET_NEXT_REG();
3145         if (next_reg.IsValid() && (next_reg.GetCode() - 1 == CURR_REG.GetCode())) {
3146             if constexpr (is_store) {  // NOLINT
3147                 GetMasm()->Stp(CURR_REG, next_reg,
3148                                MemOperand(base_reg, (slot + CURR_REG.GetCode() - start_reg) * DOUBLE_WORD_SIZE_BYTE));
3149             } else {  // NOLINT
3150                 GetMasm()->Ldp(CURR_REG, next_reg,
3151                                MemOperand(base_reg, (slot + CURR_REG.GetCode() - start_reg) * DOUBLE_WORD_SIZE_BYTE));
3152             }
3153             next_reg = GET_NEXT_REG();
3154         } else {
3155             if constexpr (is_store) {  // NOLINT
3156                 GetMasm()->Str(CURR_REG,
3157                                MemOperand(base_reg, (slot + CURR_REG.GetCode() - start_reg) * DOUBLE_WORD_SIZE_BYTE));
3158             } else {  // NOLINT
3159                 GetMasm()->Ldr(CURR_REG,
3160                                MemOperand(base_reg, (slot + CURR_REG.GetCode() - start_reg) * DOUBLE_WORD_SIZE_BYTE));
3161             }
3162         }
3163     }
3164 }
3165 
PushRegisters(RegMask registers,bool is_fp,bool align)3166 void Aarch64Encoder::PushRegisters(RegMask registers, bool is_fp, [[maybe_unused]] bool align)
3167 {
3168     static constexpr size_t PAIR_OFFSET = 2 * DOUBLE_WORD_SIZE_BYTE;
3169     Register last_reg = INVALID_REG;
3170     for (size_t i = 0; i < registers.size(); i++) {
3171         if (registers[i]) {
3172             if (last_reg == INVALID_REG) {
3173                 last_reg = i;
3174                 continue;
3175             }
3176             if (is_fp) {
3177                 GetMasm()->stp(vixl::aarch64::VRegister(last_reg, DOUBLE_WORD_SIZE),
3178                                vixl::aarch64::VRegister(i, DOUBLE_WORD_SIZE),
3179                                MemOperand(vixl::aarch64::sp, -PAIR_OFFSET, vixl::aarch64::AddrMode::PreIndex));
3180             } else {
3181                 GetMasm()->stp(vixl::aarch64::Register(last_reg, DOUBLE_WORD_SIZE),
3182                                vixl::aarch64::Register(i, DOUBLE_WORD_SIZE),
3183                                MemOperand(vixl::aarch64::sp, -PAIR_OFFSET, vixl::aarch64::AddrMode::PreIndex));
3184             }
3185             last_reg = INVALID_REG;
3186         }
3187     }
3188     if (last_reg != INVALID_REG) {
3189         if (is_fp) {
3190             GetMasm()->str(vixl::aarch64::VRegister(last_reg, DOUBLE_WORD_SIZE),
3191                            MemOperand(vixl::aarch64::sp, align ? -PAIR_OFFSET : -DOUBLE_WORD_SIZE_BYTE,
3192                                       vixl::aarch64::AddrMode::PreIndex));
3193         } else {
3194             GetMasm()->str(vixl::aarch64::Register(last_reg, DOUBLE_WORD_SIZE),
3195                            MemOperand(vixl::aarch64::sp, align ? -PAIR_OFFSET : -DOUBLE_WORD_SIZE_BYTE,
3196                                       vixl::aarch64::AddrMode::PreIndex));
3197         }
3198     }
3199 }
3200 
PopRegisters(RegMask registers,bool is_fp,bool align)3201 void Aarch64Encoder::PopRegisters(RegMask registers, bool is_fp, [[maybe_unused]] bool align)
3202 {
3203     static constexpr size_t PAIR_OFFSET = 2 * DOUBLE_WORD_SIZE_BYTE;
3204     Register last_reg;
3205     if ((registers.count() & 1U) != 0) {
3206         last_reg = registers.GetMaxRegister();
3207         if (is_fp) {
3208             GetMasm()->ldr(vixl::aarch64::VRegister(last_reg, DOUBLE_WORD_SIZE),
3209                            MemOperand(vixl::aarch64::sp, align ? PAIR_OFFSET : DOUBLE_WORD_SIZE_BYTE,
3210                                       vixl::aarch64::AddrMode::PostIndex));
3211         } else {
3212             GetMasm()->ldr(vixl::aarch64::Register(last_reg, DOUBLE_WORD_SIZE),
3213                            MemOperand(vixl::aarch64::sp, align ? PAIR_OFFSET : DOUBLE_WORD_SIZE_BYTE,
3214                                       vixl::aarch64::AddrMode::PostIndex));
3215         }
3216         registers.reset(last_reg);
3217     }
3218     last_reg = INVALID_REG;
3219     for (ssize_t i = registers.size() - 1; i >= 0; i--) {
3220         if (registers[i]) {
3221             if (last_reg == INVALID_REG) {
3222                 last_reg = i;
3223                 continue;
3224             }
3225             if (is_fp) {
3226                 GetMasm()->ldp(vixl::aarch64::VRegister(i, DOUBLE_WORD_SIZE),
3227                                vixl::aarch64::VRegister(last_reg, DOUBLE_WORD_SIZE),
3228                                MemOperand(vixl::aarch64::sp, PAIR_OFFSET, vixl::aarch64::AddrMode::PostIndex));
3229             } else {
3230                 GetMasm()->ldp(vixl::aarch64::Register(i, DOUBLE_WORD_SIZE),
3231                                vixl::aarch64::Register(last_reg, DOUBLE_WORD_SIZE),
3232                                MemOperand(vixl::aarch64::sp, PAIR_OFFSET, vixl::aarch64::AddrMode::PostIndex));
3233             }
3234             last_reg = INVALID_REG;
3235         }
3236     }
3237 }
3238 
3239 #ifndef PANDA_MINIMAL_VIXL
GetDecoder() const3240 auto &Aarch64Encoder::GetDecoder() const
3241 {
3242     if (decoder_ == nullptr) {
3243         decoder_ = GetAllocator()->New<vixl::aarch64::Decoder>(GetAllocator());
3244     }
3245     return *decoder_;
3246 }
3247 #endif
3248 
DisasmInstr(std::ostream & stream,size_t pc,ssize_t code_offset) const3249 size_t Aarch64Encoder::DisasmInstr([[maybe_unused]] std::ostream &stream, size_t pc,
3250                                    [[maybe_unused]] ssize_t code_offset) const
3251 {
3252 #ifndef PANDA_MINIMAL_VIXL
3253     // NOLINTNEXTLINE (cppcoreguidelines-pro-type-member-ini)
3254     std::array<char, vixl::aarch64::Disassembler::GetDefaultBufferSize()> buf;
3255     vixl::aarch64::Disassembler disasm(std::data(buf), std::size(buf));
3256 
3257     auto &decoder {GetDecoder()};
3258     vixl::aarch64::Decoder::ScopedVisitors sv(decoder, {&disasm});
3259     auto instr = GetMasm()->GetBuffer()->GetOffsetAddress<vixl::aarch64::Instruction *>(pc);
3260 
3261     auto buffer_start = GetMasm()->GetBuffer()->GetOffsetAddress<uintptr_t>(0);
3262     decoder.Decode(instr);
3263     if (code_offset < 0) {
3264         stream << disasm.GetOutput();
3265     } else {
3266         stream << std::setw(0x4) << std::right << std::setfill('0') << std::hex
3267                << reinterpret_cast<uintptr_t>(instr) - buffer_start + code_offset << ": " << disasm.GetOutput()
3268                << std::setfill(' ') << std::dec;
3269     }
3270 
3271 #endif
3272     return pc + vixl::aarch64::kInstructionSize;
3273 }
3274 }  // namespace panda::compiler::aarch64
3275