1 /*
2 * Copyright (c) 2021-2024 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15 /*
16 Encoder (implementation of math and mem Low-level emitters)
17 */
18
19 #include <aarch64/macro-assembler-aarch64.h>
20 #include <cstddef>
21 #include "compiler/optimizer/code_generator/target/aarch64/target.h"
22 #include "compiler/optimizer/code_generator/encode.h"
23 #include "compiler/optimizer/code_generator/fast_divisor.h"
24 #include "scoped_tmp_reg.h"
25 #include "compiler/optimizer/code_generator/relocations.h"
26
27 #if defined(USE_VIXL_ARM64) && !defined(PANDA_MINIMAL_VIXL)
28 #include "aarch64/disasm-aarch64.h"
29 #endif
30
31 #include <iomanip>
32
33 #include "lib_helpers.inl"
34
35 #ifndef PANDA_TARGET_MACOS
36 #include "elf.h"
37 #endif // PANDA_TARGET_MACOS
38
39 namespace ark::compiler::aarch64 {
40 using vixl::aarch64::CPURegister;
41 using vixl::aarch64::MemOperand;
42
43 /// Converters
Convert(const Condition cc)44 static vixl::aarch64::Condition Convert(const Condition cc)
45 {
46 switch (cc) {
47 case Condition::EQ:
48 return vixl::aarch64::Condition::eq;
49 case Condition::NE:
50 return vixl::aarch64::Condition::ne;
51 case Condition::LT:
52 return vixl::aarch64::Condition::lt;
53 case Condition::GT:
54 return vixl::aarch64::Condition::gt;
55 case Condition::LE:
56 return vixl::aarch64::Condition::le;
57 case Condition::GE:
58 return vixl::aarch64::Condition::ge;
59 case Condition::LO:
60 return vixl::aarch64::Condition::lo;
61 case Condition::LS:
62 return vixl::aarch64::Condition::ls;
63 case Condition::HI:
64 return vixl::aarch64::Condition::hi;
65 case Condition::HS:
66 return vixl::aarch64::Condition::hs;
67 // NOTE(igorban) : Remove them
68 case Condition::MI:
69 return vixl::aarch64::Condition::mi;
70 case Condition::PL:
71 return vixl::aarch64::Condition::pl;
72 case Condition::VS:
73 return vixl::aarch64::Condition::vs;
74 case Condition::VC:
75 return vixl::aarch64::Condition::vc;
76 case Condition::AL:
77 return vixl::aarch64::Condition::al;
78 case Condition::NV:
79 return vixl::aarch64::Condition::nv;
80 default:
81 UNREACHABLE();
82 return vixl::aarch64::Condition::eq;
83 }
84 }
85
ConvertTest(const Condition cc)86 static vixl::aarch64::Condition ConvertTest(const Condition cc)
87 {
88 ASSERT(cc == Condition::TST_EQ || cc == Condition::TST_NE);
89 return cc == Condition::TST_EQ ? vixl::aarch64::Condition::eq : vixl::aarch64::Condition::ne;
90 }
91
Convert(const ShiftType type)92 static vixl::aarch64::Shift Convert(const ShiftType type)
93 {
94 switch (type) {
95 case ShiftType::LSL:
96 return vixl::aarch64::Shift::LSL;
97 case ShiftType::LSR:
98 return vixl::aarch64::Shift::LSR;
99 case ShiftType::ASR:
100 return vixl::aarch64::Shift::ASR;
101 case ShiftType::ROR:
102 return vixl::aarch64::Shift::ROR;
103 default:
104 UNREACHABLE();
105 }
106 }
107
VixlVReg(Reg reg)108 static vixl::aarch64::VRegister VixlVReg(Reg reg)
109 {
110 ASSERT(reg.IsValid());
111 auto vixlVreg = vixl::aarch64::VRegister(reg.GetId(), reg.GetSize());
112 ASSERT(vixlVreg.IsValid());
113 return vixlVreg;
114 }
115
VixlShift(Shift shift)116 static vixl::aarch64::Operand VixlShift(Shift shift)
117 {
118 Reg reg = shift.GetBase();
119 ASSERT(reg.IsValid());
120 if (reg.IsScalar()) {
121 ASSERT(reg.IsScalar());
122 size_t regSize = reg.GetSize();
123 if (regSize < WORD_SIZE) {
124 regSize = WORD_SIZE;
125 }
126 auto vixlReg = vixl::aarch64::Register(reg.GetId(), regSize);
127 ASSERT(vixlReg.IsValid());
128
129 return vixl::aarch64::Operand(vixlReg, Convert(shift.GetType()), shift.GetScale());
130 }
131
132 // Invalid register type
133 UNREACHABLE();
134 }
135
ConvertMem(MemRef mem)136 static vixl::aarch64::MemOperand ConvertMem(MemRef mem)
137 {
138 bool base = mem.HasBase() && (mem.GetBase().GetId() != vixl::aarch64::xzr.GetCode());
139 bool hasIndex = mem.HasIndex();
140 bool shift = mem.HasScale();
141 bool offset = mem.HasDisp();
142 auto baseReg = Reg(mem.GetBase().GetId(), INT64_TYPE);
143 if (base && !hasIndex && !shift) {
144 // Memory address = x_reg(base) + imm(offset)
145 if (mem.GetDisp() != 0) {
146 auto disp = mem.GetDisp();
147 return vixl::aarch64::MemOperand(VixlReg(baseReg), VixlImm(disp));
148 }
149 // Memory address = x_reg(base)
150 return vixl::aarch64::MemOperand(VixlReg(mem.GetBase(), DOUBLE_WORD_SIZE));
151 }
152 if (base && hasIndex && !offset) {
153 auto scale = mem.GetScale();
154 auto indexReg = mem.GetIndex();
155 // Memory address = x_reg(base) + (SXTW(w_reg(index)) << scale)
156 if (indexReg.GetSize() == WORD_SIZE) {
157 // Sign-extend and shift w-register in offset-position (signed because index always has signed type)
158 return vixl::aarch64::MemOperand(VixlReg(baseReg), VixlReg(indexReg), vixl::aarch64::Extend::SXTW, scale);
159 }
160 // Memory address = x_reg(base) + (x_reg(index) << scale)
161 if (scale != 0) {
162 ASSERT(indexReg.GetSize() == DOUBLE_WORD_SIZE);
163 return vixl::aarch64::MemOperand(VixlReg(baseReg), VixlReg(indexReg), vixl::aarch64::LSL, scale);
164 }
165 // Memory address = x_reg(base) + x_reg(index)
166 return vixl::aarch64::MemOperand(VixlReg(baseReg), VixlReg(indexReg));
167 }
168 // Wrong memRef
169 // Return invalid memory operand
170 auto tmp = vixl::aarch64::MemOperand();
171 ASSERT(!tmp.IsValid());
172 return tmp;
173 }
174
Promote(Reg reg)175 static Reg Promote(Reg reg)
176 {
177 if (reg.GetType() == INT8_TYPE) {
178 return Reg(reg.GetId(), INT16_TYPE);
179 }
180 return reg;
181 }
182
CreateLabel()183 Aarch64LabelHolder::LabelId Aarch64LabelHolder::CreateLabel()
184 {
185 ++id_;
186 auto allocator = GetEncoder()->GetAllocator();
187 auto *label = allocator->New<LabelType>(allocator);
188 labels_.push_back(label);
189 ASSERT(labels_.size() == id_);
190 return id_ - 1;
191 }
192
CreateLabels(LabelId size)193 void Aarch64LabelHolder::CreateLabels(LabelId size)
194 {
195 for (LabelId i = 0; i <= size; ++i) {
196 CreateLabel();
197 }
198 }
199
BindLabel(LabelId id)200 void Aarch64LabelHolder::BindLabel(LabelId id)
201 {
202 static_cast<Aarch64Encoder *>(GetEncoder())->GetMasm()->Bind(labels_[id]);
203 }
204
GetLabel(LabelId id) const205 Aarch64LabelHolder::LabelType *Aarch64LabelHolder::GetLabel(LabelId id) const
206 {
207 ASSERT(labels_.size() > id);
208 return labels_[id];
209 }
210
Size()211 Aarch64LabelHolder::LabelId Aarch64LabelHolder::Size()
212 {
213 return labels_.size();
214 }
215
Aarch64Encoder(ArenaAllocator * allocator)216 Aarch64Encoder::Aarch64Encoder(ArenaAllocator *allocator) : Encoder(allocator, Arch::AARCH64)
217 {
218 labels_ = allocator->New<Aarch64LabelHolder>(this);
219 if (labels_ == nullptr) {
220 SetFalseResult();
221 }
222 // We enable LR tmp reg by default in Aarch64
223 EnableLrAsTempReg(true);
224 }
225
~Aarch64Encoder()226 Aarch64Encoder::~Aarch64Encoder()
227 {
228 auto labels = static_cast<Aarch64LabelHolder *>(GetLabels())->labels_;
229 for (auto label : labels) {
230 label->~Label();
231 }
232 if (masm_ != nullptr) {
233 masm_->~MacroAssembler();
234 masm_ = nullptr;
235 }
236 }
237
GetLabels() const238 LabelHolder *Aarch64Encoder::GetLabels() const
239 {
240 ASSERT(labels_ != nullptr);
241 return labels_;
242 }
243
IsValid() const244 bool Aarch64Encoder::IsValid() const
245 {
246 return true;
247 }
248
GetTarget()249 constexpr auto Aarch64Encoder::GetTarget()
250 {
251 return ark::compiler::Target(Arch::AARCH64);
252 }
253
SetMaxAllocatedBytes(size_t size)254 void Aarch64Encoder::SetMaxAllocatedBytes(size_t size)
255 {
256 GetMasm()->GetBuffer()->SetMmapMaxBytes(size);
257 }
258
InitMasm()259 bool Aarch64Encoder::InitMasm()
260 {
261 if (masm_ == nullptr) {
262 // Initialize Masm
263 masm_ = GetAllocator()->New<vixl::aarch64::MacroAssembler>(GetAllocator());
264 if (masm_ == nullptr || !masm_->IsValid()) {
265 SetFalseResult();
266 return false;
267 }
268 ASSERT(GetMasm());
269
270 // Make sure that the compiler uses the same scratch registers as the assembler
271 CHECK_EQ(RegMask(GetMasm()->GetScratchRegisterList()->GetList()), GetTarget().GetTempRegsMask());
272 CHECK_EQ(RegMask(GetMasm()->GetScratchVRegisterList()->GetList()), GetTarget().GetTempVRegsMask());
273 }
274 return true;
275 }
276
Finalize()277 void Aarch64Encoder::Finalize()
278 {
279 GetMasm()->FinalizeCode();
280 }
281
EncodeJump(LabelHolder::LabelId id)282 void Aarch64Encoder::EncodeJump(LabelHolder::LabelId id)
283 {
284 auto label = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(id);
285 GetMasm()->B(label);
286 }
287
EncodeJump(LabelHolder::LabelId id,Reg src0,Reg src1,Condition cc)288 void Aarch64Encoder::EncodeJump(LabelHolder::LabelId id, Reg src0, Reg src1, Condition cc)
289 {
290 if (src1.GetId() == GetRegfile()->GetZeroReg().GetId()) {
291 EncodeJump(id, src0, cc);
292 return;
293 }
294
295 if (src0.IsScalar()) {
296 GetMasm()->Cmp(VixlReg(src0), VixlReg(src1));
297 } else {
298 GetMasm()->Fcmp(VixlVReg(src0), VixlVReg(src1));
299 }
300
301 auto label = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(id);
302 GetMasm()->B(label, Convert(cc));
303 }
304
EncodeJump(LabelHolder::LabelId id,Reg src,Imm imm,Condition cc)305 void Aarch64Encoder::EncodeJump(LabelHolder::LabelId id, Reg src, Imm imm, Condition cc)
306 {
307 auto value = imm.GetAsInt();
308 if (value == 0) {
309 EncodeJump(id, src, cc);
310 return;
311 }
312
313 if (value < 0) {
314 GetMasm()->Cmn(VixlReg(src), VixlImm(-value));
315 } else { // if (value > 0)
316 GetMasm()->Cmp(VixlReg(src), VixlImm(value));
317 }
318
319 auto label = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(id);
320 GetMasm()->B(label, Convert(cc));
321 }
322
EncodeJumpTest(LabelHolder::LabelId id,Reg src0,Reg src1,Condition cc)323 void Aarch64Encoder::EncodeJumpTest(LabelHolder::LabelId id, Reg src0, Reg src1, Condition cc)
324 {
325 ASSERT(src0.IsScalar() && src1.IsScalar());
326
327 GetMasm()->Tst(VixlReg(src0), VixlReg(src1));
328 auto label = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(id);
329 GetMasm()->B(label, ConvertTest(cc));
330 }
331
EncodeJumpTest(LabelHolder::LabelId id,Reg src,Imm imm,Condition cc)332 void Aarch64Encoder::EncodeJumpTest(LabelHolder::LabelId id, Reg src, Imm imm, Condition cc)
333 {
334 ASSERT(src.IsScalar());
335
336 auto value = imm.GetAsInt();
337 if (CanEncodeImmLogical(value, src.GetSize() > WORD_SIZE ? DOUBLE_WORD_SIZE : WORD_SIZE)) {
338 GetMasm()->Tst(VixlReg(src), VixlImm(value));
339 auto label = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(id);
340 GetMasm()->B(label, ConvertTest(cc));
341 } else {
342 ScopedTmpReg tmpReg(this, src.GetType());
343 EncodeMov(tmpReg, imm);
344 EncodeJumpTest(id, src, tmpReg, cc);
345 }
346 }
347
EncodeJump(LabelHolder::LabelId id,Reg src,Condition cc)348 void Aarch64Encoder::EncodeJump(LabelHolder::LabelId id, Reg src, Condition cc)
349 {
350 auto label = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(id);
351 ASSERT(src.IsScalar());
352 auto rzero = Reg(GetRegfile()->GetZeroReg().GetId(), src.GetType());
353
354 switch (cc) {
355 case Condition::LO:
356 // Always false
357 return;
358 case Condition::HS:
359 // Always true
360 GetMasm()->B(label);
361 return;
362 case Condition::EQ:
363 case Condition::LS:
364 if (src.GetId() == rzero.GetId()) {
365 GetMasm()->B(label);
366 return;
367 }
368 // True only when zero
369 GetMasm()->Cbz(VixlReg(src), label);
370 return;
371 case Condition::NE:
372 case Condition::HI:
373 if (src.GetId() == rzero.GetId()) {
374 // Do nothing
375 return;
376 }
377 // True only when non-zero
378 GetMasm()->Cbnz(VixlReg(src), label);
379 return;
380 default:
381 break;
382 }
383
384 ASSERT(rzero.IsValid());
385 GetMasm()->Cmp(VixlReg(src), VixlReg(rzero));
386 GetMasm()->B(label, Convert(cc));
387 }
388
EncodeJump(Reg dst)389 void Aarch64Encoder::EncodeJump(Reg dst)
390 {
391 GetMasm()->Br(VixlReg(dst));
392 }
393
EncodeJump(RelocationInfo * relocation)394 void Aarch64Encoder::EncodeJump([[maybe_unused]] RelocationInfo *relocation)
395 {
396 #ifdef PANDA_TARGET_MACOS
397 LOG(FATAL, COMPILER) << "Not supported in Macos build";
398 #else
399 auto buffer = GetMasm()->GetBuffer();
400 relocation->offset = GetCursorOffset();
401 relocation->addend = 0;
402 relocation->type = R_AARCH64_CALL26;
403 static constexpr uint32_t CALL_WITH_ZERO_OFFSET = 0x14000000;
404 buffer->Emit32(CALL_WITH_ZERO_OFFSET);
405 #endif
406 }
407
EncodeBitTestAndBranch(LabelHolder::LabelId id,compiler::Reg reg,uint32_t bitPos,bool bitValue)408 void Aarch64Encoder::EncodeBitTestAndBranch(LabelHolder::LabelId id, compiler::Reg reg, uint32_t bitPos, bool bitValue)
409 {
410 ASSERT(reg.IsScalar() && reg.GetSize() > bitPos);
411 auto label = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(id);
412 if (bitValue) {
413 GetMasm()->Tbnz(VixlReg(reg), bitPos, label);
414 } else {
415 GetMasm()->Tbz(VixlReg(reg), bitPos, label);
416 }
417 }
418
EncodeNop()419 void Aarch64Encoder::EncodeNop()
420 {
421 GetMasm()->Nop();
422 }
423
MakeCall(compiler::RelocationInfo * relocation)424 void Aarch64Encoder::MakeCall([[maybe_unused]] compiler::RelocationInfo *relocation)
425 {
426 #ifdef PANDA_TARGET_MACOS
427 LOG(FATAL, COMPILER) << "Not supported in Macos build";
428 #else
429 auto buffer = GetMasm()->GetBuffer();
430 relocation->offset = GetCursorOffset();
431 relocation->addend = 0;
432 relocation->type = R_AARCH64_CALL26;
433 static constexpr uint32_t CALL_WITH_ZERO_OFFSET = 0x94000000;
434 buffer->Emit32(CALL_WITH_ZERO_OFFSET);
435 #endif
436 }
437
MakeCall(const void * entryPoint)438 void Aarch64Encoder::MakeCall(const void *entryPoint)
439 {
440 ScopedTmpReg tmp(this, true);
441 EncodeMov(tmp, Imm(reinterpret_cast<uintptr_t>(entryPoint)));
442 GetMasm()->Blr(VixlReg(tmp));
443 }
444
MakeCall(MemRef entryPoint)445 void Aarch64Encoder::MakeCall(MemRef entryPoint)
446 {
447 ScopedTmpReg tmp(this, true);
448 EncodeLdr(tmp, false, entryPoint);
449 GetMasm()->Blr(VixlReg(tmp));
450 }
451
MakeCall(Reg reg)452 void Aarch64Encoder::MakeCall(Reg reg)
453 {
454 GetMasm()->Blr(VixlReg(reg));
455 }
456
MakeCall(LabelHolder::LabelId id)457 void Aarch64Encoder::MakeCall(LabelHolder::LabelId id)
458 {
459 auto label = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(id);
460 GetMasm()->Bl(label);
461 }
462
LoadPcRelative(Reg reg,intptr_t offset,Reg regAddr)463 void Aarch64Encoder::LoadPcRelative(Reg reg, intptr_t offset, Reg regAddr)
464 {
465 ASSERT(GetCodeOffset() != Encoder::INVALID_OFFSET);
466 ASSERT(reg.IsValid() || regAddr.IsValid());
467
468 if (!regAddr.IsValid()) {
469 regAddr = reg.As(INT64_TYPE);
470 }
471
472 if (vixl::IsInt21(offset)) {
473 GetMasm()->adr(VixlReg(regAddr), offset);
474 if (reg != INVALID_REGISTER) {
475 EncodeLdr(reg, false, MemRef(regAddr));
476 }
477 } else {
478 size_t pc = GetCodeOffset() + GetCursorOffset();
479 size_t addr;
480 if (intptr_t res = helpers::ToSigned(pc) + offset; res < 0) {
481 // Make both, pc and addr, positive
482 ssize_t extend = RoundUp(std::abs(res), vixl::aarch64::kPageSize);
483 addr = res + extend;
484 pc += extend;
485 } else {
486 addr = res;
487 }
488
489 ssize_t adrpImm = (addr >> vixl::aarch64::kPageSizeLog2) - (pc >> vixl::aarch64::kPageSizeLog2);
490
491 GetMasm()->adrp(VixlReg(regAddr), adrpImm);
492
493 offset = ark::helpers::ToUnsigned(addr) & (vixl::aarch64::kPageSize - 1);
494 if (reg.GetId() != regAddr.GetId()) {
495 EncodeAdd(regAddr, regAddr, Imm(offset));
496 if (reg != INVALID_REGISTER) {
497 EncodeLdr(reg, true, MemRef(regAddr));
498 }
499 } else {
500 EncodeLdr(reg, true, MemRef(regAddr, offset));
501 }
502 }
503 }
504
MakeCallAot(intptr_t offset)505 void Aarch64Encoder::MakeCallAot(intptr_t offset)
506 {
507 ScopedTmpReg tmp(this, true);
508 LoadPcRelative(tmp, offset);
509 GetMasm()->Blr(VixlReg(tmp));
510 }
511
CanMakeCallByOffset(intptr_t offset)512 bool Aarch64Encoder::CanMakeCallByOffset(intptr_t offset)
513 {
514 // NOLINTNEXTLINE(hicpp-signed-bitwise)
515 auto off = (offset >> vixl::aarch64::kInstructionSizeLog2);
516 return vixl::aarch64::Instruction::IsValidImmPCOffset(vixl::aarch64::ImmBranchType::UncondBranchType, off);
517 }
518
MakeCallByOffset(intptr_t offset)519 void Aarch64Encoder::MakeCallByOffset(intptr_t offset)
520 {
521 GetMasm()->Bl(offset);
522 }
523
MakeLoadAotTable(intptr_t offset,Reg reg)524 void Aarch64Encoder::MakeLoadAotTable(intptr_t offset, Reg reg)
525 {
526 LoadPcRelative(reg, offset);
527 }
528
MakeLoadAotTableAddr(intptr_t offset,Reg addr,Reg val)529 void Aarch64Encoder::MakeLoadAotTableAddr(intptr_t offset, Reg addr, Reg val)
530 {
531 LoadPcRelative(val, offset, addr);
532 }
533
EncodeAbort()534 void Aarch64Encoder::EncodeAbort()
535 {
536 GetMasm()->Brk();
537 }
538
EncodeReturn()539 void Aarch64Encoder::EncodeReturn()
540 {
541 GetMasm()->Ret();
542 }
543
EncodeMul(Reg unused1,Reg unused2,Imm unused3)544 void Aarch64Encoder::EncodeMul([[maybe_unused]] Reg unused1, [[maybe_unused]] Reg unused2, [[maybe_unused]] Imm unused3)
545 {
546 SetFalseResult();
547 }
548
EncodeMov(Reg dst,Reg src)549 void Aarch64Encoder::EncodeMov(Reg dst, Reg src)
550 {
551 if (dst == src) {
552 return;
553 }
554 if (src.IsFloat() && dst.IsFloat()) {
555 if (src.GetSize() != dst.GetSize()) {
556 GetMasm()->Fcvt(VixlVReg(dst), VixlVReg(src));
557 return;
558 }
559 GetMasm()->Fmov(VixlVReg(dst), VixlVReg(src));
560 return;
561 }
562 if (src.IsFloat() && !dst.IsFloat()) {
563 GetMasm()->Fmov(VixlReg(dst, src.GetSize()), VixlVReg(src));
564 return;
565 }
566 if (dst.IsFloat()) {
567 ASSERT(src.IsScalar());
568 GetMasm()->Fmov(VixlVReg(dst), VixlReg(src));
569 return;
570 }
571 // DiscardForSameWReg below means we would drop "mov w0, w0", but it is guarded by "dst == src" above anyway.
572 // NOTE: "mov w0, w0" is not equal "nop", as it clears upper bits of x0.
573 // Keeping the option here helps to generate nothing when e.g. src is x0 and dst is w0.
574 // Probably, a better solution here is to system-wide checking register size on Encoder level.
575 if (src.GetSize() != dst.GetSize()) {
576 auto srcReg = Reg(src.GetId(), dst.GetType());
577 GetMasm()->Mov(VixlReg(dst), VixlReg(srcReg), vixl::aarch64::DiscardMoveMode::kDiscardForSameWReg);
578 return;
579 }
580 GetMasm()->Mov(VixlReg(dst), VixlReg(src), vixl::aarch64::DiscardMoveMode::kDiscardForSameWReg);
581 }
582
EncodeNeg(Reg dst,Reg src)583 void Aarch64Encoder::EncodeNeg(Reg dst, Reg src)
584 {
585 if (dst.IsFloat()) {
586 GetMasm()->Fneg(VixlVReg(dst), VixlVReg(src));
587 return;
588 }
589 GetMasm()->Neg(VixlReg(dst), VixlReg(src));
590 }
591
EncodeAbs(Reg dst,Reg src)592 void Aarch64Encoder::EncodeAbs(Reg dst, Reg src)
593 {
594 if (dst.IsFloat()) {
595 GetMasm()->Fabs(VixlVReg(dst), VixlVReg(src));
596 return;
597 }
598
599 ASSERT(!GetRegfile()->IsZeroReg(dst));
600 if (GetRegfile()->IsZeroReg(src)) {
601 EncodeMov(dst, src);
602 return;
603 }
604
605 if (src.GetSize() == DOUBLE_WORD_SIZE) {
606 GetMasm()->Cmp(VixlReg(src), vixl::aarch64::xzr);
607 } else {
608 GetMasm()->Cmp(VixlReg(src), vixl::aarch64::wzr);
609 }
610 GetMasm()->Cneg(VixlReg(Promote(dst)), VixlReg(Promote(src)), vixl::aarch64::Condition::lt);
611 }
612
EncodeSqrt(Reg dst,Reg src)613 void Aarch64Encoder::EncodeSqrt(Reg dst, Reg src)
614 {
615 ASSERT(dst.IsFloat());
616 GetMasm()->Fsqrt(VixlVReg(dst), VixlVReg(src));
617 }
618
EncodeIsInf(Reg dst,Reg src)619 void Aarch64Encoder::EncodeIsInf(Reg dst, Reg src)
620 {
621 ASSERT(dst.IsScalar() && src.IsFloat());
622
623 if (src.GetSize() == WORD_SIZE) {
624 constexpr uint32_t INF_MASK = 0xff000000;
625
626 ScopedTmpRegU32 tmpReg(this);
627 auto tmp = VixlReg(tmpReg);
628 GetMasm()->Fmov(tmp, VixlVReg(src));
629 GetMasm()->Mov(VixlReg(dst).W(), INF_MASK);
630 GetMasm()->Lsl(tmp, tmp, 1);
631 GetMasm()->Cmp(tmp, VixlReg(dst, WORD_SIZE));
632 } else {
633 constexpr uint64_t INF_MASK = 0xffe0000000000000;
634
635 ScopedTmpRegU64 tmpReg(this);
636 auto tmp = VixlReg(tmpReg);
637 GetMasm()->Fmov(tmp, VixlVReg(src));
638 GetMasm()->Mov(VixlReg(dst).X(), INF_MASK);
639 GetMasm()->Lsl(tmp, tmp, 1);
640 GetMasm()->Cmp(tmp, VixlReg(dst, DOUBLE_WORD_SIZE));
641 }
642
643 GetMasm()->Cset(VixlReg(dst), vixl::aarch64::Condition::eq);
644 }
645
EncodeCmpFracWithDelta(Reg src)646 void Aarch64Encoder::EncodeCmpFracWithDelta(Reg src)
647 {
648 ASSERT(src.IsFloat());
649 ASSERT(src.GetSize() == WORD_SIZE || src.GetSize() == DOUBLE_WORD_SIZE);
650
651 // Encode (fabs(src - trunc(src)) <= DELTA)
652 if (src.GetSize() == WORD_SIZE) {
653 ScopedTmpRegF32 tmp(this);
654 GetMasm()->Frintz(VixlVReg(tmp), VixlVReg(src));
655 EncodeSub(tmp, src, tmp);
656 EncodeAbs(tmp, tmp);
657 GetMasm()->Fcmp(VixlVReg(tmp), std::numeric_limits<float>::epsilon());
658 } else {
659 ScopedTmpRegF64 tmp(this);
660 GetMasm()->Frintz(VixlVReg(tmp), VixlVReg(src));
661 EncodeSub(tmp, src, tmp);
662 EncodeAbs(tmp, tmp);
663 GetMasm()->Fcmp(VixlVReg(tmp), std::numeric_limits<double>::epsilon());
664 }
665 }
666
EncodeIsInteger(Reg dst,Reg src)667 void Aarch64Encoder::EncodeIsInteger(Reg dst, Reg src)
668 {
669 ASSERT(dst.IsScalar() && src.IsFloat());
670 ASSERT(src.GetSize() == WORD_SIZE || src.GetSize() == DOUBLE_WORD_SIZE);
671
672 auto labelExit = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
673 auto labelInfOrNan = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
674
675 EncodeCmpFracWithDelta(src);
676 GetMasm()->B(labelInfOrNan, vixl::aarch64::Condition::vs); // Inf or NaN
677 GetMasm()->Cset(VixlReg(dst), vixl::aarch64::Condition::le);
678 GetMasm()->B(labelExit);
679
680 // IsInteger returns false if src is Inf or NaN
681 GetMasm()->Bind(labelInfOrNan);
682 EncodeMov(dst, Imm(false));
683
684 GetMasm()->Bind(labelExit);
685 }
686
EncodeIsSafeInteger(Reg dst,Reg src)687 void Aarch64Encoder::EncodeIsSafeInteger(Reg dst, Reg src)
688 {
689 ASSERT(dst.IsScalar() && src.IsFloat());
690 ASSERT(src.GetSize() == WORD_SIZE || src.GetSize() == DOUBLE_WORD_SIZE);
691
692 auto labelExit = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
693 auto labelFalse = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
694
695 // Check if IsInteger
696 EncodeCmpFracWithDelta(src);
697 GetMasm()->B(labelFalse, vixl::aarch64::Condition::vs); // Inf or NaN
698 GetMasm()->B(labelFalse, vixl::aarch64::Condition::gt);
699
700 // Check if it is safe, i.e. src can be represented in float/double without losing precision
701 if (src.GetSize() == WORD_SIZE) {
702 ScopedTmpRegF32 tmp(this);
703 EncodeAbs(tmp, src);
704 GetMasm()->Fcmp(VixlVReg(tmp), MaxIntAsExactFloat());
705 } else {
706 ScopedTmpRegF64 tmp(this);
707 EncodeAbs(tmp, src);
708 GetMasm()->Fcmp(VixlVReg(tmp), MaxIntAsExactDouble());
709 }
710 GetMasm()->Cset(VixlReg(dst), vixl::aarch64::Condition::le);
711 GetMasm()->B(labelExit);
712
713 // Return false if src !IsInteger
714 GetMasm()->Bind(labelFalse);
715 EncodeMov(dst, Imm(false));
716
717 GetMasm()->Bind(labelExit);
718 }
719
720 /* NaN values are needed to be canonicalized */
EncodeFpToBits(Reg dst,Reg src)721 void Aarch64Encoder::EncodeFpToBits(Reg dst, Reg src)
722 {
723 ASSERT(dst.IsScalar() && src.IsFloat());
724 ASSERT(dst.GetSize() == WORD_SIZE || dst.GetSize() == DOUBLE_WORD_SIZE);
725
726 if (dst.GetSize() == WORD_SIZE) {
727 ASSERT(src.GetSize() == WORD_SIZE);
728
729 constexpr auto FNAN = 0x7fc00000;
730
731 ScopedTmpRegU32 tmp(this);
732
733 GetMasm()->Fcmp(VixlVReg(src), VixlVReg(src));
734 GetMasm()->Mov(VixlReg(tmp), FNAN);
735 GetMasm()->Umov(VixlReg(dst), VixlVReg(src), 0);
736 GetMasm()->Csel(VixlReg(dst), VixlReg(tmp), VixlReg(dst), vixl::aarch64::Condition::ne);
737 } else {
738 ASSERT(src.GetSize() == DOUBLE_WORD_SIZE);
739
740 constexpr auto DNAN = 0x7ff8000000000000;
741
742 ScopedTmpRegU64 tmpReg(this);
743 auto tmp = VixlReg(tmpReg);
744
745 GetMasm()->Fcmp(VixlVReg(src), VixlVReg(src));
746 GetMasm()->Mov(tmp, DNAN);
747 GetMasm()->Umov(VixlReg(dst), VixlVReg(src), 0);
748 GetMasm()->Csel(VixlReg(dst), tmp, VixlReg(dst), vixl::aarch64::Condition::ne);
749 }
750 }
751
EncodeMoveBitsRaw(Reg dst,Reg src)752 void Aarch64Encoder::EncodeMoveBitsRaw(Reg dst, Reg src)
753 {
754 ASSERT((dst.IsFloat() && src.IsScalar()) || (src.IsFloat() && dst.IsScalar()));
755 if (dst.IsScalar()) {
756 ASSERT(src.GetSize() == dst.GetSize());
757 if (dst.GetSize() == WORD_SIZE) {
758 GetMasm()->Umov(VixlReg(dst).W(), VixlVReg(src).S(), 0);
759 } else {
760 GetMasm()->Umov(VixlReg(dst), VixlVReg(src), 0);
761 }
762 } else {
763 ASSERT(dst.GetSize() == src.GetSize());
764 ScopedTmpReg tmpReg(this, src.GetType());
765 auto srcReg = src;
766 auto rzero = GetRegfile()->GetZeroReg();
767 if (src.GetId() == rzero.GetId()) {
768 EncodeMov(tmpReg, Imm(0));
769 srcReg = tmpReg;
770 }
771
772 if (srcReg.GetSize() == WORD_SIZE) {
773 GetMasm()->Fmov(VixlVReg(dst).S(), VixlReg(srcReg).W());
774 } else {
775 GetMasm()->Fmov(VixlVReg(dst), VixlReg(srcReg));
776 }
777 }
778 }
779
EncodeReverseBytes(Reg dst,Reg src)780 void Aarch64Encoder::EncodeReverseBytes(Reg dst, Reg src)
781 {
782 auto rzero = GetRegfile()->GetZeroReg();
783 if (src.GetId() == rzero.GetId()) {
784 EncodeMov(dst, Imm(0));
785 return;
786 }
787
788 ASSERT(src.GetSize() > BYTE_SIZE);
789 ASSERT(src.GetSize() == dst.GetSize());
790
791 if (src.GetSize() == HALF_SIZE) {
792 GetMasm()->Rev16(VixlReg(dst), VixlReg(src));
793 GetMasm()->Sxth(VixlReg(dst), VixlReg(dst));
794 } else {
795 GetMasm()->Rev(VixlReg(dst), VixlReg(src));
796 }
797 }
798
EncodeBitCount(Reg dst,Reg src)799 void Aarch64Encoder::EncodeBitCount(Reg dst, Reg src)
800 {
801 auto rzero = GetRegfile()->GetZeroReg();
802 if (src.GetId() == rzero.GetId()) {
803 EncodeMov(dst, Imm(0));
804 return;
805 }
806
807 ASSERT(dst.GetSize() == WORD_SIZE);
808
809 ScopedTmpRegF64 tmpReg0(this);
810 vixl::aarch64::VRegister tmpReg;
811 if (src.GetSize() == DOUBLE_WORD_SIZE) {
812 tmpReg = VixlVReg(tmpReg0).D();
813 } else {
814 tmpReg = VixlVReg(tmpReg0).S();
815 }
816
817 if (src.GetSize() < WORD_SIZE) {
818 int64_t cutValue = (1ULL << src.GetSize()) - 1;
819 EncodeAnd(src, src, Imm(cutValue));
820 }
821
822 GetMasm()->Fmov(tmpReg, VixlReg(src));
823 GetMasm()->Cnt(tmpReg.V8B(), tmpReg.V8B());
824 GetMasm()->Addv(tmpReg.B(), tmpReg.V8B());
825 EncodeMov(dst, tmpReg0);
826 }
827
828 /* Since only ROR is supported on AArch64 we do
829 * left rotaion as ROR(v, -count) */
EncodeRotate(Reg dst,Reg src1,Reg src2,bool isRor)830 void Aarch64Encoder::EncodeRotate(Reg dst, Reg src1, Reg src2, bool isRor)
831 {
832 ASSERT(src1.GetSize() == WORD_SIZE || src1.GetSize() == DOUBLE_WORD_SIZE);
833 ASSERT(src1.GetSize() == dst.GetSize());
834 auto rzero = GetRegfile()->GetZeroReg();
835 if (rzero.GetId() == src2.GetId() || rzero.GetId() == src1.GetId()) {
836 EncodeMov(dst, src1);
837 return;
838 }
839 /* as the second parameters is always 32-bits long we have to
840 * adjust the counter register for the 64-bits first operand case */
841 if (isRor) {
842 auto count = (dst.GetSize() == WORD_SIZE ? VixlReg(src2) : VixlReg(src2).X());
843 GetMasm()->Ror(VixlReg(dst), VixlReg(src1), count);
844 } else {
845 ScopedTmpReg tmp(this);
846 auto cnt = (dst.GetId() == src1.GetId() ? tmp : dst);
847 auto count = (dst.GetSize() == WORD_SIZE ? VixlReg(cnt).W() : VixlReg(cnt).X());
848 auto source2 = (dst.GetSize() == WORD_SIZE ? VixlReg(src2).W() : VixlReg(src2).X());
849 GetMasm()->Neg(count, source2);
850 GetMasm()->Ror(VixlReg(dst), VixlReg(src1), count);
851 }
852 }
853
EncodeSignum(Reg dst,Reg src)854 void Aarch64Encoder::EncodeSignum(Reg dst, Reg src)
855 {
856 ASSERT(src.GetSize() == WORD_SIZE || src.GetSize() == DOUBLE_WORD_SIZE);
857
858 ScopedTmpRegU32 tmp(this);
859 auto sign = (dst.GetId() == src.GetId() ? tmp : dst);
860
861 GetMasm()->Cmp(VixlReg(src), VixlImm(0));
862 GetMasm()->Cset(VixlReg(sign), vixl::aarch64::Condition::gt);
863
864 constexpr auto SHIFT_WORD_BITS = 31;
865 constexpr auto SHIFT_DWORD_BITS = 63;
866
867 /* The operation below is "sub dst, dst, src, lsr #reg_size-1"
868 * however, we can only encode as many as 32 bits in lsr field, so
869 * for 64-bits cases we cannot avoid having a separate lsr instruction */
870 if (src.GetSize() == WORD_SIZE) {
871 auto shift = Shift(src, LSR, SHIFT_WORD_BITS);
872 EncodeSub(dst, sign, shift);
873 } else {
874 ScopedTmpRegU64 shift(this);
875 sign = Reg(sign.GetId(), INT64_TYPE);
876 EncodeShr(shift, src, Imm(SHIFT_DWORD_BITS));
877 EncodeSub(dst, sign, shift);
878 }
879 }
880
EncodeCountLeadingZeroBits(Reg dst,Reg src)881 void Aarch64Encoder::EncodeCountLeadingZeroBits(Reg dst, Reg src)
882 {
883 auto rzero = GetRegfile()->GetZeroReg();
884 if (rzero.GetId() == src.GetId()) {
885 EncodeMov(dst, Imm(src.GetSize()));
886 return;
887 }
888 GetMasm()->Clz(VixlReg(dst), VixlReg(src));
889 }
890
EncodeCountTrailingZeroBits(Reg dst,Reg src)891 void Aarch64Encoder::EncodeCountTrailingZeroBits(Reg dst, Reg src)
892 {
893 auto rzero = GetRegfile()->GetZeroReg();
894 if (rzero.GetId() == src.GetId()) {
895 EncodeMov(dst, Imm(src.GetSize()));
896 return;
897 }
898 GetMasm()->Rbit(VixlReg(dst), VixlReg(src));
899 GetMasm()->Clz(VixlReg(dst), VixlReg(dst));
900 }
901
EncodeCeil(Reg dst,Reg src)902 void Aarch64Encoder::EncodeCeil(Reg dst, Reg src)
903 {
904 GetMasm()->Frintp(VixlVReg(dst), VixlVReg(src));
905 }
906
EncodeFloor(Reg dst,Reg src)907 void Aarch64Encoder::EncodeFloor(Reg dst, Reg src)
908 {
909 GetMasm()->Frintm(VixlVReg(dst), VixlVReg(src));
910 }
911
EncodeRint(Reg dst,Reg src)912 void Aarch64Encoder::EncodeRint(Reg dst, Reg src)
913 {
914 GetMasm()->Frintn(VixlVReg(dst), VixlVReg(src));
915 }
916
EncodeTrunc(Reg dst,Reg src)917 void Aarch64Encoder::EncodeTrunc(Reg dst, Reg src)
918 {
919 GetMasm()->Frintz(VixlVReg(dst), VixlVReg(src));
920 }
921
EncodeRoundAway(Reg dst,Reg src)922 void Aarch64Encoder::EncodeRoundAway(Reg dst, Reg src)
923 {
924 GetMasm()->Frinta(VixlVReg(dst), VixlVReg(src));
925 }
926
EncodeRoundToPInf(Reg dst,Reg src)927 void Aarch64Encoder::EncodeRoundToPInf(Reg dst, Reg src)
928 {
929 auto done = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
930 ScopedTmpReg tmp(this, src.GetType());
931 // round to nearest integer, ties away from zero
932 GetMasm()->Fcvtas(VixlReg(dst), VixlVReg(src));
933 // for positive values, zero and NaN inputs rounding is done
934 GetMasm()->Tbz(VixlReg(dst), dst.GetSize() - 1, done);
935 // if input is negative but not a tie, round to nearest is valid
936 // if input is a negative tie, dst += 1
937 GetMasm()->Frinta(VixlVReg(tmp), VixlVReg(src));
938 GetMasm()->Fsub(VixlVReg(tmp), VixlVReg(src), VixlVReg(tmp));
939 // NOLINTNEXTLINE(readability-magic-numbers)
940 GetMasm()->Fcmp(VixlVReg(tmp), 0.5F);
941 GetMasm()->Cinc(VixlReg(dst), VixlReg(dst), vixl::aarch64::Condition::eq);
942 GetMasm()->Bind(done);
943 }
944
EncodeCrc32Update(Reg dst,Reg crcReg,Reg valReg)945 void Aarch64Encoder::EncodeCrc32Update(Reg dst, Reg crcReg, Reg valReg)
946 {
947 auto tmp = dst.GetId() != crcReg.GetId() && dst.GetId() != valReg.GetId() ? dst : ScopedTmpReg(this, dst.GetType());
948 GetMasm()->Mvn(VixlReg(tmp), VixlReg(crcReg));
949 GetMasm()->Crc32b(VixlReg(tmp), VixlReg(tmp), VixlReg(valReg));
950 GetMasm()->Mvn(VixlReg(dst), VixlReg(tmp));
951 }
952
EncodeCompressEightUtf16ToUtf8CharsUsingSimd(Reg srcAddr,Reg dstAddr)953 void Aarch64Encoder::EncodeCompressEightUtf16ToUtf8CharsUsingSimd(Reg srcAddr, Reg dstAddr)
954 {
955 ScopedTmpReg tmp1(this, FLOAT64_TYPE);
956 ScopedTmpReg tmp2(this, FLOAT64_TYPE);
957 auto vixlVreg1 = vixl::aarch64::VRegister(tmp1.GetReg().GetId(), vixl::aarch64::VectorFormat::kFormat8B);
958 ASSERT(vixlVreg1.IsValid());
959 auto vixlVreg2 = vixl::aarch64::VRegister(tmp2.GetReg().GetId(), vixl::aarch64::VectorFormat::kFormat8B);
960 ASSERT(vixlVreg2.IsValid());
961 auto src = vixl::aarch64::MemOperand(VixlReg(srcAddr));
962 auto dst = vixl::aarch64::MemOperand(VixlReg(dstAddr));
963 GetMasm()->Ld2(vixlVreg1, vixlVreg2, src);
964 GetMasm()->St1(vixlVreg1, dst);
965 }
966
EncodeCompressSixteenUtf16ToUtf8CharsUsingSimd(Reg srcAddr,Reg dstAddr)967 void Aarch64Encoder::EncodeCompressSixteenUtf16ToUtf8CharsUsingSimd(Reg srcAddr, Reg dstAddr)
968 {
969 ScopedTmpReg tmp1(this, FLOAT64_TYPE);
970 ScopedTmpReg tmp2(this, FLOAT64_TYPE);
971 auto vixlVreg1 = vixl::aarch64::VRegister(tmp1.GetReg().GetId(), vixl::aarch64::VectorFormat::kFormat16B);
972 ASSERT(vixlVreg1.IsValid());
973 auto vixlVreg2 = vixl::aarch64::VRegister(tmp2.GetReg().GetId(), vixl::aarch64::VectorFormat::kFormat16B);
974 ASSERT(vixlVreg2.IsValid());
975 auto src = vixl::aarch64::MemOperand(VixlReg(srcAddr));
976 auto dst = vixl::aarch64::MemOperand(VixlReg(dstAddr));
977 GetMasm()->Ld2(vixlVreg1, vixlVreg2, src);
978 GetMasm()->St1(vixlVreg1, dst);
979 }
980
EncodeUnsignedExtendBytesToShorts(Reg dst,Reg src)981 void Aarch64Encoder::EncodeUnsignedExtendBytesToShorts(Reg dst, Reg src)
982 {
983 GetMasm()->Uxtl(VixlVReg(dst).V8H(), VixlVReg(src).V8B());
984 }
985
EncodeReverseHalfWords(Reg dst,Reg src)986 void Aarch64Encoder::EncodeReverseHalfWords(Reg dst, Reg src)
987 {
988 ASSERT(src.GetSize() == dst.GetSize());
989
990 GetMasm()->rev64(VixlVReg(dst).V4H(), VixlVReg(src).V4H());
991 }
992
CanEncodeBitCount()993 bool Aarch64Encoder::CanEncodeBitCount()
994 {
995 return true;
996 }
997
CanEncodeCompressedStringCharAt()998 bool Aarch64Encoder::CanEncodeCompressedStringCharAt()
999 {
1000 return true;
1001 }
1002
CanEncodeCompressedStringCharAtI()1003 bool Aarch64Encoder::CanEncodeCompressedStringCharAtI()
1004 {
1005 return true;
1006 }
1007
CanEncodeMAdd()1008 bool Aarch64Encoder::CanEncodeMAdd()
1009 {
1010 return true;
1011 }
1012
CanEncodeMSub()1013 bool Aarch64Encoder::CanEncodeMSub()
1014 {
1015 return true;
1016 }
1017
CanEncodeMNeg()1018 bool Aarch64Encoder::CanEncodeMNeg()
1019 {
1020 return true;
1021 }
1022
CanEncodeOrNot()1023 bool Aarch64Encoder::CanEncodeOrNot()
1024 {
1025 return true;
1026 }
1027
CanEncodeAndNot()1028 bool Aarch64Encoder::CanEncodeAndNot()
1029 {
1030 return true;
1031 }
1032
CanEncodeXorNot()1033 bool Aarch64Encoder::CanEncodeXorNot()
1034 {
1035 return true;
1036 }
1037
GetCursorOffset() const1038 size_t Aarch64Encoder::GetCursorOffset() const
1039 {
1040 return GetMasm()->GetBuffer()->GetCursorOffset();
1041 }
1042
SetCursorOffset(size_t offset)1043 void Aarch64Encoder::SetCursorOffset(size_t offset)
1044 {
1045 GetMasm()->GetBuffer()->Rewind(offset);
1046 }
1047
1048 /* return the power of 2 for the size of the type */
EncodeGetTypeSize(Reg size,Reg type)1049 void Aarch64Encoder::EncodeGetTypeSize(Reg size, Reg type)
1050 {
1051 auto sreg = VixlReg(type);
1052 auto dreg = VixlReg(size);
1053 constexpr uint8_t I16 = 0x5;
1054 constexpr uint8_t I32 = 0x7;
1055 constexpr uint8_t F64 = 0xa;
1056 constexpr uint8_t REF = 0xd;
1057 constexpr uint8_t SMALLREF = ark::OBJECT_POINTER_SIZE < sizeof(uint64_t) ? 1 : 0;
1058 auto end = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1059
1060 GetMasm()->Mov(dreg, VixlImm(0));
1061 GetMasm()->Cmp(sreg, VixlImm(I16));
1062 GetMasm()->Cinc(dreg, dreg, vixl::aarch64::Condition::ge);
1063 GetMasm()->Cmp(sreg, VixlImm(I32));
1064 GetMasm()->Cinc(dreg, dreg, vixl::aarch64::Condition::ge);
1065 GetMasm()->Cmp(sreg, VixlImm(F64));
1066 GetMasm()->Cinc(dreg, dreg, vixl::aarch64::Condition::ge);
1067 GetMasm()->Cmp(sreg, VixlImm(REF));
1068 GetMasm()->B(end, vixl::aarch64::Condition::ne);
1069 GetMasm()->Sub(dreg, dreg, VixlImm(SMALLREF));
1070 GetMasm()->Bind(end);
1071 }
1072
EncodeReverseBits(Reg dst,Reg src)1073 void Aarch64Encoder::EncodeReverseBits(Reg dst, Reg src)
1074 {
1075 auto rzero = GetRegfile()->GetZeroReg();
1076 if (rzero.GetId() == src.GetId()) {
1077 EncodeMov(dst, Imm(0));
1078 return;
1079 }
1080 ASSERT(src.GetSize() == WORD_SIZE || src.GetSize() == DOUBLE_WORD_SIZE);
1081 ASSERT(src.GetSize() == dst.GetSize());
1082
1083 GetMasm()->Rbit(VixlReg(dst), VixlReg(src));
1084 }
1085
EncodeCompressedStringCharAt(ArgsCompressedStringCharAt && args)1086 void Aarch64Encoder::EncodeCompressedStringCharAt(ArgsCompressedStringCharAt &&args)
1087 {
1088 auto [dst, str, idx, length, tmp, dataOffset, shift] = args;
1089 ASSERT(dst.GetSize() == HALF_SIZE);
1090
1091 auto labelNotCompressed = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1092 auto labelCharLoaded = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1093 auto vixlTmp = VixlReg(tmp, DOUBLE_WORD_SIZE);
1094 auto vixlDst = VixlReg(dst);
1095
1096 GetMasm()->Tbnz(VixlReg(length), 0, labelNotCompressed);
1097 EncodeAdd(tmp, str, idx);
1098 GetMasm()->ldrb(vixlDst, MemOperand(vixlTmp, dataOffset));
1099 GetMasm()->B(labelCharLoaded);
1100 GetMasm()->Bind(labelNotCompressed);
1101 EncodeAdd(tmp, str, Shift(idx, shift));
1102 GetMasm()->ldrh(vixlDst, MemOperand(vixlTmp, dataOffset));
1103 GetMasm()->Bind(labelCharLoaded);
1104 }
1105
EncodeCompressedStringCharAtI(ArgsCompressedStringCharAtI && args)1106 void Aarch64Encoder::EncodeCompressedStringCharAtI(ArgsCompressedStringCharAtI &&args)
1107 {
1108 auto [dst, str, length, dataOffset, index, shift] = args;
1109 ASSERT(dst.GetSize() == HALF_SIZE);
1110
1111 auto labelNotCompressed = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1112 auto labelCharLoaded = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1113 auto vixlStr = VixlReg(str);
1114 auto vixlDst = VixlReg(dst);
1115
1116 auto rzero = GetRegfile()->GetZeroReg().GetId();
1117 if (str.GetId() == rzero) {
1118 return;
1119 }
1120 GetMasm()->Tbnz(VixlReg(length), 0, labelNotCompressed);
1121 GetMasm()->Ldrb(vixlDst, MemOperand(vixlStr, dataOffset + index));
1122 GetMasm()->B(labelCharLoaded);
1123 GetMasm()->Bind(labelNotCompressed);
1124 GetMasm()->Ldrh(vixlDst, MemOperand(vixlStr, dataOffset + (index << shift)));
1125 GetMasm()->Bind(labelCharLoaded);
1126 }
1127
1128 /* Unsafe builtins implementation */
EncodeCompareAndSwap(Reg dst,Reg obj,Reg offset,Reg val,Reg newval)1129 void Aarch64Encoder::EncodeCompareAndSwap(Reg dst, Reg obj, Reg offset, Reg val, Reg newval)
1130 {
1131 /* Modeled according to the following logic:
1132 .L2:
1133 ldaxr cur, [addr]
1134 cmp cur, old
1135 bne .L3
1136 stlxr res, new, [addr]
1137 cbnz res, .L2
1138 .L3:
1139 cset w0, eq
1140 */
1141 ScopedTmpReg addr(this, true); /* LR is used */
1142 ScopedTmpReg cur(this, val.GetType());
1143 ScopedTmpReg res(this, val.GetType());
1144 auto loop = CreateLabel();
1145 auto exit = CreateLabel();
1146
1147 /* ldaxr wants [reg]-form of memref (no offset or disp) */
1148 EncodeAdd(addr, obj, offset);
1149
1150 BindLabel(loop);
1151 EncodeLdrExclusive(cur, addr, true);
1152 EncodeJump(exit, cur, val, Condition::NE);
1153 cur.Release();
1154 EncodeStrExclusive(res, newval, addr, true);
1155 EncodeJump(loop, res, Imm(0), Condition::NE);
1156 BindLabel(exit);
1157
1158 GetMasm()->Cset(VixlReg(dst), vixl::aarch64::Condition::eq);
1159 }
1160
EncodeUnsafeGetAndSet(Reg dst,Reg obj,Reg offset,Reg val)1161 void Aarch64Encoder::EncodeUnsafeGetAndSet(Reg dst, Reg obj, Reg offset, Reg val)
1162 {
1163 auto cur = ScopedTmpReg(this, val.GetType());
1164 auto last = ScopedTmpReg(this, val.GetType());
1165 auto addr = ScopedTmpReg(this, true); /* LR is used */
1166 auto mem = MemRef(addr);
1167 auto restart = CreateLabel();
1168 auto retryLdaxr = CreateLabel();
1169
1170 /* ldaxr wants [reg]-form of memref (no offset or disp) */
1171 EncodeAdd(addr, obj, offset);
1172
1173 /* Since GetAndSet is defined as a non-faulting operation we
1174 * have to cover two possible faulty cases:
1175 * 1. stlxr failed, we have to retry ldxar
1176 * 2. the value we got via ldxar was not the value we initially
1177 * loaded, we have to start from the very beginning */
1178 BindLabel(restart);
1179 EncodeLdrAcquire(last, false, mem);
1180
1181 BindLabel(retryLdaxr);
1182 EncodeLdrExclusive(cur, addr, true);
1183 EncodeJump(restart, cur, last, Condition::NE);
1184 last.Release();
1185 EncodeStrExclusive(dst, val, addr, true);
1186 EncodeJump(retryLdaxr, dst, Imm(0), Condition::NE);
1187
1188 EncodeMov(dst, cur);
1189 }
1190
EncodeUnsafeGetAndAdd(Reg dst,Reg obj,Reg offset,Reg val,Reg tmp)1191 void Aarch64Encoder::EncodeUnsafeGetAndAdd(Reg dst, Reg obj, Reg offset, Reg val, Reg tmp)
1192 {
1193 ScopedTmpReg cur(this, val.GetType());
1194 ScopedTmpReg last(this, val.GetType());
1195 auto newval = Reg(tmp.GetId(), val.GetType());
1196
1197 auto restart = CreateLabel();
1198 auto retryLdaxr = CreateLabel();
1199
1200 /* addr_reg aliases obj, obj reg will be restored bedore exit */
1201 auto addr = Reg(obj.GetId(), INT64_TYPE);
1202
1203 /* ldaxr wants [reg]-form of memref (no offset or disp) */
1204 auto mem = MemRef(addr);
1205 EncodeAdd(addr, obj, offset);
1206
1207 /* Since GetAndAdd is defined as a non-faulting operation we
1208 * have to cover two possible faulty cases:
1209 * 1. stlxr failed, we have to retry ldxar
1210 * 2. the value we got via ldxar was not the value we initially
1211 * loaded, we have to start from the very beginning */
1212 BindLabel(restart);
1213 EncodeLdrAcquire(last, false, mem);
1214 EncodeAdd(newval, last, val);
1215
1216 BindLabel(retryLdaxr);
1217 EncodeLdrExclusive(cur, addr, true);
1218 EncodeJump(restart, cur, last, Condition::NE);
1219 last.Release();
1220 EncodeStrExclusive(dst, newval, addr, true);
1221 EncodeJump(retryLdaxr, dst, Imm(0), Condition::NE);
1222
1223 EncodeSub(obj, addr, offset); /* restore the original value */
1224 EncodeMov(dst, cur);
1225 }
1226
EncodeMemoryBarrier(memory_order::Order order)1227 void Aarch64Encoder::EncodeMemoryBarrier(memory_order::Order order)
1228 {
1229 switch (order) {
1230 case memory_order::ACQUIRE: {
1231 GetMasm()->Dmb(vixl::aarch64::InnerShareable, vixl::aarch64::BarrierReads);
1232 break;
1233 }
1234 case memory_order::RELEASE: {
1235 GetMasm()->Dmb(vixl::aarch64::InnerShareable, vixl::aarch64::BarrierWrites);
1236 break;
1237 }
1238 case memory_order::FULL: {
1239 GetMasm()->Dmb(vixl::aarch64::InnerShareable, vixl::aarch64::BarrierAll);
1240 break;
1241 }
1242 default:
1243 break;
1244 }
1245 }
1246
EncodeNot(Reg dst,Reg src)1247 void Aarch64Encoder::EncodeNot(Reg dst, Reg src)
1248 {
1249 GetMasm()->Mvn(VixlReg(dst), VixlReg(src));
1250 }
1251
EncodeCastFloat(Reg dst,bool dstSigned,Reg src,bool srcSigned)1252 void Aarch64Encoder::EncodeCastFloat(Reg dst, bool dstSigned, Reg src, bool srcSigned)
1253 {
1254 // We DON'T support casts from float32/64 to int8/16 and bool, because this caste is not declared anywhere
1255 // in other languages and architecture, we do not know what the behavior should be.
1256 // But there is one implementation in other function: "EncodeCastFloatWithSmallDst". Call it in the "EncodeCast"
1257 // function instead of "EncodeCastFloat". It works as follows: cast from float32/64 to int32, moving sign bit from
1258 // int32 to dst type, then extend number from dst type to int32 (a necessary condition for an isa). All work in dst
1259 // register.
1260 ASSERT(dst.GetSize() >= WORD_SIZE);
1261
1262 if (src.IsFloat() && dst.IsScalar()) {
1263 if (dstSigned) {
1264 GetMasm()->Fcvtzs(VixlReg(dst), VixlVReg(src));
1265 } else {
1266 GetMasm()->Fcvtzu(VixlReg(dst), VixlVReg(src));
1267 }
1268 return;
1269 }
1270 if (src.IsScalar() && dst.IsFloat()) {
1271 auto rzero = GetRegfile()->GetZeroReg().GetId();
1272 if (src.GetId() == rzero) {
1273 if (dst.GetSize() == WORD_SIZE) {
1274 GetMasm()->Fmov(VixlVReg(dst), 0.0F);
1275 } else {
1276 GetMasm()->Fmov(VixlVReg(dst), 0.0);
1277 }
1278 } else if (srcSigned) {
1279 GetMasm()->Scvtf(VixlVReg(dst), VixlReg(src));
1280 } else {
1281 GetMasm()->Ucvtf(VixlVReg(dst), VixlReg(src));
1282 }
1283 return;
1284 }
1285 if (src.IsFloat() && dst.IsFloat()) {
1286 if (src.GetSize() != dst.GetSize()) {
1287 GetMasm()->Fcvt(VixlVReg(dst), VixlVReg(src));
1288 return;
1289 }
1290 GetMasm()->Fmov(VixlVReg(dst), VixlVReg(src));
1291 return;
1292 }
1293 UNREACHABLE();
1294 }
1295
EncodeCastFloatWithSmallDst(Reg dst,bool dstSigned,Reg src,bool srcSigned)1296 void Aarch64Encoder::EncodeCastFloatWithSmallDst(Reg dst, bool dstSigned, Reg src, bool srcSigned)
1297 {
1298 // Dst bool type don't supported!
1299
1300 if (src.IsFloat() && dst.IsScalar()) {
1301 if (dstSigned) {
1302 GetMasm()->Fcvtzs(VixlReg(dst), VixlVReg(src));
1303 if (dst.GetSize() < WORD_SIZE) {
1304 constexpr uint32_t TEST_BIT = (1U << (static_cast<uint32_t>(WORD_SIZE) - 1));
1305 ScopedTmpReg tmpReg1(this, dst.GetType());
1306 auto tmp1 = VixlReg(tmpReg1);
1307 ScopedTmpReg tmpReg2(this, dst.GetType());
1308 auto tmp2 = VixlReg(tmpReg2);
1309
1310 // NOLINTNEXTLINE(hicpp-signed-bitwise)
1311 int32_t setBit = (dst.GetSize() == BYTE_SIZE) ? (1UL << (BYTE_SIZE - 1)) : (1UL << (HALF_SIZE - 1));
1312 int32_t remBit = setBit - 1;
1313 GetMasm()->Ands(tmp1, VixlReg(dst), TEST_BIT);
1314
1315 GetMasm()->Orr(tmp1, VixlReg(dst), setBit);
1316 GetMasm()->And(tmp2, VixlReg(dst), remBit);
1317 // Select result - if zero set - tmp2, else tmp1
1318 GetMasm()->Csel(VixlReg(dst), tmp2, tmp1, vixl::aarch64::eq);
1319 EncodeCastScalar(Reg(dst.GetId(), INT32_TYPE), dstSigned, dst, dstSigned);
1320 }
1321 return;
1322 }
1323 GetMasm()->Fcvtzu(VixlReg(dst), VixlVReg(src));
1324 if (dst.GetSize() < WORD_SIZE) {
1325 EncodeCastScalar(Reg(dst.GetId(), INT32_TYPE), dstSigned, dst, dstSigned);
1326 }
1327 return;
1328 }
1329 if (src.IsScalar() && dst.IsFloat()) {
1330 if (srcSigned) {
1331 GetMasm()->Scvtf(VixlVReg(dst), VixlReg(src));
1332 } else {
1333 GetMasm()->Ucvtf(VixlVReg(dst), VixlReg(src));
1334 }
1335 return;
1336 }
1337 if (src.IsFloat() && dst.IsFloat()) {
1338 if (src.GetSize() != dst.GetSize()) {
1339 GetMasm()->Fcvt(VixlVReg(dst), VixlVReg(src));
1340 return;
1341 }
1342 GetMasm()->Fmov(VixlVReg(dst), VixlVReg(src));
1343 return;
1344 }
1345 UNREACHABLE();
1346 }
1347
EncodeCastSigned(Reg dst,Reg src)1348 void Aarch64Encoder::EncodeCastSigned(Reg dst, Reg src)
1349 {
1350 size_t srcSize = src.GetSize();
1351 size_t dstSize = dst.GetSize();
1352 auto srcR = Reg(src.GetId(), dst.GetType());
1353 // Else signed extend
1354 if (srcSize > dstSize) {
1355 srcSize = dstSize;
1356 }
1357 switch (srcSize) {
1358 case BYTE_SIZE:
1359 GetMasm()->Sxtb(VixlReg(dst), VixlReg(srcR));
1360 break;
1361 case HALF_SIZE:
1362 GetMasm()->Sxth(VixlReg(dst), VixlReg(srcR));
1363 break;
1364 case WORD_SIZE:
1365 GetMasm()->Sxtw(VixlReg(dst), VixlReg(srcR));
1366 break;
1367 case DOUBLE_WORD_SIZE:
1368 GetMasm()->Mov(VixlReg(dst), VixlReg(srcR));
1369 break;
1370 default:
1371 SetFalseResult();
1372 break;
1373 }
1374 }
1375
EncodeCastUnsigned(Reg dst,Reg src)1376 void Aarch64Encoder::EncodeCastUnsigned(Reg dst, Reg src)
1377 {
1378 size_t srcSize = src.GetSize();
1379 size_t dstSize = dst.GetSize();
1380 auto srcR = Reg(src.GetId(), dst.GetType());
1381 if (srcSize > dstSize && dstSize < WORD_SIZE) {
1382 // We need to cut the number, if it is less, than 32-bit. It is by ISA agreement.
1383 int64_t cutValue = (1ULL << dstSize) - 1;
1384 GetMasm()->And(VixlReg(dst), VixlReg(src), VixlImm(cutValue));
1385 return;
1386 }
1387 // Else unsigned extend
1388 switch (srcSize) {
1389 case BYTE_SIZE:
1390 GetMasm()->Uxtb(VixlReg(dst), VixlReg(srcR));
1391 return;
1392 case HALF_SIZE:
1393 GetMasm()->Uxth(VixlReg(dst), VixlReg(srcR));
1394 return;
1395 case WORD_SIZE:
1396 GetMasm()->Uxtw(VixlReg(dst), VixlReg(srcR));
1397 return;
1398 case DOUBLE_WORD_SIZE:
1399 GetMasm()->Mov(VixlReg(dst), VixlReg(srcR));
1400 return;
1401 default:
1402 SetFalseResult();
1403 return;
1404 }
1405 }
1406
EncodeCastScalar(Reg dst,bool dstSigned,Reg src,bool srcSigned)1407 void Aarch64Encoder::EncodeCastScalar(Reg dst, bool dstSigned, Reg src, bool srcSigned)
1408 {
1409 size_t srcSize = src.GetSize();
1410 size_t dstSize = dst.GetSize();
1411 // In our ISA minimal type is 32-bit, so type less then 32-bit
1412 // we should extend to 32-bit. So we can have 2 cast
1413 // (For examble, i8->u16 will work as i8->u16 and u16->u32)
1414 if (dstSize < WORD_SIZE) {
1415 if (srcSize > dstSize) {
1416 if (dstSigned) {
1417 EncodeCastSigned(dst, src);
1418 } else {
1419 EncodeCastUnsigned(dst, src);
1420 }
1421 return;
1422 }
1423 if (srcSize == dstSize) {
1424 GetMasm()->Mov(VixlReg(dst), VixlReg(src));
1425 if (!(srcSigned || dstSigned) || (srcSigned && dstSigned)) {
1426 return;
1427 }
1428 if (dstSigned) {
1429 EncodeCastSigned(Reg(dst.GetId(), INT32_TYPE), dst);
1430 } else {
1431 EncodeCastUnsigned(Reg(dst.GetId(), INT32_TYPE), dst);
1432 }
1433 return;
1434 }
1435 if (srcSigned) {
1436 EncodeCastSigned(dst, src);
1437 if (!dstSigned) {
1438 EncodeCastUnsigned(Reg(dst.GetId(), INT32_TYPE), dst);
1439 }
1440 } else {
1441 EncodeCastUnsigned(dst, src);
1442 if (dstSigned) {
1443 EncodeCastSigned(Reg(dst.GetId(), INT32_TYPE), dst);
1444 }
1445 }
1446 } else {
1447 if (srcSize == dstSize) {
1448 GetMasm()->Mov(VixlReg(dst), VixlReg(src));
1449 return;
1450 }
1451 if (srcSigned) {
1452 EncodeCastSigned(dst, src);
1453 } else {
1454 EncodeCastUnsigned(dst, src);
1455 }
1456 }
1457 }
1458
EncodeFastPathDynamicCast(Reg dst,Reg src,LabelHolder::LabelId slow)1459 void Aarch64Encoder::EncodeFastPathDynamicCast(Reg dst, Reg src, LabelHolder::LabelId slow)
1460 {
1461 ASSERT(IsJsNumberCast());
1462 ASSERT(src.IsFloat() && dst.IsScalar());
1463
1464 CHECK_EQ(src.GetSize(), BITS_PER_UINT64);
1465 CHECK_EQ(dst.GetSize(), BITS_PER_UINT32);
1466
1467 // We use slow path, because in general JS double -> int32 cast is complex and we check only few common cases here
1468 // and move other checks in slow path. In case CPU supports special JS double -> int32 instruction we do not need
1469 // slow path.
1470 if (!IsLabelValid(slow)) {
1471 // use special JS aarch64 instruction
1472 #ifndef NDEBUG
1473 vixl::CPUFeaturesScope scope(GetMasm(), vixl::CPUFeatures::kFP, vixl::CPUFeatures::kJSCVT);
1474 #endif
1475 GetMasm()->Fjcvtzs(VixlReg(dst), VixlVReg(src));
1476 return;
1477 }
1478
1479 // infinite and big numbers will overflow here to INT64_MIN or INT64_MAX, but NaN casts to 0
1480 GetMasm()->Fcvtzs(VixlReg(dst, DOUBLE_WORD_SIZE), VixlVReg(src));
1481 // check INT64_MIN
1482 GetMasm()->Cmp(VixlReg(dst, DOUBLE_WORD_SIZE), VixlImm(1));
1483 // check INT64_MAX
1484 GetMasm()->Ccmp(VixlReg(dst, DOUBLE_WORD_SIZE), VixlImm(-1), vixl::aarch64::StatusFlags::VFlag,
1485 vixl::aarch64::Condition::vc);
1486 auto slowLabel {static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(slow)};
1487 // jump to slow path in case of overflow
1488 GetMasm()->B(slowLabel, vixl::aarch64::Condition::vs);
1489 }
1490
EncodeCast(Reg dst,bool dstSigned,Reg src,bool srcSigned)1491 void Aarch64Encoder::EncodeCast(Reg dst, bool dstSigned, Reg src, bool srcSigned)
1492 {
1493 if (src.IsFloat() || dst.IsFloat()) {
1494 EncodeCastFloat(dst, dstSigned, src, srcSigned);
1495 return;
1496 }
1497
1498 ASSERT(src.IsScalar() && dst.IsScalar());
1499 auto rzero = GetRegfile()->GetZeroReg().GetId();
1500 if (src.GetId() == rzero) {
1501 ASSERT(dst.GetId() != rzero);
1502 EncodeMov(dst, Imm(0));
1503 return;
1504 }
1505 // Scalar part
1506 EncodeCastScalar(dst, dstSigned, src, srcSigned);
1507 }
1508
EncodeCastToBool(Reg dst,Reg src)1509 void Aarch64Encoder::EncodeCastToBool(Reg dst, Reg src)
1510 {
1511 // In ISA says that we only support casts:
1512 // i32tou1, i64tou1, u32tou1, u64tou1
1513 ASSERT(src.IsScalar());
1514 ASSERT(dst.IsScalar());
1515
1516 GetMasm()->Cmp(VixlReg(src), VixlImm(0));
1517 // In our ISA minimal type is 32-bit, so bool in 32bit
1518 GetMasm()->Cset(VixlReg(Reg(dst.GetId(), INT32_TYPE)), vixl::aarch64::Condition::ne);
1519 }
1520
EncodeAdd(Reg dst,Reg src0,Shift src1)1521 void Aarch64Encoder::EncodeAdd(Reg dst, Reg src0, Shift src1)
1522 {
1523 if (dst.IsFloat()) {
1524 UNREACHABLE();
1525 }
1526 ASSERT(src0.GetSize() <= dst.GetSize());
1527 if (src0.GetSize() < dst.GetSize()) {
1528 auto src0Reg = Reg(src0.GetId(), dst.GetType());
1529 auto src1Reg = Reg(src1.GetBase().GetId(), dst.GetType());
1530 GetMasm()->Add(VixlReg(dst), VixlReg(src0Reg), VixlShift(Shift(src1Reg, src1.GetType(), src1.GetScale())));
1531 return;
1532 }
1533 GetMasm()->Add(VixlReg(dst), VixlReg(src0), VixlShift(src1));
1534 }
1535
EncodeAdd(Reg dst,Reg src0,Reg src1)1536 void Aarch64Encoder::EncodeAdd(Reg dst, Reg src0, Reg src1)
1537 {
1538 if (dst.IsFloat()) {
1539 GetMasm()->Fadd(VixlVReg(dst), VixlVReg(src0), VixlVReg(src1));
1540 return;
1541 }
1542
1543 /* if any of the operands has 64-bits size,
1544 * forcibly do the 64-bits wide operation */
1545 if ((src0.GetSize() | src1.GetSize() | dst.GetSize()) >= DOUBLE_WORD_SIZE) {
1546 GetMasm()->Add(VixlReg(dst).X(), VixlReg(src0).X(), VixlReg(src1).X());
1547 } else {
1548 /* Otherwise do 32-bits operation as any lesser
1549 * sizes have to be upcasted to 32-bits anyway */
1550 GetMasm()->Add(VixlReg(dst).W(), VixlReg(src0).W(), VixlReg(src1).W());
1551 }
1552 }
1553
EncodeSub(Reg dst,Reg src0,Shift src1)1554 void Aarch64Encoder::EncodeSub(Reg dst, Reg src0, Shift src1)
1555 {
1556 ASSERT(dst.IsScalar());
1557 GetMasm()->Sub(VixlReg(dst), VixlReg(src0), VixlShift(src1));
1558 }
1559
EncodeSub(Reg dst,Reg src0,Reg src1)1560 void Aarch64Encoder::EncodeSub(Reg dst, Reg src0, Reg src1)
1561 {
1562 if (dst.IsFloat()) {
1563 GetMasm()->Fsub(VixlVReg(dst), VixlVReg(src0), VixlVReg(src1));
1564 return;
1565 }
1566
1567 /* if any of the operands has 64-bits size,
1568 * forcibly do the 64-bits wide operation */
1569 if ((src0.GetSize() | src1.GetSize() | dst.GetSize()) >= DOUBLE_WORD_SIZE) {
1570 GetMasm()->Sub(VixlReg(dst).X(), VixlReg(src0).X(), VixlReg(src1).X());
1571 } else {
1572 /* Otherwise do 32-bits operation as any lesser
1573 * sizes have to be upcasted to 32-bits anyway */
1574 GetMasm()->Sub(VixlReg(dst).W(), VixlReg(src0).W(), VixlReg(src1).W());
1575 }
1576 }
1577
EncodeMul(Reg dst,Reg src0,Reg src1)1578 void Aarch64Encoder::EncodeMul(Reg dst, Reg src0, Reg src1)
1579 {
1580 if (dst.IsFloat()) {
1581 GetMasm()->Fmul(VixlVReg(dst), VixlVReg(src0), VixlVReg(src1));
1582 return;
1583 }
1584 auto rzero = GetRegfile()->GetZeroReg().GetId();
1585 if (src0.GetId() == rzero || src1.GetId() == rzero) {
1586 EncodeMov(dst, Imm(0));
1587 return;
1588 }
1589 GetMasm()->Mul(VixlReg(dst), VixlReg(src0), VixlReg(src1));
1590 }
1591
EncodeAddOverflow(compiler::LabelHolder::LabelId id,Reg dst,Reg src0,Reg src1,Condition cc)1592 void Aarch64Encoder::EncodeAddOverflow(compiler::LabelHolder::LabelId id, Reg dst, Reg src0, Reg src1, Condition cc)
1593 {
1594 ASSERT(!dst.IsFloat() && !src0.IsFloat() && !src1.IsFloat());
1595 ASSERT(cc == Condition::VS || cc == Condition::VC);
1596 if (dst.GetSize() == DOUBLE_WORD_SIZE) {
1597 GetMasm()->Adds(VixlReg(dst).X(), VixlReg(src0).X(), VixlReg(src1).X());
1598 } else {
1599 /* Otherwise do 32-bits operation as any lesser
1600 * sizes have to be upcasted to 32-bits anyway */
1601 GetMasm()->Adds(VixlReg(dst).W(), VixlReg(src0).W(), VixlReg(src1).W());
1602 }
1603 auto label = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(id);
1604 GetMasm()->B(label, Convert(cc));
1605 }
1606
EncodeSubOverflow(compiler::LabelHolder::LabelId id,Reg dst,Reg src0,Reg src1,Condition cc)1607 void Aarch64Encoder::EncodeSubOverflow(compiler::LabelHolder::LabelId id, Reg dst, Reg src0, Reg src1, Condition cc)
1608 {
1609 ASSERT(!dst.IsFloat() && !src0.IsFloat() && !src1.IsFloat());
1610 ASSERT(cc == Condition::VS || cc == Condition::VC);
1611 if (dst.GetSize() == DOUBLE_WORD_SIZE) {
1612 GetMasm()->Subs(VixlReg(dst).X(), VixlReg(src0).X(), VixlReg(src1).X());
1613 } else {
1614 /* Otherwise do 32-bits operation as any lesser
1615 * sizes have to be upcasted to 32-bits anyway */
1616 GetMasm()->Subs(VixlReg(dst).W(), VixlReg(src0).W(), VixlReg(src1).W());
1617 }
1618 auto label = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(id);
1619 GetMasm()->B(label, Convert(cc));
1620 }
1621
EncodeNegOverflowAndZero(compiler::LabelHolder::LabelId id,Reg dst,Reg src)1622 void Aarch64Encoder::EncodeNegOverflowAndZero(compiler::LabelHolder::LabelId id, Reg dst, Reg src)
1623 {
1624 ASSERT(!dst.IsFloat() && !src.IsFloat());
1625 // NOLINTNEXTLINE(readability-magic-numbers)
1626 EncodeJumpTest(id, src, Imm(0x7fffffff), Condition::TST_EQ);
1627 GetMasm()->Neg(VixlReg(dst).W(), VixlReg(src).W());
1628 }
1629
EncodeDiv(Reg dst,bool dstSigned,Reg src0,Reg src1)1630 void Aarch64Encoder::EncodeDiv(Reg dst, bool dstSigned, Reg src0, Reg src1)
1631 {
1632 if (dst.IsFloat()) {
1633 GetMasm()->Fdiv(VixlVReg(dst), VixlVReg(src0), VixlVReg(src1));
1634 return;
1635 }
1636
1637 auto rzero = GetRegfile()->GetZeroReg().GetId();
1638 if (src1.GetId() == rzero || src0.GetId() == rzero) {
1639 ScopedTmpReg tmpReg(this, src1.GetType());
1640 EncodeMov(tmpReg, Imm(0));
1641 // Denominator is zero-reg
1642 if (src1.GetId() == rzero) {
1643 // Encode Abort
1644 GetMasm()->Udiv(VixlReg(dst), VixlReg(tmpReg), VixlReg(tmpReg));
1645 return;
1646 }
1647
1648 // But src1 still may be zero
1649 if (src1.GetId() != src0.GetId()) {
1650 if (dstSigned) {
1651 GetMasm()->Sdiv(VixlReg(dst), VixlReg(tmpReg), VixlReg(src1));
1652 } else {
1653 GetMasm()->Udiv(VixlReg(dst), VixlReg(tmpReg), VixlReg(src1));
1654 }
1655 return;
1656 }
1657 UNREACHABLE();
1658 }
1659 if (dstSigned) {
1660 GetMasm()->Sdiv(VixlReg(dst), VixlReg(src0), VixlReg(src1));
1661 } else {
1662 GetMasm()->Udiv(VixlReg(dst), VixlReg(src0), VixlReg(src1));
1663 }
1664 }
1665
EncodeMod(Reg dst,bool dstSigned,Reg src0,Reg src1)1666 void Aarch64Encoder::EncodeMod(Reg dst, bool dstSigned, Reg src0, Reg src1)
1667 {
1668 if (dst.IsScalar()) {
1669 auto rzero = GetRegfile()->GetZeroReg().GetId();
1670 if (src1.GetId() == rzero || src0.GetId() == rzero) {
1671 ScopedTmpReg tmpReg(this, src1.GetType());
1672 EncodeMov(tmpReg, Imm(0));
1673 // Denominator is zero-reg
1674 if (src1.GetId() == rzero) {
1675 // Encode Abort
1676 GetMasm()->Udiv(VixlReg(dst), VixlReg(tmpReg), VixlReg(tmpReg));
1677 return;
1678 }
1679
1680 if (src1.GetId() == src0.GetId()) {
1681 SetFalseResult();
1682 return;
1683 }
1684 // But src1 still may be zero
1685 ScopedTmpRegU64 tmpRegUd(this);
1686 if (dst.GetSize() < DOUBLE_WORD_SIZE) {
1687 tmpRegUd.ChangeType(INT32_TYPE);
1688 }
1689 auto tmp = VixlReg(tmpRegUd);
1690 if (!dstSigned) {
1691 GetMasm()->Udiv(tmp, VixlReg(tmpReg), VixlReg(src1));
1692 GetMasm()->Msub(VixlReg(dst), tmp, VixlReg(src1), VixlReg(tmpReg));
1693 return;
1694 }
1695 GetMasm()->Sdiv(tmp, VixlReg(tmpReg), VixlReg(src1));
1696 GetMasm()->Msub(VixlReg(dst), tmp, VixlReg(src1), VixlReg(tmpReg));
1697 return;
1698 }
1699
1700 ScopedTmpRegU64 tmpReg(this);
1701 if (dst.GetSize() < DOUBLE_WORD_SIZE) {
1702 tmpReg.ChangeType(INT32_TYPE);
1703 }
1704 auto tmp = VixlReg(tmpReg);
1705
1706 if (!dstSigned) {
1707 GetMasm()->Udiv(tmp, VixlReg(src0), VixlReg(src1));
1708 GetMasm()->Msub(VixlReg(dst), tmp, VixlReg(src1), VixlReg(src0));
1709 return;
1710 }
1711 GetMasm()->Sdiv(tmp, VixlReg(src0), VixlReg(src1));
1712 GetMasm()->Msub(VixlReg(dst), tmp, VixlReg(src1), VixlReg(src0));
1713 return;
1714 }
1715
1716 EncodeFMod(dst, src0, src1);
1717 }
1718
EncodeFMod(Reg dst,Reg src0,Reg src1)1719 void Aarch64Encoder::EncodeFMod(Reg dst, Reg src0, Reg src1)
1720 {
1721 ASSERT(dst.IsFloat());
1722
1723 if (dst.GetType() == FLOAT32_TYPE) {
1724 using Fp = float (*)(float, float);
1725 MakeLibCall(dst, src0, src1, reinterpret_cast<void *>(static_cast<Fp>(fmodf)));
1726 } else {
1727 using Fp = double (*)(double, double);
1728 MakeLibCall(dst, src0, src1, reinterpret_cast<void *>(static_cast<Fp>(fmod)));
1729 }
1730 }
1731
EncodeSignedDiv(Reg dst,Reg src0,Imm imm)1732 void Aarch64Encoder::EncodeSignedDiv(Reg dst, Reg src0, Imm imm)
1733 {
1734 int64_t divisor = imm.GetAsInt();
1735
1736 FastConstSignedDivisor fastDivisor(divisor, dst.GetSize());
1737 int64_t magic = fastDivisor.GetMagic();
1738
1739 ScopedTmpReg tmp(this, dst.GetType());
1740 Reg tmp64 = tmp.GetReg().As(INT64_TYPE);
1741 EncodeMov(tmp, Imm(magic));
1742
1743 int64_t extraShift = 0;
1744 if (dst.GetSize() == DOUBLE_WORD_SIZE) {
1745 GetMasm()->Smulh(VixlReg(tmp), VixlReg(src0), VixlReg(tmp));
1746 } else {
1747 GetMasm()->Smull(VixlReg(tmp64), VixlReg(src0), VixlReg(tmp));
1748 extraShift = WORD_SIZE;
1749 }
1750
1751 bool useSignFlag = false;
1752 if (divisor > 0 && magic < 0) {
1753 GetMasm()->Adds(VixlReg(tmp64), VixlReg(tmp64), VixlShift(Shift(src0.As(INT64_TYPE), extraShift)));
1754 useSignFlag = true;
1755 } else if (divisor < 0 && magic > 0) {
1756 GetMasm()->Subs(VixlReg(tmp64), VixlReg(tmp64), VixlShift(Shift(src0.As(INT64_TYPE), extraShift)));
1757 useSignFlag = true;
1758 }
1759
1760 int64_t shift = fastDivisor.GetShift();
1761 EncodeAShr(dst.As(INT64_TYPE), tmp64, Imm(shift + extraShift));
1762
1763 // result = (result < 0 ? result + 1 : result)
1764 if (useSignFlag) {
1765 GetMasm()->Cinc(VixlReg(dst), VixlReg(dst), vixl::aarch64::Condition::mi);
1766 } else {
1767 GetMasm()->Add(VixlReg(dst), VixlReg(dst), VixlShift(Shift(dst, ShiftType::LSR, dst.GetSize() - 1U)));
1768 }
1769 }
1770
EncodeUnsignedDiv(Reg dst,Reg src0,Imm imm)1771 void Aarch64Encoder::EncodeUnsignedDiv(Reg dst, Reg src0, Imm imm)
1772 {
1773 auto divisor = bit_cast<uint64_t>(imm.GetAsInt());
1774
1775 FastConstUnsignedDivisor fastDivisor(divisor, dst.GetSize());
1776 uint64_t magic = fastDivisor.GetMagic();
1777
1778 ScopedTmpReg tmp(this, dst.GetType());
1779 Reg tmp64 = tmp.GetReg().As(INT64_TYPE);
1780 EncodeMov(tmp, Imm(magic));
1781
1782 uint64_t extraShift = 0;
1783 if (dst.GetSize() == DOUBLE_WORD_SIZE) {
1784 GetMasm()->Umulh(VixlReg(tmp), VixlReg(src0), VixlReg(tmp));
1785 } else {
1786 GetMasm()->Umull(VixlReg(tmp64), VixlReg(src0), VixlReg(tmp));
1787 extraShift = WORD_SIZE;
1788 }
1789
1790 uint64_t shift = fastDivisor.GetShift();
1791 if (!fastDivisor.GetAdd()) {
1792 EncodeShr(dst.As(INT64_TYPE), tmp64, Imm(shift + extraShift));
1793 } else {
1794 ASSERT(shift >= 1U);
1795 if (extraShift > 0U) {
1796 EncodeShr(tmp64, tmp64, Imm(extraShift));
1797 }
1798 EncodeSub(dst, src0, tmp);
1799 GetMasm()->Add(VixlReg(dst), VixlReg(tmp), VixlShift(Shift(dst, ShiftType::LSR, 1U)));
1800 EncodeShr(dst, dst, Imm(shift - 1U));
1801 }
1802 }
1803
EncodeDiv(Reg dst,Reg src0,Imm imm,bool isSigned)1804 void Aarch64Encoder::EncodeDiv(Reg dst, Reg src0, Imm imm, bool isSigned)
1805 {
1806 ASSERT(dst.IsScalar() && dst.GetSize() >= WORD_SIZE);
1807 ASSERT(CanOptimizeImmDivMod(bit_cast<uint64_t>(imm.GetAsInt()), isSigned));
1808 if (isSigned) {
1809 EncodeSignedDiv(dst, src0, imm);
1810 } else {
1811 EncodeUnsignedDiv(dst, src0, imm);
1812 }
1813 }
1814
EncodeMod(Reg dst,Reg src0,Imm imm,bool isSigned)1815 void Aarch64Encoder::EncodeMod(Reg dst, Reg src0, Imm imm, bool isSigned)
1816 {
1817 ASSERT(dst.IsScalar() && dst.GetSize() >= WORD_SIZE);
1818 ASSERT(CanOptimizeImmDivMod(bit_cast<uint64_t>(imm.GetAsInt()), isSigned));
1819 // dst = src0 - imm * (src0 / imm)
1820 ScopedTmpReg tmp(this, dst.GetType());
1821 EncodeDiv(tmp, src0, imm, isSigned);
1822
1823 ScopedTmpReg immReg(this, dst.GetType());
1824 EncodeMov(immReg, imm);
1825
1826 GetMasm()->Msub(VixlReg(dst), VixlReg(immReg), VixlReg(tmp), VixlReg(src0));
1827 }
1828
EncodeMin(Reg dst,bool dstSigned,Reg src0,Reg src1)1829 void Aarch64Encoder::EncodeMin(Reg dst, bool dstSigned, Reg src0, Reg src1)
1830 {
1831 if (dst.IsFloat()) {
1832 GetMasm()->Fmin(VixlVReg(dst), VixlVReg(src0), VixlVReg(src1));
1833 return;
1834 }
1835 if (dstSigned) {
1836 GetMasm()->Cmp(VixlReg(src0), VixlReg(src1));
1837 GetMasm()->Csel(VixlReg(dst), VixlReg(src0), VixlReg(src1), vixl::aarch64::Condition::lt);
1838 return;
1839 }
1840 GetMasm()->Cmp(VixlReg(src0), VixlReg(src1));
1841 GetMasm()->Csel(VixlReg(dst), VixlReg(src0), VixlReg(src1), vixl::aarch64::Condition::ls);
1842 }
1843
EncodeMax(Reg dst,bool dstSigned,Reg src0,Reg src1)1844 void Aarch64Encoder::EncodeMax(Reg dst, bool dstSigned, Reg src0, Reg src1)
1845 {
1846 if (dst.IsFloat()) {
1847 GetMasm()->Fmax(VixlVReg(dst), VixlVReg(src0), VixlVReg(src1));
1848 return;
1849 }
1850 if (dstSigned) {
1851 GetMasm()->Cmp(VixlReg(src0), VixlReg(src1));
1852 GetMasm()->Csel(VixlReg(dst), VixlReg(src0), VixlReg(src1), vixl::aarch64::Condition::gt);
1853 return;
1854 }
1855 GetMasm()->Cmp(VixlReg(src0), VixlReg(src1));
1856 GetMasm()->Csel(VixlReg(dst), VixlReg(src0), VixlReg(src1), vixl::aarch64::Condition::hi);
1857 }
1858
EncodeShl(Reg dst,Reg src0,Reg src1)1859 void Aarch64Encoder::EncodeShl(Reg dst, Reg src0, Reg src1)
1860 {
1861 auto rzero = GetRegfile()->GetZeroReg().GetId();
1862 ASSERT(dst.GetId() != rzero);
1863 if (src0.GetId() == rzero) {
1864 EncodeMov(dst, Imm(0));
1865 return;
1866 }
1867 if (src1.GetId() == rzero) {
1868 EncodeMov(dst, src0);
1869 }
1870 if (dst.GetSize() < WORD_SIZE) {
1871 GetMasm()->And(VixlReg(src1), VixlReg(src1), VixlImm(dst.GetSize() - 1));
1872 }
1873 GetMasm()->Lsl(VixlReg(dst), VixlReg(src0), VixlReg(src1));
1874 }
1875
EncodeShr(Reg dst,Reg src0,Reg src1)1876 void Aarch64Encoder::EncodeShr(Reg dst, Reg src0, Reg src1)
1877 {
1878 auto rzero = GetRegfile()->GetZeroReg().GetId();
1879 ASSERT(dst.GetId() != rzero);
1880 if (src0.GetId() == rzero) {
1881 EncodeMov(dst, Imm(0));
1882 return;
1883 }
1884 if (src1.GetId() == rzero) {
1885 EncodeMov(dst, src0);
1886 }
1887
1888 if (dst.GetSize() < WORD_SIZE) {
1889 GetMasm()->And(VixlReg(src1), VixlReg(src1), VixlImm(dst.GetSize() - 1));
1890 }
1891
1892 GetMasm()->Lsr(VixlReg(dst), VixlReg(src0), VixlReg(src1));
1893 }
1894
EncodeAShr(Reg dst,Reg src0,Reg src1)1895 void Aarch64Encoder::EncodeAShr(Reg dst, Reg src0, Reg src1)
1896 {
1897 auto rzero = GetRegfile()->GetZeroReg().GetId();
1898 ASSERT(dst.GetId() != rzero);
1899 if (src0.GetId() == rzero) {
1900 EncodeMov(dst, Imm(0));
1901 return;
1902 }
1903 if (src1.GetId() == rzero) {
1904 EncodeMov(dst, src0);
1905 }
1906
1907 if (dst.GetSize() < WORD_SIZE) {
1908 GetMasm()->And(VixlReg(src1), VixlReg(src1), VixlImm(dst.GetSize() - 1));
1909 }
1910 GetMasm()->Asr(VixlReg(dst), VixlReg(src0), VixlReg(src1));
1911 }
1912
EncodeAnd(Reg dst,Reg src0,Reg src1)1913 void Aarch64Encoder::EncodeAnd(Reg dst, Reg src0, Reg src1)
1914 {
1915 GetMasm()->And(VixlReg(dst), VixlReg(src0), VixlReg(src1));
1916 }
1917
EncodeAnd(Reg dst,Reg src0,Shift src1)1918 void Aarch64Encoder::EncodeAnd(Reg dst, Reg src0, Shift src1)
1919 {
1920 GetMasm()->And(VixlReg(dst), VixlReg(src0), VixlShift(src1));
1921 }
1922
EncodeOr(Reg dst,Reg src0,Reg src1)1923 void Aarch64Encoder::EncodeOr(Reg dst, Reg src0, Reg src1)
1924 {
1925 GetMasm()->Orr(VixlReg(dst), VixlReg(src0), VixlReg(src1));
1926 }
1927
EncodeOr(Reg dst,Reg src0,Shift src1)1928 void Aarch64Encoder::EncodeOr(Reg dst, Reg src0, Shift src1)
1929 {
1930 GetMasm()->Orr(VixlReg(dst), VixlReg(src0), VixlShift(src1));
1931 }
1932
EncodeXor(Reg dst,Reg src0,Reg src1)1933 void Aarch64Encoder::EncodeXor(Reg dst, Reg src0, Reg src1)
1934 {
1935 GetMasm()->Eor(VixlReg(dst), VixlReg(src0), VixlReg(src1));
1936 }
1937
EncodeXor(Reg dst,Reg src0,Shift src1)1938 void Aarch64Encoder::EncodeXor(Reg dst, Reg src0, Shift src1)
1939 {
1940 GetMasm()->Eor(VixlReg(dst), VixlReg(src0), VixlShift(src1));
1941 }
1942
EncodeAdd(Reg dst,Reg src,Imm imm)1943 void Aarch64Encoder::EncodeAdd(Reg dst, Reg src, Imm imm)
1944 {
1945 ASSERT(dst.IsScalar() && "UNIMPLEMENTED");
1946 ASSERT(dst.GetSize() >= src.GetSize());
1947 if (dst.GetSize() != src.GetSize()) {
1948 auto srcReg = Reg(src.GetId(), dst.GetType());
1949 GetMasm()->Add(VixlReg(dst), VixlReg(srcReg), VixlImm(imm));
1950 return;
1951 }
1952 GetMasm()->Add(VixlReg(dst), VixlReg(src), VixlImm(imm));
1953 }
1954
EncodeSub(Reg dst,Reg src,Imm imm)1955 void Aarch64Encoder::EncodeSub(Reg dst, Reg src, Imm imm)
1956 {
1957 ASSERT(dst.IsScalar() && "UNIMPLEMENTED");
1958 GetMasm()->Sub(VixlReg(dst), VixlReg(src), VixlImm(imm));
1959 }
1960
EncodeShl(Reg dst,Reg src,Imm imm)1961 void Aarch64Encoder::EncodeShl(Reg dst, Reg src, Imm imm)
1962 {
1963 ASSERT(dst.IsScalar() && "Invalid operand type");
1964 auto rzero = GetRegfile()->GetZeroReg().GetId();
1965 ASSERT(dst.GetId() != rzero);
1966 if (src.GetId() == rzero) {
1967 EncodeMov(dst, Imm(0));
1968 return;
1969 }
1970
1971 GetMasm()->Lsl(VixlReg(dst), VixlReg(src), imm.GetAsInt());
1972 }
1973
EncodeShr(Reg dst,Reg src,Imm imm)1974 void Aarch64Encoder::EncodeShr(Reg dst, Reg src, Imm imm)
1975 {
1976 int64_t immValue = static_cast<uint64_t>(imm.GetAsInt()) & (dst.GetSize() - 1);
1977
1978 ASSERT(dst.IsScalar() && "Invalid operand type");
1979 auto rzero = GetRegfile()->GetZeroReg().GetId();
1980 ASSERT(dst.GetId() != rzero);
1981 if (src.GetId() == rzero) {
1982 EncodeMov(dst, Imm(0));
1983 return;
1984 }
1985
1986 GetMasm()->Lsr(VixlReg(dst), VixlReg(src), immValue);
1987 }
1988
EncodeAShr(Reg dst,Reg src,Imm imm)1989 void Aarch64Encoder::EncodeAShr(Reg dst, Reg src, Imm imm)
1990 {
1991 ASSERT(dst.IsScalar() && "Invalid operand type");
1992 GetMasm()->Asr(VixlReg(dst), VixlReg(src), imm.GetAsInt());
1993 }
1994
EncodeAnd(Reg dst,Reg src,Imm imm)1995 void Aarch64Encoder::EncodeAnd(Reg dst, Reg src, Imm imm)
1996 {
1997 ASSERT(dst.IsScalar() && "Invalid operand type");
1998 GetMasm()->And(VixlReg(dst), VixlReg(src), VixlImm(imm));
1999 }
2000
EncodeOr(Reg dst,Reg src,Imm imm)2001 void Aarch64Encoder::EncodeOr(Reg dst, Reg src, Imm imm)
2002 {
2003 ASSERT(dst.IsScalar() && "Invalid operand type");
2004 GetMasm()->Orr(VixlReg(dst), VixlReg(src), VixlImm(imm));
2005 }
2006
EncodeXor(Reg dst,Reg src,Imm imm)2007 void Aarch64Encoder::EncodeXor(Reg dst, Reg src, Imm imm)
2008 {
2009 ASSERT(dst.IsScalar() && "Invalid operand type");
2010 GetMasm()->Eor(VixlReg(dst), VixlReg(src), VixlImm(imm));
2011 }
2012
EncodeMov(Reg dst,Imm src)2013 void Aarch64Encoder::EncodeMov(Reg dst, Imm src)
2014 {
2015 if (dst.IsFloat()) {
2016 if (dst.GetSize() == WORD_SIZE) {
2017 GetMasm()->Fmov(VixlVReg(dst), src.GetAsFloat());
2018 } else {
2019 GetMasm()->Fmov(VixlVReg(dst), src.GetAsDouble());
2020 }
2021 return;
2022 }
2023 if (dst.GetSize() > WORD_SIZE) {
2024 GetMasm()->Mov(VixlReg(dst), VixlImm(src));
2025 } else {
2026 GetMasm()->Mov(VixlReg(dst), VixlImm(static_cast<int32_t>(src.GetAsInt())));
2027 }
2028 }
2029
EncodeLdr(Reg dst,bool dstSigned,MemRef mem)2030 void Aarch64Encoder::EncodeLdr(Reg dst, bool dstSigned, MemRef mem)
2031 {
2032 auto rzero = GetRegfile()->GetZeroReg().GetId();
2033 if (!ConvertMem(mem).IsValid() || (dst.GetId() == rzero && dst.IsScalar())) {
2034 // Try move zero reg to dst (for do not create temp-reg)
2035 // Check: dst not vector, dst not index, dst not rzero
2036 [[maybe_unused]] auto baseReg = mem.GetBase();
2037 auto indexReg = mem.GetIndex();
2038
2039 // Invalid == base is rzero or invalid
2040 ASSERT(baseReg.GetId() == rzero || !baseReg.IsValid());
2041 // checks for use dst-register
2042 if (dst.IsScalar() && dst.IsValid() && // not float
2043 (indexReg.GetId() != dst.GetId()) && // not index
2044 (dst.GetId() != rzero)) { // not rzero
2045 // May use dst like rzero
2046 EncodeMov(dst, Imm(0));
2047
2048 auto fixMem = MemRef(dst, indexReg, mem.GetScale(), mem.GetDisp());
2049 ASSERT(ConvertMem(fixMem).IsValid());
2050 EncodeLdr(dst, dstSigned, fixMem);
2051 } else {
2052 // Use tmp-reg
2053 ScopedTmpReg tmpReg(this);
2054 EncodeMov(tmpReg, Imm(0));
2055
2056 auto fixMem = MemRef(tmpReg, indexReg, mem.GetScale(), mem.GetDisp());
2057 ASSERT(ConvertMem(fixMem).IsValid());
2058 // Used for zero-dst
2059 EncodeLdr(tmpReg, dstSigned, fixMem);
2060 }
2061 return;
2062 }
2063 ASSERT(ConvertMem(mem).IsValid());
2064 if (dst.IsFloat()) {
2065 GetMasm()->Ldr(VixlVReg(dst), ConvertMem(mem));
2066 return;
2067 }
2068 if (dstSigned) {
2069 if (dst.GetSize() == BYTE_SIZE) {
2070 GetMasm()->Ldrsb(VixlReg(dst, DOUBLE_WORD_SIZE), ConvertMem(mem));
2071 return;
2072 }
2073 if (dst.GetSize() == HALF_SIZE) {
2074 GetMasm()->Ldrsh(VixlReg(dst), ConvertMem(mem));
2075 return;
2076 }
2077 } else {
2078 if (dst.GetSize() == BYTE_SIZE) {
2079 GetMasm()->Ldrb(VixlReg(dst, WORD_SIZE), ConvertMem(mem));
2080 return;
2081 }
2082 if (dst.GetSize() == HALF_SIZE) {
2083 GetMasm()->Ldrh(VixlReg(dst), ConvertMem(mem));
2084 return;
2085 }
2086 }
2087 GetMasm()->Ldr(VixlReg(dst), ConvertMem(mem));
2088 }
2089
EncodeLdrAcquireInvalid(Reg dst,bool dstSigned,MemRef mem)2090 void Aarch64Encoder::EncodeLdrAcquireInvalid(Reg dst, bool dstSigned, MemRef mem)
2091 {
2092 // Try move zero reg to dst (for do not create temp-reg)
2093 // Check: dst not vector, dst not index, dst not rzero
2094 [[maybe_unused]] auto baseReg = mem.GetBase();
2095 auto rzero = GetRegfile()->GetZeroReg().GetId();
2096
2097 auto indexReg = mem.GetIndex();
2098
2099 // Invalid == base is rzero or invalid
2100 ASSERT(baseReg.GetId() == rzero || !baseReg.IsValid());
2101 // checks for use dst-register
2102 if (dst.IsScalar() && dst.IsValid() && // not float
2103 (indexReg.GetId() != dst.GetId()) && // not index
2104 (dst.GetId() != rzero)) { // not rzero
2105 // May use dst like rzero
2106 EncodeMov(dst, Imm(0));
2107
2108 auto fixMem = MemRef(dst, indexReg, mem.GetScale(), mem.GetDisp());
2109 ASSERT(ConvertMem(fixMem).IsValid());
2110 EncodeLdrAcquire(dst, dstSigned, fixMem);
2111 } else {
2112 // Use tmp-reg
2113 ScopedTmpReg tmpReg(this);
2114 EncodeMov(tmpReg, Imm(0));
2115
2116 auto fixMem = MemRef(tmpReg, indexReg, mem.GetScale(), mem.GetDisp());
2117 ASSERT(ConvertMem(fixMem).IsValid());
2118 // Used for zero-dst
2119 EncodeLdrAcquire(tmpReg, dstSigned, fixMem);
2120 }
2121 }
2122
EncodeLdrAcquireScalar(Reg dst,bool dstSigned,MemRef mem)2123 void Aarch64Encoder::EncodeLdrAcquireScalar(Reg dst, bool dstSigned, MemRef mem)
2124 {
2125 #ifndef NDEBUG
2126 CheckAlignment(mem, dst.GetSize());
2127 #endif // NDEBUG
2128 if (dstSigned) {
2129 if (dst.GetSize() == BYTE_SIZE) {
2130 GetMasm()->Ldarb(VixlReg(dst), ConvertMem(mem));
2131 GetMasm()->Sxtb(VixlReg(dst), VixlReg(dst));
2132 return;
2133 }
2134 if (dst.GetSize() == HALF_SIZE) {
2135 GetMasm()->Ldarh(VixlReg(dst), ConvertMem(mem));
2136 GetMasm()->Sxth(VixlReg(dst), VixlReg(dst));
2137 return;
2138 }
2139 if (dst.GetSize() == WORD_SIZE) {
2140 GetMasm()->Ldar(VixlReg(dst), ConvertMem(mem));
2141 GetMasm()->Sxtw(VixlReg(dst), VixlReg(dst));
2142 return;
2143 }
2144 } else {
2145 if (dst.GetSize() == BYTE_SIZE) {
2146 GetMasm()->Ldarb(VixlReg(dst, WORD_SIZE), ConvertMem(mem));
2147 return;
2148 }
2149 if (dst.GetSize() == HALF_SIZE) {
2150 GetMasm()->Ldarh(VixlReg(dst), ConvertMem(mem));
2151 return;
2152 }
2153 }
2154 GetMasm()->Ldar(VixlReg(dst), ConvertMem(mem));
2155 }
2156
CheckAlignment(MemRef mem,size_t size)2157 void Aarch64Encoder::CheckAlignment(MemRef mem, size_t size)
2158 {
2159 ASSERT(size == WORD_SIZE || size == BYTE_SIZE || size == HALF_SIZE || size == DOUBLE_WORD_SIZE);
2160 if (size == BYTE_SIZE) {
2161 return;
2162 }
2163 size_t alignmentMask = (size >> 3U) - 1;
2164 ASSERT(!mem.HasIndex() && !mem.HasScale());
2165 if (mem.HasDisp()) {
2166 // We need additional tmp register for check base + offset.
2167 // The case when separately the base and the offset are not aligned, but in sum there are aligned very rarely.
2168 // Therefore, the alignment check for base and offset takes place separately
2169 [[maybe_unused]] size_t offset = mem.GetDisp();
2170 ASSERT((offset & alignmentMask) == 0);
2171 }
2172 auto baseReg = mem.GetBase();
2173 auto end = CreateLabel();
2174 EncodeJumpTest(end, baseReg, Imm(alignmentMask), Condition::TST_EQ);
2175 EncodeAbort();
2176 BindLabel(end);
2177 }
2178
EncodeLdrAcquire(Reg dst,bool dstSigned,MemRef mem)2179 void Aarch64Encoder::EncodeLdrAcquire(Reg dst, bool dstSigned, MemRef mem)
2180 {
2181 if (mem.HasIndex()) {
2182 ScopedTmpRegU64 tmpReg(this);
2183 if (mem.HasScale()) {
2184 EncodeAdd(tmpReg, mem.GetBase(), Shift(mem.GetIndex(), mem.GetScale()));
2185 } else {
2186 EncodeAdd(tmpReg, mem.GetBase(), mem.GetIndex());
2187 }
2188 mem = MemRef(tmpReg, mem.GetDisp());
2189 }
2190
2191 auto rzero = GetRegfile()->GetZeroReg().GetId();
2192 if (!ConvertMem(mem).IsValid() || (dst.GetId() == rzero && dst.IsScalar())) {
2193 EncodeLdrAcquireInvalid(dst, dstSigned, mem);
2194 return;
2195 }
2196
2197 ASSERT(!mem.HasIndex() && !mem.HasScale());
2198 if (dst.IsFloat()) {
2199 ScopedTmpRegU64 tmpReg(this);
2200 auto memLdar = mem;
2201 if (mem.HasDisp()) {
2202 if (vixl::aarch64::Assembler::IsImmAddSub(mem.GetDisp())) {
2203 EncodeAdd(tmpReg, mem.GetBase(), Imm(mem.GetDisp()));
2204 } else {
2205 EncodeMov(tmpReg, Imm(mem.GetDisp()));
2206 EncodeAdd(tmpReg, mem.GetBase(), tmpReg);
2207 }
2208 memLdar = MemRef(tmpReg);
2209 }
2210 #ifndef NDEBUG
2211 CheckAlignment(memLdar, dst.GetSize());
2212 #endif // NDEBUG
2213 auto tmp = VixlReg(tmpReg, dst.GetSize());
2214 GetMasm()->Ldar(tmp, ConvertMem(memLdar));
2215 GetMasm()->Fmov(VixlVReg(dst), tmp);
2216 return;
2217 }
2218
2219 if (!mem.HasDisp()) {
2220 EncodeLdrAcquireScalar(dst, dstSigned, mem);
2221 return;
2222 }
2223
2224 Reg dst64(dst.GetId(), INT64_TYPE);
2225 if (vixl::aarch64::Assembler::IsImmAddSub(mem.GetDisp())) {
2226 EncodeAdd(dst64, mem.GetBase(), Imm(mem.GetDisp()));
2227 } else {
2228 EncodeMov(dst64, Imm(mem.GetDisp()));
2229 EncodeAdd(dst64, mem.GetBase(), dst64);
2230 }
2231 EncodeLdrAcquireScalar(dst, dstSigned, MemRef(dst64));
2232 }
2233
EncodeStr(Reg src,MemRef mem)2234 void Aarch64Encoder::EncodeStr(Reg src, MemRef mem)
2235 {
2236 if (!ConvertMem(mem).IsValid()) {
2237 auto indexReg = mem.GetIndex();
2238 auto rzero = GetRegfile()->GetZeroReg().GetId();
2239 // Invalid == base is rzero or invalid
2240 ASSERT(mem.GetBase().GetId() == rzero || !mem.GetBase().IsValid());
2241 // Use tmp-reg
2242 ScopedTmpReg tmpReg(this);
2243 EncodeMov(tmpReg, Imm(0));
2244
2245 auto fixMem = MemRef(tmpReg, indexReg, mem.GetScale(), mem.GetDisp());
2246 ASSERT(ConvertMem(fixMem).IsValid());
2247 if (src.GetId() != rzero) {
2248 EncodeStr(src, fixMem);
2249 } else {
2250 EncodeStr(tmpReg, fixMem);
2251 }
2252 return;
2253 }
2254 ASSERT(ConvertMem(mem).IsValid());
2255 if (src.IsFloat()) {
2256 GetMasm()->Str(VixlVReg(src), ConvertMem(mem));
2257 return;
2258 }
2259 if (src.GetSize() == BYTE_SIZE) {
2260 GetMasm()->Strb(VixlReg(src), ConvertMem(mem));
2261 return;
2262 }
2263 if (src.GetSize() == HALF_SIZE) {
2264 GetMasm()->Strh(VixlReg(src), ConvertMem(mem));
2265 return;
2266 }
2267 GetMasm()->Str(VixlReg(src), ConvertMem(mem));
2268 }
2269
EncodeStrRelease(Reg src,MemRef mem)2270 void Aarch64Encoder::EncodeStrRelease(Reg src, MemRef mem)
2271 {
2272 ScopedTmpRegLazy base(this);
2273 MemRef fixedMem;
2274 bool memWasFixed = false;
2275 if (mem.HasDisp()) {
2276 if (vixl::aarch64::Assembler::IsImmAddSub(mem.GetDisp())) {
2277 base.AcquireIfInvalid();
2278 EncodeAdd(base, mem.GetBase(), Imm(mem.GetDisp()));
2279 } else {
2280 base.AcquireIfInvalid();
2281 EncodeMov(base, Imm(mem.GetDisp()));
2282 EncodeAdd(base, mem.GetBase(), base);
2283 }
2284 memWasFixed = true;
2285 }
2286 if (mem.HasIndex()) {
2287 base.AcquireIfInvalid();
2288 if (mem.HasScale()) {
2289 EncodeAdd(base, memWasFixed ? base : mem.GetBase(), Shift(mem.GetIndex(), mem.GetScale()));
2290 } else {
2291 EncodeAdd(base, memWasFixed ? base : mem.GetBase(), mem.GetIndex());
2292 }
2293 memWasFixed = true;
2294 }
2295
2296 if (memWasFixed) {
2297 fixedMem = MemRef(base);
2298 } else {
2299 fixedMem = mem;
2300 }
2301
2302 #ifndef NDEBUG
2303 CheckAlignment(fixedMem, src.GetSize());
2304 #endif // NDEBUG
2305 if (src.IsFloat()) {
2306 ScopedTmpRegU64 tmpReg(this);
2307 auto tmp = VixlReg(tmpReg, src.GetSize());
2308 GetMasm()->Fmov(tmp, VixlVReg(src));
2309 GetMasm()->Stlr(tmp, ConvertMem(fixedMem));
2310 return;
2311 }
2312 if (src.GetSize() == BYTE_SIZE) {
2313 GetMasm()->Stlrb(VixlReg(src), ConvertMem(fixedMem));
2314 return;
2315 }
2316 if (src.GetSize() == HALF_SIZE) {
2317 GetMasm()->Stlrh(VixlReg(src), ConvertMem(fixedMem));
2318 return;
2319 }
2320 GetMasm()->Stlr(VixlReg(src), ConvertMem(fixedMem));
2321 }
2322
EncodeLdrExclusive(Reg dst,Reg addr,bool acquire)2323 void Aarch64Encoder::EncodeLdrExclusive(Reg dst, Reg addr, bool acquire)
2324 {
2325 ASSERT(dst.IsScalar());
2326 auto dstReg = VixlReg(dst);
2327 auto memCvt = ConvertMem(MemRef(addr));
2328 #ifndef NDEBUG
2329 CheckAlignment(MemRef(addr), dst.GetSize());
2330 #endif // NDEBUG
2331 if (dst.GetSize() == BYTE_SIZE) {
2332 if (acquire) {
2333 GetMasm()->Ldaxrb(dstReg, memCvt);
2334 return;
2335 }
2336 GetMasm()->Ldxrb(dstReg, memCvt);
2337 return;
2338 }
2339 if (dst.GetSize() == HALF_SIZE) {
2340 if (acquire) {
2341 GetMasm()->Ldaxrh(dstReg, memCvt);
2342 return;
2343 }
2344 GetMasm()->Ldxrh(dstReg, memCvt);
2345 return;
2346 }
2347 if (acquire) {
2348 GetMasm()->Ldaxr(dstReg, memCvt);
2349 return;
2350 }
2351 GetMasm()->Ldxr(dstReg, memCvt);
2352 }
2353
EncodeStrExclusive(Reg dst,Reg src,Reg addr,bool release)2354 void Aarch64Encoder::EncodeStrExclusive(Reg dst, Reg src, Reg addr, bool release)
2355 {
2356 ASSERT(dst.IsScalar() && src.IsScalar());
2357
2358 bool copyDst = dst.GetId() == src.GetId() || dst.GetId() == addr.GetId();
2359 ScopedTmpReg tmp(this);
2360 auto srcReg = VixlReg(src);
2361 auto memCvt = ConvertMem(MemRef(addr));
2362 auto dstReg = copyDst ? VixlReg(tmp) : VixlReg(dst);
2363 #ifndef NDEBUG
2364 CheckAlignment(MemRef(addr), src.GetSize());
2365 #endif // NDEBUG
2366
2367 if (src.GetSize() == BYTE_SIZE) {
2368 if (release) {
2369 GetMasm()->Stlxrb(dstReg, srcReg, memCvt);
2370 } else {
2371 GetMasm()->Stxrb(dstReg, srcReg, memCvt);
2372 }
2373 } else if (src.GetSize() == HALF_SIZE) {
2374 if (release) {
2375 GetMasm()->Stlxrh(dstReg, srcReg, memCvt);
2376 } else {
2377 GetMasm()->Stxrh(dstReg, srcReg, memCvt);
2378 }
2379 } else {
2380 if (release) {
2381 GetMasm()->Stlxr(dstReg, srcReg, memCvt);
2382 } else {
2383 GetMasm()->Stxr(dstReg, srcReg, memCvt);
2384 }
2385 }
2386 if (copyDst) {
2387 EncodeMov(dst, tmp);
2388 }
2389 }
2390
EncodeStrz(Reg src,MemRef mem)2391 void Aarch64Encoder::EncodeStrz(Reg src, MemRef mem)
2392 {
2393 if (!ConvertMem(mem).IsValid()) {
2394 EncodeStr(src, mem);
2395 return;
2396 }
2397 ASSERT(ConvertMem(mem).IsValid());
2398 // Upper half of registers must be zeroed by-default
2399 if (src.IsFloat()) {
2400 EncodeStr(src.As(FLOAT64_TYPE), mem);
2401 return;
2402 }
2403 if (src.GetSize() < WORD_SIZE) {
2404 EncodeCast(src, false, src.As(INT64_TYPE), false);
2405 }
2406 GetMasm()->Str(VixlReg(src.As(INT64_TYPE)), ConvertMem(mem));
2407 }
2408
EncodeSti(int64_t src,uint8_t srcSizeBytes,MemRef mem)2409 void Aarch64Encoder::EncodeSti(int64_t src, uint8_t srcSizeBytes, MemRef mem)
2410 {
2411 if (mem.IsValid() && mem.IsOffsetMem() && src == 0 && srcSizeBytes == 1) {
2412 auto rzero = GetRegfile()->GetZeroReg();
2413 GetMasm()->Strb(VixlReg(rzero), ConvertMem(mem));
2414 return;
2415 }
2416 if (!ConvertMem(mem).IsValid()) {
2417 auto rzero = GetRegfile()->GetZeroReg();
2418 EncodeStr(rzero, mem);
2419 return;
2420 }
2421
2422 ScopedTmpRegU64 tmpReg(this);
2423 auto tmp = VixlReg(tmpReg);
2424 GetMasm()->Mov(tmp, VixlImm(src));
2425 if (srcSizeBytes == 1U) {
2426 GetMasm()->Strb(tmp, ConvertMem(mem));
2427 return;
2428 }
2429 if (srcSizeBytes == HALF_WORD_SIZE_BYTES) {
2430 GetMasm()->Strh(tmp, ConvertMem(mem));
2431 return;
2432 }
2433 ASSERT((srcSizeBytes == WORD_SIZE_BYTES) || (srcSizeBytes == DOUBLE_WORD_SIZE_BYTES));
2434 GetMasm()->Str(tmp, ConvertMem(mem));
2435 }
2436
EncodeSti(float src,MemRef mem)2437 void Aarch64Encoder::EncodeSti(float src, MemRef mem)
2438 {
2439 if (!ConvertMem(mem).IsValid()) {
2440 auto rzero = GetRegfile()->GetZeroReg();
2441 EncodeStr(rzero, mem);
2442 return;
2443 }
2444 ScopedTmpRegF32 tmpReg(this);
2445 GetMasm()->Fmov(VixlVReg(tmpReg).S(), src);
2446 EncodeStr(tmpReg, mem);
2447 }
2448
EncodeSti(double src,MemRef mem)2449 void Aarch64Encoder::EncodeSti(double src, MemRef mem)
2450 {
2451 if (!ConvertMem(mem).IsValid()) {
2452 auto rzero = GetRegfile()->GetZeroReg();
2453 EncodeStr(rzero, mem);
2454 return;
2455 }
2456 ScopedTmpRegF64 tmpReg(this);
2457 GetMasm()->Fmov(VixlVReg(tmpReg).D(), src);
2458 EncodeStr(tmpReg, mem);
2459 }
2460
EncodeMemCopy(MemRef memFrom,MemRef memTo,size_t size)2461 void Aarch64Encoder::EncodeMemCopy(MemRef memFrom, MemRef memTo, size_t size)
2462 {
2463 if (!ConvertMem(memFrom).IsValid() || !ConvertMem(memTo).IsValid()) {
2464 auto rzero = GetRegfile()->GetZeroReg();
2465 if (!ConvertMem(memFrom).IsValid()) {
2466 // Encode one load - will fix inside
2467 EncodeLdr(rzero, false, memFrom);
2468 } else {
2469 ASSERT(!ConvertMem(memTo).IsValid());
2470 // Encode one store - will fix inside
2471 EncodeStr(rzero, memTo);
2472 }
2473 return;
2474 }
2475 ASSERT(ConvertMem(memFrom).IsValid());
2476 ASSERT(ConvertMem(memTo).IsValid());
2477 ScopedTmpRegU64 tmpReg(this);
2478 auto tmp = VixlReg(tmpReg, std::min(size, static_cast<size_t>(DOUBLE_WORD_SIZE)));
2479 if (size == BYTE_SIZE) {
2480 GetMasm()->Ldrb(tmp, ConvertMem(memFrom));
2481 GetMasm()->Strb(tmp, ConvertMem(memTo));
2482 } else if (size == HALF_SIZE) {
2483 GetMasm()->Ldrh(tmp, ConvertMem(memFrom));
2484 GetMasm()->Strh(tmp, ConvertMem(memTo));
2485 } else {
2486 ASSERT(size == WORD_SIZE || size == DOUBLE_WORD_SIZE);
2487 GetMasm()->Ldr(tmp, ConvertMem(memFrom));
2488 GetMasm()->Str(tmp, ConvertMem(memTo));
2489 }
2490 }
2491
EncodeMemCopyz(MemRef memFrom,MemRef memTo,size_t size)2492 void Aarch64Encoder::EncodeMemCopyz(MemRef memFrom, MemRef memTo, size_t size)
2493 {
2494 if (!ConvertMem(memFrom).IsValid() || !ConvertMem(memTo).IsValid()) {
2495 auto rzero = GetRegfile()->GetZeroReg();
2496 if (!ConvertMem(memFrom).IsValid()) {
2497 // Encode one load - will fix inside
2498 EncodeLdr(rzero, false, memFrom);
2499 } else {
2500 ASSERT(!ConvertMem(memTo).IsValid());
2501 // Encode one store - will fix inside
2502 EncodeStr(rzero, memTo);
2503 }
2504 return;
2505 }
2506 ASSERT(ConvertMem(memFrom).IsValid());
2507 ASSERT(ConvertMem(memTo).IsValid());
2508 ScopedTmpRegU64 tmpReg(this);
2509 auto tmp = VixlReg(tmpReg, std::min(size, static_cast<size_t>(DOUBLE_WORD_SIZE)));
2510 auto zero = VixlReg(GetRegfile()->GetZeroReg(), WORD_SIZE);
2511 if (size == BYTE_SIZE) {
2512 GetMasm()->Ldrb(tmp, ConvertMem(memFrom));
2513 GetMasm()->Stp(tmp, zero, ConvertMem(memTo));
2514 } else if (size == HALF_SIZE) {
2515 GetMasm()->Ldrh(tmp, ConvertMem(memFrom));
2516 GetMasm()->Stp(tmp, zero, ConvertMem(memTo));
2517 } else {
2518 ASSERT(size == WORD_SIZE || size == DOUBLE_WORD_SIZE);
2519 GetMasm()->Ldr(tmp, ConvertMem(memFrom));
2520 if (size == WORD_SIZE) {
2521 GetMasm()->Stp(tmp, zero, ConvertMem(memTo));
2522 } else {
2523 GetMasm()->Str(tmp, ConvertMem(memTo));
2524 }
2525 }
2526 }
2527
EncodeCompare(Reg dst,Reg src0,Reg src1,Condition cc)2528 void Aarch64Encoder::EncodeCompare(Reg dst, Reg src0, Reg src1, Condition cc)
2529 {
2530 ASSERT(src0.IsFloat() == src1.IsFloat());
2531 if (src0.IsFloat()) {
2532 GetMasm()->Fcmp(VixlVReg(src0), VixlVReg(src1));
2533 } else {
2534 GetMasm()->Cmp(VixlReg(src0), VixlReg(src1));
2535 }
2536 GetMasm()->Cset(VixlReg(dst), Convert(cc));
2537 }
2538
EncodeCompareTest(Reg dst,Reg src0,Reg src1,Condition cc)2539 void Aarch64Encoder::EncodeCompareTest(Reg dst, Reg src0, Reg src1, Condition cc)
2540 {
2541 ASSERT(src0.IsScalar() && src1.IsScalar());
2542
2543 GetMasm()->Tst(VixlReg(src0), VixlReg(src1));
2544 GetMasm()->Cset(VixlReg(dst), ConvertTest(cc));
2545 }
2546
EncodeAtomicByteOr(Reg addr,Reg value,bool fastEncoding)2547 void Aarch64Encoder::EncodeAtomicByteOr(Reg addr, Reg value, bool fastEncoding)
2548 {
2549 if (fastEncoding) {
2550 #ifndef NDEBUG
2551 vixl::CPUFeaturesScope scope(GetMasm(), vixl::CPUFeatures::kAtomics);
2552 #endif
2553 GetMasm()->Stsetb(VixlReg(value, BYTE_SIZE), MemOperand(VixlReg(addr)));
2554 return;
2555 }
2556
2557 // Slow encoding, should not be used in production code!!!
2558 auto linkReg = GetTarget().GetLinkReg();
2559 auto frameReg = GetTarget().GetFrameReg();
2560 static constexpr size_t PAIR_OFFSET = 2 * DOUBLE_WORD_SIZE_BYTES;
2561
2562 ScopedTmpRegLazy tmp1(this);
2563 ScopedTmpRegLazy tmp2(this);
2564 Reg orValue;
2565 Reg storeResult;
2566 bool hasTemps = GetScratchRegistersWithLrCount() >= 2U;
2567 if (hasTemps) {
2568 tmp1.AcquireWithLr();
2569 tmp2.AcquireWithLr();
2570 orValue = tmp1.GetReg().As(INT32_TYPE);
2571 storeResult = tmp2.GetReg().As(INT32_TYPE);
2572 } else {
2573 GetMasm()->stp(VixlReg(frameReg), VixlReg(linkReg),
2574 MemOperand(vixl::aarch64::sp, -PAIR_OFFSET, vixl::aarch64::AddrMode::PreIndex));
2575 orValue = frameReg.As(INT32_TYPE);
2576 storeResult = linkReg.As(INT32_TYPE);
2577 }
2578
2579 auto *loop = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
2580 GetMasm()->Bind(loop);
2581 GetMasm()->Ldxrb(VixlReg(orValue), MemOperand(VixlReg(addr)));
2582 GetMasm()->Orr(VixlReg(orValue), VixlReg(orValue), VixlReg(value, WORD_SIZE));
2583 GetMasm()->Stxrb(VixlReg(storeResult), VixlReg(orValue), MemOperand(VixlReg(addr)));
2584 GetMasm()->Cbnz(VixlReg(storeResult), loop);
2585 if (!hasTemps) {
2586 GetMasm()->ldp(VixlReg(frameReg), VixlReg(linkReg),
2587 MemOperand(vixl::aarch64::sp, PAIR_OFFSET, vixl::aarch64::AddrMode::PostIndex));
2588 }
2589 }
2590
EncodeCmp(Reg dst,Reg src0,Reg src1,Condition cc)2591 void Aarch64Encoder::EncodeCmp(Reg dst, Reg src0, Reg src1, Condition cc)
2592 {
2593 if (src0.IsFloat()) {
2594 ASSERT(src1.IsFloat());
2595 ASSERT(cc == Condition::MI || cc == Condition::LT);
2596 GetMasm()->Fcmp(VixlVReg(src0), VixlVReg(src1));
2597 } else {
2598 ASSERT(src0.IsScalar() && src1.IsScalar());
2599 ASSERT(cc == Condition::LO || cc == Condition::LT);
2600 GetMasm()->Cmp(VixlReg(src0), VixlReg(src1));
2601 }
2602 GetMasm()->Cset(VixlReg(dst), vixl::aarch64::Condition::ne);
2603 GetMasm()->Cneg(VixlReg(Promote(dst)), VixlReg(Promote(dst)), Convert(cc));
2604 }
2605
EncodeSelect(ArgsSelect && args)2606 void Aarch64Encoder::EncodeSelect(ArgsSelect &&args)
2607 {
2608 auto [dst, src0, src1, src2, src3, cc] = args;
2609 if (src2.IsScalar()) {
2610 GetMasm()->Cmp(VixlReg(src2), VixlReg(src3));
2611 } else {
2612 GetMasm()->Fcmp(VixlVReg(src2), VixlVReg(src3));
2613 }
2614 if (dst.IsFloat()) {
2615 GetMasm()->Fcsel(VixlVReg(dst), VixlVReg(src0), VixlVReg(src1), Convert(cc));
2616 } else {
2617 GetMasm()->Csel(VixlReg(dst), VixlReg(src0), VixlReg(src1), Convert(cc));
2618 }
2619 }
2620
EncodeSelect(ArgsSelectImm && args)2621 void Aarch64Encoder::EncodeSelect(ArgsSelectImm &&args)
2622 {
2623 auto [dst, src0, src1, src2, imm, cc] = args;
2624 if (src2.IsScalar()) {
2625 GetMasm()->Cmp(VixlReg(src2), VixlImm(imm));
2626 } else {
2627 GetMasm()->Fcmp(VixlVReg(src2), imm.GetAsDouble());
2628 }
2629 if (dst.IsFloat()) {
2630 GetMasm()->Fcsel(VixlVReg(dst), VixlVReg(src0), VixlVReg(src1), Convert(cc));
2631 } else {
2632 GetMasm()->Csel(VixlReg(dst), VixlReg(src0), VixlReg(src1), Convert(cc));
2633 }
2634 }
2635
EncodeSelectTest(ArgsSelect && args)2636 void Aarch64Encoder::EncodeSelectTest(ArgsSelect &&args)
2637 {
2638 auto [dst, src0, src1, src2, src3, cc] = args;
2639 ASSERT(!src2.IsFloat() && !src3.IsFloat());
2640 GetMasm()->Tst(VixlReg(src2), VixlReg(src3));
2641 if (dst.IsFloat()) {
2642 GetMasm()->Fcsel(VixlVReg(dst), VixlVReg(src0), VixlVReg(src1), ConvertTest(cc));
2643 } else {
2644 GetMasm()->Csel(VixlReg(dst), VixlReg(src0), VixlReg(src1), ConvertTest(cc));
2645 }
2646 }
2647
EncodeSelectTest(ArgsSelectImm && args)2648 void Aarch64Encoder::EncodeSelectTest(ArgsSelectImm &&args)
2649 {
2650 auto [dst, src0, src1, src2, imm, cc] = args;
2651 ASSERT(!src2.IsFloat());
2652 ASSERT(CanEncodeImmLogical(imm.GetAsInt(), src2.GetSize() > WORD_SIZE ? DOUBLE_WORD_SIZE : WORD_SIZE));
2653 GetMasm()->Tst(VixlReg(src2), VixlImm(imm));
2654 if (dst.IsFloat()) {
2655 GetMasm()->Fcsel(VixlVReg(dst), VixlVReg(src0), VixlVReg(src1), ConvertTest(cc));
2656 } else {
2657 GetMasm()->Csel(VixlReg(dst), VixlReg(src0), VixlReg(src1), ConvertTest(cc));
2658 }
2659 }
2660
EncodeLdp(Reg dst0,Reg dst1,bool dstSigned,MemRef mem)2661 void Aarch64Encoder::EncodeLdp(Reg dst0, Reg dst1, bool dstSigned, MemRef mem)
2662 {
2663 ASSERT(dst0.IsFloat() == dst1.IsFloat());
2664 ASSERT(dst0.GetSize() == dst1.GetSize());
2665 if (!ConvertMem(mem).IsValid()) {
2666 // Encode one Ldr - will fix inside
2667 EncodeLdr(dst0, dstSigned, mem);
2668 return;
2669 }
2670
2671 if (dst0.IsFloat()) {
2672 GetMasm()->Ldp(VixlVReg(dst0), VixlVReg(dst1), ConvertMem(mem));
2673 return;
2674 }
2675 if (dstSigned && dst0.GetSize() == WORD_SIZE) {
2676 GetMasm()->Ldpsw(VixlReg(dst0, DOUBLE_WORD_SIZE), VixlReg(dst1, DOUBLE_WORD_SIZE), ConvertMem(mem));
2677 return;
2678 }
2679 GetMasm()->Ldp(VixlReg(dst0), VixlReg(dst1), ConvertMem(mem));
2680 }
2681
EncodeStp(Reg src0,Reg src1,MemRef mem)2682 void Aarch64Encoder::EncodeStp(Reg src0, Reg src1, MemRef mem)
2683 {
2684 ASSERT(src0.IsFloat() == src1.IsFloat());
2685 ASSERT(src0.GetSize() == src1.GetSize());
2686 if (!ConvertMem(mem).IsValid()) {
2687 // Encode one Str - will fix inside
2688 EncodeStr(src0, mem);
2689 return;
2690 }
2691
2692 if (src0.IsFloat()) {
2693 GetMasm()->Stp(VixlVReg(src0), VixlVReg(src1), ConvertMem(mem));
2694 return;
2695 }
2696 GetMasm()->Stp(VixlReg(src0), VixlReg(src1), ConvertMem(mem));
2697 }
2698
EncodeMAdd(Reg dst,Reg src0,Reg src1,Reg src2)2699 void Aarch64Encoder::EncodeMAdd(Reg dst, Reg src0, Reg src1, Reg src2)
2700 {
2701 ASSERT(dst.GetSize() == src1.GetSize() && dst.GetSize() == src0.GetSize() && dst.GetSize() == src2.GetSize());
2702 ASSERT(dst.IsScalar() == src0.IsScalar() && dst.IsScalar() == src1.IsScalar() && dst.IsScalar() == src2.IsScalar());
2703
2704 ASSERT(!GetRegfile()->IsZeroReg(dst));
2705
2706 if (GetRegfile()->IsZeroReg(src0) || GetRegfile()->IsZeroReg(src1)) {
2707 EncodeMov(dst, src2);
2708 return;
2709 }
2710
2711 if (GetRegfile()->IsZeroReg(src2)) {
2712 EncodeMul(dst, src0, src1);
2713 return;
2714 }
2715
2716 if (dst.IsScalar()) {
2717 GetMasm()->Madd(VixlReg(dst), VixlReg(src0), VixlReg(src1), VixlReg(src2));
2718 } else {
2719 GetMasm()->Fmadd(VixlVReg(dst), VixlVReg(src0), VixlVReg(src1), VixlVReg(src2));
2720 }
2721 }
2722
EncodeMSub(Reg dst,Reg src0,Reg src1,Reg src2)2723 void Aarch64Encoder::EncodeMSub(Reg dst, Reg src0, Reg src1, Reg src2)
2724 {
2725 ASSERT(dst.GetSize() == src1.GetSize() && dst.GetSize() == src0.GetSize() && dst.GetSize() == src2.GetSize());
2726 ASSERT(dst.IsScalar() == src0.IsScalar() && dst.IsScalar() == src1.IsScalar() && dst.IsScalar() == src2.IsScalar());
2727
2728 ASSERT(!GetRegfile()->IsZeroReg(dst));
2729
2730 if (GetRegfile()->IsZeroReg(src0) || GetRegfile()->IsZeroReg(src1)) {
2731 EncodeMov(dst, src2);
2732 return;
2733 }
2734
2735 if (GetRegfile()->IsZeroReg(src2)) {
2736 EncodeMNeg(dst, src0, src1);
2737 return;
2738 }
2739
2740 if (dst.IsScalar()) {
2741 GetMasm()->Msub(VixlReg(dst), VixlReg(src0), VixlReg(src1), VixlReg(src2));
2742 } else {
2743 GetMasm()->Fmsub(VixlVReg(dst), VixlVReg(src0), VixlVReg(src1), VixlVReg(src2));
2744 }
2745 }
2746
EncodeMNeg(Reg dst,Reg src0,Reg src1)2747 void Aarch64Encoder::EncodeMNeg(Reg dst, Reg src0, Reg src1)
2748 {
2749 ASSERT(dst.GetSize() == src1.GetSize() && dst.GetSize() == src0.GetSize());
2750 ASSERT(dst.IsScalar() == src0.IsScalar() && dst.IsScalar() == src1.IsScalar());
2751
2752 ASSERT(!GetRegfile()->IsZeroReg(dst));
2753
2754 if (GetRegfile()->IsZeroReg(src0) || GetRegfile()->IsZeroReg(src1)) {
2755 EncodeMov(dst, Imm(0U));
2756 return;
2757 }
2758
2759 if (dst.IsScalar()) {
2760 GetMasm()->Mneg(VixlReg(dst), VixlReg(src0), VixlReg(src1));
2761 } else {
2762 GetMasm()->Fnmul(VixlVReg(dst), VixlVReg(src0), VixlVReg(src1));
2763 }
2764 }
2765
EncodeOrNot(Reg dst,Reg src0,Reg src1)2766 void Aarch64Encoder::EncodeOrNot(Reg dst, Reg src0, Reg src1)
2767 {
2768 ASSERT(dst.GetSize() == src1.GetSize() && dst.GetSize() == src0.GetSize());
2769 ASSERT(dst.IsScalar() && src0.IsScalar() && src1.IsScalar());
2770 GetMasm()->Orn(VixlReg(dst), VixlReg(src0), VixlReg(src1));
2771 }
2772
EncodeOrNot(Reg dst,Reg src0,Shift src1)2773 void Aarch64Encoder::EncodeOrNot(Reg dst, Reg src0, Shift src1)
2774 {
2775 ASSERT(dst.GetSize() == src0.GetSize() && dst.GetSize() == src1.GetBase().GetSize());
2776 ASSERT(dst.IsScalar() && src0.IsScalar() && src1.GetBase().IsScalar());
2777 GetMasm()->Orn(VixlReg(dst), VixlReg(src0), VixlShift(src1));
2778 }
2779
EncodeExtractBits(Reg dst,Reg src0,Imm imm1,Imm imm2)2780 void Aarch64Encoder::EncodeExtractBits(Reg dst, Reg src0, Imm imm1, Imm imm2)
2781 {
2782 GetMasm()->Ubfx(VixlReg(dst), VixlReg(src0), imm1.GetAsInt(), imm2.GetAsInt());
2783 }
2784
EncodeAndNot(Reg dst,Reg src0,Reg src1)2785 void Aarch64Encoder::EncodeAndNot(Reg dst, Reg src0, Reg src1)
2786 {
2787 ASSERT(dst.GetSize() == src1.GetSize() && dst.GetSize() == src0.GetSize());
2788 ASSERT(dst.IsScalar() && src0.IsScalar() && src1.IsScalar());
2789 GetMasm()->Bic(VixlReg(dst), VixlReg(src0), VixlReg(src1));
2790 }
2791
EncodeAndNot(Reg dst,Reg src0,Shift src1)2792 void Aarch64Encoder::EncodeAndNot(Reg dst, Reg src0, Shift src1)
2793 {
2794 ASSERT(dst.GetSize() == src0.GetSize() && dst.GetSize() == src1.GetBase().GetSize());
2795 ASSERT(dst.IsScalar() && src0.IsScalar() && src1.GetBase().IsScalar());
2796 GetMasm()->Bic(VixlReg(dst), VixlReg(src0), VixlShift(src1));
2797 }
2798
EncodeXorNot(Reg dst,Reg src0,Reg src1)2799 void Aarch64Encoder::EncodeXorNot(Reg dst, Reg src0, Reg src1)
2800 {
2801 ASSERT(dst.GetSize() == src1.GetSize() && dst.GetSize() == src0.GetSize());
2802 ASSERT(dst.IsScalar() && src0.IsScalar() && src1.IsScalar());
2803 GetMasm()->Eon(VixlReg(dst), VixlReg(src0), VixlReg(src1));
2804 }
2805
EncodeXorNot(Reg dst,Reg src0,Shift src1)2806 void Aarch64Encoder::EncodeXorNot(Reg dst, Reg src0, Shift src1)
2807 {
2808 ASSERT(dst.GetSize() == src0.GetSize() && dst.GetSize() == src1.GetBase().GetSize());
2809 ASSERT(dst.IsScalar() && src0.IsScalar() && src1.GetBase().IsScalar());
2810 GetMasm()->Eon(VixlReg(dst), VixlReg(src0), VixlShift(src1));
2811 }
2812
EncodeNeg(Reg dst,Shift src)2813 void Aarch64Encoder::EncodeNeg(Reg dst, Shift src)
2814 {
2815 ASSERT(dst.GetSize() == src.GetBase().GetSize());
2816 ASSERT(dst.IsScalar() && src.GetBase().IsScalar());
2817 GetMasm()->Neg(VixlReg(dst), VixlShift(src));
2818 }
2819
EncodeStackOverflowCheck(ssize_t offset)2820 void Aarch64Encoder::EncodeStackOverflowCheck(ssize_t offset)
2821 {
2822 ScopedTmpReg tmp(this);
2823 EncodeAdd(tmp, GetTarget().GetStackReg(), Imm(offset));
2824 EncodeLdr(tmp, false, MemRef(tmp));
2825 }
2826
CanEncodeImmAddSubCmp(int64_t imm,uint32_t size,bool signedCompare)2827 bool Aarch64Encoder::CanEncodeImmAddSubCmp(int64_t imm, [[maybe_unused]] uint32_t size,
2828 [[maybe_unused]] bool signedCompare)
2829 {
2830 if (imm == INT64_MIN) {
2831 return false;
2832 }
2833 if (imm < 0) {
2834 imm = -imm;
2835 }
2836 return vixl::aarch64::Assembler::IsImmAddSub(imm);
2837 }
2838
CanEncodeImmLogical(uint64_t imm,uint32_t size)2839 bool Aarch64Encoder::CanEncodeImmLogical(uint64_t imm, uint32_t size)
2840 {
2841 #ifndef NDEBUG
2842 if (size < DOUBLE_WORD_SIZE) {
2843 // Test if the highest part is consistent:
2844 ASSERT(((imm >> size) == 0) || (((~imm) >> size) == 0));
2845 }
2846 #endif // NDEBUG
2847 return vixl::aarch64::Assembler::IsImmLogical(imm, size);
2848 }
2849
CanOptimizeImmDivMod(uint64_t imm,bool isSigned) const2850 bool Aarch64Encoder::CanOptimizeImmDivMod(uint64_t imm, bool isSigned) const
2851 {
2852 return CanOptimizeImmDivModCommon(imm, isSigned);
2853 }
2854
2855 /*
2856 * From aarch64 instruction set
2857 *
2858 * ========================================================
2859 * Syntax
2860 *
2861 * LDR Wt, [Xn|SP, Rm{, extend {amount}}] ; 32-bit general registers
2862 *
2863 * LDR Xt, [Xn|SP, Rm{, extend {amount}}] ; 64-bit general registers
2864 *
2865 * amount
2866 * Is the index shift amount, optional and defaulting to #0 when extend is not LSL:
2867 *
2868 * 32-bit general registers
2869 * Can be one of #0 or #2.
2870 *
2871 * 64-bit general registers
2872 * Can be one of #0 or #3.
2873 * ========================================================
2874 * Syntax
2875 *
2876 * LDRH Wt, [Xn|SP, Rm{, extend {amount}}]
2877 *
2878 * amount
2879 * Is the index shift amount, optional and defaulting to #0 when extend is not LSL, and can be either #0 or #1.
2880 * ========================================================
2881 *
2882 * Scale can be 0 or 1 for half load, 2 for word load, 3 for double word load
2883 */
CanEncodeScale(uint64_t imm,uint32_t size)2884 bool Aarch64Encoder::CanEncodeScale(uint64_t imm, uint32_t size)
2885 {
2886 return (imm == 0) || ((1U << imm) == (size >> 3U));
2887 }
2888
CanEncodeShiftedOperand(ShiftOpcode opcode,ShiftType shiftType)2889 bool Aarch64Encoder::CanEncodeShiftedOperand(ShiftOpcode opcode, ShiftType shiftType)
2890 {
2891 switch (opcode) {
2892 case ShiftOpcode::NEG_SR:
2893 case ShiftOpcode::ADD_SR:
2894 case ShiftOpcode::SUB_SR:
2895 return shiftType == ShiftType::LSL || shiftType == ShiftType::LSR || shiftType == ShiftType::ASR;
2896 case ShiftOpcode::AND_SR:
2897 case ShiftOpcode::OR_SR:
2898 case ShiftOpcode::XOR_SR:
2899 case ShiftOpcode::AND_NOT_SR:
2900 case ShiftOpcode::OR_NOT_SR:
2901 case ShiftOpcode::XOR_NOT_SR:
2902 return shiftType != ShiftType::INVALID_SHIFT;
2903 default:
2904 return false;
2905 }
2906 }
2907
CanEncodeFloatSelect()2908 bool Aarch64Encoder::CanEncodeFloatSelect()
2909 {
2910 return true;
2911 }
2912
AcquireScratchRegister(TypeInfo type)2913 Reg Aarch64Encoder::AcquireScratchRegister(TypeInfo type)
2914 {
2915 ASSERT(GetMasm()->GetCurrentScratchRegisterScope() == nullptr);
2916 auto reg = type.IsFloat() ? GetMasm()->GetScratchVRegisterList()->PopLowestIndex()
2917 : GetMasm()->GetScratchRegisterList()->PopLowestIndex();
2918 ASSERT(reg.IsValid());
2919 return Reg(reg.GetCode(), type);
2920 }
2921
AcquireScratchRegister(Reg reg)2922 void Aarch64Encoder::AcquireScratchRegister(Reg reg)
2923 {
2924 ASSERT(GetMasm()->GetCurrentScratchRegisterScope() == nullptr);
2925 if (reg == GetTarget().GetLinkReg()) {
2926 ASSERT_PRINT(!lrAcquired_, "Trying to acquire LR, which hasn't been released before");
2927 lrAcquired_ = true;
2928 return;
2929 }
2930 auto type = reg.GetType();
2931 auto regId = reg.GetId();
2932
2933 if (type.IsFloat()) {
2934 ASSERT(GetMasm()->GetScratchVRegisterList()->IncludesAliasOf(VixlVReg(reg)));
2935 GetMasm()->GetScratchVRegisterList()->Remove(regId);
2936 } else {
2937 ASSERT(GetMasm()->GetScratchRegisterList()->IncludesAliasOf(VixlReg(reg)));
2938 GetMasm()->GetScratchRegisterList()->Remove(regId);
2939 }
2940 }
2941
ReleaseScratchRegister(Reg reg)2942 void Aarch64Encoder::ReleaseScratchRegister(Reg reg)
2943 {
2944 if (reg == GetTarget().GetLinkReg()) {
2945 ASSERT_PRINT(lrAcquired_, "Trying to release LR, which hasn't been acquired before");
2946 lrAcquired_ = false;
2947 } else if (reg.IsFloat()) {
2948 GetMasm()->GetScratchVRegisterList()->Combine(reg.GetId());
2949 } else if (reg.GetId() != GetTarget().GetLinkReg().GetId()) {
2950 GetMasm()->GetScratchRegisterList()->Combine(reg.GetId());
2951 }
2952 }
2953
IsScratchRegisterReleased(Reg reg) const2954 bool Aarch64Encoder::IsScratchRegisterReleased(Reg reg) const
2955 {
2956 if (reg == GetTarget().GetLinkReg()) {
2957 return !lrAcquired_;
2958 }
2959 if (reg.IsFloat()) {
2960 return GetMasm()->GetScratchVRegisterList()->IncludesAliasOf(VixlVReg(reg));
2961 }
2962 return GetMasm()->GetScratchRegisterList()->IncludesAliasOf(VixlReg(reg));
2963 }
2964
GetScratchRegistersMask() const2965 RegMask Aarch64Encoder::GetScratchRegistersMask() const
2966 {
2967 return RegMask(GetMasm()->GetScratchRegisterList()->GetList());
2968 }
2969
GetScratchFpRegistersMask() const2970 RegMask Aarch64Encoder::GetScratchFpRegistersMask() const
2971 {
2972 return RegMask(GetMasm()->GetScratchVRegisterList()->GetList());
2973 }
2974
GetAvailableScratchRegisters() const2975 RegMask Aarch64Encoder::GetAvailableScratchRegisters() const
2976 {
2977 return RegMask(GetMasm()->GetScratchRegisterList()->GetList());
2978 }
2979
GetAvailableScratchFpRegisters() const2980 VRegMask Aarch64Encoder::GetAvailableScratchFpRegisters() const
2981 {
2982 return VRegMask(GetMasm()->GetScratchVRegisterList()->GetList());
2983 }
2984
GetRefType()2985 TypeInfo Aarch64Encoder::GetRefType()
2986 {
2987 return INT64_TYPE;
2988 }
2989
BufferData() const2990 void *Aarch64Encoder::BufferData() const
2991 {
2992 return GetMasm()->GetBuffer()->GetStartAddress<void *>();
2993 }
2994
BufferSize() const2995 size_t Aarch64Encoder::BufferSize() const
2996 {
2997 return GetMasm()->GetBuffer()->GetSizeInBytes();
2998 }
2999
MakeLibCall(Reg dst,Reg src0,Reg src1,const void * entryPoint)3000 void Aarch64Encoder::MakeLibCall(Reg dst, Reg src0, Reg src1, const void *entryPoint)
3001 {
3002 if (!dst.IsFloat()) {
3003 SetFalseResult();
3004 return;
3005 }
3006 if (dst.GetType() == FLOAT32_TYPE) {
3007 if (!src0.IsFloat() || !src1.IsFloat()) {
3008 SetFalseResult();
3009 return;
3010 }
3011
3012 if (src0.GetId() != vixl::aarch64::s0.GetCode() || src1.GetId() != vixl::aarch64::s1.GetCode()) {
3013 ScopedTmpRegF32 tmp(this);
3014 GetMasm()->Fmov(VixlVReg(tmp), VixlVReg(src1));
3015 GetMasm()->Fmov(vixl::aarch64::s0, VixlVReg(src0));
3016 GetMasm()->Fmov(vixl::aarch64::s1, VixlVReg(tmp));
3017 }
3018
3019 MakeCall(entryPoint);
3020
3021 if (dst.GetId() != vixl::aarch64::s0.GetCode()) {
3022 GetMasm()->Fmov(VixlVReg(dst), vixl::aarch64::s0);
3023 }
3024 } else if (dst.GetType() == FLOAT64_TYPE) {
3025 if (!src0.IsFloat() || !src1.IsFloat()) {
3026 SetFalseResult();
3027 return;
3028 }
3029
3030 if (src0.GetId() != vixl::aarch64::d0.GetCode() || src1.GetId() != vixl::aarch64::d1.GetCode()) {
3031 ScopedTmpRegF64 tmp(this);
3032 GetMasm()->Fmov(VixlVReg(tmp), VixlVReg(src1));
3033
3034 GetMasm()->Fmov(vixl::aarch64::d0, VixlVReg(src0));
3035 GetMasm()->Fmov(vixl::aarch64::d1, VixlVReg(tmp));
3036 }
3037
3038 MakeCall(entryPoint);
3039
3040 if (dst.GetId() != vixl::aarch64::d0.GetCode()) {
3041 GetMasm()->Fmov(VixlVReg(dst), vixl::aarch64::d0);
3042 }
3043 } else {
3044 UNREACHABLE();
3045 }
3046 }
3047
3048 template <bool IS_STORE>
LoadStoreRegisters(RegMask registers,ssize_t slot,size_t startReg,bool isFp)3049 void Aarch64Encoder::LoadStoreRegisters(RegMask registers, ssize_t slot, size_t startReg, bool isFp)
3050 {
3051 if (registers.none()) {
3052 return;
3053 }
3054 int32_t lastReg = registers.size() - 1;
3055 for (; lastReg >= 0; --lastReg) {
3056 if (registers.test(lastReg)) {
3057 break;
3058 }
3059 }
3060 // Construct single add for big offset
3061 size_t spOffset;
3062 auto lastOffset = (slot + lastReg - startReg) * DOUBLE_WORD_SIZE_BYTES;
3063
3064 if (!vixl::aarch64::Assembler::IsImmLSPair(lastOffset, vixl::aarch64::kXRegSizeInBytesLog2)) {
3065 ScopedTmpReg lrReg(this, true);
3066 auto tmp = VixlReg(lrReg);
3067 spOffset = slot * DOUBLE_WORD_SIZE_BYTES;
3068 slot = 0;
3069 if (vixl::aarch64::Assembler::IsImmAddSub(spOffset)) {
3070 GetMasm()->Add(tmp, vixl::aarch64::sp, VixlImm(spOffset));
3071 } else {
3072 GetMasm()->Mov(tmp, VixlImm(spOffset));
3073 GetMasm()->Add(tmp, vixl::aarch64::sp, tmp);
3074 }
3075 LoadStoreRegistersLoop<IS_STORE>(registers, slot, startReg, isFp, tmp);
3076 } else {
3077 LoadStoreRegistersLoop<IS_STORE>(registers, slot, startReg, isFp, vixl::aarch64::sp);
3078 }
3079 }
3080
3081 template <bool IS_STORE>
LoadStorePair(vixl::aarch64::MacroAssembler * masm,CPURegister lastReg,CPURegister reg,Reg base,int32_t idx)3082 static void LoadStorePair(vixl::aarch64::MacroAssembler *masm, CPURegister lastReg, CPURegister reg, Reg base,
3083 int32_t idx)
3084 {
3085 auto baseReg = VixlReg(base);
3086 static constexpr int32_t OFFSET = 2;
3087 if constexpr (IS_STORE) { // NOLINT
3088 masm->Stp(lastReg, reg, MemOperand(baseReg, (idx - OFFSET) * DOUBLE_WORD_SIZE_BYTES));
3089 } else { // NOLINT
3090 masm->Ldp(lastReg, reg, MemOperand(baseReg, (idx - OFFSET) * DOUBLE_WORD_SIZE_BYTES));
3091 }
3092 }
3093
3094 template <bool IS_STORE>
LoadStoreReg(vixl::aarch64::MacroAssembler * masm,CPURegister lastReg,Reg base,int32_t idx)3095 static void LoadStoreReg(vixl::aarch64::MacroAssembler *masm, CPURegister lastReg, Reg base, int32_t idx)
3096 {
3097 auto baseReg = VixlReg(base);
3098 if constexpr (IS_STORE) { // NOLINT
3099 masm->Str(lastReg, MemOperand(baseReg, (idx - 1) * DOUBLE_WORD_SIZE_BYTES));
3100 } else { // NOLINT
3101 masm->Ldr(lastReg, MemOperand(baseReg, (idx - 1) * DOUBLE_WORD_SIZE_BYTES));
3102 }
3103 }
3104
3105 template <bool IS_STORE>
LoadStoreRegistersMainLoop(RegMask registers,bool isFp,int32_t slot,Reg base,RegMask mask)3106 void Aarch64Encoder::LoadStoreRegistersMainLoop(RegMask registers, bool isFp, int32_t slot, Reg base, RegMask mask)
3107 {
3108 bool hasMask = mask.any();
3109 int32_t index = hasMask ? static_cast<int32_t>(mask.GetMinRegister()) : 0;
3110 int32_t lastIndex = -1;
3111 ssize_t lastId = -1;
3112
3113 slot -= index;
3114 for (ssize_t id = index; id < helpers::ToSigned(registers.size()); id++) {
3115 if (hasMask) {
3116 if (!mask.test(id)) {
3117 continue;
3118 }
3119 index++;
3120 }
3121 if (!registers.test(id)) {
3122 continue;
3123 }
3124 if (!hasMask) {
3125 index++;
3126 }
3127 if (lastId == -1) {
3128 lastId = id;
3129 lastIndex = index;
3130 continue;
3131 }
3132
3133 auto lastReg =
3134 CPURegister(lastId, vixl::aarch64::kXRegSize, isFp ? CPURegister::kVRegister : CPURegister::kRegister);
3135 if (!hasMask || lastId + 1 == id) {
3136 auto reg =
3137 CPURegister(id, vixl::aarch64::kXRegSize, isFp ? CPURegister::kVRegister : CPURegister::kRegister);
3138 LoadStorePair<IS_STORE>(GetMasm(), lastReg, reg, base, slot + index);
3139 lastId = -1;
3140 } else {
3141 LoadStoreReg<IS_STORE>(GetMasm(), lastReg, base, slot + lastIndex);
3142 lastId = id;
3143 lastIndex = index;
3144 }
3145 }
3146 if (lastId != -1) {
3147 auto lastReg =
3148 CPURegister(lastId, vixl::aarch64::kXRegSize, isFp ? CPURegister::kVRegister : CPURegister::kRegister);
3149 LoadStoreReg<IS_STORE>(GetMasm(), lastReg, base, slot + lastIndex);
3150 }
3151 }
3152
3153 template <bool IS_STORE>
LoadStoreRegisters(RegMask registers,bool isFp,int32_t slot,Reg base,RegMask mask)3154 void Aarch64Encoder::LoadStoreRegisters(RegMask registers, bool isFp, int32_t slot, Reg base, RegMask mask)
3155 {
3156 if (registers.none()) {
3157 return;
3158 }
3159
3160 int32_t maxOffset = (slot + helpers::ToSigned(registers.GetMaxRegister())) * DOUBLE_WORD_SIZE_BYTES;
3161 int32_t minOffset = (slot + helpers::ToSigned(registers.GetMinRegister())) * DOUBLE_WORD_SIZE_BYTES;
3162
3163 ScopedTmpRegLazy tmpReg(this, true);
3164 // Construct single add for big offset
3165 if (!vixl::aarch64::Assembler::IsImmLSPair(minOffset, vixl::aarch64::kXRegSizeInBytesLog2) ||
3166 !vixl::aarch64::Assembler::IsImmLSPair(maxOffset, vixl::aarch64::kXRegSizeInBytesLog2)) {
3167 tmpReg.AcquireWithLr();
3168 auto lrReg = VixlReg(tmpReg);
3169 ssize_t spOffset = slot * DOUBLE_WORD_SIZE_BYTES;
3170 if (vixl::aarch64::Assembler::IsImmAddSub(spOffset)) {
3171 GetMasm()->Add(lrReg, VixlReg(base), VixlImm(spOffset));
3172 } else {
3173 GetMasm()->Mov(lrReg, VixlImm(spOffset));
3174 GetMasm()->Add(lrReg, VixlReg(base), lrReg);
3175 }
3176 // Adjust new values for slot and base register
3177 slot = 0;
3178 base = tmpReg;
3179 }
3180
3181 LoadStoreRegistersMainLoop<IS_STORE>(registers, isFp, slot, base, mask);
3182 }
3183
3184 template <bool IS_STORE>
LoadStoreRegistersLoop(RegMask registers,ssize_t slot,size_t startReg,bool isFp,const vixl::aarch64::Register & baseReg)3185 void Aarch64Encoder::LoadStoreRegistersLoop(RegMask registers, ssize_t slot, size_t startReg, bool isFp,
3186 const vixl::aarch64::Register &baseReg)
3187 {
3188 size_t i = 0;
3189 const auto getNextReg = [®isters, &i, isFp]() {
3190 for (; i < registers.size(); i++) {
3191 if (registers.test(i)) {
3192 return CPURegister(i++, vixl::aarch64::kXRegSize,
3193 isFp ? CPURegister::kVRegister : CPURegister::kRegister);
3194 }
3195 }
3196 return CPURegister();
3197 };
3198
3199 for (CPURegister nextReg = getNextReg(); nextReg.IsValid();) {
3200 const CPURegister currReg = nextReg;
3201 nextReg = getNextReg();
3202 if (nextReg.IsValid() && (nextReg.GetCode() - 1 == currReg.GetCode())) {
3203 if constexpr (IS_STORE) { // NOLINT
3204 GetMasm()->Stp(currReg, nextReg,
3205 MemOperand(baseReg, (slot + currReg.GetCode() - startReg) * DOUBLE_WORD_SIZE_BYTES));
3206 } else { // NOLINT
3207 GetMasm()->Ldp(currReg, nextReg,
3208 MemOperand(baseReg, (slot + currReg.GetCode() - startReg) * DOUBLE_WORD_SIZE_BYTES));
3209 }
3210 nextReg = getNextReg();
3211 } else {
3212 if constexpr (IS_STORE) { // NOLINT
3213 GetMasm()->Str(currReg,
3214 MemOperand(baseReg, (slot + currReg.GetCode() - startReg) * DOUBLE_WORD_SIZE_BYTES));
3215 } else { // NOLINT
3216 GetMasm()->Ldr(currReg,
3217 MemOperand(baseReg, (slot + currReg.GetCode() - startReg) * DOUBLE_WORD_SIZE_BYTES));
3218 }
3219 }
3220 }
3221 }
3222
SaveRegisters(RegMask registers,ssize_t slot,size_t startReg,bool isFp)3223 void Aarch64Encoder::SaveRegisters(RegMask registers, ssize_t slot, size_t startReg, bool isFp)
3224 {
3225 LoadStoreRegisters<true>(registers, slot, startReg, isFp);
3226 }
3227
LoadRegisters(RegMask registers,ssize_t slot,size_t startReg,bool isFp)3228 void Aarch64Encoder::LoadRegisters(RegMask registers, ssize_t slot, size_t startReg, bool isFp)
3229 {
3230 LoadStoreRegisters<false>(registers, slot, startReg, isFp);
3231 }
3232
SaveRegisters(RegMask registers,bool isFp,ssize_t slot,Reg base,RegMask mask)3233 void Aarch64Encoder::SaveRegisters(RegMask registers, bool isFp, ssize_t slot, Reg base, RegMask mask)
3234 {
3235 LoadStoreRegisters<true>(registers, isFp, slot, base, mask);
3236 }
3237
LoadRegisters(RegMask registers,bool isFp,ssize_t slot,Reg base,RegMask mask)3238 void Aarch64Encoder::LoadRegisters(RegMask registers, bool isFp, ssize_t slot, Reg base, RegMask mask)
3239 {
3240 LoadStoreRegisters<false>(registers, isFp, slot, base, mask);
3241 }
3242
PushRegisters(RegMask registers,bool isFp)3243 void Aarch64Encoder::PushRegisters(RegMask registers, bool isFp)
3244 {
3245 static constexpr size_t PAIR_OFFSET = 2 * DOUBLE_WORD_SIZE_BYTES;
3246 Register lastReg = INVALID_REG;
3247 for (size_t i = 0; i < registers.size(); i++) {
3248 if (registers[i]) {
3249 if (lastReg == INVALID_REG) {
3250 lastReg = i;
3251 continue;
3252 }
3253 if (isFp) {
3254 GetMasm()->stp(vixl::aarch64::VRegister(lastReg, DOUBLE_WORD_SIZE),
3255 vixl::aarch64::VRegister(i, DOUBLE_WORD_SIZE),
3256 MemOperand(vixl::aarch64::sp, -PAIR_OFFSET, vixl::aarch64::AddrMode::PreIndex));
3257 } else {
3258 GetMasm()->stp(vixl::aarch64::Register(lastReg, DOUBLE_WORD_SIZE),
3259 vixl::aarch64::Register(i, DOUBLE_WORD_SIZE),
3260 MemOperand(vixl::aarch64::sp, -PAIR_OFFSET, vixl::aarch64::AddrMode::PreIndex));
3261 }
3262 lastReg = INVALID_REG;
3263 }
3264 }
3265 if (lastReg != INVALID_REG) {
3266 if (isFp) {
3267 GetMasm()->str(vixl::aarch64::VRegister(lastReg, DOUBLE_WORD_SIZE),
3268 MemOperand(vixl::aarch64::sp, -PAIR_OFFSET, vixl::aarch64::AddrMode::PreIndex));
3269 } else {
3270 GetMasm()->str(vixl::aarch64::Register(lastReg, DOUBLE_WORD_SIZE),
3271 MemOperand(vixl::aarch64::sp, -PAIR_OFFSET, vixl::aarch64::AddrMode::PreIndex));
3272 }
3273 }
3274 }
3275
PopRegisters(RegMask registers,bool isFp)3276 void Aarch64Encoder::PopRegisters(RegMask registers, bool isFp)
3277 {
3278 static constexpr size_t PAIR_OFFSET = 2 * DOUBLE_WORD_SIZE_BYTES;
3279 Register lastReg;
3280 if ((registers.count() & 1U) != 0) {
3281 lastReg = registers.GetMaxRegister();
3282 if (isFp) {
3283 GetMasm()->ldr(vixl::aarch64::VRegister(lastReg, DOUBLE_WORD_SIZE),
3284 MemOperand(vixl::aarch64::sp, PAIR_OFFSET, vixl::aarch64::AddrMode::PostIndex));
3285 } else {
3286 GetMasm()->ldr(vixl::aarch64::Register(lastReg, DOUBLE_WORD_SIZE),
3287 MemOperand(vixl::aarch64::sp, PAIR_OFFSET, vixl::aarch64::AddrMode::PostIndex));
3288 }
3289 registers.reset(lastReg);
3290 }
3291 lastReg = INVALID_REG;
3292 for (ssize_t i = registers.size() - 1; i >= 0; i--) {
3293 if (registers[i]) {
3294 if (lastReg == INVALID_REG) {
3295 lastReg = i;
3296 continue;
3297 }
3298 if (isFp) {
3299 GetMasm()->ldp(vixl::aarch64::VRegister(i, DOUBLE_WORD_SIZE),
3300 vixl::aarch64::VRegister(lastReg, DOUBLE_WORD_SIZE),
3301 MemOperand(vixl::aarch64::sp, PAIR_OFFSET, vixl::aarch64::AddrMode::PostIndex));
3302 } else {
3303 GetMasm()->ldp(vixl::aarch64::Register(i, DOUBLE_WORD_SIZE),
3304 vixl::aarch64::Register(lastReg, DOUBLE_WORD_SIZE),
3305 MemOperand(vixl::aarch64::sp, PAIR_OFFSET, vixl::aarch64::AddrMode::PostIndex));
3306 }
3307 lastReg = INVALID_REG;
3308 }
3309 }
3310 }
3311
GetMasm() const3312 vixl::aarch64::MacroAssembler *Aarch64Encoder::GetMasm() const
3313 {
3314 ASSERT(masm_ != nullptr);
3315 return masm_;
3316 }
3317
GetLabelAddress(LabelHolder::LabelId label)3318 size_t Aarch64Encoder::GetLabelAddress(LabelHolder::LabelId label)
3319 {
3320 auto plabel = labels_->GetLabel(label);
3321 ASSERT(plabel->IsBound());
3322 return GetMasm()->GetLabelAddress<size_t>(plabel);
3323 }
3324
LabelHasLinks(LabelHolder::LabelId label)3325 bool Aarch64Encoder::LabelHasLinks(LabelHolder::LabelId label)
3326 {
3327 auto plabel = labels_->GetLabel(label);
3328 return plabel->IsLinked();
3329 }
3330
3331 #ifndef PANDA_MINIMAL_VIXL
GetDecoder() const3332 vixl::aarch64::Decoder &Aarch64Encoder::GetDecoder() const
3333 {
3334 if (!decoder_) {
3335 decoder_.emplace(GetAllocator());
3336 decoder_->visitors()->push_back(&GetDisasm());
3337 }
3338 return *decoder_;
3339 }
3340
GetDisasm() const3341 vixl::aarch64::Disassembler &Aarch64Encoder::GetDisasm() const
3342 {
3343 if (!disasm_) {
3344 disasm_.emplace(GetAllocator());
3345 }
3346 return *disasm_;
3347 }
3348 #endif
3349
DisasmInstr(std::ostream & stream,size_t pc,ssize_t codeOffset) const3350 size_t Aarch64Encoder::DisasmInstr([[maybe_unused]] std::ostream &stream, size_t pc,
3351 [[maybe_unused]] ssize_t codeOffset) const
3352 {
3353 #ifndef PANDA_MINIMAL_VIXL
3354 auto bufferStart = GetMasm()->GetBuffer()->GetOffsetAddress<uintptr_t>(0);
3355 auto instr = GetMasm()->GetBuffer()->GetOffsetAddress<vixl::aarch64::Instruction *>(pc);
3356 GetDecoder().Decode(instr);
3357 if (codeOffset < 0) {
3358 stream << GetDisasm().GetOutput();
3359 } else {
3360 stream << std::setw(0x4) << std::right << std::setfill('0') << std::hex
3361 << reinterpret_cast<uintptr_t>(instr) - bufferStart + codeOffset << ": " << GetDisasm().GetOutput()
3362 << std::setfill(' ') << std::dec;
3363 }
3364
3365 #endif
3366 return pc + vixl::aarch64::kInstructionSize;
3367 }
3368 } // namespace ark::compiler::aarch64
3369