1 /*
2 * Copyright (c) 2021-2024 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15 /*
16 Encoder (implementation of math and mem Low-level emitters)
17 */
18
19 #include <aarch64/macro-assembler-aarch64.h>
20 #include <cstddef>
21 #include "compiler/optimizer/code_generator/target/aarch64/target.h"
22 #include "compiler/optimizer/code_generator/encode.h"
23 #include "compiler/optimizer/code_generator/fast_divisor.h"
24 #include "scoped_tmp_reg.h"
25 #include "compiler/optimizer/code_generator/relocations.h"
26
27 #if defined(USE_VIXL_ARM64) && !defined(PANDA_MINIMAL_VIXL)
28 #include "aarch64/disasm-aarch64.h"
29 #endif
30
31 #include <iomanip>
32
33 #include "lib_helpers.inl"
34
35 #ifndef PANDA_TARGET_MACOS
36 #include "elf.h"
37 #endif // PANDA_TARGET_MACOS
38
39 namespace ark::compiler::aarch64 {
40 using vixl::aarch64::CPURegister;
41 using vixl::aarch64::MemOperand;
42
43 /// Converters
Convert(const Condition cc)44 static vixl::aarch64::Condition Convert(const Condition cc)
45 {
46 switch (cc) {
47 case Condition::EQ:
48 return vixl::aarch64::Condition::eq;
49 case Condition::NE:
50 return vixl::aarch64::Condition::ne;
51 case Condition::LT:
52 return vixl::aarch64::Condition::lt;
53 case Condition::GT:
54 return vixl::aarch64::Condition::gt;
55 case Condition::LE:
56 return vixl::aarch64::Condition::le;
57 case Condition::GE:
58 return vixl::aarch64::Condition::ge;
59 case Condition::LO:
60 return vixl::aarch64::Condition::lo;
61 case Condition::LS:
62 return vixl::aarch64::Condition::ls;
63 case Condition::HI:
64 return vixl::aarch64::Condition::hi;
65 case Condition::HS:
66 return vixl::aarch64::Condition::hs;
67 // NOTE(igorban) : Remove them
68 case Condition::MI:
69 return vixl::aarch64::Condition::mi;
70 case Condition::PL:
71 return vixl::aarch64::Condition::pl;
72 case Condition::VS:
73 return vixl::aarch64::Condition::vs;
74 case Condition::VC:
75 return vixl::aarch64::Condition::vc;
76 case Condition::AL:
77 return vixl::aarch64::Condition::al;
78 case Condition::NV:
79 return vixl::aarch64::Condition::nv;
80 default:
81 UNREACHABLE();
82 return vixl::aarch64::Condition::eq;
83 }
84 }
85
ConvertTest(const Condition cc)86 static vixl::aarch64::Condition ConvertTest(const Condition cc)
87 {
88 ASSERT(cc == Condition::TST_EQ || cc == Condition::TST_NE);
89 return cc == Condition::TST_EQ ? vixl::aarch64::Condition::eq : vixl::aarch64::Condition::ne;
90 }
91
Convert(const ShiftType type)92 static vixl::aarch64::Shift Convert(const ShiftType type)
93 {
94 switch (type) {
95 case ShiftType::LSL:
96 return vixl::aarch64::Shift::LSL;
97 case ShiftType::LSR:
98 return vixl::aarch64::Shift::LSR;
99 case ShiftType::ASR:
100 return vixl::aarch64::Shift::ASR;
101 case ShiftType::ROR:
102 return vixl::aarch64::Shift::ROR;
103 default:
104 UNREACHABLE();
105 }
106 }
107
VixlVReg(Reg reg)108 static vixl::aarch64::VRegister VixlVReg(Reg reg)
109 {
110 ASSERT(reg.IsValid());
111 auto vixlVreg = vixl::aarch64::VRegister(reg.GetId(), reg.GetSize());
112 ASSERT(vixlVreg.IsValid());
113 return vixlVreg;
114 }
115
VixlShift(Shift shift)116 static vixl::aarch64::Operand VixlShift(Shift shift)
117 {
118 Reg reg = shift.GetBase();
119 ASSERT(reg.IsValid());
120 if (reg.IsScalar()) {
121 ASSERT(reg.IsScalar());
122 size_t regSize = reg.GetSize();
123 if (regSize < WORD_SIZE) {
124 regSize = WORD_SIZE;
125 }
126 auto vixlReg = vixl::aarch64::Register(reg.GetId(), regSize);
127 ASSERT(vixlReg.IsValid());
128
129 return vixl::aarch64::Operand(vixlReg, Convert(shift.GetType()), shift.GetScale());
130 }
131
132 // Invalid register type
133 UNREACHABLE();
134 }
135
ConvertMem(MemRef mem)136 static vixl::aarch64::MemOperand ConvertMem(MemRef mem)
137 {
138 bool base = mem.HasBase() && (mem.GetBase().GetId() != vixl::aarch64::xzr.GetCode());
139 bool hasIndex = mem.HasIndex();
140 bool shift = mem.HasScale();
141 bool offset = mem.HasDisp();
142 auto baseReg = Reg(mem.GetBase().GetId(), INT64_TYPE);
143 if (base && !hasIndex && !shift) {
144 // Memory address = x_reg(base) + imm(offset)
145 if (mem.GetDisp() != 0) {
146 auto disp = mem.GetDisp();
147 return vixl::aarch64::MemOperand(VixlReg(baseReg), VixlImm(disp));
148 }
149 // Memory address = x_reg(base)
150 return vixl::aarch64::MemOperand(VixlReg(mem.GetBase(), DOUBLE_WORD_SIZE));
151 }
152 if (base && hasIndex && !offset) {
153 auto scale = mem.GetScale();
154 auto indexReg = mem.GetIndex();
155 // Memory address = x_reg(base) + (SXTW(w_reg(index)) << scale)
156 if (indexReg.GetSize() == WORD_SIZE) {
157 // Sign-extend and shift w-register in offset-position (signed because index always has signed type)
158 return vixl::aarch64::MemOperand(VixlReg(baseReg), VixlReg(indexReg), vixl::aarch64::Extend::SXTW, scale);
159 }
160 // Memory address = x_reg(base) + (x_reg(index) << scale)
161 if (scale != 0) {
162 ASSERT(indexReg.GetSize() == DOUBLE_WORD_SIZE);
163 return vixl::aarch64::MemOperand(VixlReg(baseReg), VixlReg(indexReg), vixl::aarch64::LSL, scale);
164 }
165 // Memory address = x_reg(base) + x_reg(index)
166 return vixl::aarch64::MemOperand(VixlReg(baseReg), VixlReg(indexReg));
167 }
168 // Wrong memRef
169 // Return invalid memory operand
170 auto tmp = vixl::aarch64::MemOperand();
171 ASSERT(!tmp.IsValid());
172 return tmp;
173 }
174
Promote(Reg reg)175 static Reg Promote(Reg reg)
176 {
177 if (reg.GetType() == INT8_TYPE) {
178 return Reg(reg.GetId(), INT16_TYPE);
179 }
180 return reg;
181 }
182
CreateLabel()183 Aarch64LabelHolder::LabelId Aarch64LabelHolder::CreateLabel()
184 {
185 ++id_;
186 auto allocator = GetEncoder()->GetAllocator();
187 auto *label = allocator->New<LabelType>(allocator);
188 labels_.push_back(label);
189 ASSERT(labels_.size() == id_);
190 return id_ - 1;
191 }
192
CreateLabels(LabelId size)193 void Aarch64LabelHolder::CreateLabels(LabelId size)
194 {
195 for (LabelId i = 0; i <= size; ++i) {
196 CreateLabel();
197 }
198 }
199
BindLabel(LabelId id)200 void Aarch64LabelHolder::BindLabel(LabelId id)
201 {
202 static_cast<Aarch64Encoder *>(GetEncoder())->GetMasm()->Bind(labels_[id]);
203 }
204
GetLabel(LabelId id) const205 Aarch64LabelHolder::LabelType *Aarch64LabelHolder::GetLabel(LabelId id) const
206 {
207 ASSERT(labels_.size() > id);
208 return labels_[id];
209 }
210
Size()211 Aarch64LabelHolder::LabelId Aarch64LabelHolder::Size()
212 {
213 return labels_.size();
214 }
215
Aarch64Encoder(ArenaAllocator * allocator)216 Aarch64Encoder::Aarch64Encoder(ArenaAllocator *allocator) : Encoder(allocator, Arch::AARCH64)
217 {
218 labels_ = allocator->New<Aarch64LabelHolder>(this);
219 if (labels_ == nullptr) {
220 SetFalseResult();
221 }
222 // We enable LR tmp reg by default in Aarch64
223 EnableLrAsTempReg(true);
224 }
225
~Aarch64Encoder()226 Aarch64Encoder::~Aarch64Encoder()
227 {
228 auto labels = static_cast<Aarch64LabelHolder *>(GetLabels())->labels_;
229 for (auto label : labels) {
230 label->~Label();
231 }
232 if (masm_ != nullptr) {
233 masm_->~MacroAssembler();
234 masm_ = nullptr;
235 }
236 }
237
GetLabels() const238 LabelHolder *Aarch64Encoder::GetLabels() const
239 {
240 ASSERT(labels_ != nullptr);
241 return labels_;
242 }
243
IsValid() const244 bool Aarch64Encoder::IsValid() const
245 {
246 return true;
247 }
248
GetTarget()249 constexpr auto Aarch64Encoder::GetTarget()
250 {
251 return ark::compiler::Target(Arch::AARCH64);
252 }
253
SetMaxAllocatedBytes(size_t size)254 void Aarch64Encoder::SetMaxAllocatedBytes(size_t size)
255 {
256 GetMasm()->GetBuffer()->SetMmapMaxBytes(size);
257 }
258
InitMasm()259 bool Aarch64Encoder::InitMasm()
260 {
261 if (masm_ == nullptr) {
262 // Initialize Masm
263 masm_ = GetAllocator()->New<vixl::aarch64::MacroAssembler>(GetAllocator());
264 if (masm_ == nullptr || !masm_->IsValid()) {
265 SetFalseResult();
266 return false;
267 }
268 ASSERT(GetMasm());
269
270 // Make sure that the compiler uses the same scratch registers as the assembler
271 CHECK_EQ(RegMask(GetMasm()->GetScratchRegisterList()->GetList()), GetTarget().GetTempRegsMask());
272 CHECK_EQ(RegMask(GetMasm()->GetScratchVRegisterList()->GetList()), GetTarget().GetTempVRegsMask());
273 }
274 return true;
275 }
276
Finalize()277 void Aarch64Encoder::Finalize()
278 {
279 GetMasm()->FinalizeCode();
280 }
281
EncodeJump(LabelHolder::LabelId id)282 void Aarch64Encoder::EncodeJump(LabelHolder::LabelId id)
283 {
284 auto label = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(id);
285 GetMasm()->B(label);
286 }
287
EncodeJump(LabelHolder::LabelId id,Reg src0,Reg src1,Condition cc)288 void Aarch64Encoder::EncodeJump(LabelHolder::LabelId id, Reg src0, Reg src1, Condition cc)
289 {
290 if (src1.GetId() == GetRegfile()->GetZeroReg().GetId()) {
291 EncodeJump(id, src0, cc);
292 return;
293 }
294
295 if (src0.IsScalar()) {
296 GetMasm()->Cmp(VixlReg(src0), VixlReg(src1));
297 } else {
298 GetMasm()->Fcmp(VixlVReg(src0), VixlVReg(src1));
299 }
300
301 auto label = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(id);
302 GetMasm()->B(label, Convert(cc));
303 }
304
EncodeJump(LabelHolder::LabelId id,Reg src,Imm imm,Condition cc)305 void Aarch64Encoder::EncodeJump(LabelHolder::LabelId id, Reg src, Imm imm, Condition cc)
306 {
307 auto value = imm.GetAsInt();
308 if (value == 0) {
309 EncodeJump(id, src, cc);
310 return;
311 }
312
313 if (value < 0) {
314 GetMasm()->Cmn(VixlReg(src), VixlImm(-value));
315 } else { // if (value > 0)
316 GetMasm()->Cmp(VixlReg(src), VixlImm(value));
317 }
318
319 auto label = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(id);
320 GetMasm()->B(label, Convert(cc));
321 }
322
EncodeJumpTest(LabelHolder::LabelId id,Reg src0,Reg src1,Condition cc)323 void Aarch64Encoder::EncodeJumpTest(LabelHolder::LabelId id, Reg src0, Reg src1, Condition cc)
324 {
325 ASSERT(src0.IsScalar() && src1.IsScalar());
326
327 GetMasm()->Tst(VixlReg(src0), VixlReg(src1));
328 auto label = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(id);
329 GetMasm()->B(label, ConvertTest(cc));
330 }
331
EncodeJumpTest(LabelHolder::LabelId id,Reg src,Imm imm,Condition cc)332 void Aarch64Encoder::EncodeJumpTest(LabelHolder::LabelId id, Reg src, Imm imm, Condition cc)
333 {
334 ASSERT(src.IsScalar());
335
336 auto value = imm.GetAsInt();
337 if (CanEncodeImmLogical(value, src.GetSize() > WORD_SIZE ? DOUBLE_WORD_SIZE : WORD_SIZE)) {
338 GetMasm()->Tst(VixlReg(src), VixlImm(value));
339 auto label = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(id);
340 GetMasm()->B(label, ConvertTest(cc));
341 } else {
342 ScopedTmpReg tmpReg(this, src.GetType());
343 EncodeMov(tmpReg, imm);
344 EncodeJumpTest(id, src, tmpReg, cc);
345 }
346 }
347
EncodeJump(LabelHolder::LabelId id,Reg src,Condition cc)348 void Aarch64Encoder::EncodeJump(LabelHolder::LabelId id, Reg src, Condition cc)
349 {
350 auto label = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(id);
351 ASSERT(src.IsScalar());
352 auto rzero = Reg(GetRegfile()->GetZeroReg().GetId(), src.GetType());
353
354 switch (cc) {
355 case Condition::LO:
356 // Always false
357 return;
358 case Condition::HS:
359 // Always true
360 GetMasm()->B(label);
361 return;
362 case Condition::EQ:
363 case Condition::LS:
364 if (src.GetId() == rzero.GetId()) {
365 GetMasm()->B(label);
366 return;
367 }
368 // True only when zero
369 GetMasm()->Cbz(VixlReg(src), label);
370 return;
371 case Condition::NE:
372 case Condition::HI:
373 if (src.GetId() == rzero.GetId()) {
374 // Do nothing
375 return;
376 }
377 // True only when non-zero
378 GetMasm()->Cbnz(VixlReg(src), label);
379 return;
380 default:
381 break;
382 }
383
384 ASSERT(rzero.IsValid());
385 GetMasm()->Cmp(VixlReg(src), VixlReg(rzero));
386 GetMasm()->B(label, Convert(cc));
387 }
388
EncodeJump(Reg dst)389 void Aarch64Encoder::EncodeJump(Reg dst)
390 {
391 GetMasm()->Br(VixlReg(dst));
392 }
393
EncodeJump(RelocationInfo * relocation)394 void Aarch64Encoder::EncodeJump([[maybe_unused]] RelocationInfo *relocation)
395 {
396 #ifdef PANDA_TARGET_MACOS
397 LOG(FATAL, COMPILER) << "Not supported in Macos build";
398 #else
399 auto buffer = GetMasm()->GetBuffer();
400 relocation->offset = GetCursorOffset();
401 relocation->addend = 0;
402 relocation->type = R_AARCH64_CALL26;
403 static constexpr uint32_t CALL_WITH_ZERO_OFFSET = 0x14000000;
404 buffer->Emit32(CALL_WITH_ZERO_OFFSET);
405 #endif
406 }
407
EncodeBitTestAndBranch(LabelHolder::LabelId id,compiler::Reg reg,uint32_t bitPos,bool bitValue)408 void Aarch64Encoder::EncodeBitTestAndBranch(LabelHolder::LabelId id, compiler::Reg reg, uint32_t bitPos, bool bitValue)
409 {
410 ASSERT(reg.IsScalar() && reg.GetSize() > bitPos);
411 auto label = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(id);
412 if (bitValue) {
413 GetMasm()->Tbnz(VixlReg(reg), bitPos, label);
414 } else {
415 GetMasm()->Tbz(VixlReg(reg), bitPos, label);
416 }
417 }
418
EncodeNop()419 void Aarch64Encoder::EncodeNop()
420 {
421 GetMasm()->Nop();
422 }
423
MakeCall(compiler::RelocationInfo * relocation)424 void Aarch64Encoder::MakeCall([[maybe_unused]] compiler::RelocationInfo *relocation)
425 {
426 #ifdef PANDA_TARGET_MACOS
427 LOG(FATAL, COMPILER) << "Not supported in Macos build";
428 #else
429 auto buffer = GetMasm()->GetBuffer();
430 relocation->offset = GetCursorOffset();
431 relocation->addend = 0;
432 relocation->type = R_AARCH64_CALL26;
433 static constexpr uint32_t CALL_WITH_ZERO_OFFSET = 0x94000000;
434 buffer->Emit32(CALL_WITH_ZERO_OFFSET);
435 #endif
436 }
437
MakeCall(const void * entryPoint)438 void Aarch64Encoder::MakeCall(const void *entryPoint)
439 {
440 ScopedTmpReg tmp(this, true);
441 EncodeMov(tmp, Imm(reinterpret_cast<uintptr_t>(entryPoint)));
442 GetMasm()->Blr(VixlReg(tmp));
443 }
444
MakeCall(MemRef entryPoint)445 void Aarch64Encoder::MakeCall(MemRef entryPoint)
446 {
447 ScopedTmpReg tmp(this, true);
448 EncodeLdr(tmp, false, entryPoint);
449 GetMasm()->Blr(VixlReg(tmp));
450 }
451
MakeCall(Reg reg)452 void Aarch64Encoder::MakeCall(Reg reg)
453 {
454 GetMasm()->Blr(VixlReg(reg));
455 }
456
MakeCall(LabelHolder::LabelId id)457 void Aarch64Encoder::MakeCall(LabelHolder::LabelId id)
458 {
459 auto label = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(id);
460 GetMasm()->Bl(label);
461 }
462
LoadPcRelative(Reg reg,intptr_t offset,Reg regAddr)463 void Aarch64Encoder::LoadPcRelative(Reg reg, intptr_t offset, Reg regAddr)
464 {
465 ASSERT(GetCodeOffset() != Encoder::INVALID_OFFSET);
466 ASSERT(reg.IsValid() || regAddr.IsValid());
467
468 if (!regAddr.IsValid()) {
469 regAddr = reg.As(INT64_TYPE);
470 }
471
472 if (vixl::IsInt21(offset)) {
473 GetMasm()->adr(VixlReg(regAddr), offset);
474 if (reg != INVALID_REGISTER) {
475 EncodeLdr(reg, false, MemRef(regAddr));
476 }
477 } else {
478 size_t pc = GetCodeOffset() + GetCursorOffset();
479 size_t addr;
480 if (intptr_t res = helpers::ToSigned(pc) + offset; res < 0) {
481 // Make both, pc and addr, positive
482 ssize_t extend = RoundUp(std::abs(res), vixl::aarch64::kPageSize);
483 addr = res + extend;
484 pc += extend;
485 } else {
486 addr = res;
487 }
488
489 ssize_t adrpImm = (addr >> vixl::aarch64::kPageSizeLog2) - (pc >> vixl::aarch64::kPageSizeLog2);
490
491 GetMasm()->adrp(VixlReg(regAddr), adrpImm);
492
493 offset = ark::helpers::ToUnsigned(addr) & (vixl::aarch64::kPageSize - 1);
494 if (reg.GetId() != regAddr.GetId()) {
495 EncodeAdd(regAddr, regAddr, Imm(offset));
496 if (reg != INVALID_REGISTER) {
497 EncodeLdr(reg, true, MemRef(regAddr));
498 }
499 } else {
500 EncodeLdr(reg, true, MemRef(regAddr, offset));
501 }
502 }
503 }
504
MakeCallAot(intptr_t offset)505 void Aarch64Encoder::MakeCallAot(intptr_t offset)
506 {
507 ScopedTmpReg tmp(this, true);
508 LoadPcRelative(tmp, offset);
509 GetMasm()->Blr(VixlReg(tmp));
510 }
511
CanMakeCallByOffset(intptr_t offset)512 bool Aarch64Encoder::CanMakeCallByOffset(intptr_t offset)
513 {
514 // NOLINTNEXTLINE(hicpp-signed-bitwise)
515 auto off = (offset >> vixl::aarch64::kInstructionSizeLog2);
516 return vixl::aarch64::Instruction::IsValidImmPCOffset(vixl::aarch64::ImmBranchType::UncondBranchType, off);
517 }
518
MakeCallByOffset(intptr_t offset)519 void Aarch64Encoder::MakeCallByOffset(intptr_t offset)
520 {
521 GetMasm()->Bl(offset);
522 }
523
MakeLoadAotTable(intptr_t offset,Reg reg)524 void Aarch64Encoder::MakeLoadAotTable(intptr_t offset, Reg reg)
525 {
526 LoadPcRelative(reg, offset);
527 }
528
MakeLoadAotTableAddr(intptr_t offset,Reg addr,Reg val)529 void Aarch64Encoder::MakeLoadAotTableAddr(intptr_t offset, Reg addr, Reg val)
530 {
531 LoadPcRelative(val, offset, addr);
532 }
533
EncodeAbort()534 void Aarch64Encoder::EncodeAbort()
535 {
536 GetMasm()->Brk();
537 }
538
EncodeReturn()539 void Aarch64Encoder::EncodeReturn()
540 {
541 GetMasm()->Ret();
542 }
543
EncodeMul(Reg unused1,Reg unused2,Imm unused3)544 void Aarch64Encoder::EncodeMul([[maybe_unused]] Reg unused1, [[maybe_unused]] Reg unused2, [[maybe_unused]] Imm unused3)
545 {
546 SetFalseResult();
547 }
548
EncodeMov(Reg dst,Reg src)549 void Aarch64Encoder::EncodeMov(Reg dst, Reg src)
550 {
551 if (dst == src) {
552 return;
553 }
554 if (src.IsFloat() && dst.IsFloat()) {
555 if (src.GetSize() != dst.GetSize()) {
556 GetMasm()->Fcvt(VixlVReg(dst), VixlVReg(src));
557 return;
558 }
559 GetMasm()->Fmov(VixlVReg(dst), VixlVReg(src));
560 return;
561 }
562 if (src.IsFloat() && !dst.IsFloat()) {
563 GetMasm()->Fmov(VixlReg(dst, src.GetSize()), VixlVReg(src));
564 return;
565 }
566 if (dst.IsFloat()) {
567 ASSERT(src.IsScalar());
568 GetMasm()->Fmov(VixlVReg(dst), VixlReg(src));
569 return;
570 }
571 // DiscardForSameWReg below means we would drop "mov w0, w0", but it is guarded by "dst == src" above anyway.
572 // NOTE: "mov w0, w0" is not equal "nop", as it clears upper bits of x0.
573 // Keeping the option here helps to generate nothing when e.g. src is x0 and dst is w0.
574 // Probably, a better solution here is to system-wide checking register size on Encoder level.
575 if (src.GetSize() != dst.GetSize()) {
576 auto srcReg = Reg(src.GetId(), dst.GetType());
577 GetMasm()->Mov(VixlReg(dst), VixlReg(srcReg), vixl::aarch64::DiscardMoveMode::kDiscardForSameWReg);
578 return;
579 }
580 GetMasm()->Mov(VixlReg(dst), VixlReg(src), vixl::aarch64::DiscardMoveMode::kDiscardForSameWReg);
581 }
582
EncodeNeg(Reg dst,Reg src)583 void Aarch64Encoder::EncodeNeg(Reg dst, Reg src)
584 {
585 if (dst.IsFloat()) {
586 GetMasm()->Fneg(VixlVReg(dst), VixlVReg(src));
587 return;
588 }
589 GetMasm()->Neg(VixlReg(dst), VixlReg(src));
590 }
591
EncodeAbs(Reg dst,Reg src)592 void Aarch64Encoder::EncodeAbs(Reg dst, Reg src)
593 {
594 if (dst.IsFloat()) {
595 GetMasm()->Fabs(VixlVReg(dst), VixlVReg(src));
596 return;
597 }
598
599 ASSERT(!GetRegfile()->IsZeroReg(dst));
600 if (GetRegfile()->IsZeroReg(src)) {
601 EncodeMov(dst, src);
602 return;
603 }
604
605 if (src.GetSize() == DOUBLE_WORD_SIZE) {
606 GetMasm()->Cmp(VixlReg(src), vixl::aarch64::xzr);
607 } else {
608 GetMasm()->Cmp(VixlReg(src), vixl::aarch64::wzr);
609 }
610 GetMasm()->Cneg(VixlReg(Promote(dst)), VixlReg(Promote(src)), vixl::aarch64::Condition::lt);
611 }
612
EncodeSqrt(Reg dst,Reg src)613 void Aarch64Encoder::EncodeSqrt(Reg dst, Reg src)
614 {
615 ASSERT(dst.IsFloat());
616 GetMasm()->Fsqrt(VixlVReg(dst), VixlVReg(src));
617 }
618
EncodeIsInf(Reg dst,Reg src)619 void Aarch64Encoder::EncodeIsInf(Reg dst, Reg src)
620 {
621 ASSERT(dst.IsScalar() && src.IsFloat());
622
623 if (src.GetSize() == WORD_SIZE) {
624 constexpr uint32_t INF_MASK = 0xff000000;
625
626 ScopedTmpRegU32 tmpReg(this);
627 auto tmp = VixlReg(tmpReg);
628 GetMasm()->Fmov(tmp, VixlVReg(src));
629 GetMasm()->Mov(VixlReg(dst).W(), INF_MASK);
630 GetMasm()->Lsl(tmp, tmp, 1);
631 GetMasm()->Cmp(tmp, VixlReg(dst, WORD_SIZE));
632 } else {
633 constexpr uint64_t INF_MASK = 0xffe0000000000000;
634
635 ScopedTmpRegU64 tmpReg(this);
636 auto tmp = VixlReg(tmpReg);
637 GetMasm()->Fmov(tmp, VixlVReg(src));
638 GetMasm()->Mov(VixlReg(dst).X(), INF_MASK);
639 GetMasm()->Lsl(tmp, tmp, 1);
640 GetMasm()->Cmp(tmp, VixlReg(dst, DOUBLE_WORD_SIZE));
641 }
642
643 GetMasm()->Cset(VixlReg(dst), vixl::aarch64::Condition::eq);
644 }
645
EncodeCmpFracWithDelta(Reg src)646 void Aarch64Encoder::EncodeCmpFracWithDelta(Reg src)
647 {
648 ASSERT(src.IsFloat());
649 ASSERT(src.GetSize() == WORD_SIZE || src.GetSize() == DOUBLE_WORD_SIZE);
650
651 // Encode (fabs(src - trunc(src)) <= DELTA)
652 if (src.GetSize() == WORD_SIZE) {
653 ScopedTmpRegF32 tmp(this);
654 GetMasm()->Frintz(VixlVReg(tmp), VixlVReg(src));
655 EncodeSub(tmp, src, tmp);
656 EncodeAbs(tmp, tmp);
657 GetMasm()->Fcmp(VixlVReg(tmp), std::numeric_limits<float>::epsilon());
658 } else {
659 ScopedTmpRegF64 tmp(this);
660 GetMasm()->Frintz(VixlVReg(tmp), VixlVReg(src));
661 EncodeSub(tmp, src, tmp);
662 EncodeAbs(tmp, tmp);
663 GetMasm()->Fcmp(VixlVReg(tmp), std::numeric_limits<double>::epsilon());
664 }
665 }
666
EncodeIsInteger(Reg dst,Reg src)667 void Aarch64Encoder::EncodeIsInteger(Reg dst, Reg src)
668 {
669 ASSERT(dst.IsScalar() && src.IsFloat());
670 ASSERT(src.GetSize() == WORD_SIZE || src.GetSize() == DOUBLE_WORD_SIZE);
671
672 auto labelExit = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
673 auto labelInfOrNan = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
674
675 EncodeCmpFracWithDelta(src);
676 GetMasm()->B(labelInfOrNan, vixl::aarch64::Condition::vs); // Inf or NaN
677 GetMasm()->Cset(VixlReg(dst), vixl::aarch64::Condition::le);
678 GetMasm()->B(labelExit);
679
680 // IsInteger returns false if src is Inf or NaN
681 GetMasm()->Bind(labelInfOrNan);
682 EncodeMov(dst, Imm(false));
683
684 GetMasm()->Bind(labelExit);
685 }
686
EncodeIsSafeInteger(Reg dst,Reg src)687 void Aarch64Encoder::EncodeIsSafeInteger(Reg dst, Reg src)
688 {
689 ASSERT(dst.IsScalar() && src.IsFloat());
690 ASSERT(src.GetSize() == WORD_SIZE || src.GetSize() == DOUBLE_WORD_SIZE);
691
692 auto labelExit = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
693 auto labelFalse = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
694
695 // Check if IsInteger
696 EncodeCmpFracWithDelta(src);
697 GetMasm()->B(labelFalse, vixl::aarch64::Condition::vs); // Inf or NaN
698 GetMasm()->B(labelFalse, vixl::aarch64::Condition::gt);
699
700 // Check if it is safe, i.e. src can be represented in float/double without losing precision
701 if (src.GetSize() == WORD_SIZE) {
702 ScopedTmpRegF32 tmp(this);
703 EncodeAbs(tmp, src);
704 GetMasm()->Fcmp(VixlVReg(tmp), MaxIntAsExactFloat());
705 } else {
706 ScopedTmpRegF64 tmp(this);
707 EncodeAbs(tmp, src);
708 GetMasm()->Fcmp(VixlVReg(tmp), MaxIntAsExactDouble());
709 }
710 GetMasm()->Cset(VixlReg(dst), vixl::aarch64::Condition::le);
711 GetMasm()->B(labelExit);
712
713 // Return false if src !IsInteger
714 GetMasm()->Bind(labelFalse);
715 EncodeMov(dst, Imm(false));
716
717 GetMasm()->Bind(labelExit);
718 }
719
720 /* NaN values are needed to be canonicalized */
EncodeFpToBits(Reg dst,Reg src)721 void Aarch64Encoder::EncodeFpToBits(Reg dst, Reg src)
722 {
723 ASSERT(dst.IsScalar() && src.IsFloat());
724 ASSERT(dst.GetSize() == WORD_SIZE || dst.GetSize() == DOUBLE_WORD_SIZE);
725
726 if (dst.GetSize() == WORD_SIZE) {
727 ASSERT(src.GetSize() == WORD_SIZE);
728
729 constexpr auto FNAN = 0x7fc00000;
730
731 ScopedTmpRegU32 tmp(this);
732
733 GetMasm()->Fcmp(VixlVReg(src), VixlVReg(src));
734 GetMasm()->Mov(VixlReg(tmp), FNAN);
735 GetMasm()->Umov(VixlReg(dst), VixlVReg(src), 0);
736 GetMasm()->Csel(VixlReg(dst), VixlReg(tmp), VixlReg(dst), vixl::aarch64::Condition::ne);
737 } else {
738 ASSERT(src.GetSize() == DOUBLE_WORD_SIZE);
739
740 constexpr auto DNAN = 0x7ff8000000000000;
741
742 ScopedTmpRegU64 tmpReg(this);
743 auto tmp = VixlReg(tmpReg);
744
745 GetMasm()->Fcmp(VixlVReg(src), VixlVReg(src));
746 GetMasm()->Mov(tmp, DNAN);
747 GetMasm()->Umov(VixlReg(dst), VixlVReg(src), 0);
748 GetMasm()->Csel(VixlReg(dst), tmp, VixlReg(dst), vixl::aarch64::Condition::ne);
749 }
750 }
751
EncodeMoveBitsRaw(Reg dst,Reg src)752 void Aarch64Encoder::EncodeMoveBitsRaw(Reg dst, Reg src)
753 {
754 ASSERT((dst.IsFloat() && src.IsScalar()) || (src.IsFloat() && dst.IsScalar()));
755 if (dst.IsScalar()) {
756 ASSERT(src.GetSize() == dst.GetSize());
757 if (dst.GetSize() == WORD_SIZE) {
758 GetMasm()->Umov(VixlReg(dst).W(), VixlVReg(src).S(), 0);
759 } else {
760 GetMasm()->Umov(VixlReg(dst), VixlVReg(src), 0);
761 }
762 } else {
763 ASSERT(dst.GetSize() == src.GetSize());
764 ScopedTmpReg tmpReg(this, src.GetType());
765 auto srcReg = src;
766 auto rzero = GetRegfile()->GetZeroReg();
767 if (src.GetId() == rzero.GetId()) {
768 EncodeMov(tmpReg, Imm(0));
769 srcReg = tmpReg;
770 }
771
772 if (srcReg.GetSize() == WORD_SIZE) {
773 GetMasm()->Fmov(VixlVReg(dst).S(), VixlReg(srcReg).W());
774 } else {
775 GetMasm()->Fmov(VixlVReg(dst), VixlReg(srcReg));
776 }
777 }
778 }
779
EncodeReverseBytes(Reg dst,Reg src)780 void Aarch64Encoder::EncodeReverseBytes(Reg dst, Reg src)
781 {
782 auto rzero = GetRegfile()->GetZeroReg();
783 if (src.GetId() == rzero.GetId()) {
784 EncodeMov(dst, Imm(0));
785 return;
786 }
787
788 ASSERT(src.GetSize() > BYTE_SIZE);
789 ASSERT(src.GetSize() == dst.GetSize());
790
791 if (src.GetSize() == HALF_SIZE) {
792 GetMasm()->Rev16(VixlReg(dst), VixlReg(src));
793 GetMasm()->Sxth(VixlReg(dst), VixlReg(dst));
794 } else {
795 GetMasm()->Rev(VixlReg(dst), VixlReg(src));
796 }
797 }
798
EncodeBitCount(Reg dst,Reg src)799 void Aarch64Encoder::EncodeBitCount(Reg dst, Reg src)
800 {
801 auto rzero = GetRegfile()->GetZeroReg();
802 if (src.GetId() == rzero.GetId()) {
803 EncodeMov(dst, Imm(0));
804 return;
805 }
806
807 ASSERT(dst.GetSize() == WORD_SIZE);
808
809 ScopedTmpRegF64 tmpReg0(this);
810 vixl::aarch64::VRegister tmpReg;
811 if (src.GetSize() == DOUBLE_WORD_SIZE) {
812 tmpReg = VixlVReg(tmpReg0).D();
813 } else {
814 tmpReg = VixlVReg(tmpReg0).S();
815 }
816
817 if (src.GetSize() < WORD_SIZE) {
818 int64_t cutValue = (1ULL << src.GetSize()) - 1;
819 EncodeAnd(src, src, Imm(cutValue));
820 }
821
822 GetMasm()->Fmov(tmpReg, VixlReg(src));
823 GetMasm()->Cnt(tmpReg.V8B(), tmpReg.V8B());
824 GetMasm()->Addv(tmpReg.B(), tmpReg.V8B());
825 EncodeMov(dst, tmpReg0);
826 }
827
828 /* Since only ROR is supported on AArch64 we do
829 * left rotaion as ROR(v, -count) */
EncodeRotate(Reg dst,Reg src1,Reg src2,bool isRor)830 void Aarch64Encoder::EncodeRotate(Reg dst, Reg src1, Reg src2, bool isRor)
831 {
832 ASSERT(src1.GetSize() == WORD_SIZE || src1.GetSize() == DOUBLE_WORD_SIZE);
833 ASSERT(src1.GetSize() == dst.GetSize());
834 auto rzero = GetRegfile()->GetZeroReg();
835 if (rzero.GetId() == src2.GetId() || rzero.GetId() == src1.GetId()) {
836 EncodeMov(dst, src1);
837 return;
838 }
839 /* as the second parameters is always 32-bits long we have to
840 * adjust the counter register for the 64-bits first operand case */
841 if (isRor) {
842 auto count = (dst.GetSize() == WORD_SIZE ? VixlReg(src2) : VixlReg(src2).X());
843 GetMasm()->Ror(VixlReg(dst), VixlReg(src1), count);
844 } else {
845 ScopedTmpReg tmp(this);
846 auto cnt = (dst.GetId() == src1.GetId() ? tmp : dst);
847 auto count = (dst.GetSize() == WORD_SIZE ? VixlReg(cnt).W() : VixlReg(cnt).X());
848 auto source2 = (dst.GetSize() == WORD_SIZE ? VixlReg(src2).W() : VixlReg(src2).X());
849 GetMasm()->Neg(count, source2);
850 GetMasm()->Ror(VixlReg(dst), VixlReg(src1), count);
851 }
852 }
853
EncodeSignum(Reg dst,Reg src)854 void Aarch64Encoder::EncodeSignum(Reg dst, Reg src)
855 {
856 ASSERT(src.GetSize() == WORD_SIZE || src.GetSize() == DOUBLE_WORD_SIZE);
857
858 ScopedTmpRegU32 tmp(this);
859 auto sign = (dst.GetId() == src.GetId() ? tmp : dst);
860
861 GetMasm()->Cmp(VixlReg(src), VixlImm(0));
862 GetMasm()->Cset(VixlReg(sign), vixl::aarch64::Condition::gt);
863
864 constexpr auto SHIFT_WORD_BITS = 31;
865 constexpr auto SHIFT_DWORD_BITS = 63;
866
867 /* The operation below is "sub dst, dst, src, lsr #reg_size-1"
868 * however, we can only encode as many as 32 bits in lsr field, so
869 * for 64-bits cases we cannot avoid having a separate lsr instruction */
870 if (src.GetSize() == WORD_SIZE) {
871 auto shift = Shift(src, LSR, SHIFT_WORD_BITS);
872 EncodeSub(dst, sign, shift);
873 } else {
874 ScopedTmpRegU64 shift(this);
875 sign = Reg(sign.GetId(), INT64_TYPE);
876 EncodeShr(shift, src, Imm(SHIFT_DWORD_BITS));
877 EncodeSub(dst, sign, shift);
878 }
879 }
880
EncodeCountLeadingZeroBits(Reg dst,Reg src)881 void Aarch64Encoder::EncodeCountLeadingZeroBits(Reg dst, Reg src)
882 {
883 auto rzero = GetRegfile()->GetZeroReg();
884 if (rzero.GetId() == src.GetId()) {
885 EncodeMov(dst, Imm(src.GetSize()));
886 return;
887 }
888 GetMasm()->Clz(VixlReg(dst), VixlReg(src));
889 }
890
EncodeCountTrailingZeroBits(Reg dst,Reg src)891 void Aarch64Encoder::EncodeCountTrailingZeroBits(Reg dst, Reg src)
892 {
893 auto rzero = GetRegfile()->GetZeroReg();
894 if (rzero.GetId() == src.GetId()) {
895 EncodeMov(dst, Imm(src.GetSize()));
896 return;
897 }
898 GetMasm()->Rbit(VixlReg(dst), VixlReg(src));
899 GetMasm()->Clz(VixlReg(dst), VixlReg(dst));
900 }
901
EncodeCeil(Reg dst,Reg src)902 void Aarch64Encoder::EncodeCeil(Reg dst, Reg src)
903 {
904 GetMasm()->Frintp(VixlVReg(dst), VixlVReg(src));
905 }
906
EncodeFloor(Reg dst,Reg src)907 void Aarch64Encoder::EncodeFloor(Reg dst, Reg src)
908 {
909 GetMasm()->Frintm(VixlVReg(dst), VixlVReg(src));
910 }
911
EncodeRint(Reg dst,Reg src)912 void Aarch64Encoder::EncodeRint(Reg dst, Reg src)
913 {
914 GetMasm()->Frintn(VixlVReg(dst), VixlVReg(src));
915 }
916
EncodeTrunc(Reg dst,Reg src)917 void Aarch64Encoder::EncodeTrunc(Reg dst, Reg src)
918 {
919 GetMasm()->Frintz(VixlVReg(dst), VixlVReg(src));
920 }
921
EncodeRoundAway(Reg dst,Reg src)922 void Aarch64Encoder::EncodeRoundAway(Reg dst, Reg src)
923 {
924 GetMasm()->Frinta(VixlVReg(dst), VixlVReg(src));
925 }
926
EncodeRoundToPInf(Reg dst,Reg src)927 void Aarch64Encoder::EncodeRoundToPInf(Reg dst, Reg src)
928 {
929 auto done = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
930 ScopedTmpReg tmp(this, src.GetType());
931 // round to nearest integer, ties away from zero
932 GetMasm()->Fcvtas(VixlReg(dst), VixlVReg(src));
933 // for positive values, zero and NaN inputs rounding is done
934 GetMasm()->Tbz(VixlReg(dst), dst.GetSize() - 1, done);
935 // if input is negative but not a tie, round to nearest is valid
936 // if input is a negative tie, dst += 1
937 GetMasm()->Frinta(VixlVReg(tmp), VixlVReg(src));
938 GetMasm()->Fsub(VixlVReg(tmp), VixlVReg(src), VixlVReg(tmp));
939 // NOLINTNEXTLINE(readability-magic-numbers)
940 GetMasm()->Fcmp(VixlVReg(tmp), 0.5F);
941 GetMasm()->Cinc(VixlReg(dst), VixlReg(dst), vixl::aarch64::Condition::eq);
942 GetMasm()->Bind(done);
943 }
944
EncodeCrc32Update(Reg dst,Reg crcReg,Reg valReg)945 void Aarch64Encoder::EncodeCrc32Update(Reg dst, Reg crcReg, Reg valReg)
946 {
947 auto tmp = dst.GetId() != crcReg.GetId() && dst.GetId() != valReg.GetId() ? dst : ScopedTmpReg(this, dst.GetType());
948 GetMasm()->Mvn(VixlReg(tmp), VixlReg(crcReg));
949 GetMasm()->Crc32b(VixlReg(tmp), VixlReg(tmp), VixlReg(valReg));
950 GetMasm()->Mvn(VixlReg(dst), VixlReg(tmp));
951 }
952
EncodeCompressEightUtf16ToUtf8CharsUsingSimd(Reg srcAddr,Reg dstAddr)953 void Aarch64Encoder::EncodeCompressEightUtf16ToUtf8CharsUsingSimd(Reg srcAddr, Reg dstAddr)
954 {
955 ScopedTmpReg tmp1(this, FLOAT64_TYPE);
956 ScopedTmpReg tmp2(this, FLOAT64_TYPE);
957 auto vixlVreg1 = vixl::aarch64::VRegister(tmp1.GetReg().GetId(), vixl::aarch64::VectorFormat::kFormat8B);
958 ASSERT(vixlVreg1.IsValid());
959 auto vixlVreg2 = vixl::aarch64::VRegister(tmp2.GetReg().GetId(), vixl::aarch64::VectorFormat::kFormat8B);
960 ASSERT(vixlVreg2.IsValid());
961 auto src = vixl::aarch64::MemOperand(VixlReg(srcAddr));
962 auto dst = vixl::aarch64::MemOperand(VixlReg(dstAddr));
963 GetMasm()->Ld2(vixlVreg1, vixlVreg2, src);
964 GetMasm()->St1(vixlVreg1, dst);
965 }
966
EncodeCompressSixteenUtf16ToUtf8CharsUsingSimd(Reg srcAddr,Reg dstAddr)967 void Aarch64Encoder::EncodeCompressSixteenUtf16ToUtf8CharsUsingSimd(Reg srcAddr, Reg dstAddr)
968 {
969 ScopedTmpReg tmp1(this, FLOAT64_TYPE);
970 ScopedTmpReg tmp2(this, FLOAT64_TYPE);
971 auto vixlVreg1 = vixl::aarch64::VRegister(tmp1.GetReg().GetId(), vixl::aarch64::VectorFormat::kFormat16B);
972 ASSERT(vixlVreg1.IsValid());
973 auto vixlVreg2 = vixl::aarch64::VRegister(tmp2.GetReg().GetId(), vixl::aarch64::VectorFormat::kFormat16B);
974 ASSERT(vixlVreg2.IsValid());
975 auto src = vixl::aarch64::MemOperand(VixlReg(srcAddr));
976 auto dst = vixl::aarch64::MemOperand(VixlReg(dstAddr));
977 GetMasm()->Ld2(vixlVreg1, vixlVreg2, src);
978 GetMasm()->St1(vixlVreg1, dst);
979 }
980
EncodeMemCharU8X32UsingSimd(Reg dst,Reg ch,Reg srcAddr,Reg tmp)981 void Aarch64Encoder::EncodeMemCharU8X32UsingSimd(Reg dst, Reg ch, Reg srcAddr, Reg tmp)
982 {
983 ScopedTmpReg vTmp0(this, FLOAT64_TYPE);
984 ScopedTmpReg vTmp1(this, FLOAT64_TYPE);
985 auto vReg0 = vixl::aarch64::VRegister(vTmp0.GetReg().GetId(), vixl::aarch64::VectorFormat::kFormat16B);
986 auto vReg1 = vixl::aarch64::VRegister(vTmp1.GetReg().GetId(), vixl::aarch64::VectorFormat::kFormat16B);
987 auto vReg2 = vixl::aarch64::VRegister(tmp.GetId(), vixl::aarch64::VectorFormat::kFormat16B);
988 auto xReg0 = vixl::aarch64::Register(dst.GetId(), vixl::aarch64::kXRegSize);
989 auto labelReturn = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
990 auto labelFound = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
991 auto labelSecond16B = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
992 auto labelCheckV0D1 = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
993 auto labelCheckV1D1 = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
994
995 GetMasm()->Ld1(vReg0, vReg1, vixl::aarch64::MemOperand(VixlReg(srcAddr)));
996 GetMasm()->Dup(vReg2, VixlReg(ch));
997 GetMasm()->Cmeq(vReg0, vReg0, vReg2);
998 GetMasm()->Cmeq(vReg1, vReg1, vReg2);
999 // Give up if char is not there
1000 GetMasm()->Addp(vReg2, vReg0, vReg1);
1001 GetMasm()->Addp(vReg2.V2D(), vReg2.V2D(), vReg2.V2D());
1002 GetMasm()->Mov(xReg0, vReg2.D(), 0);
1003 GetMasm()->Cbz(xReg0, labelReturn);
1004 // Inspect the first 16-byte block
1005 GetMasm()->Mov(xReg0, vReg0.D(), 0);
1006 GetMasm()->Cbz(xReg0, labelCheckV0D1);
1007 GetMasm()->Rev(xReg0, xReg0);
1008 GetMasm()->Clz(xReg0, xReg0);
1009 GetMasm()->B(labelFound);
1010 GetMasm()->Bind(labelCheckV0D1);
1011 GetMasm()->Mov(xReg0, vReg0.D(), 1U);
1012 GetMasm()->Cbz(xReg0, labelSecond16B);
1013 GetMasm()->Rev(xReg0, xReg0);
1014 GetMasm()->Clz(xReg0, xReg0);
1015 GetMasm()->Add(xReg0, xReg0, VixlImm(BITS_PER_UINT64));
1016 GetMasm()->B(labelFound);
1017 // Inspect the second 16-byte block
1018 GetMasm()->Bind(labelSecond16B);
1019 GetMasm()->Mov(xReg0, vReg1.D(), 0);
1020 GetMasm()->Cbz(xReg0, labelCheckV1D1);
1021 GetMasm()->Rev(xReg0, xReg0);
1022 GetMasm()->Clz(xReg0, xReg0);
1023 GetMasm()->Add(xReg0, xReg0, VixlImm(2U * BITS_PER_UINT64));
1024 GetMasm()->B(labelFound);
1025 GetMasm()->Bind(labelCheckV1D1);
1026 GetMasm()->Mov(xReg0, vReg1.D(), 1U);
1027 GetMasm()->Rev(xReg0, xReg0);
1028 GetMasm()->Clz(xReg0, xReg0);
1029 GetMasm()->Add(xReg0, xReg0, VixlImm(3U * BITS_PER_UINT64));
1030
1031 GetMasm()->Bind(labelFound);
1032 GetMasm()->Lsr(xReg0, xReg0, 3U);
1033 GetMasm()->Add(xReg0, xReg0, VixlReg(srcAddr));
1034 GetMasm()->Bind(labelReturn);
1035 }
1036
EncodeMemCharU16X16UsingSimd(Reg dst,Reg ch,Reg srcAddr,Reg tmp)1037 void Aarch64Encoder::EncodeMemCharU16X16UsingSimd(Reg dst, Reg ch, Reg srcAddr, Reg tmp)
1038 {
1039 ScopedTmpReg vTmp0(this, FLOAT64_TYPE);
1040 ScopedTmpReg vTmp1(this, FLOAT64_TYPE);
1041 auto vReg0 = vixl::aarch64::VRegister(vTmp0.GetReg().GetId(), vixl::aarch64::VectorFormat::kFormat8H);
1042 auto vReg1 = vixl::aarch64::VRegister(vTmp1.GetReg().GetId(), vixl::aarch64::VectorFormat::kFormat8H);
1043 auto vReg2 = vixl::aarch64::VRegister(tmp.GetId(), vixl::aarch64::VectorFormat::kFormat8H);
1044 auto xReg0 = vixl::aarch64::Register(dst.GetId(), vixl::aarch64::kXRegSize);
1045 auto labelReturn = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1046 auto labelFound = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1047 auto labelSecond16B = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1048 auto labelCheckV0D1 = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1049 auto labelCheckV1D1 = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1050
1051 GetMasm()->Ld1(vReg0, vReg1, vixl::aarch64::MemOperand(VixlReg(srcAddr)));
1052 GetMasm()->Dup(vReg2, VixlReg(ch));
1053 GetMasm()->Cmeq(vReg0, vReg0, vReg2);
1054 GetMasm()->Cmeq(vReg1, vReg1, vReg2);
1055 // Give up if char is not there
1056 GetMasm()->Addp(vReg2, vReg0, vReg1);
1057 GetMasm()->Addp(vReg2.V2D(), vReg2.V2D(), vReg2.V2D());
1058 GetMasm()->Mov(xReg0, vReg2.D(), 0);
1059 GetMasm()->Cbz(xReg0, labelReturn);
1060 // Inspect the first 16-byte block
1061 GetMasm()->Mov(xReg0, vReg0.D(), 0);
1062 GetMasm()->Cbz(xReg0, labelCheckV0D1);
1063 GetMasm()->Rev(xReg0, xReg0);
1064 GetMasm()->Clz(xReg0, xReg0);
1065 GetMasm()->B(labelFound);
1066 GetMasm()->Bind(labelCheckV0D1);
1067 GetMasm()->Mov(xReg0, vReg0.D(), 1U);
1068 GetMasm()->Cbz(xReg0, labelSecond16B);
1069 GetMasm()->Rev(xReg0, xReg0);
1070 GetMasm()->Clz(xReg0, xReg0);
1071 GetMasm()->Add(xReg0, xReg0, VixlImm(BITS_PER_UINT64));
1072 GetMasm()->B(labelFound);
1073 // Inspect the second 16-byte block
1074 GetMasm()->Bind(labelSecond16B);
1075 GetMasm()->Mov(xReg0, vReg1.D(), 0);
1076 GetMasm()->Cbz(xReg0, labelCheckV1D1);
1077 GetMasm()->Rev(xReg0, xReg0);
1078 GetMasm()->Clz(xReg0, xReg0);
1079 GetMasm()->Add(xReg0, xReg0, VixlImm(2U * BITS_PER_UINT64));
1080 GetMasm()->B(labelFound);
1081 GetMasm()->Bind(labelCheckV1D1);
1082 GetMasm()->Mov(xReg0, vReg1.D(), 1U);
1083 GetMasm()->Rev(xReg0, xReg0);
1084 GetMasm()->Clz(xReg0, xReg0);
1085 GetMasm()->Add(xReg0, xReg0, VixlImm(3U * BITS_PER_UINT64));
1086
1087 GetMasm()->Bind(labelFound);
1088 GetMasm()->Lsr(xReg0, xReg0, 4U);
1089 GetMasm()->Lsl(xReg0, xReg0, 1U);
1090 GetMasm()->Add(xReg0, xReg0, VixlReg(srcAddr));
1091 GetMasm()->Bind(labelReturn);
1092 }
1093
EncodeMemCharU8X16UsingSimd(Reg dst,Reg ch,Reg srcAddr,Reg tmp)1094 void Aarch64Encoder::EncodeMemCharU8X16UsingSimd(Reg dst, Reg ch, Reg srcAddr, Reg tmp)
1095 {
1096 ScopedTmpReg vTmp0(this, FLOAT64_TYPE);
1097 ScopedTmpReg vTmp1(this, FLOAT64_TYPE);
1098 auto vReg0 = vixl::aarch64::VRegister(vTmp0.GetReg().GetId(), vixl::aarch64::VectorFormat::kFormat16B);
1099 auto vReg1 = vixl::aarch64::VRegister(tmp.GetId(), vixl::aarch64::VectorFormat::kFormat16B);
1100 auto xReg0 = vixl::aarch64::Register(dst.GetId(), vixl::aarch64::kXRegSize);
1101 auto labelReturn = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1102 auto labelFound = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1103 auto labelCheckV0D1 = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1104
1105 GetMasm()->Ld1(vReg0, vixl::aarch64::MemOperand(VixlReg(srcAddr)));
1106 GetMasm()->Dup(vReg1, VixlReg(ch));
1107 GetMasm()->Cmeq(vReg0, vReg0, vReg1);
1108 // Give up if char is not there
1109 GetMasm()->Addp(vReg1.V2D(), vReg0.V2D(), vReg0.V2D());
1110 GetMasm()->Mov(xReg0, vReg1.D(), 0);
1111 GetMasm()->Cbz(xReg0, labelReturn);
1112 // Compute a pointer to the char
1113 GetMasm()->Mov(xReg0, vReg0.D(), 0);
1114 GetMasm()->Cbz(xReg0, labelCheckV0D1);
1115 GetMasm()->Rev(xReg0, xReg0);
1116 GetMasm()->Clz(xReg0, xReg0);
1117 GetMasm()->B(labelFound);
1118 GetMasm()->Bind(labelCheckV0D1);
1119 GetMasm()->Mov(xReg0, vReg0.D(), 1U);
1120 GetMasm()->Rev(xReg0, xReg0);
1121 GetMasm()->Clz(xReg0, xReg0);
1122 GetMasm()->Add(xReg0, xReg0, VixlImm(BITS_PER_UINT64));
1123 GetMasm()->Bind(labelFound);
1124 GetMasm()->Lsr(xReg0, xReg0, 3U); // number of 8-bit chars
1125 GetMasm()->Add(xReg0, xReg0, VixlReg(srcAddr));
1126 GetMasm()->Bind(labelReturn);
1127 }
1128
EncodeMemCharU16X8UsingSimd(Reg dst,Reg ch,Reg srcAddr,Reg tmp)1129 void Aarch64Encoder::EncodeMemCharU16X8UsingSimd(Reg dst, Reg ch, Reg srcAddr, Reg tmp)
1130 {
1131 ScopedTmpReg vTmp0(this, FLOAT64_TYPE);
1132 ScopedTmpReg vTmp1(this, FLOAT64_TYPE);
1133 auto vReg0 = vixl::aarch64::VRegister(vTmp0.GetReg().GetId(), vixl::aarch64::VectorFormat::kFormat8H);
1134 auto vReg1 = vixl::aarch64::VRegister(tmp.GetId(), vixl::aarch64::VectorFormat::kFormat8H);
1135 auto xReg0 = vixl::aarch64::Register(dst.GetId(), vixl::aarch64::kXRegSize);
1136 auto labelReturn = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1137 auto labelFound = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1138 auto labelCheckV0D1 = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1139
1140 GetMasm()->Ld1(vReg0, vixl::aarch64::MemOperand(VixlReg(srcAddr)));
1141 GetMasm()->Dup(vReg1, VixlReg(ch));
1142 GetMasm()->Cmeq(vReg0, vReg0, vReg1);
1143 // Give up if char is not there
1144 GetMasm()->Addp(vReg1.V2D(), vReg0.V2D(), vReg0.V2D());
1145 GetMasm()->Mov(xReg0, vReg1.D(), 0);
1146 GetMasm()->Cbz(xReg0, labelReturn);
1147 // Compute a pointer to the char
1148 GetMasm()->Mov(xReg0, vReg0.D(), 0);
1149 GetMasm()->Cbz(xReg0, labelCheckV0D1);
1150 GetMasm()->Rev(xReg0, xReg0);
1151 GetMasm()->Clz(xReg0, xReg0);
1152 GetMasm()->B(labelFound);
1153 GetMasm()->Bind(labelCheckV0D1);
1154 GetMasm()->Mov(xReg0, vReg0.D(), 1U);
1155 GetMasm()->Rev(xReg0, xReg0);
1156 GetMasm()->Clz(xReg0, xReg0);
1157 GetMasm()->Add(xReg0, xReg0, VixlImm(BITS_PER_UINT64));
1158 GetMasm()->Bind(labelFound);
1159 GetMasm()->Lsr(xReg0, xReg0, 4U); // number of 16-bit chars
1160 GetMasm()->Lsl(xReg0, xReg0, 1U); // number of bytes
1161 GetMasm()->Add(xReg0, xReg0, VixlReg(srcAddr));
1162 GetMasm()->Bind(labelReturn);
1163 }
1164
EncodeUnsignedExtendBytesToShorts(Reg dst,Reg src)1165 void Aarch64Encoder::EncodeUnsignedExtendBytesToShorts(Reg dst, Reg src)
1166 {
1167 GetMasm()->Uxtl(VixlVReg(dst).V8H(), VixlVReg(src).V8B());
1168 }
1169
EncodeReverseHalfWords(Reg dst,Reg src)1170 void Aarch64Encoder::EncodeReverseHalfWords(Reg dst, Reg src)
1171 {
1172 ASSERT(src.GetSize() == dst.GetSize());
1173
1174 GetMasm()->rev64(VixlVReg(dst).V4H(), VixlVReg(src).V4H());
1175 }
1176
CanEncodeBitCount()1177 bool Aarch64Encoder::CanEncodeBitCount()
1178 {
1179 return true;
1180 }
1181
CanEncodeCompressedStringCharAt()1182 bool Aarch64Encoder::CanEncodeCompressedStringCharAt()
1183 {
1184 return true;
1185 }
1186
CanEncodeCompressedStringCharAtI()1187 bool Aarch64Encoder::CanEncodeCompressedStringCharAtI()
1188 {
1189 return true;
1190 }
1191
CanEncodeMAdd()1192 bool Aarch64Encoder::CanEncodeMAdd()
1193 {
1194 return true;
1195 }
1196
CanEncodeMSub()1197 bool Aarch64Encoder::CanEncodeMSub()
1198 {
1199 return true;
1200 }
1201
CanEncodeMNeg()1202 bool Aarch64Encoder::CanEncodeMNeg()
1203 {
1204 return true;
1205 }
1206
CanEncodeOrNot()1207 bool Aarch64Encoder::CanEncodeOrNot()
1208 {
1209 return true;
1210 }
1211
CanEncodeAndNot()1212 bool Aarch64Encoder::CanEncodeAndNot()
1213 {
1214 return true;
1215 }
1216
CanEncodeXorNot()1217 bool Aarch64Encoder::CanEncodeXorNot()
1218 {
1219 return true;
1220 }
1221
GetCursorOffset() const1222 size_t Aarch64Encoder::GetCursorOffset() const
1223 {
1224 return GetMasm()->GetBuffer()->GetCursorOffset();
1225 }
1226
SetCursorOffset(size_t offset)1227 void Aarch64Encoder::SetCursorOffset(size_t offset)
1228 {
1229 GetMasm()->GetBuffer()->Rewind(offset);
1230 }
1231
1232 /* return the power of 2 for the size of the type */
EncodeGetTypeSize(Reg size,Reg type)1233 void Aarch64Encoder::EncodeGetTypeSize(Reg size, Reg type)
1234 {
1235 auto sreg = VixlReg(type);
1236 auto dreg = VixlReg(size);
1237 constexpr uint8_t I16 = 0x5;
1238 constexpr uint8_t I32 = 0x7;
1239 constexpr uint8_t F64 = 0xa;
1240 constexpr uint8_t REF = 0xd;
1241 constexpr uint8_t SMALLREF = ark::OBJECT_POINTER_SIZE < sizeof(uint64_t) ? 1 : 0;
1242 auto end = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1243
1244 GetMasm()->Mov(dreg, VixlImm(0));
1245 GetMasm()->Cmp(sreg, VixlImm(I16));
1246 GetMasm()->Cinc(dreg, dreg, vixl::aarch64::Condition::ge);
1247 GetMasm()->Cmp(sreg, VixlImm(I32));
1248 GetMasm()->Cinc(dreg, dreg, vixl::aarch64::Condition::ge);
1249 GetMasm()->Cmp(sreg, VixlImm(F64));
1250 GetMasm()->Cinc(dreg, dreg, vixl::aarch64::Condition::ge);
1251 GetMasm()->Cmp(sreg, VixlImm(REF));
1252 GetMasm()->B(end, vixl::aarch64::Condition::ne);
1253 GetMasm()->Sub(dreg, dreg, VixlImm(SMALLREF));
1254 GetMasm()->Bind(end);
1255 }
1256
EncodeReverseBits(Reg dst,Reg src)1257 void Aarch64Encoder::EncodeReverseBits(Reg dst, Reg src)
1258 {
1259 auto rzero = GetRegfile()->GetZeroReg();
1260 if (rzero.GetId() == src.GetId()) {
1261 EncodeMov(dst, Imm(0));
1262 return;
1263 }
1264 ASSERT(src.GetSize() == WORD_SIZE || src.GetSize() == DOUBLE_WORD_SIZE);
1265 ASSERT(src.GetSize() == dst.GetSize());
1266
1267 GetMasm()->Rbit(VixlReg(dst), VixlReg(src));
1268 }
1269
EncodeCompressedStringCharAt(ArgsCompressedStringCharAt && args)1270 void Aarch64Encoder::EncodeCompressedStringCharAt(ArgsCompressedStringCharAt &&args)
1271 {
1272 auto [dst, str, idx, length, tmp, dataOffset, shift] = args;
1273 ASSERT(dst.GetSize() == HALF_SIZE);
1274
1275 auto labelNotCompressed = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1276 auto labelCharLoaded = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1277 auto vixlTmp = VixlReg(tmp, DOUBLE_WORD_SIZE);
1278 auto vixlDst = VixlReg(dst);
1279
1280 GetMasm()->Tbnz(VixlReg(length), 0, labelNotCompressed);
1281 EncodeAdd(tmp, str, idx);
1282 GetMasm()->ldrb(vixlDst, MemOperand(vixlTmp, dataOffset));
1283 GetMasm()->B(labelCharLoaded);
1284 GetMasm()->Bind(labelNotCompressed);
1285 EncodeAdd(tmp, str, Shift(idx, shift));
1286 GetMasm()->ldrh(vixlDst, MemOperand(vixlTmp, dataOffset));
1287 GetMasm()->Bind(labelCharLoaded);
1288 }
1289
EncodeCompressedStringCharAtI(ArgsCompressedStringCharAtI && args)1290 void Aarch64Encoder::EncodeCompressedStringCharAtI(ArgsCompressedStringCharAtI &&args)
1291 {
1292 auto [dst, str, length, dataOffset, index, shift] = args;
1293 ASSERT(dst.GetSize() == HALF_SIZE);
1294
1295 auto labelNotCompressed = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1296 auto labelCharLoaded = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1297 auto vixlStr = VixlReg(str);
1298 auto vixlDst = VixlReg(dst);
1299
1300 auto rzero = GetRegfile()->GetZeroReg().GetId();
1301 if (str.GetId() == rzero) {
1302 return;
1303 }
1304 GetMasm()->Tbnz(VixlReg(length), 0, labelNotCompressed);
1305 GetMasm()->Ldrb(vixlDst, MemOperand(vixlStr, dataOffset + index));
1306 GetMasm()->B(labelCharLoaded);
1307 GetMasm()->Bind(labelNotCompressed);
1308 GetMasm()->Ldrh(vixlDst, MemOperand(vixlStr, dataOffset + (index << shift)));
1309 GetMasm()->Bind(labelCharLoaded);
1310 }
1311
1312 /* Unsafe builtins implementation */
EncodeCompareAndSwap(Reg dst,Reg obj,Reg offset,Reg val,Reg newval)1313 void Aarch64Encoder::EncodeCompareAndSwap(Reg dst, Reg obj, Reg offset, Reg val, Reg newval)
1314 {
1315 /* Modeled according to the following logic:
1316 .L2:
1317 ldaxr cur, [addr]
1318 cmp cur, old
1319 bne .L3
1320 stlxr res, new, [addr]
1321 cbnz res, .L2
1322 .L3:
1323 cset w0, eq
1324 */
1325 ScopedTmpReg addr(this, true); /* LR is used */
1326 ScopedTmpReg cur(this, val.GetType());
1327 ScopedTmpReg res(this, val.GetType());
1328 auto loop = CreateLabel();
1329 auto exit = CreateLabel();
1330
1331 /* ldaxr wants [reg]-form of memref (no offset or disp) */
1332 EncodeAdd(addr, obj, offset);
1333
1334 BindLabel(loop);
1335 EncodeLdrExclusive(cur, addr, true);
1336 EncodeJump(exit, cur, val, Condition::NE);
1337 cur.Release();
1338 EncodeStrExclusive(res, newval, addr, true);
1339 EncodeJump(loop, res, Imm(0), Condition::NE);
1340 BindLabel(exit);
1341
1342 GetMasm()->Cset(VixlReg(dst), vixl::aarch64::Condition::eq);
1343 }
1344
EncodeUnsafeGetAndSet(Reg dst,Reg obj,Reg offset,Reg val)1345 void Aarch64Encoder::EncodeUnsafeGetAndSet(Reg dst, Reg obj, Reg offset, Reg val)
1346 {
1347 auto cur = ScopedTmpReg(this, val.GetType());
1348 auto last = ScopedTmpReg(this, val.GetType());
1349 auto addr = ScopedTmpReg(this, true); /* LR is used */
1350 auto mem = MemRef(addr);
1351 auto restart = CreateLabel();
1352 auto retryLdaxr = CreateLabel();
1353
1354 /* ldaxr wants [reg]-form of memref (no offset or disp) */
1355 EncodeAdd(addr, obj, offset);
1356
1357 /* Since GetAndSet is defined as a non-faulting operation we
1358 * have to cover two possible faulty cases:
1359 * 1. stlxr failed, we have to retry ldxar
1360 * 2. the value we got via ldxar was not the value we initially
1361 * loaded, we have to start from the very beginning */
1362 BindLabel(restart);
1363 EncodeLdrAcquire(last, false, mem);
1364
1365 BindLabel(retryLdaxr);
1366 EncodeLdrExclusive(cur, addr, true);
1367 EncodeJump(restart, cur, last, Condition::NE);
1368 last.Release();
1369 EncodeStrExclusive(dst, val, addr, true);
1370 EncodeJump(retryLdaxr, dst, Imm(0), Condition::NE);
1371
1372 EncodeMov(dst, cur);
1373 }
1374
EncodeUnsafeGetAndAdd(Reg dst,Reg obj,Reg offset,Reg val,Reg tmp)1375 void Aarch64Encoder::EncodeUnsafeGetAndAdd(Reg dst, Reg obj, Reg offset, Reg val, Reg tmp)
1376 {
1377 ScopedTmpReg cur(this, val.GetType());
1378 ScopedTmpReg last(this, val.GetType());
1379 auto newval = Reg(tmp.GetId(), val.GetType());
1380
1381 auto restart = CreateLabel();
1382 auto retryLdaxr = CreateLabel();
1383
1384 /* addr_reg aliases obj, obj reg will be restored bedore exit */
1385 auto addr = Reg(obj.GetId(), INT64_TYPE);
1386
1387 /* ldaxr wants [reg]-form of memref (no offset or disp) */
1388 auto mem = MemRef(addr);
1389 EncodeAdd(addr, obj, offset);
1390
1391 /* Since GetAndAdd is defined as a non-faulting operation we
1392 * have to cover two possible faulty cases:
1393 * 1. stlxr failed, we have to retry ldxar
1394 * 2. the value we got via ldxar was not the value we initially
1395 * loaded, we have to start from the very beginning */
1396 BindLabel(restart);
1397 EncodeLdrAcquire(last, false, mem);
1398 EncodeAdd(newval, last, val);
1399
1400 BindLabel(retryLdaxr);
1401 EncodeLdrExclusive(cur, addr, true);
1402 EncodeJump(restart, cur, last, Condition::NE);
1403 last.Release();
1404 EncodeStrExclusive(dst, newval, addr, true);
1405 EncodeJump(retryLdaxr, dst, Imm(0), Condition::NE);
1406
1407 EncodeSub(obj, addr, offset); /* restore the original value */
1408 EncodeMov(dst, cur);
1409 }
1410
EncodeMemoryBarrier(memory_order::Order order)1411 void Aarch64Encoder::EncodeMemoryBarrier(memory_order::Order order)
1412 {
1413 switch (order) {
1414 case memory_order::ACQUIRE: {
1415 GetMasm()->Dmb(vixl::aarch64::InnerShareable, vixl::aarch64::BarrierReads);
1416 break;
1417 }
1418 case memory_order::RELEASE: {
1419 GetMasm()->Dmb(vixl::aarch64::InnerShareable, vixl::aarch64::BarrierWrites);
1420 break;
1421 }
1422 case memory_order::FULL: {
1423 GetMasm()->Dmb(vixl::aarch64::InnerShareable, vixl::aarch64::BarrierAll);
1424 break;
1425 }
1426 default:
1427 break;
1428 }
1429 }
1430
EncodeNot(Reg dst,Reg src)1431 void Aarch64Encoder::EncodeNot(Reg dst, Reg src)
1432 {
1433 GetMasm()->Mvn(VixlReg(dst), VixlReg(src));
1434 }
1435
EncodeCastFloat(Reg dst,bool dstSigned,Reg src,bool srcSigned)1436 void Aarch64Encoder::EncodeCastFloat(Reg dst, bool dstSigned, Reg src, bool srcSigned)
1437 {
1438 // We DON'T support casts from float32/64 to int8/16 and bool, because this caste is not declared anywhere
1439 // in other languages and architecture, we do not know what the behavior should be.
1440 // But there is one implementation in other function: "EncodeCastFloatWithSmallDst". Call it in the "EncodeCast"
1441 // function instead of "EncodeCastFloat". It works as follows: cast from float32/64 to int32, moving sign bit from
1442 // int32 to dst type, then extend number from dst type to int32 (a necessary condition for an isa). All work in dst
1443 // register.
1444 ASSERT(dst.GetSize() >= WORD_SIZE);
1445
1446 if (src.IsFloat() && dst.IsScalar()) {
1447 if (dstSigned) {
1448 GetMasm()->Fcvtzs(VixlReg(dst), VixlVReg(src));
1449 } else {
1450 GetMasm()->Fcvtzu(VixlReg(dst), VixlVReg(src));
1451 }
1452 return;
1453 }
1454 if (src.IsScalar() && dst.IsFloat()) {
1455 auto rzero = GetRegfile()->GetZeroReg().GetId();
1456 if (src.GetId() == rzero) {
1457 if (dst.GetSize() == WORD_SIZE) {
1458 GetMasm()->Fmov(VixlVReg(dst), 0.0F);
1459 } else {
1460 GetMasm()->Fmov(VixlVReg(dst), 0.0);
1461 }
1462 } else if (srcSigned) {
1463 GetMasm()->Scvtf(VixlVReg(dst), VixlReg(src));
1464 } else {
1465 GetMasm()->Ucvtf(VixlVReg(dst), VixlReg(src));
1466 }
1467 return;
1468 }
1469 if (src.IsFloat() && dst.IsFloat()) {
1470 if (src.GetSize() != dst.GetSize()) {
1471 GetMasm()->Fcvt(VixlVReg(dst), VixlVReg(src));
1472 return;
1473 }
1474 GetMasm()->Fmov(VixlVReg(dst), VixlVReg(src));
1475 return;
1476 }
1477 UNREACHABLE();
1478 }
1479
EncodeCastFloatWithSmallDst(Reg dst,bool dstSigned,Reg src,bool srcSigned)1480 void Aarch64Encoder::EncodeCastFloatWithSmallDst(Reg dst, bool dstSigned, Reg src, bool srcSigned)
1481 {
1482 // Dst bool type don't supported!
1483
1484 if (src.IsFloat() && dst.IsScalar()) {
1485 if (dstSigned) {
1486 GetMasm()->Fcvtzs(VixlReg(dst), VixlVReg(src));
1487 if (dst.GetSize() < WORD_SIZE) {
1488 constexpr uint32_t TEST_BIT = (1U << (static_cast<uint32_t>(WORD_SIZE) - 1));
1489 ScopedTmpReg tmpReg1(this, dst.GetType());
1490 auto tmp1 = VixlReg(tmpReg1);
1491 ScopedTmpReg tmpReg2(this, dst.GetType());
1492 auto tmp2 = VixlReg(tmpReg2);
1493
1494 // NOLINTNEXTLINE(hicpp-signed-bitwise)
1495 int32_t setBit = (dst.GetSize() == BYTE_SIZE) ? (1UL << (BYTE_SIZE - 1)) : (1UL << (HALF_SIZE - 1));
1496 int32_t remBit = setBit - 1;
1497 GetMasm()->Ands(tmp1, VixlReg(dst), TEST_BIT);
1498
1499 GetMasm()->Orr(tmp1, VixlReg(dst), setBit);
1500 GetMasm()->And(tmp2, VixlReg(dst), remBit);
1501 // Select result - if zero set - tmp2, else tmp1
1502 GetMasm()->Csel(VixlReg(dst), tmp2, tmp1, vixl::aarch64::eq);
1503 EncodeCastScalar(Reg(dst.GetId(), INT32_TYPE), dstSigned, dst, dstSigned);
1504 }
1505 return;
1506 }
1507 GetMasm()->Fcvtzu(VixlReg(dst), VixlVReg(src));
1508 if (dst.GetSize() < WORD_SIZE) {
1509 EncodeCastScalar(Reg(dst.GetId(), INT32_TYPE), dstSigned, dst, dstSigned);
1510 }
1511 return;
1512 }
1513 if (src.IsScalar() && dst.IsFloat()) {
1514 if (srcSigned) {
1515 GetMasm()->Scvtf(VixlVReg(dst), VixlReg(src));
1516 } else {
1517 GetMasm()->Ucvtf(VixlVReg(dst), VixlReg(src));
1518 }
1519 return;
1520 }
1521 if (src.IsFloat() && dst.IsFloat()) {
1522 if (src.GetSize() != dst.GetSize()) {
1523 GetMasm()->Fcvt(VixlVReg(dst), VixlVReg(src));
1524 return;
1525 }
1526 GetMasm()->Fmov(VixlVReg(dst), VixlVReg(src));
1527 return;
1528 }
1529 UNREACHABLE();
1530 }
1531
EncodeCastSigned(Reg dst,Reg src)1532 void Aarch64Encoder::EncodeCastSigned(Reg dst, Reg src)
1533 {
1534 size_t srcSize = src.GetSize();
1535 size_t dstSize = dst.GetSize();
1536 auto srcR = Reg(src.GetId(), dst.GetType());
1537 // Else signed extend
1538 if (srcSize > dstSize) {
1539 srcSize = dstSize;
1540 }
1541 switch (srcSize) {
1542 case BYTE_SIZE:
1543 GetMasm()->Sxtb(VixlReg(dst), VixlReg(srcR));
1544 break;
1545 case HALF_SIZE:
1546 GetMasm()->Sxth(VixlReg(dst), VixlReg(srcR));
1547 break;
1548 case WORD_SIZE:
1549 GetMasm()->Sxtw(VixlReg(dst), VixlReg(srcR));
1550 break;
1551 case DOUBLE_WORD_SIZE:
1552 GetMasm()->Mov(VixlReg(dst), VixlReg(srcR));
1553 break;
1554 default:
1555 SetFalseResult();
1556 break;
1557 }
1558 }
1559
EncodeCastUnsigned(Reg dst,Reg src)1560 void Aarch64Encoder::EncodeCastUnsigned(Reg dst, Reg src)
1561 {
1562 size_t srcSize = src.GetSize();
1563 size_t dstSize = dst.GetSize();
1564 auto srcR = Reg(src.GetId(), dst.GetType());
1565 if (srcSize > dstSize && dstSize < WORD_SIZE) {
1566 // We need to cut the number, if it is less, than 32-bit. It is by ISA agreement.
1567 int64_t cutValue = (1ULL << dstSize) - 1;
1568 GetMasm()->And(VixlReg(dst), VixlReg(src), VixlImm(cutValue));
1569 return;
1570 }
1571 // Else unsigned extend
1572 switch (srcSize) {
1573 case BYTE_SIZE:
1574 GetMasm()->Uxtb(VixlReg(dst), VixlReg(srcR));
1575 return;
1576 case HALF_SIZE:
1577 GetMasm()->Uxth(VixlReg(dst), VixlReg(srcR));
1578 return;
1579 case WORD_SIZE:
1580 GetMasm()->Uxtw(VixlReg(dst), VixlReg(srcR));
1581 return;
1582 case DOUBLE_WORD_SIZE:
1583 GetMasm()->Mov(VixlReg(dst), VixlReg(srcR));
1584 return;
1585 default:
1586 SetFalseResult();
1587 return;
1588 }
1589 }
1590
EncodeCastScalar(Reg dst,bool dstSigned,Reg src,bool srcSigned)1591 void Aarch64Encoder::EncodeCastScalar(Reg dst, bool dstSigned, Reg src, bool srcSigned)
1592 {
1593 size_t srcSize = src.GetSize();
1594 size_t dstSize = dst.GetSize();
1595 // In our ISA minimal type is 32-bit, so type less then 32-bit
1596 // we should extend to 32-bit. So we can have 2 cast
1597 // (For examble, i8->u16 will work as i8->u16 and u16->u32)
1598 if (dstSize < WORD_SIZE) {
1599 if (srcSize > dstSize) {
1600 if (dstSigned) {
1601 EncodeCastSigned(dst, src);
1602 } else {
1603 EncodeCastUnsigned(dst, src);
1604 }
1605 return;
1606 }
1607 if (srcSize == dstSize) {
1608 GetMasm()->Mov(VixlReg(dst), VixlReg(src));
1609 if (!(srcSigned || dstSigned) || (srcSigned && dstSigned)) {
1610 return;
1611 }
1612 if (dstSigned) {
1613 EncodeCastSigned(Reg(dst.GetId(), INT32_TYPE), dst);
1614 } else {
1615 EncodeCastUnsigned(Reg(dst.GetId(), INT32_TYPE), dst);
1616 }
1617 return;
1618 }
1619 if (srcSigned) {
1620 EncodeCastSigned(dst, src);
1621 if (!dstSigned) {
1622 EncodeCastUnsigned(Reg(dst.GetId(), INT32_TYPE), dst);
1623 }
1624 } else {
1625 EncodeCastUnsigned(dst, src);
1626 if (dstSigned) {
1627 EncodeCastSigned(Reg(dst.GetId(), INT32_TYPE), dst);
1628 }
1629 }
1630 } else {
1631 if (srcSize == dstSize) {
1632 GetMasm()->Mov(VixlReg(dst), VixlReg(src));
1633 return;
1634 }
1635 if (srcSigned) {
1636 EncodeCastSigned(dst, src);
1637 } else {
1638 EncodeCastUnsigned(dst, src);
1639 }
1640 }
1641 }
1642
EncodeFastPathDynamicCast(Reg dst,Reg src,LabelHolder::LabelId slow)1643 void Aarch64Encoder::EncodeFastPathDynamicCast(Reg dst, Reg src, LabelHolder::LabelId slow)
1644 {
1645 ASSERT(IsJsNumberCast());
1646 ASSERT(src.IsFloat() && dst.IsScalar());
1647
1648 CHECK_EQ(src.GetSize(), BITS_PER_UINT64);
1649 CHECK_EQ(dst.GetSize(), BITS_PER_UINT32);
1650
1651 // We use slow path, because in general JS double -> int32 cast is complex and we check only few common cases here
1652 // and move other checks in slow path. In case CPU supports special JS double -> int32 instruction we do not need
1653 // slow path.
1654 if (!IsLabelValid(slow)) {
1655 // use special JS aarch64 instruction
1656 #ifndef NDEBUG
1657 vixl::CPUFeaturesScope scope(GetMasm(), vixl::CPUFeatures::kFP, vixl::CPUFeatures::kJSCVT);
1658 #endif
1659 GetMasm()->Fjcvtzs(VixlReg(dst), VixlVReg(src));
1660 return;
1661 }
1662
1663 // infinite and big numbers will overflow here to INT64_MIN or INT64_MAX, but NaN casts to 0
1664 GetMasm()->Fcvtzs(VixlReg(dst, DOUBLE_WORD_SIZE), VixlVReg(src));
1665 // check INT64_MIN
1666 GetMasm()->Cmp(VixlReg(dst, DOUBLE_WORD_SIZE), VixlImm(1));
1667 // check INT64_MAX
1668 GetMasm()->Ccmp(VixlReg(dst, DOUBLE_WORD_SIZE), VixlImm(-1), vixl::aarch64::StatusFlags::VFlag,
1669 vixl::aarch64::Condition::vc);
1670 auto slowLabel {static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(slow)};
1671 // jump to slow path in case of overflow
1672 GetMasm()->B(slowLabel, vixl::aarch64::Condition::vs);
1673 }
1674
EncodeCast(Reg dst,bool dstSigned,Reg src,bool srcSigned)1675 void Aarch64Encoder::EncodeCast(Reg dst, bool dstSigned, Reg src, bool srcSigned)
1676 {
1677 if (src.IsFloat() || dst.IsFloat()) {
1678 EncodeCastFloat(dst, dstSigned, src, srcSigned);
1679 return;
1680 }
1681
1682 ASSERT(src.IsScalar() && dst.IsScalar());
1683 auto rzero = GetRegfile()->GetZeroReg().GetId();
1684 if (src.GetId() == rzero) {
1685 ASSERT(dst.GetId() != rzero);
1686 EncodeMov(dst, Imm(0));
1687 return;
1688 }
1689 // Scalar part
1690 EncodeCastScalar(dst, dstSigned, src, srcSigned);
1691 }
1692
EncodeCastToBool(Reg dst,Reg src)1693 void Aarch64Encoder::EncodeCastToBool(Reg dst, Reg src)
1694 {
1695 // In ISA says that we only support casts:
1696 // i32tou1, i64tou1, u32tou1, u64tou1
1697 ASSERT(src.IsScalar());
1698 ASSERT(dst.IsScalar());
1699
1700 GetMasm()->Cmp(VixlReg(src), VixlImm(0));
1701 // In our ISA minimal type is 32-bit, so bool in 32bit
1702 GetMasm()->Cset(VixlReg(Reg(dst.GetId(), INT32_TYPE)), vixl::aarch64::Condition::ne);
1703 }
1704
EncodeAdd(Reg dst,Reg src0,Shift src1)1705 void Aarch64Encoder::EncodeAdd(Reg dst, Reg src0, Shift src1)
1706 {
1707 if (dst.IsFloat()) {
1708 UNREACHABLE();
1709 }
1710 ASSERT(src0.GetSize() <= dst.GetSize());
1711 if (src0.GetSize() < dst.GetSize()) {
1712 auto src0Reg = Reg(src0.GetId(), dst.GetType());
1713 auto src1Reg = Reg(src1.GetBase().GetId(), dst.GetType());
1714 GetMasm()->Add(VixlReg(dst), VixlReg(src0Reg), VixlShift(Shift(src1Reg, src1.GetType(), src1.GetScale())));
1715 return;
1716 }
1717 GetMasm()->Add(VixlReg(dst), VixlReg(src0), VixlShift(src1));
1718 }
1719
EncodeAdd(Reg dst,Reg src0,Reg src1)1720 void Aarch64Encoder::EncodeAdd(Reg dst, Reg src0, Reg src1)
1721 {
1722 if (dst.IsFloat()) {
1723 GetMasm()->Fadd(VixlVReg(dst), VixlVReg(src0), VixlVReg(src1));
1724 return;
1725 }
1726
1727 /* if any of the operands has 64-bits size,
1728 * forcibly do the 64-bits wide operation */
1729 if ((src0.GetSize() | src1.GetSize() | dst.GetSize()) >= DOUBLE_WORD_SIZE) {
1730 GetMasm()->Add(VixlReg(dst).X(), VixlReg(src0).X(), VixlReg(src1).X());
1731 } else {
1732 /* Otherwise do 32-bits operation as any lesser
1733 * sizes have to be upcasted to 32-bits anyway */
1734 GetMasm()->Add(VixlReg(dst).W(), VixlReg(src0).W(), VixlReg(src1).W());
1735 }
1736 }
1737
EncodeSub(Reg dst,Reg src0,Shift src1)1738 void Aarch64Encoder::EncodeSub(Reg dst, Reg src0, Shift src1)
1739 {
1740 ASSERT(dst.IsScalar());
1741 GetMasm()->Sub(VixlReg(dst), VixlReg(src0), VixlShift(src1));
1742 }
1743
EncodeSub(Reg dst,Reg src0,Reg src1)1744 void Aarch64Encoder::EncodeSub(Reg dst, Reg src0, Reg src1)
1745 {
1746 if (dst.IsFloat()) {
1747 GetMasm()->Fsub(VixlVReg(dst), VixlVReg(src0), VixlVReg(src1));
1748 return;
1749 }
1750
1751 /* if any of the operands has 64-bits size,
1752 * forcibly do the 64-bits wide operation */
1753 if ((src0.GetSize() | src1.GetSize() | dst.GetSize()) >= DOUBLE_WORD_SIZE) {
1754 GetMasm()->Sub(VixlReg(dst).X(), VixlReg(src0).X(), VixlReg(src1).X());
1755 } else {
1756 /* Otherwise do 32-bits operation as any lesser
1757 * sizes have to be upcasted to 32-bits anyway */
1758 GetMasm()->Sub(VixlReg(dst).W(), VixlReg(src0).W(), VixlReg(src1).W());
1759 }
1760 }
1761
EncodeMul(Reg dst,Reg src0,Reg src1)1762 void Aarch64Encoder::EncodeMul(Reg dst, Reg src0, Reg src1)
1763 {
1764 if (dst.IsFloat()) {
1765 GetMasm()->Fmul(VixlVReg(dst), VixlVReg(src0), VixlVReg(src1));
1766 return;
1767 }
1768 auto rzero = GetRegfile()->GetZeroReg().GetId();
1769 if (src0.GetId() == rzero || src1.GetId() == rzero) {
1770 EncodeMov(dst, Imm(0));
1771 return;
1772 }
1773 GetMasm()->Mul(VixlReg(dst), VixlReg(src0), VixlReg(src1));
1774 }
1775
EncodeAddOverflow(compiler::LabelHolder::LabelId id,Reg dst,Reg src0,Reg src1,Condition cc)1776 void Aarch64Encoder::EncodeAddOverflow(compiler::LabelHolder::LabelId id, Reg dst, Reg src0, Reg src1, Condition cc)
1777 {
1778 ASSERT(!dst.IsFloat() && !src0.IsFloat() && !src1.IsFloat());
1779 ASSERT(cc == Condition::VS || cc == Condition::VC);
1780 if (dst.GetSize() == DOUBLE_WORD_SIZE) {
1781 GetMasm()->Adds(VixlReg(dst).X(), VixlReg(src0).X(), VixlReg(src1).X());
1782 } else {
1783 /* Otherwise do 32-bits operation as any lesser
1784 * sizes have to be upcasted to 32-bits anyway */
1785 GetMasm()->Adds(VixlReg(dst).W(), VixlReg(src0).W(), VixlReg(src1).W());
1786 }
1787 auto label = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(id);
1788 GetMasm()->B(label, Convert(cc));
1789 }
1790
EncodeSubOverflow(compiler::LabelHolder::LabelId id,Reg dst,Reg src0,Reg src1,Condition cc)1791 void Aarch64Encoder::EncodeSubOverflow(compiler::LabelHolder::LabelId id, Reg dst, Reg src0, Reg src1, Condition cc)
1792 {
1793 ASSERT(!dst.IsFloat() && !src0.IsFloat() && !src1.IsFloat());
1794 ASSERT(cc == Condition::VS || cc == Condition::VC);
1795 if (dst.GetSize() == DOUBLE_WORD_SIZE) {
1796 GetMasm()->Subs(VixlReg(dst).X(), VixlReg(src0).X(), VixlReg(src1).X());
1797 } else {
1798 /* Otherwise do 32-bits operation as any lesser
1799 * sizes have to be upcasted to 32-bits anyway */
1800 GetMasm()->Subs(VixlReg(dst).W(), VixlReg(src0).W(), VixlReg(src1).W());
1801 }
1802 auto label = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(id);
1803 GetMasm()->B(label, Convert(cc));
1804 }
1805
EncodeNegOverflowAndZero(compiler::LabelHolder::LabelId id,Reg dst,Reg src)1806 void Aarch64Encoder::EncodeNegOverflowAndZero(compiler::LabelHolder::LabelId id, Reg dst, Reg src)
1807 {
1808 ASSERT(!dst.IsFloat() && !src.IsFloat());
1809 // NOLINTNEXTLINE(readability-magic-numbers)
1810 EncodeJumpTest(id, src, Imm(0x7fffffff), Condition::TST_EQ);
1811 GetMasm()->Neg(VixlReg(dst).W(), VixlReg(src).W());
1812 }
1813
EncodeDiv(Reg dst,bool dstSigned,Reg src0,Reg src1)1814 void Aarch64Encoder::EncodeDiv(Reg dst, bool dstSigned, Reg src0, Reg src1)
1815 {
1816 if (dst.IsFloat()) {
1817 GetMasm()->Fdiv(VixlVReg(dst), VixlVReg(src0), VixlVReg(src1));
1818 return;
1819 }
1820
1821 auto rzero = GetRegfile()->GetZeroReg().GetId();
1822 if (src1.GetId() == rzero || src0.GetId() == rzero) {
1823 ScopedTmpReg tmpReg(this, src1.GetType());
1824 EncodeMov(tmpReg, Imm(0));
1825 // Denominator is zero-reg
1826 if (src1.GetId() == rzero) {
1827 // Encode Abort
1828 GetMasm()->Udiv(VixlReg(dst), VixlReg(tmpReg), VixlReg(tmpReg));
1829 return;
1830 }
1831
1832 // But src1 still may be zero
1833 if (src1.GetId() != src0.GetId()) {
1834 if (dstSigned) {
1835 GetMasm()->Sdiv(VixlReg(dst), VixlReg(tmpReg), VixlReg(src1));
1836 } else {
1837 GetMasm()->Udiv(VixlReg(dst), VixlReg(tmpReg), VixlReg(src1));
1838 }
1839 return;
1840 }
1841 UNREACHABLE();
1842 }
1843 if (dstSigned) {
1844 GetMasm()->Sdiv(VixlReg(dst), VixlReg(src0), VixlReg(src1));
1845 } else {
1846 GetMasm()->Udiv(VixlReg(dst), VixlReg(src0), VixlReg(src1));
1847 }
1848 }
1849
EncodeMod(Reg dst,bool dstSigned,Reg src0,Reg src1)1850 void Aarch64Encoder::EncodeMod(Reg dst, bool dstSigned, Reg src0, Reg src1)
1851 {
1852 if (dst.IsScalar()) {
1853 auto rzero = GetRegfile()->GetZeroReg().GetId();
1854 if (src1.GetId() == rzero || src0.GetId() == rzero) {
1855 ScopedTmpReg tmpReg(this, src1.GetType());
1856 EncodeMov(tmpReg, Imm(0));
1857 // Denominator is zero-reg
1858 if (src1.GetId() == rzero) {
1859 // Encode Abort
1860 GetMasm()->Udiv(VixlReg(dst), VixlReg(tmpReg), VixlReg(tmpReg));
1861 return;
1862 }
1863
1864 if (src1.GetId() == src0.GetId()) {
1865 SetFalseResult();
1866 return;
1867 }
1868 // But src1 still may be zero
1869 ScopedTmpRegU64 tmpRegUd(this);
1870 if (dst.GetSize() < DOUBLE_WORD_SIZE) {
1871 tmpRegUd.ChangeType(INT32_TYPE);
1872 }
1873 auto tmp = VixlReg(tmpRegUd);
1874 if (!dstSigned) {
1875 GetMasm()->Udiv(tmp, VixlReg(tmpReg), VixlReg(src1));
1876 GetMasm()->Msub(VixlReg(dst), tmp, VixlReg(src1), VixlReg(tmpReg));
1877 return;
1878 }
1879 GetMasm()->Sdiv(tmp, VixlReg(tmpReg), VixlReg(src1));
1880 GetMasm()->Msub(VixlReg(dst), tmp, VixlReg(src1), VixlReg(tmpReg));
1881 return;
1882 }
1883
1884 ScopedTmpRegU64 tmpReg(this);
1885 if (dst.GetSize() < DOUBLE_WORD_SIZE) {
1886 tmpReg.ChangeType(INT32_TYPE);
1887 }
1888 auto tmp = VixlReg(tmpReg);
1889
1890 if (!dstSigned) {
1891 GetMasm()->Udiv(tmp, VixlReg(src0), VixlReg(src1));
1892 GetMasm()->Msub(VixlReg(dst), tmp, VixlReg(src1), VixlReg(src0));
1893 return;
1894 }
1895 GetMasm()->Sdiv(tmp, VixlReg(src0), VixlReg(src1));
1896 GetMasm()->Msub(VixlReg(dst), tmp, VixlReg(src1), VixlReg(src0));
1897 return;
1898 }
1899
1900 EncodeFMod(dst, src0, src1);
1901 }
1902
EncodeFMod(Reg dst,Reg src0,Reg src1)1903 void Aarch64Encoder::EncodeFMod(Reg dst, Reg src0, Reg src1)
1904 {
1905 ASSERT(dst.IsFloat());
1906
1907 if (dst.GetType() == FLOAT32_TYPE) {
1908 using Fp = float (*)(float, float);
1909 MakeLibCall(dst, src0, src1, reinterpret_cast<void *>(static_cast<Fp>(fmodf)));
1910 } else {
1911 using Fp = double (*)(double, double);
1912 MakeLibCall(dst, src0, src1, reinterpret_cast<void *>(static_cast<Fp>(fmod)));
1913 }
1914 }
1915
EncodeSignedDiv(Reg dst,Reg src0,Imm imm)1916 void Aarch64Encoder::EncodeSignedDiv(Reg dst, Reg src0, Imm imm)
1917 {
1918 int64_t divisor = imm.GetAsInt();
1919
1920 FastConstSignedDivisor fastDivisor(divisor, dst.GetSize());
1921 int64_t magic = fastDivisor.GetMagic();
1922
1923 ScopedTmpReg tmp(this, dst.GetType());
1924 Reg tmp64 = tmp.GetReg().As(INT64_TYPE);
1925 EncodeMov(tmp, Imm(magic));
1926
1927 int64_t extraShift = 0;
1928 if (dst.GetSize() == DOUBLE_WORD_SIZE) {
1929 GetMasm()->Smulh(VixlReg(tmp), VixlReg(src0), VixlReg(tmp));
1930 } else {
1931 GetMasm()->Smull(VixlReg(tmp64), VixlReg(src0), VixlReg(tmp));
1932 extraShift = WORD_SIZE;
1933 }
1934
1935 bool useSignFlag = false;
1936 if (divisor > 0 && magic < 0) {
1937 GetMasm()->Adds(VixlReg(tmp64), VixlReg(tmp64), VixlShift(Shift(src0.As(INT64_TYPE), extraShift)));
1938 useSignFlag = true;
1939 } else if (divisor < 0 && magic > 0) {
1940 GetMasm()->Subs(VixlReg(tmp64), VixlReg(tmp64), VixlShift(Shift(src0.As(INT64_TYPE), extraShift)));
1941 useSignFlag = true;
1942 }
1943
1944 int64_t shift = fastDivisor.GetShift();
1945 EncodeAShr(dst.As(INT64_TYPE), tmp64, Imm(shift + extraShift));
1946
1947 // result = (result < 0 ? result + 1 : result)
1948 if (useSignFlag) {
1949 GetMasm()->Cinc(VixlReg(dst), VixlReg(dst), vixl::aarch64::Condition::mi);
1950 } else {
1951 GetMasm()->Add(VixlReg(dst), VixlReg(dst), VixlShift(Shift(dst, ShiftType::LSR, dst.GetSize() - 1U)));
1952 }
1953 }
1954
EncodeUnsignedDiv(Reg dst,Reg src0,Imm imm)1955 void Aarch64Encoder::EncodeUnsignedDiv(Reg dst, Reg src0, Imm imm)
1956 {
1957 auto divisor = bit_cast<uint64_t>(imm.GetAsInt());
1958
1959 FastConstUnsignedDivisor fastDivisor(divisor, dst.GetSize());
1960 uint64_t magic = fastDivisor.GetMagic();
1961
1962 ScopedTmpReg tmp(this, dst.GetType());
1963 Reg tmp64 = tmp.GetReg().As(INT64_TYPE);
1964 EncodeMov(tmp, Imm(magic));
1965
1966 uint64_t extraShift = 0;
1967 if (dst.GetSize() == DOUBLE_WORD_SIZE) {
1968 GetMasm()->Umulh(VixlReg(tmp), VixlReg(src0), VixlReg(tmp));
1969 } else {
1970 GetMasm()->Umull(VixlReg(tmp64), VixlReg(src0), VixlReg(tmp));
1971 extraShift = WORD_SIZE;
1972 }
1973
1974 uint64_t shift = fastDivisor.GetShift();
1975 if (!fastDivisor.GetAdd()) {
1976 EncodeShr(dst.As(INT64_TYPE), tmp64, Imm(shift + extraShift));
1977 } else {
1978 ASSERT(shift >= 1U);
1979 if (extraShift > 0U) {
1980 EncodeShr(tmp64, tmp64, Imm(extraShift));
1981 }
1982 EncodeSub(dst, src0, tmp);
1983 GetMasm()->Add(VixlReg(dst), VixlReg(tmp), VixlShift(Shift(dst, ShiftType::LSR, 1U)));
1984 EncodeShr(dst, dst, Imm(shift - 1U));
1985 }
1986 }
1987
EncodeDiv(Reg dst,Reg src0,Imm imm,bool isSigned)1988 void Aarch64Encoder::EncodeDiv(Reg dst, Reg src0, Imm imm, bool isSigned)
1989 {
1990 ASSERT(dst.IsScalar() && dst.GetSize() >= WORD_SIZE);
1991 ASSERT(CanOptimizeImmDivMod(bit_cast<uint64_t>(imm.GetAsInt()), isSigned));
1992 if (isSigned) {
1993 EncodeSignedDiv(dst, src0, imm);
1994 } else {
1995 EncodeUnsignedDiv(dst, src0, imm);
1996 }
1997 }
1998
EncodeMod(Reg dst,Reg src0,Imm imm,bool isSigned)1999 void Aarch64Encoder::EncodeMod(Reg dst, Reg src0, Imm imm, bool isSigned)
2000 {
2001 ASSERT(dst.IsScalar() && dst.GetSize() >= WORD_SIZE);
2002 ASSERT(CanOptimizeImmDivMod(bit_cast<uint64_t>(imm.GetAsInt()), isSigned));
2003 // dst = src0 - imm * (src0 / imm)
2004 ScopedTmpReg tmp(this, dst.GetType());
2005 EncodeDiv(tmp, src0, imm, isSigned);
2006
2007 ScopedTmpReg immReg(this, dst.GetType());
2008 EncodeMov(immReg, imm);
2009
2010 GetMasm()->Msub(VixlReg(dst), VixlReg(immReg), VixlReg(tmp), VixlReg(src0));
2011 }
2012
EncodeMin(Reg dst,bool dstSigned,Reg src0,Reg src1)2013 void Aarch64Encoder::EncodeMin(Reg dst, bool dstSigned, Reg src0, Reg src1)
2014 {
2015 if (dst.IsFloat()) {
2016 GetMasm()->Fmin(VixlVReg(dst), VixlVReg(src0), VixlVReg(src1));
2017 return;
2018 }
2019 if (dstSigned) {
2020 GetMasm()->Cmp(VixlReg(src0), VixlReg(src1));
2021 GetMasm()->Csel(VixlReg(dst), VixlReg(src0), VixlReg(src1), vixl::aarch64::Condition::lt);
2022 return;
2023 }
2024 GetMasm()->Cmp(VixlReg(src0), VixlReg(src1));
2025 GetMasm()->Csel(VixlReg(dst), VixlReg(src0), VixlReg(src1), vixl::aarch64::Condition::ls);
2026 }
2027
EncodeMax(Reg dst,bool dstSigned,Reg src0,Reg src1)2028 void Aarch64Encoder::EncodeMax(Reg dst, bool dstSigned, Reg src0, Reg src1)
2029 {
2030 if (dst.IsFloat()) {
2031 GetMasm()->Fmax(VixlVReg(dst), VixlVReg(src0), VixlVReg(src1));
2032 return;
2033 }
2034 if (dstSigned) {
2035 GetMasm()->Cmp(VixlReg(src0), VixlReg(src1));
2036 GetMasm()->Csel(VixlReg(dst), VixlReg(src0), VixlReg(src1), vixl::aarch64::Condition::gt);
2037 return;
2038 }
2039 GetMasm()->Cmp(VixlReg(src0), VixlReg(src1));
2040 GetMasm()->Csel(VixlReg(dst), VixlReg(src0), VixlReg(src1), vixl::aarch64::Condition::hi);
2041 }
2042
EncodeShl(Reg dst,Reg src0,Reg src1)2043 void Aarch64Encoder::EncodeShl(Reg dst, Reg src0, Reg src1)
2044 {
2045 auto rzero = GetRegfile()->GetZeroReg().GetId();
2046 ASSERT(dst.GetId() != rzero);
2047 if (src0.GetId() == rzero) {
2048 EncodeMov(dst, Imm(0));
2049 return;
2050 }
2051 if (src1.GetId() == rzero) {
2052 EncodeMov(dst, src0);
2053 }
2054 if (dst.GetSize() < WORD_SIZE) {
2055 GetMasm()->And(VixlReg(src1), VixlReg(src1), VixlImm(dst.GetSize() - 1));
2056 }
2057 GetMasm()->Lsl(VixlReg(dst), VixlReg(src0), VixlReg(src1));
2058 }
2059
EncodeShr(Reg dst,Reg src0,Reg src1)2060 void Aarch64Encoder::EncodeShr(Reg dst, Reg src0, Reg src1)
2061 {
2062 auto rzero = GetRegfile()->GetZeroReg().GetId();
2063 ASSERT(dst.GetId() != rzero);
2064 if (src0.GetId() == rzero) {
2065 EncodeMov(dst, Imm(0));
2066 return;
2067 }
2068 if (src1.GetId() == rzero) {
2069 EncodeMov(dst, src0);
2070 }
2071
2072 if (dst.GetSize() < WORD_SIZE) {
2073 GetMasm()->And(VixlReg(src1), VixlReg(src1), VixlImm(dst.GetSize() - 1));
2074 }
2075
2076 GetMasm()->Lsr(VixlReg(dst), VixlReg(src0), VixlReg(src1));
2077 }
2078
EncodeAShr(Reg dst,Reg src0,Reg src1)2079 void Aarch64Encoder::EncodeAShr(Reg dst, Reg src0, Reg src1)
2080 {
2081 auto rzero = GetRegfile()->GetZeroReg().GetId();
2082 ASSERT(dst.GetId() != rzero);
2083 if (src0.GetId() == rzero) {
2084 EncodeMov(dst, Imm(0));
2085 return;
2086 }
2087 if (src1.GetId() == rzero) {
2088 EncodeMov(dst, src0);
2089 }
2090
2091 if (dst.GetSize() < WORD_SIZE) {
2092 GetMasm()->And(VixlReg(src1), VixlReg(src1), VixlImm(dst.GetSize() - 1));
2093 }
2094 GetMasm()->Asr(VixlReg(dst), VixlReg(src0), VixlReg(src1));
2095 }
2096
EncodeAnd(Reg dst,Reg src0,Reg src1)2097 void Aarch64Encoder::EncodeAnd(Reg dst, Reg src0, Reg src1)
2098 {
2099 GetMasm()->And(VixlReg(dst), VixlReg(src0), VixlReg(src1));
2100 }
2101
EncodeAnd(Reg dst,Reg src0,Shift src1)2102 void Aarch64Encoder::EncodeAnd(Reg dst, Reg src0, Shift src1)
2103 {
2104 GetMasm()->And(VixlReg(dst), VixlReg(src0), VixlShift(src1));
2105 }
2106
EncodeOr(Reg dst,Reg src0,Reg src1)2107 void Aarch64Encoder::EncodeOr(Reg dst, Reg src0, Reg src1)
2108 {
2109 GetMasm()->Orr(VixlReg(dst), VixlReg(src0), VixlReg(src1));
2110 }
2111
EncodeOr(Reg dst,Reg src0,Shift src1)2112 void Aarch64Encoder::EncodeOr(Reg dst, Reg src0, Shift src1)
2113 {
2114 GetMasm()->Orr(VixlReg(dst), VixlReg(src0), VixlShift(src1));
2115 }
2116
EncodeXor(Reg dst,Reg src0,Reg src1)2117 void Aarch64Encoder::EncodeXor(Reg dst, Reg src0, Reg src1)
2118 {
2119 GetMasm()->Eor(VixlReg(dst), VixlReg(src0), VixlReg(src1));
2120 }
2121
EncodeXor(Reg dst,Reg src0,Shift src1)2122 void Aarch64Encoder::EncodeXor(Reg dst, Reg src0, Shift src1)
2123 {
2124 GetMasm()->Eor(VixlReg(dst), VixlReg(src0), VixlShift(src1));
2125 }
2126
EncodeAdd(Reg dst,Reg src,Imm imm)2127 void Aarch64Encoder::EncodeAdd(Reg dst, Reg src, Imm imm)
2128 {
2129 ASSERT(dst.IsScalar() && "UNIMPLEMENTED");
2130 ASSERT(dst.GetSize() >= src.GetSize());
2131 if (dst.GetSize() != src.GetSize()) {
2132 auto srcReg = Reg(src.GetId(), dst.GetType());
2133 GetMasm()->Add(VixlReg(dst), VixlReg(srcReg), VixlImm(imm));
2134 return;
2135 }
2136 GetMasm()->Add(VixlReg(dst), VixlReg(src), VixlImm(imm));
2137 }
2138
EncodeSub(Reg dst,Reg src,Imm imm)2139 void Aarch64Encoder::EncodeSub(Reg dst, Reg src, Imm imm)
2140 {
2141 ASSERT(dst.IsScalar() && "UNIMPLEMENTED");
2142 GetMasm()->Sub(VixlReg(dst), VixlReg(src), VixlImm(imm));
2143 }
2144
EncodeShl(Reg dst,Reg src,Imm imm)2145 void Aarch64Encoder::EncodeShl(Reg dst, Reg src, Imm imm)
2146 {
2147 ASSERT(dst.IsScalar() && "Invalid operand type");
2148 auto rzero = GetRegfile()->GetZeroReg().GetId();
2149 ASSERT(dst.GetId() != rzero);
2150 if (src.GetId() == rzero) {
2151 EncodeMov(dst, Imm(0));
2152 return;
2153 }
2154
2155 GetMasm()->Lsl(VixlReg(dst), VixlReg(src), imm.GetAsInt());
2156 }
2157
EncodeShr(Reg dst,Reg src,Imm imm)2158 void Aarch64Encoder::EncodeShr(Reg dst, Reg src, Imm imm)
2159 {
2160 int64_t immValue = static_cast<uint64_t>(imm.GetAsInt()) & (dst.GetSize() - 1);
2161
2162 ASSERT(dst.IsScalar() && "Invalid operand type");
2163 auto rzero = GetRegfile()->GetZeroReg().GetId();
2164 ASSERT(dst.GetId() != rzero);
2165 if (src.GetId() == rzero) {
2166 EncodeMov(dst, Imm(0));
2167 return;
2168 }
2169
2170 GetMasm()->Lsr(VixlReg(dst), VixlReg(src), immValue);
2171 }
2172
EncodeAShr(Reg dst,Reg src,Imm imm)2173 void Aarch64Encoder::EncodeAShr(Reg dst, Reg src, Imm imm)
2174 {
2175 ASSERT(dst.IsScalar() && "Invalid operand type");
2176 GetMasm()->Asr(VixlReg(dst), VixlReg(src), imm.GetAsInt());
2177 }
2178
EncodeAnd(Reg dst,Reg src,Imm imm)2179 void Aarch64Encoder::EncodeAnd(Reg dst, Reg src, Imm imm)
2180 {
2181 ASSERT(dst.IsScalar() && "Invalid operand type");
2182 GetMasm()->And(VixlReg(dst), VixlReg(src), VixlImm(imm));
2183 }
2184
EncodeOr(Reg dst,Reg src,Imm imm)2185 void Aarch64Encoder::EncodeOr(Reg dst, Reg src, Imm imm)
2186 {
2187 ASSERT(dst.IsScalar() && "Invalid operand type");
2188 GetMasm()->Orr(VixlReg(dst), VixlReg(src), VixlImm(imm));
2189 }
2190
EncodeXor(Reg dst,Reg src,Imm imm)2191 void Aarch64Encoder::EncodeXor(Reg dst, Reg src, Imm imm)
2192 {
2193 ASSERT(dst.IsScalar() && "Invalid operand type");
2194 GetMasm()->Eor(VixlReg(dst), VixlReg(src), VixlImm(imm));
2195 }
2196
EncodeMov(Reg dst,Imm src)2197 void Aarch64Encoder::EncodeMov(Reg dst, Imm src)
2198 {
2199 if (dst.IsFloat()) {
2200 if (dst.GetSize() == WORD_SIZE) {
2201 GetMasm()->Fmov(VixlVReg(dst), src.GetAsFloat());
2202 } else {
2203 GetMasm()->Fmov(VixlVReg(dst), src.GetAsDouble());
2204 }
2205 return;
2206 }
2207 if (dst.GetSize() > WORD_SIZE) {
2208 GetMasm()->Mov(VixlReg(dst), VixlImm(src));
2209 } else {
2210 GetMasm()->Mov(VixlReg(dst), VixlImm(static_cast<int32_t>(src.GetAsInt())));
2211 }
2212 }
2213
EncodeLdr(Reg dst,bool dstSigned,MemRef mem)2214 void Aarch64Encoder::EncodeLdr(Reg dst, bool dstSigned, MemRef mem)
2215 {
2216 auto rzero = GetRegfile()->GetZeroReg().GetId();
2217 if (!ConvertMem(mem).IsValid() || (dst.GetId() == rzero && dst.IsScalar())) {
2218 // Try move zero reg to dst (for do not create temp-reg)
2219 // Check: dst not vector, dst not index, dst not rzero
2220 [[maybe_unused]] auto baseReg = mem.GetBase();
2221 auto indexReg = mem.GetIndex();
2222
2223 // Invalid == base is rzero or invalid
2224 ASSERT(baseReg.GetId() == rzero || !baseReg.IsValid());
2225 // checks for use dst-register
2226 if (dst.IsScalar() && dst.IsValid() && // not float
2227 (indexReg.GetId() != dst.GetId()) && // not index
2228 (dst.GetId() != rzero)) { // not rzero
2229 // May use dst like rzero
2230 EncodeMov(dst, Imm(0));
2231
2232 auto fixMem = MemRef(dst, indexReg, mem.GetScale(), mem.GetDisp());
2233 ASSERT(ConvertMem(fixMem).IsValid());
2234 EncodeLdr(dst, dstSigned, fixMem);
2235 } else {
2236 // Use tmp-reg
2237 ScopedTmpReg tmpReg(this);
2238 EncodeMov(tmpReg, Imm(0));
2239
2240 auto fixMem = MemRef(tmpReg, indexReg, mem.GetScale(), mem.GetDisp());
2241 ASSERT(ConvertMem(fixMem).IsValid());
2242 // Used for zero-dst
2243 EncodeLdr(tmpReg, dstSigned, fixMem);
2244 }
2245 return;
2246 }
2247 ASSERT(ConvertMem(mem).IsValid());
2248 if (dst.IsFloat()) {
2249 GetMasm()->Ldr(VixlVReg(dst), ConvertMem(mem));
2250 return;
2251 }
2252 if (dstSigned) {
2253 if (dst.GetSize() == BYTE_SIZE) {
2254 GetMasm()->Ldrsb(VixlReg(dst, DOUBLE_WORD_SIZE), ConvertMem(mem));
2255 return;
2256 }
2257 if (dst.GetSize() == HALF_SIZE) {
2258 GetMasm()->Ldrsh(VixlReg(dst), ConvertMem(mem));
2259 return;
2260 }
2261 } else {
2262 if (dst.GetSize() == BYTE_SIZE) {
2263 GetMasm()->Ldrb(VixlReg(dst, WORD_SIZE), ConvertMem(mem));
2264 return;
2265 }
2266 if (dst.GetSize() == HALF_SIZE) {
2267 GetMasm()->Ldrh(VixlReg(dst), ConvertMem(mem));
2268 return;
2269 }
2270 }
2271 GetMasm()->Ldr(VixlReg(dst), ConvertMem(mem));
2272 }
2273
EncodeLdrAcquireInvalid(Reg dst,bool dstSigned,MemRef mem)2274 void Aarch64Encoder::EncodeLdrAcquireInvalid(Reg dst, bool dstSigned, MemRef mem)
2275 {
2276 // Try move zero reg to dst (for do not create temp-reg)
2277 // Check: dst not vector, dst not index, dst not rzero
2278 [[maybe_unused]] auto baseReg = mem.GetBase();
2279 auto rzero = GetRegfile()->GetZeroReg().GetId();
2280
2281 auto indexReg = mem.GetIndex();
2282
2283 // Invalid == base is rzero or invalid
2284 ASSERT(baseReg.GetId() == rzero || !baseReg.IsValid());
2285 // checks for use dst-register
2286 if (dst.IsScalar() && dst.IsValid() && // not float
2287 (indexReg.GetId() != dst.GetId()) && // not index
2288 (dst.GetId() != rzero)) { // not rzero
2289 // May use dst like rzero
2290 EncodeMov(dst, Imm(0));
2291
2292 auto fixMem = MemRef(dst, indexReg, mem.GetScale(), mem.GetDisp());
2293 ASSERT(ConvertMem(fixMem).IsValid());
2294 EncodeLdrAcquire(dst, dstSigned, fixMem);
2295 } else {
2296 // Use tmp-reg
2297 ScopedTmpReg tmpReg(this);
2298 EncodeMov(tmpReg, Imm(0));
2299
2300 auto fixMem = MemRef(tmpReg, indexReg, mem.GetScale(), mem.GetDisp());
2301 ASSERT(ConvertMem(fixMem).IsValid());
2302 // Used for zero-dst
2303 EncodeLdrAcquire(tmpReg, dstSigned, fixMem);
2304 }
2305 }
2306
EncodeLdrAcquireScalar(Reg dst,bool dstSigned,MemRef mem)2307 void Aarch64Encoder::EncodeLdrAcquireScalar(Reg dst, bool dstSigned, MemRef mem)
2308 {
2309 #ifndef NDEBUG
2310 CheckAlignment(mem, dst.GetSize());
2311 #endif // NDEBUG
2312 if (dstSigned) {
2313 if (dst.GetSize() == BYTE_SIZE) {
2314 GetMasm()->Ldarb(VixlReg(dst), ConvertMem(mem));
2315 GetMasm()->Sxtb(VixlReg(dst), VixlReg(dst));
2316 return;
2317 }
2318 if (dst.GetSize() == HALF_SIZE) {
2319 GetMasm()->Ldarh(VixlReg(dst), ConvertMem(mem));
2320 GetMasm()->Sxth(VixlReg(dst), VixlReg(dst));
2321 return;
2322 }
2323 if (dst.GetSize() == WORD_SIZE) {
2324 GetMasm()->Ldar(VixlReg(dst), ConvertMem(mem));
2325 GetMasm()->Sxtw(VixlReg(dst), VixlReg(dst));
2326 return;
2327 }
2328 } else {
2329 if (dst.GetSize() == BYTE_SIZE) {
2330 GetMasm()->Ldarb(VixlReg(dst, WORD_SIZE), ConvertMem(mem));
2331 return;
2332 }
2333 if (dst.GetSize() == HALF_SIZE) {
2334 GetMasm()->Ldarh(VixlReg(dst), ConvertMem(mem));
2335 return;
2336 }
2337 }
2338 GetMasm()->Ldar(VixlReg(dst), ConvertMem(mem));
2339 }
2340
CheckAlignment(MemRef mem,size_t size)2341 void Aarch64Encoder::CheckAlignment(MemRef mem, size_t size)
2342 {
2343 ASSERT(size == WORD_SIZE || size == BYTE_SIZE || size == HALF_SIZE || size == DOUBLE_WORD_SIZE);
2344 if (size == BYTE_SIZE) {
2345 return;
2346 }
2347 size_t alignmentMask = (size >> 3U) - 1;
2348 ASSERT(!mem.HasIndex() && !mem.HasScale());
2349 if (mem.HasDisp()) {
2350 // We need additional tmp register for check base + offset.
2351 // The case when separately the base and the offset are not aligned, but in sum there are aligned very rarely.
2352 // Therefore, the alignment check for base and offset takes place separately
2353 [[maybe_unused]] size_t offset = mem.GetDisp();
2354 ASSERT((offset & alignmentMask) == 0);
2355 }
2356 auto baseReg = mem.GetBase();
2357 auto end = CreateLabel();
2358 EncodeJumpTest(end, baseReg, Imm(alignmentMask), Condition::TST_EQ);
2359 EncodeAbort();
2360 BindLabel(end);
2361 }
2362
EncodeLdrAcquire(Reg dst,bool dstSigned,MemRef mem)2363 void Aarch64Encoder::EncodeLdrAcquire(Reg dst, bool dstSigned, MemRef mem)
2364 {
2365 if (mem.HasIndex()) {
2366 ScopedTmpRegU64 tmpReg(this);
2367 if (mem.HasScale()) {
2368 EncodeAdd(tmpReg, mem.GetBase(), Shift(mem.GetIndex(), mem.GetScale()));
2369 } else {
2370 EncodeAdd(tmpReg, mem.GetBase(), mem.GetIndex());
2371 }
2372 mem = MemRef(tmpReg, mem.GetDisp());
2373 }
2374
2375 auto rzero = GetRegfile()->GetZeroReg().GetId();
2376 if (!ConvertMem(mem).IsValid() || (dst.GetId() == rzero && dst.IsScalar())) {
2377 EncodeLdrAcquireInvalid(dst, dstSigned, mem);
2378 return;
2379 }
2380
2381 ASSERT(!mem.HasIndex() && !mem.HasScale());
2382 if (dst.IsFloat()) {
2383 ScopedTmpRegU64 tmpReg(this);
2384 auto memLdar = mem;
2385 if (mem.HasDisp()) {
2386 if (vixl::aarch64::Assembler::IsImmAddSub(mem.GetDisp())) {
2387 EncodeAdd(tmpReg, mem.GetBase(), Imm(mem.GetDisp()));
2388 } else {
2389 EncodeMov(tmpReg, Imm(mem.GetDisp()));
2390 EncodeAdd(tmpReg, mem.GetBase(), tmpReg);
2391 }
2392 memLdar = MemRef(tmpReg);
2393 }
2394 #ifndef NDEBUG
2395 CheckAlignment(memLdar, dst.GetSize());
2396 #endif // NDEBUG
2397 auto tmp = VixlReg(tmpReg, dst.GetSize());
2398 GetMasm()->Ldar(tmp, ConvertMem(memLdar));
2399 GetMasm()->Fmov(VixlVReg(dst), tmp);
2400 return;
2401 }
2402
2403 if (!mem.HasDisp()) {
2404 EncodeLdrAcquireScalar(dst, dstSigned, mem);
2405 return;
2406 }
2407
2408 Reg dst64(dst.GetId(), INT64_TYPE);
2409 if (vixl::aarch64::Assembler::IsImmAddSub(mem.GetDisp())) {
2410 EncodeAdd(dst64, mem.GetBase(), Imm(mem.GetDisp()));
2411 } else {
2412 EncodeMov(dst64, Imm(mem.GetDisp()));
2413 EncodeAdd(dst64, mem.GetBase(), dst64);
2414 }
2415 EncodeLdrAcquireScalar(dst, dstSigned, MemRef(dst64));
2416 }
2417
EncodeStr(Reg src,MemRef mem)2418 void Aarch64Encoder::EncodeStr(Reg src, MemRef mem)
2419 {
2420 if (!ConvertMem(mem).IsValid()) {
2421 auto indexReg = mem.GetIndex();
2422 auto rzero = GetRegfile()->GetZeroReg().GetId();
2423 // Invalid == base is rzero or invalid
2424 ASSERT(mem.GetBase().GetId() == rzero || !mem.GetBase().IsValid());
2425 // Use tmp-reg
2426 ScopedTmpReg tmpReg(this);
2427 EncodeMov(tmpReg, Imm(0));
2428
2429 auto fixMem = MemRef(tmpReg, indexReg, mem.GetScale(), mem.GetDisp());
2430 ASSERT(ConvertMem(fixMem).IsValid());
2431 if (src.GetId() != rzero) {
2432 EncodeStr(src, fixMem);
2433 } else {
2434 EncodeStr(tmpReg, fixMem);
2435 }
2436 return;
2437 }
2438 ASSERT(ConvertMem(mem).IsValid());
2439 if (src.IsFloat()) {
2440 GetMasm()->Str(VixlVReg(src), ConvertMem(mem));
2441 return;
2442 }
2443 if (src.GetSize() == BYTE_SIZE) {
2444 GetMasm()->Strb(VixlReg(src), ConvertMem(mem));
2445 return;
2446 }
2447 if (src.GetSize() == HALF_SIZE) {
2448 GetMasm()->Strh(VixlReg(src), ConvertMem(mem));
2449 return;
2450 }
2451 GetMasm()->Str(VixlReg(src), ConvertMem(mem));
2452 }
2453
EncodeStrRelease(Reg src,MemRef mem)2454 void Aarch64Encoder::EncodeStrRelease(Reg src, MemRef mem)
2455 {
2456 ScopedTmpRegLazy base(this);
2457 MemRef fixedMem;
2458 bool memWasFixed = false;
2459 if (mem.HasDisp()) {
2460 if (vixl::aarch64::Assembler::IsImmAddSub(mem.GetDisp())) {
2461 base.AcquireIfInvalid();
2462 EncodeAdd(base, mem.GetBase(), Imm(mem.GetDisp()));
2463 } else {
2464 base.AcquireIfInvalid();
2465 EncodeMov(base, Imm(mem.GetDisp()));
2466 EncodeAdd(base, mem.GetBase(), base);
2467 }
2468 memWasFixed = true;
2469 }
2470 if (mem.HasIndex()) {
2471 base.AcquireIfInvalid();
2472 if (mem.HasScale()) {
2473 EncodeAdd(base, memWasFixed ? base : mem.GetBase(), Shift(mem.GetIndex(), mem.GetScale()));
2474 } else {
2475 EncodeAdd(base, memWasFixed ? base : mem.GetBase(), mem.GetIndex());
2476 }
2477 memWasFixed = true;
2478 }
2479
2480 if (memWasFixed) {
2481 fixedMem = MemRef(base);
2482 } else {
2483 fixedMem = mem;
2484 }
2485
2486 #ifndef NDEBUG
2487 CheckAlignment(fixedMem, src.GetSize());
2488 #endif // NDEBUG
2489 if (src.IsFloat()) {
2490 ScopedTmpRegU64 tmpReg(this);
2491 auto tmp = VixlReg(tmpReg, src.GetSize());
2492 GetMasm()->Fmov(tmp, VixlVReg(src));
2493 GetMasm()->Stlr(tmp, ConvertMem(fixedMem));
2494 return;
2495 }
2496 if (src.GetSize() == BYTE_SIZE) {
2497 GetMasm()->Stlrb(VixlReg(src), ConvertMem(fixedMem));
2498 return;
2499 }
2500 if (src.GetSize() == HALF_SIZE) {
2501 GetMasm()->Stlrh(VixlReg(src), ConvertMem(fixedMem));
2502 return;
2503 }
2504 GetMasm()->Stlr(VixlReg(src), ConvertMem(fixedMem));
2505 }
2506
EncodeLdrExclusive(Reg dst,Reg addr,bool acquire)2507 void Aarch64Encoder::EncodeLdrExclusive(Reg dst, Reg addr, bool acquire)
2508 {
2509 ASSERT(dst.IsScalar());
2510 auto dstReg = VixlReg(dst);
2511 auto memCvt = ConvertMem(MemRef(addr));
2512 #ifndef NDEBUG
2513 CheckAlignment(MemRef(addr), dst.GetSize());
2514 #endif // NDEBUG
2515 if (dst.GetSize() == BYTE_SIZE) {
2516 if (acquire) {
2517 GetMasm()->Ldaxrb(dstReg, memCvt);
2518 return;
2519 }
2520 GetMasm()->Ldxrb(dstReg, memCvt);
2521 return;
2522 }
2523 if (dst.GetSize() == HALF_SIZE) {
2524 if (acquire) {
2525 GetMasm()->Ldaxrh(dstReg, memCvt);
2526 return;
2527 }
2528 GetMasm()->Ldxrh(dstReg, memCvt);
2529 return;
2530 }
2531 if (acquire) {
2532 GetMasm()->Ldaxr(dstReg, memCvt);
2533 return;
2534 }
2535 GetMasm()->Ldxr(dstReg, memCvt);
2536 }
2537
EncodeStrExclusive(Reg dst,Reg src,Reg addr,bool release)2538 void Aarch64Encoder::EncodeStrExclusive(Reg dst, Reg src, Reg addr, bool release)
2539 {
2540 ASSERT(dst.IsScalar() && src.IsScalar());
2541
2542 bool copyDst = dst.GetId() == src.GetId() || dst.GetId() == addr.GetId();
2543 ScopedTmpReg tmp(this);
2544 auto srcReg = VixlReg(src);
2545 auto memCvt = ConvertMem(MemRef(addr));
2546 auto dstReg = copyDst ? VixlReg(tmp) : VixlReg(dst);
2547 #ifndef NDEBUG
2548 CheckAlignment(MemRef(addr), src.GetSize());
2549 #endif // NDEBUG
2550
2551 if (src.GetSize() == BYTE_SIZE) {
2552 if (release) {
2553 GetMasm()->Stlxrb(dstReg, srcReg, memCvt);
2554 } else {
2555 GetMasm()->Stxrb(dstReg, srcReg, memCvt);
2556 }
2557 } else if (src.GetSize() == HALF_SIZE) {
2558 if (release) {
2559 GetMasm()->Stlxrh(dstReg, srcReg, memCvt);
2560 } else {
2561 GetMasm()->Stxrh(dstReg, srcReg, memCvt);
2562 }
2563 } else {
2564 if (release) {
2565 GetMasm()->Stlxr(dstReg, srcReg, memCvt);
2566 } else {
2567 GetMasm()->Stxr(dstReg, srcReg, memCvt);
2568 }
2569 }
2570 if (copyDst) {
2571 EncodeMov(dst, tmp);
2572 }
2573 }
2574
EncodeStrz(Reg src,MemRef mem)2575 void Aarch64Encoder::EncodeStrz(Reg src, MemRef mem)
2576 {
2577 if (!ConvertMem(mem).IsValid()) {
2578 EncodeStr(src, mem);
2579 return;
2580 }
2581 ASSERT(ConvertMem(mem).IsValid());
2582 // Upper half of registers must be zeroed by-default
2583 if (src.IsFloat()) {
2584 EncodeStr(src.As(FLOAT64_TYPE), mem);
2585 return;
2586 }
2587 if (src.GetSize() < WORD_SIZE) {
2588 EncodeCast(src, false, src.As(INT64_TYPE), false);
2589 }
2590 GetMasm()->Str(VixlReg(src.As(INT64_TYPE)), ConvertMem(mem));
2591 }
2592
EncodeSti(int64_t src,uint8_t srcSizeBytes,MemRef mem)2593 void Aarch64Encoder::EncodeSti(int64_t src, uint8_t srcSizeBytes, MemRef mem)
2594 {
2595 if (mem.IsValid() && mem.IsOffsetMem() && src == 0 && srcSizeBytes == 1) {
2596 auto rzero = GetRegfile()->GetZeroReg();
2597 GetMasm()->Strb(VixlReg(rzero), ConvertMem(mem));
2598 return;
2599 }
2600 if (!ConvertMem(mem).IsValid()) {
2601 auto rzero = GetRegfile()->GetZeroReg();
2602 EncodeStr(rzero, mem);
2603 return;
2604 }
2605
2606 ScopedTmpRegU64 tmpReg(this);
2607 auto tmp = VixlReg(tmpReg);
2608 GetMasm()->Mov(tmp, VixlImm(src));
2609 if (srcSizeBytes == 1U) {
2610 GetMasm()->Strb(tmp, ConvertMem(mem));
2611 return;
2612 }
2613 if (srcSizeBytes == HALF_WORD_SIZE_BYTES) {
2614 GetMasm()->Strh(tmp, ConvertMem(mem));
2615 return;
2616 }
2617 ASSERT((srcSizeBytes == WORD_SIZE_BYTES) || (srcSizeBytes == DOUBLE_WORD_SIZE_BYTES));
2618 GetMasm()->Str(tmp, ConvertMem(mem));
2619 }
2620
EncodeSti(float src,MemRef mem)2621 void Aarch64Encoder::EncodeSti(float src, MemRef mem)
2622 {
2623 if (!ConvertMem(mem).IsValid()) {
2624 auto rzero = GetRegfile()->GetZeroReg();
2625 EncodeStr(rzero, mem);
2626 return;
2627 }
2628 ScopedTmpRegF32 tmpReg(this);
2629 GetMasm()->Fmov(VixlVReg(tmpReg).S(), src);
2630 EncodeStr(tmpReg, mem);
2631 }
2632
EncodeSti(double src,MemRef mem)2633 void Aarch64Encoder::EncodeSti(double src, MemRef mem)
2634 {
2635 if (!ConvertMem(mem).IsValid()) {
2636 auto rzero = GetRegfile()->GetZeroReg();
2637 EncodeStr(rzero, mem);
2638 return;
2639 }
2640 ScopedTmpRegF64 tmpReg(this);
2641 GetMasm()->Fmov(VixlVReg(tmpReg).D(), src);
2642 EncodeStr(tmpReg, mem);
2643 }
2644
EncodeMemCopy(MemRef memFrom,MemRef memTo,size_t size)2645 void Aarch64Encoder::EncodeMemCopy(MemRef memFrom, MemRef memTo, size_t size)
2646 {
2647 if (!ConvertMem(memFrom).IsValid() || !ConvertMem(memTo).IsValid()) {
2648 auto rzero = GetRegfile()->GetZeroReg();
2649 if (!ConvertMem(memFrom).IsValid()) {
2650 // Encode one load - will fix inside
2651 EncodeLdr(rzero, false, memFrom);
2652 } else {
2653 ASSERT(!ConvertMem(memTo).IsValid());
2654 // Encode one store - will fix inside
2655 EncodeStr(rzero, memTo);
2656 }
2657 return;
2658 }
2659 ASSERT(ConvertMem(memFrom).IsValid());
2660 ASSERT(ConvertMem(memTo).IsValid());
2661 ScopedTmpRegU64 tmpReg(this);
2662 auto tmp = VixlReg(tmpReg, std::min(size, static_cast<size_t>(DOUBLE_WORD_SIZE)));
2663 if (size == BYTE_SIZE) {
2664 GetMasm()->Ldrb(tmp, ConvertMem(memFrom));
2665 GetMasm()->Strb(tmp, ConvertMem(memTo));
2666 } else if (size == HALF_SIZE) {
2667 GetMasm()->Ldrh(tmp, ConvertMem(memFrom));
2668 GetMasm()->Strh(tmp, ConvertMem(memTo));
2669 } else {
2670 ASSERT(size == WORD_SIZE || size == DOUBLE_WORD_SIZE);
2671 GetMasm()->Ldr(tmp, ConvertMem(memFrom));
2672 GetMasm()->Str(tmp, ConvertMem(memTo));
2673 }
2674 }
2675
EncodeMemCopyz(MemRef memFrom,MemRef memTo,size_t size)2676 void Aarch64Encoder::EncodeMemCopyz(MemRef memFrom, MemRef memTo, size_t size)
2677 {
2678 if (!ConvertMem(memFrom).IsValid() || !ConvertMem(memTo).IsValid()) {
2679 auto rzero = GetRegfile()->GetZeroReg();
2680 if (!ConvertMem(memFrom).IsValid()) {
2681 // Encode one load - will fix inside
2682 EncodeLdr(rzero, false, memFrom);
2683 } else {
2684 ASSERT(!ConvertMem(memTo).IsValid());
2685 // Encode one store - will fix inside
2686 EncodeStr(rzero, memTo);
2687 }
2688 return;
2689 }
2690 ASSERT(ConvertMem(memFrom).IsValid());
2691 ASSERT(ConvertMem(memTo).IsValid());
2692 ScopedTmpRegU64 tmpReg(this);
2693 auto tmp = VixlReg(tmpReg, std::min(size, static_cast<size_t>(DOUBLE_WORD_SIZE)));
2694 auto zero = VixlReg(GetRegfile()->GetZeroReg(), WORD_SIZE);
2695 if (size == BYTE_SIZE) {
2696 GetMasm()->Ldrb(tmp, ConvertMem(memFrom));
2697 GetMasm()->Stp(tmp, zero, ConvertMem(memTo));
2698 } else if (size == HALF_SIZE) {
2699 GetMasm()->Ldrh(tmp, ConvertMem(memFrom));
2700 GetMasm()->Stp(tmp, zero, ConvertMem(memTo));
2701 } else {
2702 ASSERT(size == WORD_SIZE || size == DOUBLE_WORD_SIZE);
2703 GetMasm()->Ldr(tmp, ConvertMem(memFrom));
2704 if (size == WORD_SIZE) {
2705 GetMasm()->Stp(tmp, zero, ConvertMem(memTo));
2706 } else {
2707 GetMasm()->Str(tmp, ConvertMem(memTo));
2708 }
2709 }
2710 }
2711
EncodeCompare(Reg dst,Reg src0,Reg src1,Condition cc)2712 void Aarch64Encoder::EncodeCompare(Reg dst, Reg src0, Reg src1, Condition cc)
2713 {
2714 ASSERT(src0.IsFloat() == src1.IsFloat());
2715 if (src0.IsFloat()) {
2716 GetMasm()->Fcmp(VixlVReg(src0), VixlVReg(src1));
2717 } else {
2718 GetMasm()->Cmp(VixlReg(src0), VixlReg(src1));
2719 }
2720 GetMasm()->Cset(VixlReg(dst), Convert(cc));
2721 }
2722
EncodeCompareTest(Reg dst,Reg src0,Reg src1,Condition cc)2723 void Aarch64Encoder::EncodeCompareTest(Reg dst, Reg src0, Reg src1, Condition cc)
2724 {
2725 ASSERT(src0.IsScalar() && src1.IsScalar());
2726
2727 GetMasm()->Tst(VixlReg(src0), VixlReg(src1));
2728 GetMasm()->Cset(VixlReg(dst), ConvertTest(cc));
2729 }
2730
EncodeAtomicByteOr(Reg addr,Reg value,bool fastEncoding)2731 void Aarch64Encoder::EncodeAtomicByteOr(Reg addr, Reg value, bool fastEncoding)
2732 {
2733 if (fastEncoding) {
2734 #ifndef NDEBUG
2735 vixl::CPUFeaturesScope scope(GetMasm(), vixl::CPUFeatures::kAtomics);
2736 #endif
2737 GetMasm()->Stsetb(VixlReg(value, BYTE_SIZE), MemOperand(VixlReg(addr)));
2738 return;
2739 }
2740
2741 // Slow encoding, should not be used in production code!!!
2742 auto linkReg = GetTarget().GetLinkReg();
2743 auto frameReg = GetTarget().GetFrameReg();
2744 static constexpr size_t PAIR_OFFSET = 2 * DOUBLE_WORD_SIZE_BYTES;
2745
2746 ScopedTmpRegLazy tmp1(this);
2747 ScopedTmpRegLazy tmp2(this);
2748 Reg orValue;
2749 Reg storeResult;
2750 bool hasTemps = GetScratchRegistersWithLrCount() >= 2U;
2751 if (hasTemps) {
2752 tmp1.AcquireWithLr();
2753 tmp2.AcquireWithLr();
2754 orValue = tmp1.GetReg().As(INT32_TYPE);
2755 storeResult = tmp2.GetReg().As(INT32_TYPE);
2756 } else {
2757 GetMasm()->stp(VixlReg(frameReg), VixlReg(linkReg),
2758 MemOperand(vixl::aarch64::sp, -PAIR_OFFSET, vixl::aarch64::AddrMode::PreIndex));
2759 orValue = frameReg.As(INT32_TYPE);
2760 storeResult = linkReg.As(INT32_TYPE);
2761 }
2762
2763 auto *loop = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
2764 GetMasm()->Bind(loop);
2765 GetMasm()->Ldxrb(VixlReg(orValue), MemOperand(VixlReg(addr)));
2766 GetMasm()->Orr(VixlReg(orValue), VixlReg(orValue), VixlReg(value, WORD_SIZE));
2767 GetMasm()->Stxrb(VixlReg(storeResult), VixlReg(orValue), MemOperand(VixlReg(addr)));
2768 GetMasm()->Cbnz(VixlReg(storeResult), loop);
2769 if (!hasTemps) {
2770 GetMasm()->ldp(VixlReg(frameReg), VixlReg(linkReg),
2771 MemOperand(vixl::aarch64::sp, PAIR_OFFSET, vixl::aarch64::AddrMode::PostIndex));
2772 }
2773 }
2774
EncodeCmp(Reg dst,Reg src0,Reg src1,Condition cc)2775 void Aarch64Encoder::EncodeCmp(Reg dst, Reg src0, Reg src1, Condition cc)
2776 {
2777 if (src0.IsFloat()) {
2778 ASSERT(src1.IsFloat());
2779 ASSERT(cc == Condition::MI || cc == Condition::LT);
2780 GetMasm()->Fcmp(VixlVReg(src0), VixlVReg(src1));
2781 } else {
2782 ASSERT(src0.IsScalar() && src1.IsScalar());
2783 ASSERT(cc == Condition::LO || cc == Condition::LT);
2784 GetMasm()->Cmp(VixlReg(src0), VixlReg(src1));
2785 }
2786 GetMasm()->Cset(VixlReg(dst), vixl::aarch64::Condition::ne);
2787 GetMasm()->Cneg(VixlReg(Promote(dst)), VixlReg(Promote(dst)), Convert(cc));
2788 }
2789
EncodeSelect(ArgsSelect && args)2790 void Aarch64Encoder::EncodeSelect(ArgsSelect &&args)
2791 {
2792 auto [dst, src0, src1, src2, src3, cc] = args;
2793 if (src2.IsScalar()) {
2794 GetMasm()->Cmp(VixlReg(src2), VixlReg(src3));
2795 } else {
2796 GetMasm()->Fcmp(VixlVReg(src2), VixlVReg(src3));
2797 }
2798 if (dst.IsFloat()) {
2799 GetMasm()->Fcsel(VixlVReg(dst), VixlVReg(src0), VixlVReg(src1), Convert(cc));
2800 } else {
2801 GetMasm()->Csel(VixlReg(dst), VixlReg(src0), VixlReg(src1), Convert(cc));
2802 }
2803 }
2804
EncodeSelect(ArgsSelectImm && args)2805 void Aarch64Encoder::EncodeSelect(ArgsSelectImm &&args)
2806 {
2807 auto [dst, src0, src1, src2, imm, cc] = args;
2808 if (src2.IsScalar()) {
2809 GetMasm()->Cmp(VixlReg(src2), VixlImm(imm));
2810 } else {
2811 GetMasm()->Fcmp(VixlVReg(src2), imm.GetAsDouble());
2812 }
2813 if (dst.IsFloat()) {
2814 GetMasm()->Fcsel(VixlVReg(dst), VixlVReg(src0), VixlVReg(src1), Convert(cc));
2815 } else {
2816 GetMasm()->Csel(VixlReg(dst), VixlReg(src0), VixlReg(src1), Convert(cc));
2817 }
2818 }
2819
EncodeSelectTest(ArgsSelect && args)2820 void Aarch64Encoder::EncodeSelectTest(ArgsSelect &&args)
2821 {
2822 auto [dst, src0, src1, src2, src3, cc] = args;
2823 ASSERT(!src2.IsFloat() && !src3.IsFloat());
2824 GetMasm()->Tst(VixlReg(src2), VixlReg(src3));
2825 if (dst.IsFloat()) {
2826 GetMasm()->Fcsel(VixlVReg(dst), VixlVReg(src0), VixlVReg(src1), ConvertTest(cc));
2827 } else {
2828 GetMasm()->Csel(VixlReg(dst), VixlReg(src0), VixlReg(src1), ConvertTest(cc));
2829 }
2830 }
2831
EncodeSelectTest(ArgsSelectImm && args)2832 void Aarch64Encoder::EncodeSelectTest(ArgsSelectImm &&args)
2833 {
2834 auto [dst, src0, src1, src2, imm, cc] = args;
2835 ASSERT(!src2.IsFloat());
2836 ASSERT(CanEncodeImmLogical(imm.GetAsInt(), src2.GetSize() > WORD_SIZE ? DOUBLE_WORD_SIZE : WORD_SIZE));
2837 GetMasm()->Tst(VixlReg(src2), VixlImm(imm));
2838 if (dst.IsFloat()) {
2839 GetMasm()->Fcsel(VixlVReg(dst), VixlVReg(src0), VixlVReg(src1), ConvertTest(cc));
2840 } else {
2841 GetMasm()->Csel(VixlReg(dst), VixlReg(src0), VixlReg(src1), ConvertTest(cc));
2842 }
2843 }
2844
EncodeLdp(Reg dst0,Reg dst1,bool dstSigned,MemRef mem)2845 void Aarch64Encoder::EncodeLdp(Reg dst0, Reg dst1, bool dstSigned, MemRef mem)
2846 {
2847 ASSERT(dst0.IsFloat() == dst1.IsFloat());
2848 ASSERT(dst0.GetSize() == dst1.GetSize());
2849 if (!ConvertMem(mem).IsValid()) {
2850 // Encode one Ldr - will fix inside
2851 EncodeLdr(dst0, dstSigned, mem);
2852 return;
2853 }
2854
2855 if (dst0.IsFloat()) {
2856 GetMasm()->Ldp(VixlVReg(dst0), VixlVReg(dst1), ConvertMem(mem));
2857 return;
2858 }
2859 if (dstSigned && dst0.GetSize() == WORD_SIZE) {
2860 GetMasm()->Ldpsw(VixlReg(dst0, DOUBLE_WORD_SIZE), VixlReg(dst1, DOUBLE_WORD_SIZE), ConvertMem(mem));
2861 return;
2862 }
2863 GetMasm()->Ldp(VixlReg(dst0), VixlReg(dst1), ConvertMem(mem));
2864 }
2865
EncodeStp(Reg src0,Reg src1,MemRef mem)2866 void Aarch64Encoder::EncodeStp(Reg src0, Reg src1, MemRef mem)
2867 {
2868 ASSERT(src0.IsFloat() == src1.IsFloat());
2869 ASSERT(src0.GetSize() == src1.GetSize());
2870 if (!ConvertMem(mem).IsValid()) {
2871 // Encode one Str - will fix inside
2872 EncodeStr(src0, mem);
2873 return;
2874 }
2875
2876 if (src0.IsFloat()) {
2877 GetMasm()->Stp(VixlVReg(src0), VixlVReg(src1), ConvertMem(mem));
2878 return;
2879 }
2880 GetMasm()->Stp(VixlReg(src0), VixlReg(src1), ConvertMem(mem));
2881 }
2882
EncodeMAdd(Reg dst,Reg src0,Reg src1,Reg src2)2883 void Aarch64Encoder::EncodeMAdd(Reg dst, Reg src0, Reg src1, Reg src2)
2884 {
2885 ASSERT(dst.GetSize() == src1.GetSize() && dst.GetSize() == src0.GetSize() && dst.GetSize() == src2.GetSize());
2886 ASSERT(dst.IsScalar() == src0.IsScalar() && dst.IsScalar() == src1.IsScalar() && dst.IsScalar() == src2.IsScalar());
2887
2888 ASSERT(!GetRegfile()->IsZeroReg(dst));
2889
2890 if (GetRegfile()->IsZeroReg(src0) || GetRegfile()->IsZeroReg(src1)) {
2891 EncodeMov(dst, src2);
2892 return;
2893 }
2894
2895 if (GetRegfile()->IsZeroReg(src2)) {
2896 EncodeMul(dst, src0, src1);
2897 return;
2898 }
2899
2900 if (dst.IsScalar()) {
2901 GetMasm()->Madd(VixlReg(dst), VixlReg(src0), VixlReg(src1), VixlReg(src2));
2902 } else {
2903 GetMasm()->Fmadd(VixlVReg(dst), VixlVReg(src0), VixlVReg(src1), VixlVReg(src2));
2904 }
2905 }
2906
EncodeMSub(Reg dst,Reg src0,Reg src1,Reg src2)2907 void Aarch64Encoder::EncodeMSub(Reg dst, Reg src0, Reg src1, Reg src2)
2908 {
2909 ASSERT(dst.GetSize() == src1.GetSize() && dst.GetSize() == src0.GetSize() && dst.GetSize() == src2.GetSize());
2910 ASSERT(dst.IsScalar() == src0.IsScalar() && dst.IsScalar() == src1.IsScalar() && dst.IsScalar() == src2.IsScalar());
2911
2912 ASSERT(!GetRegfile()->IsZeroReg(dst));
2913
2914 if (GetRegfile()->IsZeroReg(src0) || GetRegfile()->IsZeroReg(src1)) {
2915 EncodeMov(dst, src2);
2916 return;
2917 }
2918
2919 if (GetRegfile()->IsZeroReg(src2)) {
2920 EncodeMNeg(dst, src0, src1);
2921 return;
2922 }
2923
2924 if (dst.IsScalar()) {
2925 GetMasm()->Msub(VixlReg(dst), VixlReg(src0), VixlReg(src1), VixlReg(src2));
2926 } else {
2927 GetMasm()->Fmsub(VixlVReg(dst), VixlVReg(src0), VixlVReg(src1), VixlVReg(src2));
2928 }
2929 }
2930
EncodeMNeg(Reg dst,Reg src0,Reg src1)2931 void Aarch64Encoder::EncodeMNeg(Reg dst, Reg src0, Reg src1)
2932 {
2933 ASSERT(dst.GetSize() == src1.GetSize() && dst.GetSize() == src0.GetSize());
2934 ASSERT(dst.IsScalar() == src0.IsScalar() && dst.IsScalar() == src1.IsScalar());
2935
2936 ASSERT(!GetRegfile()->IsZeroReg(dst));
2937
2938 if (GetRegfile()->IsZeroReg(src0) || GetRegfile()->IsZeroReg(src1)) {
2939 EncodeMov(dst, Imm(0U));
2940 return;
2941 }
2942
2943 if (dst.IsScalar()) {
2944 GetMasm()->Mneg(VixlReg(dst), VixlReg(src0), VixlReg(src1));
2945 } else {
2946 GetMasm()->Fnmul(VixlVReg(dst), VixlVReg(src0), VixlVReg(src1));
2947 }
2948 }
2949
EncodeOrNot(Reg dst,Reg src0,Reg src1)2950 void Aarch64Encoder::EncodeOrNot(Reg dst, Reg src0, Reg src1)
2951 {
2952 ASSERT(dst.GetSize() == src1.GetSize() && dst.GetSize() == src0.GetSize());
2953 ASSERT(dst.IsScalar() && src0.IsScalar() && src1.IsScalar());
2954 GetMasm()->Orn(VixlReg(dst), VixlReg(src0), VixlReg(src1));
2955 }
2956
EncodeOrNot(Reg dst,Reg src0,Shift src1)2957 void Aarch64Encoder::EncodeOrNot(Reg dst, Reg src0, Shift src1)
2958 {
2959 ASSERT(dst.GetSize() == src0.GetSize() && dst.GetSize() == src1.GetBase().GetSize());
2960 ASSERT(dst.IsScalar() && src0.IsScalar() && src1.GetBase().IsScalar());
2961 GetMasm()->Orn(VixlReg(dst), VixlReg(src0), VixlShift(src1));
2962 }
2963
EncodeExtractBits(Reg dst,Reg src0,Imm imm1,Imm imm2)2964 void Aarch64Encoder::EncodeExtractBits(Reg dst, Reg src0, Imm imm1, Imm imm2)
2965 {
2966 GetMasm()->Ubfx(VixlReg(dst), VixlReg(src0), imm1.GetAsInt(), imm2.GetAsInt());
2967 }
2968
EncodeAndNot(Reg dst,Reg src0,Reg src1)2969 void Aarch64Encoder::EncodeAndNot(Reg dst, Reg src0, Reg src1)
2970 {
2971 ASSERT(dst.GetSize() == src1.GetSize() && dst.GetSize() == src0.GetSize());
2972 ASSERT(dst.IsScalar() && src0.IsScalar() && src1.IsScalar());
2973 GetMasm()->Bic(VixlReg(dst), VixlReg(src0), VixlReg(src1));
2974 }
2975
EncodeAndNot(Reg dst,Reg src0,Shift src1)2976 void Aarch64Encoder::EncodeAndNot(Reg dst, Reg src0, Shift src1)
2977 {
2978 ASSERT(dst.GetSize() == src0.GetSize() && dst.GetSize() == src1.GetBase().GetSize());
2979 ASSERT(dst.IsScalar() && src0.IsScalar() && src1.GetBase().IsScalar());
2980 GetMasm()->Bic(VixlReg(dst), VixlReg(src0), VixlShift(src1));
2981 }
2982
EncodeXorNot(Reg dst,Reg src0,Reg src1)2983 void Aarch64Encoder::EncodeXorNot(Reg dst, Reg src0, Reg src1)
2984 {
2985 ASSERT(dst.GetSize() == src1.GetSize() && dst.GetSize() == src0.GetSize());
2986 ASSERT(dst.IsScalar() && src0.IsScalar() && src1.IsScalar());
2987 GetMasm()->Eon(VixlReg(dst), VixlReg(src0), VixlReg(src1));
2988 }
2989
EncodeXorNot(Reg dst,Reg src0,Shift src1)2990 void Aarch64Encoder::EncodeXorNot(Reg dst, Reg src0, Shift src1)
2991 {
2992 ASSERT(dst.GetSize() == src0.GetSize() && dst.GetSize() == src1.GetBase().GetSize());
2993 ASSERT(dst.IsScalar() && src0.IsScalar() && src1.GetBase().IsScalar());
2994 GetMasm()->Eon(VixlReg(dst), VixlReg(src0), VixlShift(src1));
2995 }
2996
EncodeNeg(Reg dst,Shift src)2997 void Aarch64Encoder::EncodeNeg(Reg dst, Shift src)
2998 {
2999 ASSERT(dst.GetSize() == src.GetBase().GetSize());
3000 ASSERT(dst.IsScalar() && src.GetBase().IsScalar());
3001 GetMasm()->Neg(VixlReg(dst), VixlShift(src));
3002 }
3003
EncodeStackOverflowCheck(ssize_t offset)3004 void Aarch64Encoder::EncodeStackOverflowCheck(ssize_t offset)
3005 {
3006 ScopedTmpReg tmp(this);
3007 EncodeAdd(tmp, GetTarget().GetStackReg(), Imm(offset));
3008 EncodeLdr(tmp, false, MemRef(tmp));
3009 }
3010
CanEncodeImmAddSubCmp(int64_t imm,uint32_t size,bool signedCompare)3011 bool Aarch64Encoder::CanEncodeImmAddSubCmp(int64_t imm, [[maybe_unused]] uint32_t size,
3012 [[maybe_unused]] bool signedCompare)
3013 {
3014 if (imm == INT64_MIN) {
3015 return false;
3016 }
3017 if (imm < 0) {
3018 imm = -imm;
3019 }
3020 return vixl::aarch64::Assembler::IsImmAddSub(imm);
3021 }
3022
CanEncodeImmLogical(uint64_t imm,uint32_t size)3023 bool Aarch64Encoder::CanEncodeImmLogical(uint64_t imm, uint32_t size)
3024 {
3025 #ifndef NDEBUG
3026 if (size < DOUBLE_WORD_SIZE) {
3027 // Test if the highest part is consistent:
3028 ASSERT(((imm >> size) == 0) || (((~imm) >> size) == 0));
3029 }
3030 #endif // NDEBUG
3031 return vixl::aarch64::Assembler::IsImmLogical(imm, size);
3032 }
3033
CanOptimizeImmDivMod(uint64_t imm,bool isSigned) const3034 bool Aarch64Encoder::CanOptimizeImmDivMod(uint64_t imm, bool isSigned) const
3035 {
3036 return CanOptimizeImmDivModCommon(imm, isSigned);
3037 }
3038
3039 /*
3040 * From aarch64 instruction set
3041 *
3042 * ========================================================
3043 * Syntax
3044 *
3045 * LDR Wt, [Xn|SP, Rm{, extend {amount}}] ; 32-bit general registers
3046 *
3047 * LDR Xt, [Xn|SP, Rm{, extend {amount}}] ; 64-bit general registers
3048 *
3049 * amount
3050 * Is the index shift amount, optional and defaulting to #0 when extend is not LSL:
3051 *
3052 * 32-bit general registers
3053 * Can be one of #0 or #2.
3054 *
3055 * 64-bit general registers
3056 * Can be one of #0 or #3.
3057 * ========================================================
3058 * Syntax
3059 *
3060 * LDRH Wt, [Xn|SP, Rm{, extend {amount}}]
3061 *
3062 * amount
3063 * Is the index shift amount, optional and defaulting to #0 when extend is not LSL, and can be either #0 or #1.
3064 * ========================================================
3065 *
3066 * Scale can be 0 or 1 for half load, 2 for word load, 3 for double word load
3067 */
CanEncodeScale(uint64_t imm,uint32_t size)3068 bool Aarch64Encoder::CanEncodeScale(uint64_t imm, uint32_t size)
3069 {
3070 return (imm == 0) || ((1U << imm) == (size >> 3U));
3071 }
3072
CanEncodeShiftedOperand(ShiftOpcode opcode,ShiftType shiftType)3073 bool Aarch64Encoder::CanEncodeShiftedOperand(ShiftOpcode opcode, ShiftType shiftType)
3074 {
3075 switch (opcode) {
3076 case ShiftOpcode::NEG_SR:
3077 case ShiftOpcode::ADD_SR:
3078 case ShiftOpcode::SUB_SR:
3079 return shiftType == ShiftType::LSL || shiftType == ShiftType::LSR || shiftType == ShiftType::ASR;
3080 case ShiftOpcode::AND_SR:
3081 case ShiftOpcode::OR_SR:
3082 case ShiftOpcode::XOR_SR:
3083 case ShiftOpcode::AND_NOT_SR:
3084 case ShiftOpcode::OR_NOT_SR:
3085 case ShiftOpcode::XOR_NOT_SR:
3086 return shiftType != ShiftType::INVALID_SHIFT;
3087 default:
3088 return false;
3089 }
3090 }
3091
CanEncodeFloatSelect()3092 bool Aarch64Encoder::CanEncodeFloatSelect()
3093 {
3094 return true;
3095 }
3096
AcquireScratchRegister(TypeInfo type)3097 Reg Aarch64Encoder::AcquireScratchRegister(TypeInfo type)
3098 {
3099 ASSERT(GetMasm()->GetCurrentScratchRegisterScope() == nullptr);
3100 auto reg = type.IsFloat() ? GetMasm()->GetScratchVRegisterList()->PopLowestIndex()
3101 : GetMasm()->GetScratchRegisterList()->PopLowestIndex();
3102 ASSERT(reg.IsValid());
3103 return Reg(reg.GetCode(), type);
3104 }
3105
AcquireScratchRegister(Reg reg)3106 void Aarch64Encoder::AcquireScratchRegister(Reg reg)
3107 {
3108 ASSERT(GetMasm()->GetCurrentScratchRegisterScope() == nullptr);
3109 if (reg == GetTarget().GetLinkReg()) {
3110 ASSERT_PRINT(!lrAcquired_, "Trying to acquire LR, which hasn't been released before");
3111 lrAcquired_ = true;
3112 return;
3113 }
3114 auto type = reg.GetType();
3115 auto regId = reg.GetId();
3116
3117 if (type.IsFloat()) {
3118 ASSERT(GetMasm()->GetScratchVRegisterList()->IncludesAliasOf(VixlVReg(reg)));
3119 GetMasm()->GetScratchVRegisterList()->Remove(regId);
3120 } else {
3121 ASSERT(GetMasm()->GetScratchRegisterList()->IncludesAliasOf(VixlReg(reg)));
3122 GetMasm()->GetScratchRegisterList()->Remove(regId);
3123 }
3124 }
3125
ReleaseScratchRegister(Reg reg)3126 void Aarch64Encoder::ReleaseScratchRegister(Reg reg)
3127 {
3128 if (reg == GetTarget().GetLinkReg()) {
3129 ASSERT_PRINT(lrAcquired_, "Trying to release LR, which hasn't been acquired before");
3130 lrAcquired_ = false;
3131 } else if (reg.IsFloat()) {
3132 GetMasm()->GetScratchVRegisterList()->Combine(reg.GetId());
3133 } else if (reg.GetId() != GetTarget().GetLinkReg().GetId()) {
3134 GetMasm()->GetScratchRegisterList()->Combine(reg.GetId());
3135 }
3136 }
3137
IsScratchRegisterReleased(Reg reg) const3138 bool Aarch64Encoder::IsScratchRegisterReleased(Reg reg) const
3139 {
3140 if (reg == GetTarget().GetLinkReg()) {
3141 return !lrAcquired_;
3142 }
3143 if (reg.IsFloat()) {
3144 return GetMasm()->GetScratchVRegisterList()->IncludesAliasOf(VixlVReg(reg));
3145 }
3146 return GetMasm()->GetScratchRegisterList()->IncludesAliasOf(VixlReg(reg));
3147 }
3148
GetScratchRegistersMask() const3149 RegMask Aarch64Encoder::GetScratchRegistersMask() const
3150 {
3151 return RegMask(GetMasm()->GetScratchRegisterList()->GetList());
3152 }
3153
GetScratchFpRegistersMask() const3154 RegMask Aarch64Encoder::GetScratchFpRegistersMask() const
3155 {
3156 return RegMask(GetMasm()->GetScratchVRegisterList()->GetList());
3157 }
3158
GetAvailableScratchRegisters() const3159 RegMask Aarch64Encoder::GetAvailableScratchRegisters() const
3160 {
3161 return RegMask(GetMasm()->GetScratchRegisterList()->GetList());
3162 }
3163
GetAvailableScratchFpRegisters() const3164 VRegMask Aarch64Encoder::GetAvailableScratchFpRegisters() const
3165 {
3166 return VRegMask(GetMasm()->GetScratchVRegisterList()->GetList());
3167 }
3168
GetRefType()3169 TypeInfo Aarch64Encoder::GetRefType()
3170 {
3171 return INT64_TYPE;
3172 }
3173
BufferData() const3174 void *Aarch64Encoder::BufferData() const
3175 {
3176 return GetMasm()->GetBuffer()->GetStartAddress<void *>();
3177 }
3178
BufferSize() const3179 size_t Aarch64Encoder::BufferSize() const
3180 {
3181 return GetMasm()->GetBuffer()->GetSizeInBytes();
3182 }
3183
MakeLibCall(Reg dst,Reg src0,Reg src1,const void * entryPoint)3184 void Aarch64Encoder::MakeLibCall(Reg dst, Reg src0, Reg src1, const void *entryPoint)
3185 {
3186 if (!dst.IsFloat()) {
3187 SetFalseResult();
3188 return;
3189 }
3190 if (dst.GetType() == FLOAT32_TYPE) {
3191 if (!src0.IsFloat() || !src1.IsFloat()) {
3192 SetFalseResult();
3193 return;
3194 }
3195
3196 if (src0.GetId() != vixl::aarch64::s0.GetCode() || src1.GetId() != vixl::aarch64::s1.GetCode()) {
3197 ScopedTmpRegF32 tmp(this);
3198 GetMasm()->Fmov(VixlVReg(tmp), VixlVReg(src1));
3199 GetMasm()->Fmov(vixl::aarch64::s0, VixlVReg(src0));
3200 GetMasm()->Fmov(vixl::aarch64::s1, VixlVReg(tmp));
3201 }
3202
3203 MakeCall(entryPoint);
3204
3205 if (dst.GetId() != vixl::aarch64::s0.GetCode()) {
3206 GetMasm()->Fmov(VixlVReg(dst), vixl::aarch64::s0);
3207 }
3208 } else if (dst.GetType() == FLOAT64_TYPE) {
3209 if (!src0.IsFloat() || !src1.IsFloat()) {
3210 SetFalseResult();
3211 return;
3212 }
3213
3214 if (src0.GetId() != vixl::aarch64::d0.GetCode() || src1.GetId() != vixl::aarch64::d1.GetCode()) {
3215 ScopedTmpRegF64 tmp(this);
3216 GetMasm()->Fmov(VixlVReg(tmp), VixlVReg(src1));
3217
3218 GetMasm()->Fmov(vixl::aarch64::d0, VixlVReg(src0));
3219 GetMasm()->Fmov(vixl::aarch64::d1, VixlVReg(tmp));
3220 }
3221
3222 MakeCall(entryPoint);
3223
3224 if (dst.GetId() != vixl::aarch64::d0.GetCode()) {
3225 GetMasm()->Fmov(VixlVReg(dst), vixl::aarch64::d0);
3226 }
3227 } else {
3228 UNREACHABLE();
3229 }
3230 }
3231
3232 template <bool IS_STORE>
LoadStoreRegisters(RegMask registers,ssize_t slot,size_t startReg,bool isFp)3233 void Aarch64Encoder::LoadStoreRegisters(RegMask registers, ssize_t slot, size_t startReg, bool isFp)
3234 {
3235 if (registers.none()) {
3236 return;
3237 }
3238 int32_t lastReg = registers.size() - 1;
3239 for (; lastReg >= 0; --lastReg) {
3240 if (registers.test(lastReg)) {
3241 break;
3242 }
3243 }
3244 // Construct single add for big offset
3245 size_t spOffset;
3246 auto lastOffset = (slot + lastReg - startReg) * DOUBLE_WORD_SIZE_BYTES;
3247
3248 if (!vixl::aarch64::Assembler::IsImmLSPair(lastOffset, vixl::aarch64::kXRegSizeInBytesLog2)) {
3249 ScopedTmpReg lrReg(this, true);
3250 auto tmp = VixlReg(lrReg);
3251 spOffset = slot * DOUBLE_WORD_SIZE_BYTES;
3252 slot = 0;
3253 if (vixl::aarch64::Assembler::IsImmAddSub(spOffset)) {
3254 GetMasm()->Add(tmp, vixl::aarch64::sp, VixlImm(spOffset));
3255 } else {
3256 GetMasm()->Mov(tmp, VixlImm(spOffset));
3257 GetMasm()->Add(tmp, vixl::aarch64::sp, tmp);
3258 }
3259 LoadStoreRegistersLoop<IS_STORE>(registers, slot, startReg, isFp, tmp);
3260 } else {
3261 LoadStoreRegistersLoop<IS_STORE>(registers, slot, startReg, isFp, vixl::aarch64::sp);
3262 }
3263 }
3264
3265 template <bool IS_STORE>
LoadStorePair(vixl::aarch64::MacroAssembler * masm,CPURegister lastReg,CPURegister reg,Reg base,int32_t idx)3266 static void LoadStorePair(vixl::aarch64::MacroAssembler *masm, CPURegister lastReg, CPURegister reg, Reg base,
3267 int32_t idx)
3268 {
3269 auto baseReg = VixlReg(base);
3270 static constexpr int32_t OFFSET = 2;
3271 if constexpr (IS_STORE) { // NOLINT
3272 masm->Stp(lastReg, reg, MemOperand(baseReg, (idx - OFFSET) * DOUBLE_WORD_SIZE_BYTES));
3273 } else { // NOLINT
3274 masm->Ldp(lastReg, reg, MemOperand(baseReg, (idx - OFFSET) * DOUBLE_WORD_SIZE_BYTES));
3275 }
3276 }
3277
3278 template <bool IS_STORE>
LoadStoreReg(vixl::aarch64::MacroAssembler * masm,CPURegister lastReg,Reg base,int32_t idx)3279 static void LoadStoreReg(vixl::aarch64::MacroAssembler *masm, CPURegister lastReg, Reg base, int32_t idx)
3280 {
3281 auto baseReg = VixlReg(base);
3282 if constexpr (IS_STORE) { // NOLINT
3283 masm->Str(lastReg, MemOperand(baseReg, (idx - 1) * DOUBLE_WORD_SIZE_BYTES));
3284 } else { // NOLINT
3285 masm->Ldr(lastReg, MemOperand(baseReg, (idx - 1) * DOUBLE_WORD_SIZE_BYTES));
3286 }
3287 }
3288
3289 template <bool IS_STORE>
LoadStoreRegistersMainLoop(RegMask registers,bool isFp,int32_t slot,Reg base,RegMask mask)3290 void Aarch64Encoder::LoadStoreRegistersMainLoop(RegMask registers, bool isFp, int32_t slot, Reg base, RegMask mask)
3291 {
3292 bool hasMask = mask.any();
3293 int32_t index = hasMask ? static_cast<int32_t>(mask.GetMinRegister()) : 0;
3294 int32_t lastIndex = -1;
3295 ssize_t lastId = -1;
3296
3297 slot -= index;
3298 for (ssize_t id = index; id < helpers::ToSigned(registers.size()); id++) {
3299 if (hasMask) {
3300 if (!mask.test(id)) {
3301 continue;
3302 }
3303 index++;
3304 }
3305 if (!registers.test(id)) {
3306 continue;
3307 }
3308 if (!hasMask) {
3309 index++;
3310 }
3311 if (lastId == -1) {
3312 lastId = id;
3313 lastIndex = index;
3314 continue;
3315 }
3316
3317 auto lastReg =
3318 CPURegister(lastId, vixl::aarch64::kXRegSize, isFp ? CPURegister::kVRegister : CPURegister::kRegister);
3319 if (!hasMask || lastId + 1 == id) {
3320 auto reg =
3321 CPURegister(id, vixl::aarch64::kXRegSize, isFp ? CPURegister::kVRegister : CPURegister::kRegister);
3322 LoadStorePair<IS_STORE>(GetMasm(), lastReg, reg, base, slot + index);
3323 lastId = -1;
3324 } else {
3325 LoadStoreReg<IS_STORE>(GetMasm(), lastReg, base, slot + lastIndex);
3326 lastId = id;
3327 lastIndex = index;
3328 }
3329 }
3330 if (lastId != -1) {
3331 auto lastReg =
3332 CPURegister(lastId, vixl::aarch64::kXRegSize, isFp ? CPURegister::kVRegister : CPURegister::kRegister);
3333 LoadStoreReg<IS_STORE>(GetMasm(), lastReg, base, slot + lastIndex);
3334 }
3335 }
3336
3337 template <bool IS_STORE>
LoadStoreRegisters(RegMask registers,bool isFp,int32_t slot,Reg base,RegMask mask)3338 void Aarch64Encoder::LoadStoreRegisters(RegMask registers, bool isFp, int32_t slot, Reg base, RegMask mask)
3339 {
3340 if (registers.none()) {
3341 return;
3342 }
3343
3344 int32_t maxOffset = (slot + helpers::ToSigned(registers.GetMaxRegister())) * DOUBLE_WORD_SIZE_BYTES;
3345 int32_t minOffset = (slot + helpers::ToSigned(registers.GetMinRegister())) * DOUBLE_WORD_SIZE_BYTES;
3346
3347 ScopedTmpRegLazy tmpReg(this, true);
3348 // Construct single add for big offset
3349 if (!vixl::aarch64::Assembler::IsImmLSPair(minOffset, vixl::aarch64::kXRegSizeInBytesLog2) ||
3350 !vixl::aarch64::Assembler::IsImmLSPair(maxOffset, vixl::aarch64::kXRegSizeInBytesLog2)) {
3351 tmpReg.AcquireWithLr();
3352 auto lrReg = VixlReg(tmpReg);
3353 ssize_t spOffset = slot * DOUBLE_WORD_SIZE_BYTES;
3354 if (vixl::aarch64::Assembler::IsImmAddSub(spOffset)) {
3355 GetMasm()->Add(lrReg, VixlReg(base), VixlImm(spOffset));
3356 } else {
3357 GetMasm()->Mov(lrReg, VixlImm(spOffset));
3358 GetMasm()->Add(lrReg, VixlReg(base), lrReg);
3359 }
3360 // Adjust new values for slot and base register
3361 slot = 0;
3362 base = tmpReg;
3363 }
3364
3365 LoadStoreRegistersMainLoop<IS_STORE>(registers, isFp, slot, base, mask);
3366 }
3367
3368 template <bool IS_STORE>
LoadStoreRegistersLoop(RegMask registers,ssize_t slot,size_t startReg,bool isFp,const vixl::aarch64::Register & baseReg)3369 void Aarch64Encoder::LoadStoreRegistersLoop(RegMask registers, ssize_t slot, size_t startReg, bool isFp,
3370 const vixl::aarch64::Register &baseReg)
3371 {
3372 size_t i = 0;
3373 const auto getNextReg = [®isters, &i, isFp]() {
3374 for (; i < registers.size(); i++) {
3375 if (registers.test(i)) {
3376 return CPURegister(i++, vixl::aarch64::kXRegSize,
3377 isFp ? CPURegister::kVRegister : CPURegister::kRegister);
3378 }
3379 }
3380 return CPURegister();
3381 };
3382
3383 for (CPURegister nextReg = getNextReg(); nextReg.IsValid();) {
3384 const CPURegister currReg = nextReg;
3385 nextReg = getNextReg();
3386 if (nextReg.IsValid() && (nextReg.GetCode() - 1 == currReg.GetCode())) {
3387 if constexpr (IS_STORE) { // NOLINT
3388 GetMasm()->Stp(currReg, nextReg,
3389 MemOperand(baseReg, (slot + currReg.GetCode() - startReg) * DOUBLE_WORD_SIZE_BYTES));
3390 } else { // NOLINT
3391 GetMasm()->Ldp(currReg, nextReg,
3392 MemOperand(baseReg, (slot + currReg.GetCode() - startReg) * DOUBLE_WORD_SIZE_BYTES));
3393 }
3394 nextReg = getNextReg();
3395 } else {
3396 if constexpr (IS_STORE) { // NOLINT
3397 GetMasm()->Str(currReg,
3398 MemOperand(baseReg, (slot + currReg.GetCode() - startReg) * DOUBLE_WORD_SIZE_BYTES));
3399 } else { // NOLINT
3400 GetMasm()->Ldr(currReg,
3401 MemOperand(baseReg, (slot + currReg.GetCode() - startReg) * DOUBLE_WORD_SIZE_BYTES));
3402 }
3403 }
3404 }
3405 }
3406
SaveRegisters(RegMask registers,ssize_t slot,size_t startReg,bool isFp)3407 void Aarch64Encoder::SaveRegisters(RegMask registers, ssize_t slot, size_t startReg, bool isFp)
3408 {
3409 LoadStoreRegisters<true>(registers, slot, startReg, isFp);
3410 }
3411
LoadRegisters(RegMask registers,ssize_t slot,size_t startReg,bool isFp)3412 void Aarch64Encoder::LoadRegisters(RegMask registers, ssize_t slot, size_t startReg, bool isFp)
3413 {
3414 LoadStoreRegisters<false>(registers, slot, startReg, isFp);
3415 }
3416
SaveRegisters(RegMask registers,bool isFp,ssize_t slot,Reg base,RegMask mask)3417 void Aarch64Encoder::SaveRegisters(RegMask registers, bool isFp, ssize_t slot, Reg base, RegMask mask)
3418 {
3419 LoadStoreRegisters<true>(registers, isFp, slot, base, mask);
3420 }
3421
LoadRegisters(RegMask registers,bool isFp,ssize_t slot,Reg base,RegMask mask)3422 void Aarch64Encoder::LoadRegisters(RegMask registers, bool isFp, ssize_t slot, Reg base, RegMask mask)
3423 {
3424 LoadStoreRegisters<false>(registers, isFp, slot, base, mask);
3425 }
3426
PushRegisters(RegMask registers,bool isFp)3427 void Aarch64Encoder::PushRegisters(RegMask registers, bool isFp)
3428 {
3429 static constexpr size_t PAIR_OFFSET = 2 * DOUBLE_WORD_SIZE_BYTES;
3430 Register lastReg = INVALID_REG;
3431 for (size_t i = 0; i < registers.size(); i++) {
3432 if (registers[i]) {
3433 if (lastReg == INVALID_REG) {
3434 lastReg = i;
3435 continue;
3436 }
3437 if (isFp) {
3438 GetMasm()->stp(vixl::aarch64::VRegister(lastReg, DOUBLE_WORD_SIZE),
3439 vixl::aarch64::VRegister(i, DOUBLE_WORD_SIZE),
3440 MemOperand(vixl::aarch64::sp, -PAIR_OFFSET, vixl::aarch64::AddrMode::PreIndex));
3441 } else {
3442 GetMasm()->stp(vixl::aarch64::Register(lastReg, DOUBLE_WORD_SIZE),
3443 vixl::aarch64::Register(i, DOUBLE_WORD_SIZE),
3444 MemOperand(vixl::aarch64::sp, -PAIR_OFFSET, vixl::aarch64::AddrMode::PreIndex));
3445 }
3446 lastReg = INVALID_REG;
3447 }
3448 }
3449 if (lastReg != INVALID_REG) {
3450 if (isFp) {
3451 GetMasm()->str(vixl::aarch64::VRegister(lastReg, DOUBLE_WORD_SIZE),
3452 MemOperand(vixl::aarch64::sp, -PAIR_OFFSET, vixl::aarch64::AddrMode::PreIndex));
3453 } else {
3454 GetMasm()->str(vixl::aarch64::Register(lastReg, DOUBLE_WORD_SIZE),
3455 MemOperand(vixl::aarch64::sp, -PAIR_OFFSET, vixl::aarch64::AddrMode::PreIndex));
3456 }
3457 }
3458 }
3459
PopRegisters(RegMask registers,bool isFp)3460 void Aarch64Encoder::PopRegisters(RegMask registers, bool isFp)
3461 {
3462 static constexpr size_t PAIR_OFFSET = 2 * DOUBLE_WORD_SIZE_BYTES;
3463 Register lastReg;
3464 if ((registers.count() & 1U) != 0) {
3465 lastReg = registers.GetMaxRegister();
3466 if (isFp) {
3467 GetMasm()->ldr(vixl::aarch64::VRegister(lastReg, DOUBLE_WORD_SIZE),
3468 MemOperand(vixl::aarch64::sp, PAIR_OFFSET, vixl::aarch64::AddrMode::PostIndex));
3469 } else {
3470 GetMasm()->ldr(vixl::aarch64::Register(lastReg, DOUBLE_WORD_SIZE),
3471 MemOperand(vixl::aarch64::sp, PAIR_OFFSET, vixl::aarch64::AddrMode::PostIndex));
3472 }
3473 registers.reset(lastReg);
3474 }
3475 lastReg = INVALID_REG;
3476 for (ssize_t i = registers.size() - 1; i >= 0; i--) {
3477 if (registers[i]) {
3478 if (lastReg == INVALID_REG) {
3479 lastReg = i;
3480 continue;
3481 }
3482 if (isFp) {
3483 GetMasm()->ldp(vixl::aarch64::VRegister(i, DOUBLE_WORD_SIZE),
3484 vixl::aarch64::VRegister(lastReg, DOUBLE_WORD_SIZE),
3485 MemOperand(vixl::aarch64::sp, PAIR_OFFSET, vixl::aarch64::AddrMode::PostIndex));
3486 } else {
3487 GetMasm()->ldp(vixl::aarch64::Register(i, DOUBLE_WORD_SIZE),
3488 vixl::aarch64::Register(lastReg, DOUBLE_WORD_SIZE),
3489 MemOperand(vixl::aarch64::sp, PAIR_OFFSET, vixl::aarch64::AddrMode::PostIndex));
3490 }
3491 lastReg = INVALID_REG;
3492 }
3493 }
3494 }
3495
GetMasm() const3496 vixl::aarch64::MacroAssembler *Aarch64Encoder::GetMasm() const
3497 {
3498 ASSERT(masm_ != nullptr);
3499 return masm_;
3500 }
3501
GetLabelAddress(LabelHolder::LabelId label)3502 size_t Aarch64Encoder::GetLabelAddress(LabelHolder::LabelId label)
3503 {
3504 auto plabel = labels_->GetLabel(label);
3505 ASSERT(plabel->IsBound());
3506 return GetMasm()->GetLabelAddress<size_t>(plabel);
3507 }
3508
LabelHasLinks(LabelHolder::LabelId label)3509 bool Aarch64Encoder::LabelHasLinks(LabelHolder::LabelId label)
3510 {
3511 auto plabel = labels_->GetLabel(label);
3512 return plabel->IsLinked();
3513 }
3514
3515 #ifndef PANDA_MINIMAL_VIXL
GetDecoder() const3516 vixl::aarch64::Decoder &Aarch64Encoder::GetDecoder() const
3517 {
3518 if (!decoder_) {
3519 decoder_.emplace(GetAllocator());
3520 decoder_->visitors()->push_back(&GetDisasm());
3521 }
3522 return *decoder_;
3523 }
3524
GetDisasm() const3525 vixl::aarch64::Disassembler &Aarch64Encoder::GetDisasm() const
3526 {
3527 if (!disasm_) {
3528 disasm_.emplace(GetAllocator());
3529 }
3530 return *disasm_;
3531 }
3532 #endif
3533
DisasmInstr(std::ostream & stream,size_t pc,ssize_t codeOffset) const3534 size_t Aarch64Encoder::DisasmInstr([[maybe_unused]] std::ostream &stream, size_t pc,
3535 [[maybe_unused]] ssize_t codeOffset) const
3536 {
3537 #ifndef PANDA_MINIMAL_VIXL
3538 auto bufferStart = GetMasm()->GetBuffer()->GetOffsetAddress<uintptr_t>(0);
3539 auto instr = GetMasm()->GetBuffer()->GetOffsetAddress<vixl::aarch64::Instruction *>(pc);
3540 GetDecoder().Decode(instr);
3541 if (codeOffset < 0) {
3542 stream << GetDisasm().GetOutput();
3543 } else {
3544 stream << std::setw(0x4) << std::right << std::setfill('0') << std::hex
3545 << reinterpret_cast<uintptr_t>(instr) - bufferStart + codeOffset << ": " << GetDisasm().GetOutput()
3546 << std::setfill(' ') << std::dec;
3547 }
3548
3549 #endif
3550 return pc + vixl::aarch64::kInstructionSize;
3551 }
3552 } // namespace ark::compiler::aarch64
3553