1 /*
2 * Copyright (c) 2021-2025 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15 /*
16 Encoder (implementation of math and mem Low-level emitters)
17 */
18
19 #include <aarch64/macro-assembler-aarch64.h>
20 #include <cstddef>
21 #include "compiler/optimizer/code_generator/target/aarch64/target.h"
22 #include "compiler/optimizer/code_generator/encode.h"
23 #include "compiler/optimizer/code_generator/fast_divisor.h"
24 #include "scoped_tmp_reg.h"
25 #include "compiler/optimizer/code_generator/relocations.h"
26
27 #if defined(USE_VIXL_ARM64) && !defined(PANDA_MINIMAL_VIXL)
28 #include "aarch64/disasm-aarch64.h"
29 #endif
30
31 #include <iomanip>
32
33 #include "lib_helpers.inl"
34
35 #ifndef PANDA_TARGET_MACOS
36 #include "elf.h"
37 #endif // PANDA_TARGET_MACOS
38
39 namespace ark::compiler::aarch64 {
40 using vixl::aarch64::CPURegister;
41 using vixl::aarch64::MemOperand;
42
43 /// Converters
Convert(const Condition cc)44 static vixl::aarch64::Condition Convert(const Condition cc)
45 {
46 switch (cc) {
47 case Condition::EQ:
48 return vixl::aarch64::Condition::eq;
49 case Condition::NE:
50 return vixl::aarch64::Condition::ne;
51 case Condition::LT:
52 return vixl::aarch64::Condition::lt;
53 case Condition::GT:
54 return vixl::aarch64::Condition::gt;
55 case Condition::LE:
56 return vixl::aarch64::Condition::le;
57 case Condition::GE:
58 return vixl::aarch64::Condition::ge;
59 case Condition::LO:
60 return vixl::aarch64::Condition::lo;
61 case Condition::LS:
62 return vixl::aarch64::Condition::ls;
63 case Condition::HI:
64 return vixl::aarch64::Condition::hi;
65 case Condition::HS:
66 return vixl::aarch64::Condition::hs;
67 // NOTE(igorban) : Remove them
68 case Condition::MI:
69 return vixl::aarch64::Condition::mi;
70 case Condition::PL:
71 return vixl::aarch64::Condition::pl;
72 case Condition::VS:
73 return vixl::aarch64::Condition::vs;
74 case Condition::VC:
75 return vixl::aarch64::Condition::vc;
76 case Condition::AL:
77 return vixl::aarch64::Condition::al;
78 case Condition::NV:
79 return vixl::aarch64::Condition::nv;
80 default:
81 UNREACHABLE();
82 return vixl::aarch64::Condition::eq;
83 }
84 }
85
ConvertTest(const Condition cc)86 static vixl::aarch64::Condition ConvertTest(const Condition cc)
87 {
88 ASSERT(cc == Condition::TST_EQ || cc == Condition::TST_NE);
89 return cc == Condition::TST_EQ ? vixl::aarch64::Condition::eq : vixl::aarch64::Condition::ne;
90 }
91
Convert(const ShiftType type)92 static vixl::aarch64::Shift Convert(const ShiftType type)
93 {
94 switch (type) {
95 case ShiftType::LSL:
96 return vixl::aarch64::Shift::LSL;
97 case ShiftType::LSR:
98 return vixl::aarch64::Shift::LSR;
99 case ShiftType::ASR:
100 return vixl::aarch64::Shift::ASR;
101 case ShiftType::ROR:
102 return vixl::aarch64::Shift::ROR;
103 default:
104 UNREACHABLE();
105 }
106 }
107
VixlVReg(Reg reg)108 static vixl::aarch64::VRegister VixlVReg(Reg reg)
109 {
110 ASSERT(reg.IsValid());
111 auto vixlVreg = vixl::aarch64::VRegister(reg.GetId(), reg.GetSize());
112 ASSERT(vixlVreg.IsValid());
113 return vixlVreg;
114 }
115
VixlShift(Shift shift)116 static vixl::aarch64::Operand VixlShift(Shift shift)
117 {
118 Reg reg = shift.GetBase();
119 ASSERT(reg.IsValid());
120 if (reg.IsScalar()) {
121 ASSERT(reg.IsScalar());
122 size_t regSize = reg.GetSize();
123 if (regSize < WORD_SIZE) {
124 regSize = WORD_SIZE;
125 }
126 auto vixlReg = vixl::aarch64::Register(reg.GetId(), regSize);
127 ASSERT(vixlReg.IsValid());
128
129 return vixl::aarch64::Operand(vixlReg, Convert(shift.GetType()), shift.GetScale());
130 }
131
132 // Invalid register type
133 UNREACHABLE();
134 }
135
ConvertMem(MemRef mem)136 static vixl::aarch64::MemOperand ConvertMem(MemRef mem)
137 {
138 bool base = mem.HasBase() && (mem.GetBase().GetId() != vixl::aarch64::xzr.GetCode());
139 bool hasIndex = mem.HasIndex();
140 bool shift = mem.HasScale();
141 bool offset = mem.HasDisp();
142 auto baseReg = Reg(mem.GetBase().GetId(), INT64_TYPE);
143 if (base && !hasIndex && !shift) {
144 // Memory address = x_reg(base) + imm(offset)
145 if (mem.GetDisp() != 0) {
146 auto disp = mem.GetDisp();
147 return vixl::aarch64::MemOperand(VixlReg(baseReg), VixlImm(disp));
148 }
149 // Memory address = x_reg(base)
150 return vixl::aarch64::MemOperand(VixlReg(mem.GetBase(), DOUBLE_WORD_SIZE));
151 }
152 if (base && hasIndex && !offset) {
153 auto scale = mem.GetScale();
154 auto indexReg = mem.GetIndex();
155 // Memory address = x_reg(base) + (SXTW(w_reg(index)) << scale)
156 if (indexReg.GetSize() == WORD_SIZE) {
157 // Sign-extend and shift w-register in offset-position (signed because index always has signed type)
158 return vixl::aarch64::MemOperand(VixlReg(baseReg), VixlReg(indexReg), vixl::aarch64::Extend::SXTW, scale);
159 }
160 // Memory address = x_reg(base) + (x_reg(index) << scale)
161 if (scale != 0) {
162 ASSERT(indexReg.GetSize() == DOUBLE_WORD_SIZE);
163 return vixl::aarch64::MemOperand(VixlReg(baseReg), VixlReg(indexReg), vixl::aarch64::LSL, scale);
164 }
165 // Memory address = x_reg(base) + x_reg(index)
166 return vixl::aarch64::MemOperand(VixlReg(baseReg), VixlReg(indexReg));
167 }
168 // Wrong memRef
169 // Return invalid memory operand
170 auto tmp = vixl::aarch64::MemOperand();
171 ASSERT(!tmp.IsValid());
172 return tmp;
173 }
174
Promote(Reg reg)175 static Reg Promote(Reg reg)
176 {
177 if (reg.GetType() == INT8_TYPE) {
178 return Reg(reg.GetId(), INT16_TYPE);
179 }
180 return reg;
181 }
182
CreateLabel()183 Aarch64LabelHolder::LabelId Aarch64LabelHolder::CreateLabel()
184 {
185 ++id_;
186 auto allocator = GetEncoder()->GetAllocator();
187 auto *label = allocator->New<LabelType>(allocator);
188 labels_.push_back(label);
189 ASSERT(labels_.size() == id_);
190 return id_ - 1;
191 }
192
CreateLabels(LabelId size)193 void Aarch64LabelHolder::CreateLabels(LabelId size)
194 {
195 for (LabelId i = 0; i <= size; ++i) {
196 CreateLabel();
197 }
198 }
199
BindLabel(LabelId id)200 void Aarch64LabelHolder::BindLabel(LabelId id)
201 {
202 static_cast<Aarch64Encoder *>(GetEncoder())->GetMasm()->Bind(labels_[id]);
203 }
204
GetLabel(LabelId id) const205 Aarch64LabelHolder::LabelType *Aarch64LabelHolder::GetLabel(LabelId id) const
206 {
207 ASSERT(labels_.size() > id);
208 return labels_[id];
209 }
210
Size()211 Aarch64LabelHolder::LabelId Aarch64LabelHolder::Size()
212 {
213 return labels_.size();
214 }
215
Aarch64Encoder(ArenaAllocator * allocator)216 Aarch64Encoder::Aarch64Encoder(ArenaAllocator *allocator) : Encoder(allocator, Arch::AARCH64)
217 {
218 labels_ = allocator->New<Aarch64LabelHolder>(this);
219 if (labels_ == nullptr) {
220 SetFalseResult();
221 }
222 // We enable LR tmp reg by default in Aarch64
223 EnableLrAsTempReg(true);
224 }
225
~Aarch64Encoder()226 Aarch64Encoder::~Aarch64Encoder()
227 {
228 auto labels = static_cast<Aarch64LabelHolder *>(GetLabels())->labels_;
229 for (auto label : labels) {
230 label->~Label();
231 }
232 if (masm_ != nullptr) {
233 masm_->~MacroAssembler();
234 masm_ = nullptr;
235 }
236 }
237
GetLabels() const238 LabelHolder *Aarch64Encoder::GetLabels() const
239 {
240 ASSERT(labels_ != nullptr);
241 return labels_;
242 }
243
IsValid() const244 bool Aarch64Encoder::IsValid() const
245 {
246 return true;
247 }
248
GetTarget()249 constexpr auto Aarch64Encoder::GetTarget()
250 {
251 return ark::compiler::Target(Arch::AARCH64);
252 }
253
SetMaxAllocatedBytes(size_t size)254 void Aarch64Encoder::SetMaxAllocatedBytes(size_t size)
255 {
256 GetMasm()->GetBuffer()->SetMmapMaxBytes(size);
257 }
258
InitMasm()259 bool Aarch64Encoder::InitMasm()
260 {
261 if (masm_ == nullptr) {
262 // Initialize Masm
263 masm_ = GetAllocator()->New<vixl::aarch64::MacroAssembler>(GetAllocator());
264 if (masm_ == nullptr || !masm_->IsValid()) {
265 SetFalseResult();
266 return false;
267 }
268 ASSERT(GetMasm());
269
270 // Make sure that the compiler uses the same scratch registers as the assembler
271 CHECK_EQ(RegMask(GetMasm()->GetScratchRegisterList()->GetList()), GetTarget().GetTempRegsMask());
272 CHECK_EQ(RegMask(GetMasm()->GetScratchVRegisterList()->GetList()), GetTarget().GetTempVRegsMask());
273 }
274 return true;
275 }
276
Finalize()277 void Aarch64Encoder::Finalize()
278 {
279 GetMasm()->FinalizeCode();
280 }
281
EncodeJump(LabelHolder::LabelId id)282 void Aarch64Encoder::EncodeJump(LabelHolder::LabelId id)
283 {
284 auto label = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(id);
285 GetMasm()->B(label);
286 }
287
EncodeJump(LabelHolder::LabelId id,Reg src0,Reg src1,Condition cc)288 void Aarch64Encoder::EncodeJump(LabelHolder::LabelId id, Reg src0, Reg src1, Condition cc)
289 {
290 if (src1.GetId() == GetRegfile()->GetZeroReg().GetId()) {
291 EncodeJump(id, src0, cc);
292 return;
293 }
294
295 if (src0.IsScalar()) {
296 GetMasm()->Cmp(VixlReg(src0), VixlReg(src1));
297 } else {
298 GetMasm()->Fcmp(VixlVReg(src0), VixlVReg(src1));
299 }
300
301 auto label = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(id);
302 GetMasm()->B(label, Convert(cc));
303 }
304
EncodeJump(LabelHolder::LabelId id,Reg src,Imm imm,Condition cc)305 void Aarch64Encoder::EncodeJump(LabelHolder::LabelId id, Reg src, Imm imm, Condition cc)
306 {
307 auto value = imm.GetAsInt();
308 if (value == 0) {
309 EncodeJump(id, src, cc);
310 return;
311 }
312
313 if (value < 0) {
314 GetMasm()->Cmn(VixlReg(src), VixlImm(-value));
315 } else { // if (value > 0)
316 GetMasm()->Cmp(VixlReg(src), VixlImm(value));
317 }
318
319 auto label = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(id);
320 GetMasm()->B(label, Convert(cc));
321 }
322
EncodeJumpTest(LabelHolder::LabelId id,Reg src0,Reg src1,Condition cc)323 void Aarch64Encoder::EncodeJumpTest(LabelHolder::LabelId id, Reg src0, Reg src1, Condition cc)
324 {
325 ASSERT(src0.IsScalar() && src1.IsScalar());
326
327 GetMasm()->Tst(VixlReg(src0), VixlReg(src1));
328 auto label = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(id);
329 GetMasm()->B(label, ConvertTest(cc));
330 }
331
EncodeJumpTest(LabelHolder::LabelId id,Reg src,Imm imm,Condition cc)332 void Aarch64Encoder::EncodeJumpTest(LabelHolder::LabelId id, Reg src, Imm imm, Condition cc)
333 {
334 ASSERT(src.IsScalar());
335
336 auto value = imm.GetAsInt();
337 if (CanEncodeImmLogical(value, src.GetSize() > WORD_SIZE ? DOUBLE_WORD_SIZE : WORD_SIZE)) {
338 GetMasm()->Tst(VixlReg(src), VixlImm(value));
339 auto label = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(id);
340 GetMasm()->B(label, ConvertTest(cc));
341 } else {
342 ScopedTmpReg tmpReg(this, src.GetType());
343 EncodeMov(tmpReg, imm);
344 EncodeJumpTest(id, src, tmpReg, cc);
345 }
346 }
347
EncodeJump(LabelHolder::LabelId id,Reg src,Condition cc)348 void Aarch64Encoder::EncodeJump(LabelHolder::LabelId id, Reg src, Condition cc)
349 {
350 auto label = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(id);
351 ASSERT(src.IsScalar());
352 auto rzero = Reg(GetRegfile()->GetZeroReg().GetId(), src.GetType());
353
354 switch (cc) {
355 case Condition::LO:
356 // Always false
357 return;
358 case Condition::HS:
359 // Always true
360 GetMasm()->B(label);
361 return;
362 case Condition::EQ:
363 case Condition::LS:
364 if (src.GetId() == rzero.GetId()) {
365 GetMasm()->B(label);
366 return;
367 }
368 // True only when zero
369 GetMasm()->Cbz(VixlReg(src), label);
370 return;
371 case Condition::NE:
372 case Condition::HI:
373 if (src.GetId() == rzero.GetId()) {
374 // Do nothing
375 return;
376 }
377 // True only when non-zero
378 GetMasm()->Cbnz(VixlReg(src), label);
379 return;
380 default:
381 break;
382 }
383
384 ASSERT(rzero.IsValid());
385 GetMasm()->Cmp(VixlReg(src), VixlReg(rzero));
386 GetMasm()->B(label, Convert(cc));
387 }
388
EncodeJump(Reg dst)389 void Aarch64Encoder::EncodeJump(Reg dst)
390 {
391 GetMasm()->Br(VixlReg(dst));
392 }
393
EncodeJump(RelocationInfo * relocation)394 void Aarch64Encoder::EncodeJump([[maybe_unused]] RelocationInfo *relocation)
395 {
396 #ifdef PANDA_TARGET_MACOS
397 LOG(FATAL, COMPILER) << "Not supported in Macos build";
398 #else
399 auto buffer = GetMasm()->GetBuffer();
400 relocation->offset = GetCursorOffset();
401 relocation->addend = 0;
402 relocation->type = R_AARCH64_CALL26;
403 static constexpr uint32_t CALL_WITH_ZERO_OFFSET = 0x14000000;
404 buffer->Emit32(CALL_WITH_ZERO_OFFSET);
405 #endif
406 }
407
EncodeBitTestAndBranch(LabelHolder::LabelId id,compiler::Reg reg,uint32_t bitPos,bool bitValue)408 void Aarch64Encoder::EncodeBitTestAndBranch(LabelHolder::LabelId id, compiler::Reg reg, uint32_t bitPos, bool bitValue)
409 {
410 ASSERT(reg.IsScalar() && reg.GetSize() > bitPos);
411 auto label = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(id);
412 if (bitValue) {
413 GetMasm()->Tbnz(VixlReg(reg), bitPos, label);
414 } else {
415 GetMasm()->Tbz(VixlReg(reg), bitPos, label);
416 }
417 }
418
EncodeNop()419 void Aarch64Encoder::EncodeNop()
420 {
421 GetMasm()->Nop();
422 }
423
MakeCall(compiler::RelocationInfo * relocation)424 void Aarch64Encoder::MakeCall([[maybe_unused]] compiler::RelocationInfo *relocation)
425 {
426 #ifdef PANDA_TARGET_MACOS
427 LOG(FATAL, COMPILER) << "Not supported in Macos build";
428 #else
429 auto buffer = GetMasm()->GetBuffer();
430 relocation->offset = GetCursorOffset();
431 relocation->addend = 0;
432 relocation->type = R_AARCH64_CALL26;
433 static constexpr uint32_t CALL_WITH_ZERO_OFFSET = 0x94000000;
434 buffer->Emit32(CALL_WITH_ZERO_OFFSET);
435 #endif
436 }
437
MakeCall(const void * entryPoint)438 void Aarch64Encoder::MakeCall(const void *entryPoint)
439 {
440 ScopedTmpReg tmp(this, true);
441 EncodeMov(tmp, Imm(reinterpret_cast<uintptr_t>(entryPoint)));
442 GetMasm()->Blr(VixlReg(tmp));
443 }
444
MakeCall(MemRef entryPoint)445 void Aarch64Encoder::MakeCall(MemRef entryPoint)
446 {
447 ScopedTmpReg tmp(this, true);
448 EncodeLdr(tmp, false, entryPoint);
449 GetMasm()->Blr(VixlReg(tmp));
450 }
451
MakeCall(Reg reg)452 void Aarch64Encoder::MakeCall(Reg reg)
453 {
454 GetMasm()->Blr(VixlReg(reg));
455 }
456
MakeCall(LabelHolder::LabelId id)457 void Aarch64Encoder::MakeCall(LabelHolder::LabelId id)
458 {
459 auto label = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(id);
460 GetMasm()->Bl(label);
461 }
462
LoadPcRelative(Reg reg,intptr_t offset,Reg regAddr)463 void Aarch64Encoder::LoadPcRelative(Reg reg, intptr_t offset, Reg regAddr)
464 {
465 ASSERT(GetCodeOffset() != Encoder::INVALID_OFFSET);
466 ASSERT(reg.IsValid() || regAddr.IsValid());
467
468 if (!regAddr.IsValid()) {
469 regAddr = reg.As(INT64_TYPE);
470 }
471
472 if (vixl::IsInt21(offset)) {
473 GetMasm()->adr(VixlReg(regAddr), offset);
474 if (reg != INVALID_REGISTER) {
475 EncodeLdr(reg, false, MemRef(regAddr));
476 }
477 } else {
478 size_t pc = GetCodeOffset() + GetCursorOffset();
479 size_t addr;
480 if (auto res = static_cast<intptr_t>(helpers::ToSigned(pc) + offset); res < 0) {
481 // Make both, pc and addr, positive
482 ssize_t extend = RoundUp(std::abs(res), vixl::aarch64::kPageSize);
483 addr = static_cast<size_t>(res + extend);
484 pc += static_cast<size_t>(extend);
485 } else {
486 addr = res;
487 }
488
489 ssize_t adrpImm = (addr >> vixl::aarch64::kPageSizeLog2) - (pc >> vixl::aarch64::kPageSizeLog2);
490
491 GetMasm()->adrp(VixlReg(regAddr), adrpImm);
492
493 offset = ark::helpers::ToUnsigned(addr) & (vixl::aarch64::kPageSize - 1);
494 if (reg.GetId() != regAddr.GetId()) {
495 EncodeAdd(regAddr, regAddr, Imm(offset));
496 if (reg != INVALID_REGISTER) {
497 EncodeLdr(reg, true, MemRef(regAddr));
498 }
499 } else {
500 EncodeLdr(reg, true, MemRef(regAddr, offset));
501 }
502 }
503 }
504
MakeCallAot(intptr_t offset)505 void Aarch64Encoder::MakeCallAot(intptr_t offset)
506 {
507 ScopedTmpReg tmp(this, true);
508 LoadPcRelative(tmp, offset);
509 GetMasm()->Blr(VixlReg(tmp));
510 }
511
CanMakeCallByOffset(intptr_t offset)512 bool Aarch64Encoder::CanMakeCallByOffset(intptr_t offset)
513 {
514 // NOLINTNEXTLINE(hicpp-signed-bitwise)
515 auto off = (static_cast<uintptr_t>(offset) >> vixl::aarch64::kInstructionSizeLog2);
516 return vixl::aarch64::Instruction::IsValidImmPCOffset(vixl::aarch64::ImmBranchType::UncondBranchType, off);
517 }
518
MakeCallByOffset(intptr_t offset)519 void Aarch64Encoder::MakeCallByOffset(intptr_t offset)
520 {
521 GetMasm()->Bl(offset);
522 }
523
MakeLoadAotTable(intptr_t offset,Reg reg)524 void Aarch64Encoder::MakeLoadAotTable(intptr_t offset, Reg reg)
525 {
526 LoadPcRelative(reg, offset);
527 }
528
MakeLoadAotTableAddr(intptr_t offset,Reg addr,Reg val)529 void Aarch64Encoder::MakeLoadAotTableAddr(intptr_t offset, Reg addr, Reg val)
530 {
531 LoadPcRelative(val, offset, addr);
532 }
533
EncodeAbort()534 void Aarch64Encoder::EncodeAbort()
535 {
536 GetMasm()->Brk();
537 }
538
EncodeReturn()539 void Aarch64Encoder::EncodeReturn()
540 {
541 GetMasm()->Ret();
542 }
543
EncodeMul(Reg unused1,Reg unused2,Imm unused3)544 void Aarch64Encoder::EncodeMul([[maybe_unused]] Reg unused1, [[maybe_unused]] Reg unused2, [[maybe_unused]] Imm unused3)
545 {
546 SetFalseResult();
547 }
548
EncodeMov(Reg dst,Reg src)549 void Aarch64Encoder::EncodeMov(Reg dst, Reg src)
550 {
551 if (dst == src) {
552 return;
553 }
554 if (src.IsFloat() && dst.IsFloat()) {
555 if (src.GetSize() != dst.GetSize()) {
556 GetMasm()->Fcvt(VixlVReg(dst), VixlVReg(src));
557 return;
558 }
559 GetMasm()->Fmov(VixlVReg(dst), VixlVReg(src));
560 return;
561 }
562 if (src.IsFloat() && !dst.IsFloat()) {
563 GetMasm()->Fmov(VixlReg(dst, src.GetSize()), VixlVReg(src));
564 return;
565 }
566 if (dst.IsFloat()) {
567 ASSERT(src.IsScalar());
568 GetMasm()->Fmov(VixlVReg(dst), VixlReg(src));
569 return;
570 }
571 // DiscardForSameWReg below means we would drop "mov w0, w0", but it is guarded by "dst == src" above anyway.
572 // NOTE: "mov w0, w0" is not equal "nop", as it clears upper bits of x0.
573 // Keeping the option here helps to generate nothing when e.g. src is x0 and dst is w0.
574 // Probably, a better solution here is to system-wide checking register size on Encoder level.
575 if (src.GetSize() != dst.GetSize()) {
576 auto srcReg = Reg(src.GetId(), dst.GetType());
577 GetMasm()->Mov(VixlReg(dst), VixlReg(srcReg), vixl::aarch64::DiscardMoveMode::kDiscardForSameWReg);
578 return;
579 }
580 GetMasm()->Mov(VixlReg(dst), VixlReg(src), vixl::aarch64::DiscardMoveMode::kDiscardForSameWReg);
581 }
582
EncodeNeg(Reg dst,Reg src)583 void Aarch64Encoder::EncodeNeg(Reg dst, Reg src)
584 {
585 if (dst.IsFloat()) {
586 GetMasm()->Fneg(VixlVReg(dst), VixlVReg(src));
587 return;
588 }
589 GetMasm()->Neg(VixlReg(dst), VixlReg(src));
590 }
591
EncodeAbs(Reg dst,Reg src)592 void Aarch64Encoder::EncodeAbs(Reg dst, Reg src)
593 {
594 if (dst.IsFloat()) {
595 GetMasm()->Fabs(VixlVReg(dst), VixlVReg(src));
596 return;
597 }
598
599 ASSERT(!GetRegfile()->IsZeroReg(dst));
600 if (GetRegfile()->IsZeroReg(src)) {
601 EncodeMov(dst, src);
602 return;
603 }
604
605 if (src.GetSize() == DOUBLE_WORD_SIZE) {
606 GetMasm()->Cmp(VixlReg(src), vixl::aarch64::xzr);
607 } else {
608 GetMasm()->Cmp(VixlReg(src), vixl::aarch64::wzr);
609 }
610 GetMasm()->Cneg(VixlReg(Promote(dst)), VixlReg(Promote(src)), vixl::aarch64::Condition::lt);
611 }
612
EncodeSqrt(Reg dst,Reg src)613 void Aarch64Encoder::EncodeSqrt(Reg dst, Reg src)
614 {
615 ASSERT(dst.IsFloat());
616 GetMasm()->Fsqrt(VixlVReg(dst), VixlVReg(src));
617 }
618
EncodeIsInf(Reg dst,Reg src)619 void Aarch64Encoder::EncodeIsInf(Reg dst, Reg src)
620 {
621 ASSERT(dst.IsScalar() && src.IsFloat());
622
623 if (src.GetSize() == WORD_SIZE) {
624 constexpr uint32_t INF_MASK = 0xff000000;
625
626 ScopedTmpRegU32 tmpReg(this);
627 auto tmp = VixlReg(tmpReg);
628 GetMasm()->Fmov(tmp, VixlVReg(src));
629 GetMasm()->Mov(VixlReg(dst).W(), INF_MASK);
630 GetMasm()->Lsl(tmp, tmp, 1);
631 GetMasm()->Cmp(tmp, VixlReg(dst, WORD_SIZE));
632 } else {
633 constexpr uint64_t INF_MASK = 0xffe0000000000000;
634
635 ScopedTmpRegU64 tmpReg(this);
636 auto tmp = VixlReg(tmpReg);
637 GetMasm()->Fmov(tmp, VixlVReg(src));
638 GetMasm()->Mov(VixlReg(dst).X(), INF_MASK);
639 GetMasm()->Lsl(tmp, tmp, 1);
640 GetMasm()->Cmp(tmp, VixlReg(dst, DOUBLE_WORD_SIZE));
641 }
642
643 GetMasm()->Cset(VixlReg(dst), vixl::aarch64::Condition::eq);
644 }
645
EncodeCmpFracWithDelta(Reg src)646 void Aarch64Encoder::EncodeCmpFracWithDelta(Reg src)
647 {
648 ASSERT(src.IsFloat());
649 ASSERT(src.GetSize() == WORD_SIZE || src.GetSize() == DOUBLE_WORD_SIZE);
650
651 // Encode (fabs(src - trunc(src)) <= DELTA)
652 if (src.GetSize() == WORD_SIZE) {
653 ScopedTmpRegF32 tmp(this);
654 GetMasm()->Frintz(VixlVReg(tmp), VixlVReg(src));
655 EncodeSub(tmp, src, tmp);
656 EncodeAbs(tmp, tmp);
657 GetMasm()->Fcmp(VixlVReg(tmp), std::numeric_limits<float>::epsilon());
658 } else {
659 ScopedTmpRegF64 tmp(this);
660 GetMasm()->Frintz(VixlVReg(tmp), VixlVReg(src));
661 EncodeSub(tmp, src, tmp);
662 EncodeAbs(tmp, tmp);
663 GetMasm()->Fcmp(VixlVReg(tmp), std::numeric_limits<double>::epsilon());
664 }
665 }
666
EncodeIsInteger(Reg dst,Reg src)667 void Aarch64Encoder::EncodeIsInteger(Reg dst, Reg src)
668 {
669 ASSERT(dst.IsScalar() && src.IsFloat());
670 ASSERT(src.GetSize() == WORD_SIZE || src.GetSize() == DOUBLE_WORD_SIZE);
671
672 auto labelExit = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
673 auto labelInfOrNan = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
674
675 EncodeCmpFracWithDelta(src);
676 GetMasm()->B(labelInfOrNan, vixl::aarch64::Condition::vs); // Inf or NaN
677 GetMasm()->Cset(VixlReg(dst), vixl::aarch64::Condition::le);
678 GetMasm()->B(labelExit);
679
680 // IsInteger returns false if src is Inf or NaN
681 GetMasm()->Bind(labelInfOrNan);
682 EncodeMov(dst, Imm(false));
683
684 GetMasm()->Bind(labelExit);
685 }
686
EncodeIsSafeInteger(Reg dst,Reg src)687 void Aarch64Encoder::EncodeIsSafeInteger(Reg dst, Reg src)
688 {
689 ASSERT(dst.IsScalar() && src.IsFloat());
690 ASSERT(src.GetSize() == WORD_SIZE || src.GetSize() == DOUBLE_WORD_SIZE);
691
692 auto labelExit = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
693 auto labelFalse = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
694
695 // Check if IsInteger
696 EncodeCmpFracWithDelta(src);
697 GetMasm()->B(labelFalse, vixl::aarch64::Condition::vs); // Inf or NaN
698 GetMasm()->B(labelFalse, vixl::aarch64::Condition::gt);
699
700 // Check if it is safe, i.e. src can be represented in float/double without losing precision
701 if (src.GetSize() == WORD_SIZE) {
702 ScopedTmpRegF32 tmp(this);
703 EncodeAbs(tmp, src);
704 GetMasm()->Fcmp(VixlVReg(tmp), MaxIntAsExactFloat());
705 } else {
706 ScopedTmpRegF64 tmp(this);
707 EncodeAbs(tmp, src);
708 GetMasm()->Fcmp(VixlVReg(tmp), MaxIntAsExactDouble());
709 }
710 GetMasm()->Cset(VixlReg(dst), vixl::aarch64::Condition::le);
711 GetMasm()->B(labelExit);
712
713 // Return false if src !IsInteger
714 GetMasm()->Bind(labelFalse);
715 EncodeMov(dst, Imm(false));
716
717 GetMasm()->Bind(labelExit);
718 }
719
720 /* NaN values are needed to be canonicalized */
EncodeFpToBits(Reg dst,Reg src)721 void Aarch64Encoder::EncodeFpToBits(Reg dst, Reg src)
722 {
723 ASSERT(dst.IsScalar() && src.IsFloat());
724 ASSERT(dst.GetSize() == WORD_SIZE || dst.GetSize() == DOUBLE_WORD_SIZE);
725
726 if (dst.GetSize() == WORD_SIZE) {
727 ASSERT(src.GetSize() == WORD_SIZE);
728
729 constexpr auto FNAN = 0x7fc00000;
730
731 ScopedTmpRegU32 tmp(this);
732
733 GetMasm()->Fcmp(VixlVReg(src), VixlVReg(src));
734 GetMasm()->Mov(VixlReg(tmp), FNAN);
735 GetMasm()->Umov(VixlReg(dst), VixlVReg(src), 0);
736 GetMasm()->Csel(VixlReg(dst), VixlReg(tmp), VixlReg(dst), vixl::aarch64::Condition::ne);
737 } else {
738 ASSERT(src.GetSize() == DOUBLE_WORD_SIZE);
739
740 constexpr auto DNAN = 0x7ff8000000000000;
741
742 ScopedTmpRegU64 tmpReg(this);
743 auto tmp = VixlReg(tmpReg);
744
745 GetMasm()->Fcmp(VixlVReg(src), VixlVReg(src));
746 GetMasm()->Mov(tmp, DNAN);
747 GetMasm()->Umov(VixlReg(dst), VixlVReg(src), 0);
748 GetMasm()->Csel(VixlReg(dst), tmp, VixlReg(dst), vixl::aarch64::Condition::ne);
749 }
750 }
751
EncodeMoveBitsRaw(Reg dst,Reg src)752 void Aarch64Encoder::EncodeMoveBitsRaw(Reg dst, Reg src)
753 {
754 ASSERT((dst.IsFloat() && src.IsScalar()) || (src.IsFloat() && dst.IsScalar()));
755 if (dst.IsScalar()) {
756 ASSERT(src.GetSize() == dst.GetSize());
757 if (dst.GetSize() == WORD_SIZE) {
758 GetMasm()->Umov(VixlReg(dst).W(), VixlVReg(src).S(), 0);
759 } else {
760 GetMasm()->Umov(VixlReg(dst), VixlVReg(src), 0);
761 }
762 } else {
763 ASSERT(dst.GetSize() == src.GetSize());
764 ScopedTmpReg tmpReg(this, src.GetType());
765 auto srcReg = src;
766 auto rzero = GetRegfile()->GetZeroReg();
767 if (src.GetId() == rzero.GetId()) {
768 EncodeMov(tmpReg, Imm(0));
769 srcReg = tmpReg;
770 }
771
772 if (srcReg.GetSize() == WORD_SIZE) {
773 GetMasm()->Fmov(VixlVReg(dst).S(), VixlReg(srcReg).W());
774 } else {
775 GetMasm()->Fmov(VixlVReg(dst), VixlReg(srcReg));
776 }
777 }
778 }
779
EncodeReverseBytes(Reg dst,Reg src)780 void Aarch64Encoder::EncodeReverseBytes(Reg dst, Reg src)
781 {
782 auto rzero = GetRegfile()->GetZeroReg();
783 if (src.GetId() == rzero.GetId()) {
784 EncodeMov(dst, Imm(0));
785 return;
786 }
787
788 ASSERT(src.GetSize() > BYTE_SIZE);
789 ASSERT(src.GetSize() == dst.GetSize());
790
791 if (src.GetSize() == HALF_SIZE) {
792 GetMasm()->Rev16(VixlReg(dst), VixlReg(src));
793 GetMasm()->Sxth(VixlReg(dst), VixlReg(dst));
794 } else {
795 GetMasm()->Rev(VixlReg(dst), VixlReg(src));
796 }
797 }
798
EncodeBitCount(Reg dst,Reg src)799 void Aarch64Encoder::EncodeBitCount(Reg dst, Reg src)
800 {
801 auto rzero = GetRegfile()->GetZeroReg();
802 if (src.GetId() == rzero.GetId()) {
803 EncodeMov(dst, Imm(0));
804 return;
805 }
806
807 ASSERT(dst.GetSize() == WORD_SIZE);
808
809 ScopedTmpRegF64 tmpReg0(this);
810 vixl::aarch64::VRegister tmpReg;
811 if (src.GetSize() == DOUBLE_WORD_SIZE) {
812 tmpReg = VixlVReg(tmpReg0).D();
813 } else {
814 tmpReg = VixlVReg(tmpReg0).S();
815 }
816
817 if (src.GetSize() < WORD_SIZE) {
818 int64_t cutValue = (1ULL << src.GetSize()) - 1;
819 EncodeAnd(src, src, Imm(cutValue));
820 }
821
822 GetMasm()->Fmov(tmpReg, VixlReg(src));
823 GetMasm()->Cnt(tmpReg.V8B(), tmpReg.V8B());
824 GetMasm()->Addv(tmpReg.B(), tmpReg.V8B());
825 EncodeMov(dst, tmpReg0);
826 }
827
828 /* Since only ROR is supported on AArch64 we do
829 * left rotaion as ROR(v, -count) */
EncodeRotate(Reg dst,Reg src1,Reg src2,bool isRor)830 void Aarch64Encoder::EncodeRotate(Reg dst, Reg src1, Reg src2, bool isRor)
831 {
832 ASSERT(src1.GetSize() == WORD_SIZE || src1.GetSize() == DOUBLE_WORD_SIZE);
833 ASSERT(src1.GetSize() == dst.GetSize());
834 auto rzero = GetRegfile()->GetZeroReg();
835 if (rzero.GetId() == src2.GetId() || rzero.GetId() == src1.GetId()) {
836 EncodeMov(dst, src1);
837 return;
838 }
839 /* as the second parameters is always 32-bits long we have to
840 * adjust the counter register for the 64-bits first operand case */
841 if (isRor) {
842 auto count = (dst.GetSize() == WORD_SIZE ? VixlReg(src2) : VixlReg(src2).X());
843 GetMasm()->Ror(VixlReg(dst), VixlReg(src1), count);
844 } else {
845 ScopedTmpReg tmp(this);
846 auto cnt = (dst.GetId() == src1.GetId() ? tmp : dst);
847 auto count = (dst.GetSize() == WORD_SIZE ? VixlReg(cnt).W() : VixlReg(cnt).X());
848 auto source2 = (dst.GetSize() == WORD_SIZE ? VixlReg(src2).W() : VixlReg(src2).X());
849 GetMasm()->Neg(count, source2);
850 GetMasm()->Ror(VixlReg(dst), VixlReg(src1), count);
851 }
852 }
853
EncodeSignum(Reg dst,Reg src)854 void Aarch64Encoder::EncodeSignum(Reg dst, Reg src)
855 {
856 ASSERT(src.GetSize() == WORD_SIZE || src.GetSize() == DOUBLE_WORD_SIZE);
857
858 ScopedTmpRegU32 tmp(this);
859 auto sign = (dst.GetId() == src.GetId() ? tmp : dst);
860
861 GetMasm()->Cmp(VixlReg(src), VixlImm(0));
862 GetMasm()->Cset(VixlReg(sign), vixl::aarch64::Condition::gt);
863
864 constexpr auto SHIFT_WORD_BITS = 31;
865 constexpr auto SHIFT_DWORD_BITS = 63;
866
867 /* The operation below is "sub dst, dst, src, lsr #reg_size-1"
868 * however, we can only encode as many as 32 bits in lsr field, so
869 * for 64-bits cases we cannot avoid having a separate lsr instruction */
870 if (src.GetSize() == WORD_SIZE) {
871 auto shift = Shift(src, LSR, SHIFT_WORD_BITS);
872 EncodeSub(dst, sign, shift);
873 } else {
874 ScopedTmpRegU64 shift(this);
875 sign = Reg(sign.GetId(), INT64_TYPE);
876 EncodeShr(shift, src, Imm(SHIFT_DWORD_BITS));
877 EncodeSub(dst, sign, shift);
878 }
879 }
880
EncodeCountLeadingZeroBits(Reg dst,Reg src)881 void Aarch64Encoder::EncodeCountLeadingZeroBits(Reg dst, Reg src)
882 {
883 auto rzero = GetRegfile()->GetZeroReg();
884 if (rzero.GetId() == src.GetId()) {
885 EncodeMov(dst, Imm(src.GetSize()));
886 return;
887 }
888 GetMasm()->Clz(VixlReg(dst), VixlReg(src));
889 }
890
EncodeCountTrailingZeroBits(Reg dst,Reg src)891 void Aarch64Encoder::EncodeCountTrailingZeroBits(Reg dst, Reg src)
892 {
893 auto rzero = GetRegfile()->GetZeroReg();
894 if (rzero.GetId() == src.GetId()) {
895 EncodeMov(dst, Imm(src.GetSize()));
896 return;
897 }
898 GetMasm()->Rbit(VixlReg(dst), VixlReg(src));
899 GetMasm()->Clz(VixlReg(dst), VixlReg(dst));
900 }
901
EncodeCeil(Reg dst,Reg src)902 void Aarch64Encoder::EncodeCeil(Reg dst, Reg src)
903 {
904 GetMasm()->Frintp(VixlVReg(dst), VixlVReg(src));
905 }
906
EncodeFloor(Reg dst,Reg src)907 void Aarch64Encoder::EncodeFloor(Reg dst, Reg src)
908 {
909 GetMasm()->Frintm(VixlVReg(dst), VixlVReg(src));
910 }
911
EncodeRint(Reg dst,Reg src)912 void Aarch64Encoder::EncodeRint(Reg dst, Reg src)
913 {
914 GetMasm()->Frintn(VixlVReg(dst), VixlVReg(src));
915 }
916
EncodeTrunc(Reg dst,Reg src)917 void Aarch64Encoder::EncodeTrunc(Reg dst, Reg src)
918 {
919 GetMasm()->Frintz(VixlVReg(dst), VixlVReg(src));
920 }
921
EncodeRoundAway(Reg dst,Reg src)922 void Aarch64Encoder::EncodeRoundAway(Reg dst, Reg src)
923 {
924 GetMasm()->Frinta(VixlVReg(dst), VixlVReg(src));
925 }
926
EncodeRoundToPInfReturnScalar(Reg dst,Reg src)927 void Aarch64Encoder::EncodeRoundToPInfReturnScalar(Reg dst, Reg src)
928 {
929 auto done = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
930 ScopedTmpReg tmp(this, src.GetType());
931 // round to nearest integer, ties away from zero
932 GetMasm()->Fcvtas(VixlReg(dst), VixlVReg(src));
933 // for positive values, zero and NaN inputs rounding is done
934 GetMasm()->Tbz(VixlReg(dst), dst.GetSize() - 1, done);
935 // if input is negative but not a tie, round to nearest is valid
936 // if input is a negative tie, dst += 1
937 GetMasm()->Frinta(VixlVReg(tmp), VixlVReg(src));
938 GetMasm()->Fsub(VixlVReg(tmp), VixlVReg(src), VixlVReg(tmp));
939 // NOLINTNEXTLINE(readability-magic-numbers)
940 GetMasm()->Fcmp(VixlVReg(tmp), 0.5F);
941 GetMasm()->Cinc(VixlReg(dst), VixlReg(dst), vixl::aarch64::Condition::eq);
942 GetMasm()->Bind(done);
943 }
944
EncodeRoundToPInfReturnFloat(Reg dst,Reg src)945 void Aarch64Encoder::EncodeRoundToPInfReturnFloat(Reg dst, Reg src)
946 {
947 ASSERT(src.GetType() == FLOAT64_TYPE);
948 ASSERT(dst.GetType() == FLOAT64_TYPE);
949
950 // CC-OFFNXT(G.NAM.03-CPP) project code style
951 constexpr double HALF = 0.5;
952 // CC-OFFNXT(G.NAM.03-CPP) project code style
953 constexpr double ONE = 1.0;
954
955 ScopedTmpRegF64 ceil(this);
956
957 // calculate ceil(val)
958 GetMasm()->Frintp(VixlVReg(ceil), VixlVReg(src));
959
960 // compare ceil(val) - val with 0.5
961 GetMasm()->Fsub(VixlVReg(dst), VixlVReg(ceil), VixlVReg(src));
962 GetMasm()->Fcmp(VixlVReg(dst), HALF);
963
964 // calculate ceil(val) - 1
965 GetMasm()->Fmov(VixlVReg(dst), ONE);
966 GetMasm()->Fsub(VixlVReg(dst), VixlVReg(ceil), VixlVReg(dst));
967
968 // select final value based on comparison result
969 GetMasm()->Fcsel(VixlVReg(dst), VixlVReg(dst), VixlVReg(ceil), vixl::aarch64::Condition::gt);
970 }
971
EncodeCrc32Update(Reg dst,Reg crcReg,Reg valReg)972 void Aarch64Encoder::EncodeCrc32Update(Reg dst, Reg crcReg, Reg valReg)
973 {
974 auto tmp = dst.GetId() != crcReg.GetId() && dst.GetId() != valReg.GetId() ? dst : ScopedTmpReg(this, dst.GetType());
975 GetMasm()->Mvn(VixlReg(tmp), VixlReg(crcReg));
976 GetMasm()->Crc32b(VixlReg(tmp), VixlReg(tmp), VixlReg(valReg));
977 GetMasm()->Mvn(VixlReg(dst), VixlReg(tmp));
978 }
979
EncodeCompressEightUtf16ToUtf8CharsUsingSimd(Reg srcAddr,Reg dstAddr)980 void Aarch64Encoder::EncodeCompressEightUtf16ToUtf8CharsUsingSimd(Reg srcAddr, Reg dstAddr)
981 {
982 ScopedTmpReg tmp1(this, FLOAT64_TYPE);
983 ScopedTmpReg tmp2(this, FLOAT64_TYPE);
984 auto vixlVreg1 = vixl::aarch64::VRegister(tmp1.GetReg().GetId(), vixl::aarch64::VectorFormat::kFormat8B);
985 ASSERT(vixlVreg1.IsValid());
986 auto vixlVreg2 = vixl::aarch64::VRegister(tmp2.GetReg().GetId(), vixl::aarch64::VectorFormat::kFormat8B);
987 ASSERT(vixlVreg2.IsValid());
988 auto src = vixl::aarch64::MemOperand(VixlReg(srcAddr));
989 auto dst = vixl::aarch64::MemOperand(VixlReg(dstAddr));
990 GetMasm()->Ld2(vixlVreg1, vixlVreg2, src);
991 GetMasm()->St1(vixlVreg1, dst);
992 }
993
EncodeCompressSixteenUtf16ToUtf8CharsUsingSimd(Reg srcAddr,Reg dstAddr)994 void Aarch64Encoder::EncodeCompressSixteenUtf16ToUtf8CharsUsingSimd(Reg srcAddr, Reg dstAddr)
995 {
996 ScopedTmpReg tmp1(this, FLOAT64_TYPE);
997 ScopedTmpReg tmp2(this, FLOAT64_TYPE);
998 auto vixlVreg1 = vixl::aarch64::VRegister(tmp1.GetReg().GetId(), vixl::aarch64::VectorFormat::kFormat16B);
999 ASSERT(vixlVreg1.IsValid());
1000 auto vixlVreg2 = vixl::aarch64::VRegister(tmp2.GetReg().GetId(), vixl::aarch64::VectorFormat::kFormat16B);
1001 ASSERT(vixlVreg2.IsValid());
1002 auto src = vixl::aarch64::MemOperand(VixlReg(srcAddr));
1003 auto dst = vixl::aarch64::MemOperand(VixlReg(dstAddr));
1004 GetMasm()->Ld2(vixlVreg1, vixlVreg2, src);
1005 GetMasm()->St1(vixlVreg1, dst);
1006 }
1007
EncodeMemCharU8X32UsingSimd(Reg dst,Reg ch,Reg srcAddr,Reg tmp)1008 void Aarch64Encoder::EncodeMemCharU8X32UsingSimd(Reg dst, Reg ch, Reg srcAddr, Reg tmp)
1009 {
1010 ScopedTmpReg vTmp0(this, FLOAT64_TYPE);
1011 ScopedTmpReg vTmp1(this, FLOAT64_TYPE);
1012 auto vReg0 = vixl::aarch64::VRegister(vTmp0.GetReg().GetId(), vixl::aarch64::VectorFormat::kFormat16B);
1013 auto vReg1 = vixl::aarch64::VRegister(vTmp1.GetReg().GetId(), vixl::aarch64::VectorFormat::kFormat16B);
1014 auto vReg2 = vixl::aarch64::VRegister(tmp.GetId(), vixl::aarch64::VectorFormat::kFormat16B);
1015 auto xReg0 = vixl::aarch64::Register(dst.GetId(), vixl::aarch64::kXRegSize);
1016 auto labelReturn = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1017 auto labelFound = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1018 auto labelSecond16B = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1019 auto labelCheckV0D1 = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1020 auto labelCheckV1D1 = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1021
1022 GetMasm()->Ld1(vReg0, vReg1, vixl::aarch64::MemOperand(VixlReg(srcAddr)));
1023 GetMasm()->Dup(vReg2, VixlReg(ch));
1024 GetMasm()->Cmeq(vReg0, vReg0, vReg2);
1025 GetMasm()->Cmeq(vReg1, vReg1, vReg2);
1026 // Give up if char is not there
1027 GetMasm()->Addp(vReg2, vReg0, vReg1);
1028 GetMasm()->Addp(vReg2.V2D(), vReg2.V2D(), vReg2.V2D());
1029 GetMasm()->Mov(xReg0, vReg2.D(), 0);
1030 GetMasm()->Cbz(xReg0, labelReturn);
1031 // Inspect the first 16-byte block
1032 GetMasm()->Mov(xReg0, vReg0.D(), 0);
1033 GetMasm()->Cbz(xReg0, labelCheckV0D1);
1034 GetMasm()->Rev(xReg0, xReg0);
1035 GetMasm()->Clz(xReg0, xReg0);
1036 GetMasm()->B(labelFound);
1037 GetMasm()->Bind(labelCheckV0D1);
1038 GetMasm()->Mov(xReg0, vReg0.D(), 1U);
1039 GetMasm()->Cbz(xReg0, labelSecond16B);
1040 GetMasm()->Rev(xReg0, xReg0);
1041 GetMasm()->Clz(xReg0, xReg0);
1042 GetMasm()->Add(xReg0, xReg0, VixlImm(BITS_PER_UINT64));
1043 GetMasm()->B(labelFound);
1044 // Inspect the second 16-byte block
1045 GetMasm()->Bind(labelSecond16B);
1046 GetMasm()->Mov(xReg0, vReg1.D(), 0);
1047 GetMasm()->Cbz(xReg0, labelCheckV1D1);
1048 GetMasm()->Rev(xReg0, xReg0);
1049 GetMasm()->Clz(xReg0, xReg0);
1050 GetMasm()->Add(xReg0, xReg0, VixlImm(2U * BITS_PER_UINT64));
1051 GetMasm()->B(labelFound);
1052 GetMasm()->Bind(labelCheckV1D1);
1053 GetMasm()->Mov(xReg0, vReg1.D(), 1U);
1054 GetMasm()->Rev(xReg0, xReg0);
1055 GetMasm()->Clz(xReg0, xReg0);
1056 GetMasm()->Add(xReg0, xReg0, VixlImm(3U * BITS_PER_UINT64));
1057
1058 GetMasm()->Bind(labelFound);
1059 GetMasm()->Lsr(xReg0, xReg0, 3U);
1060 GetMasm()->Add(xReg0, xReg0, VixlReg(srcAddr));
1061 GetMasm()->Bind(labelReturn);
1062 }
1063
EncodeMemCharU16X16UsingSimd(Reg dst,Reg ch,Reg srcAddr,Reg tmp)1064 void Aarch64Encoder::EncodeMemCharU16X16UsingSimd(Reg dst, Reg ch, Reg srcAddr, Reg tmp)
1065 {
1066 ScopedTmpReg vTmp0(this, FLOAT64_TYPE);
1067 ScopedTmpReg vTmp1(this, FLOAT64_TYPE);
1068 auto vReg0 = vixl::aarch64::VRegister(vTmp0.GetReg().GetId(), vixl::aarch64::VectorFormat::kFormat8H);
1069 auto vReg1 = vixl::aarch64::VRegister(vTmp1.GetReg().GetId(), vixl::aarch64::VectorFormat::kFormat8H);
1070 auto vReg2 = vixl::aarch64::VRegister(tmp.GetId(), vixl::aarch64::VectorFormat::kFormat8H);
1071 auto xReg0 = vixl::aarch64::Register(dst.GetId(), vixl::aarch64::kXRegSize);
1072 auto labelReturn = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1073 auto labelFound = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1074 auto labelSecond16B = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1075 auto labelCheckV0D1 = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1076 auto labelCheckV1D1 = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1077
1078 GetMasm()->Ld1(vReg0, vReg1, vixl::aarch64::MemOperand(VixlReg(srcAddr)));
1079 GetMasm()->Dup(vReg2, VixlReg(ch));
1080 GetMasm()->Cmeq(vReg0, vReg0, vReg2);
1081 GetMasm()->Cmeq(vReg1, vReg1, vReg2);
1082 // Give up if char is not there
1083 GetMasm()->Addp(vReg2, vReg0, vReg1);
1084 GetMasm()->Addp(vReg2.V2D(), vReg2.V2D(), vReg2.V2D());
1085 GetMasm()->Mov(xReg0, vReg2.D(), 0);
1086 GetMasm()->Cbz(xReg0, labelReturn);
1087 // Inspect the first 16-byte block
1088 GetMasm()->Mov(xReg0, vReg0.D(), 0);
1089 GetMasm()->Cbz(xReg0, labelCheckV0D1);
1090 GetMasm()->Rev(xReg0, xReg0);
1091 GetMasm()->Clz(xReg0, xReg0);
1092 GetMasm()->B(labelFound);
1093 GetMasm()->Bind(labelCheckV0D1);
1094 GetMasm()->Mov(xReg0, vReg0.D(), 1U);
1095 GetMasm()->Cbz(xReg0, labelSecond16B);
1096 GetMasm()->Rev(xReg0, xReg0);
1097 GetMasm()->Clz(xReg0, xReg0);
1098 GetMasm()->Add(xReg0, xReg0, VixlImm(BITS_PER_UINT64));
1099 GetMasm()->B(labelFound);
1100 // Inspect the second 16-byte block
1101 GetMasm()->Bind(labelSecond16B);
1102 GetMasm()->Mov(xReg0, vReg1.D(), 0);
1103 GetMasm()->Cbz(xReg0, labelCheckV1D1);
1104 GetMasm()->Rev(xReg0, xReg0);
1105 GetMasm()->Clz(xReg0, xReg0);
1106 GetMasm()->Add(xReg0, xReg0, VixlImm(2U * BITS_PER_UINT64));
1107 GetMasm()->B(labelFound);
1108 GetMasm()->Bind(labelCheckV1D1);
1109 GetMasm()->Mov(xReg0, vReg1.D(), 1U);
1110 GetMasm()->Rev(xReg0, xReg0);
1111 GetMasm()->Clz(xReg0, xReg0);
1112 GetMasm()->Add(xReg0, xReg0, VixlImm(3U * BITS_PER_UINT64));
1113
1114 GetMasm()->Bind(labelFound);
1115 GetMasm()->Lsr(xReg0, xReg0, 4U);
1116 GetMasm()->Lsl(xReg0, xReg0, 1U);
1117 GetMasm()->Add(xReg0, xReg0, VixlReg(srcAddr));
1118 GetMasm()->Bind(labelReturn);
1119 }
1120
EncodeMemCharU8X16UsingSimd(Reg dst,Reg ch,Reg srcAddr,Reg tmp)1121 void Aarch64Encoder::EncodeMemCharU8X16UsingSimd(Reg dst, Reg ch, Reg srcAddr, Reg tmp)
1122 {
1123 ScopedTmpReg vTmp0(this, FLOAT64_TYPE);
1124 ScopedTmpReg vTmp1(this, FLOAT64_TYPE);
1125 auto vReg0 = vixl::aarch64::VRegister(vTmp0.GetReg().GetId(), vixl::aarch64::VectorFormat::kFormat16B);
1126 auto vReg1 = vixl::aarch64::VRegister(tmp.GetId(), vixl::aarch64::VectorFormat::kFormat16B);
1127 auto xReg0 = vixl::aarch64::Register(dst.GetId(), vixl::aarch64::kXRegSize);
1128 auto labelReturn = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1129 auto labelFound = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1130 auto labelCheckV0D1 = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1131
1132 GetMasm()->Ld1(vReg0, vixl::aarch64::MemOperand(VixlReg(srcAddr)));
1133 GetMasm()->Dup(vReg1, VixlReg(ch));
1134 GetMasm()->Cmeq(vReg0, vReg0, vReg1);
1135 // Give up if char is not there
1136 GetMasm()->Addp(vReg1.V2D(), vReg0.V2D(), vReg0.V2D());
1137 GetMasm()->Mov(xReg0, vReg1.D(), 0);
1138 GetMasm()->Cbz(xReg0, labelReturn);
1139 // Compute a pointer to the char
1140 GetMasm()->Mov(xReg0, vReg0.D(), 0);
1141 GetMasm()->Cbz(xReg0, labelCheckV0D1);
1142 GetMasm()->Rev(xReg0, xReg0);
1143 GetMasm()->Clz(xReg0, xReg0);
1144 GetMasm()->B(labelFound);
1145 GetMasm()->Bind(labelCheckV0D1);
1146 GetMasm()->Mov(xReg0, vReg0.D(), 1U);
1147 GetMasm()->Rev(xReg0, xReg0);
1148 GetMasm()->Clz(xReg0, xReg0);
1149 GetMasm()->Add(xReg0, xReg0, VixlImm(BITS_PER_UINT64));
1150 GetMasm()->Bind(labelFound);
1151 GetMasm()->Lsr(xReg0, xReg0, 3U); // number of 8-bit chars
1152 GetMasm()->Add(xReg0, xReg0, VixlReg(srcAddr));
1153 GetMasm()->Bind(labelReturn);
1154 }
1155
EncodeMemCharU16X8UsingSimd(Reg dst,Reg ch,Reg srcAddr,Reg tmp)1156 void Aarch64Encoder::EncodeMemCharU16X8UsingSimd(Reg dst, Reg ch, Reg srcAddr, Reg tmp)
1157 {
1158 ScopedTmpReg vTmp0(this, FLOAT64_TYPE);
1159 ScopedTmpReg vTmp1(this, FLOAT64_TYPE);
1160 auto vReg0 = vixl::aarch64::VRegister(vTmp0.GetReg().GetId(), vixl::aarch64::VectorFormat::kFormat8H);
1161 auto vReg1 = vixl::aarch64::VRegister(tmp.GetId(), vixl::aarch64::VectorFormat::kFormat8H);
1162 auto xReg0 = vixl::aarch64::Register(dst.GetId(), vixl::aarch64::kXRegSize);
1163 auto labelReturn = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1164 auto labelFound = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1165 auto labelCheckV0D1 = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1166
1167 GetMasm()->Ld1(vReg0, vixl::aarch64::MemOperand(VixlReg(srcAddr)));
1168 GetMasm()->Dup(vReg1, VixlReg(ch));
1169 GetMasm()->Cmeq(vReg0, vReg0, vReg1);
1170 // Give up if char is not there
1171 GetMasm()->Addp(vReg1.V2D(), vReg0.V2D(), vReg0.V2D());
1172 GetMasm()->Mov(xReg0, vReg1.D(), 0);
1173 GetMasm()->Cbz(xReg0, labelReturn);
1174 // Compute a pointer to the char
1175 GetMasm()->Mov(xReg0, vReg0.D(), 0);
1176 GetMasm()->Cbz(xReg0, labelCheckV0D1);
1177 GetMasm()->Rev(xReg0, xReg0);
1178 GetMasm()->Clz(xReg0, xReg0);
1179 GetMasm()->B(labelFound);
1180 GetMasm()->Bind(labelCheckV0D1);
1181 GetMasm()->Mov(xReg0, vReg0.D(), 1U);
1182 GetMasm()->Rev(xReg0, xReg0);
1183 GetMasm()->Clz(xReg0, xReg0);
1184 GetMasm()->Add(xReg0, xReg0, VixlImm(BITS_PER_UINT64));
1185 GetMasm()->Bind(labelFound);
1186 GetMasm()->Lsr(xReg0, xReg0, 4U); // number of 16-bit chars
1187 GetMasm()->Lsl(xReg0, xReg0, 1U); // number of bytes
1188 GetMasm()->Add(xReg0, xReg0, VixlReg(srcAddr));
1189 GetMasm()->Bind(labelReturn);
1190 }
1191
EncodeUnsignedExtendBytesToShorts(Reg dst,Reg src)1192 void Aarch64Encoder::EncodeUnsignedExtendBytesToShorts(Reg dst, Reg src)
1193 {
1194 GetMasm()->Uxtl(VixlVReg(dst).V8H(), VixlVReg(src).V8B());
1195 }
1196
EncodeReverseHalfWords(Reg dst,Reg src)1197 void Aarch64Encoder::EncodeReverseHalfWords(Reg dst, Reg src)
1198 {
1199 ASSERT(src.GetSize() == dst.GetSize());
1200
1201 GetMasm()->rev64(VixlVReg(dst).V4H(), VixlVReg(src).V4H());
1202 }
1203
CanEncodeBitCount()1204 bool Aarch64Encoder::CanEncodeBitCount()
1205 {
1206 return true;
1207 }
1208
CanEncodeCompressedStringCharAt()1209 bool Aarch64Encoder::CanEncodeCompressedStringCharAt()
1210 {
1211 return true;
1212 }
1213
CanEncodeCompressedStringCharAtI()1214 bool Aarch64Encoder::CanEncodeCompressedStringCharAtI()
1215 {
1216 return true;
1217 }
1218
CanEncodeMAdd()1219 bool Aarch64Encoder::CanEncodeMAdd()
1220 {
1221 return true;
1222 }
1223
CanEncodeMSub()1224 bool Aarch64Encoder::CanEncodeMSub()
1225 {
1226 return true;
1227 }
1228
CanEncodeMNeg()1229 bool Aarch64Encoder::CanEncodeMNeg()
1230 {
1231 return true;
1232 }
1233
CanEncodeOrNot()1234 bool Aarch64Encoder::CanEncodeOrNot()
1235 {
1236 return true;
1237 }
1238
CanEncodeAndNot()1239 bool Aarch64Encoder::CanEncodeAndNot()
1240 {
1241 return true;
1242 }
1243
CanEncodeXorNot()1244 bool Aarch64Encoder::CanEncodeXorNot()
1245 {
1246 return true;
1247 }
1248
GetCursorOffset() const1249 size_t Aarch64Encoder::GetCursorOffset() const
1250 {
1251 return GetMasm()->GetBuffer()->GetCursorOffset();
1252 }
1253
SetCursorOffset(size_t offset)1254 void Aarch64Encoder::SetCursorOffset(size_t offset)
1255 {
1256 GetMasm()->GetBuffer()->Rewind(offset);
1257 }
1258
1259 /* return the power of 2 for the size of the type */
EncodeGetTypeSize(Reg size,Reg type)1260 void Aarch64Encoder::EncodeGetTypeSize(Reg size, Reg type)
1261 {
1262 auto sreg = VixlReg(type);
1263 auto dreg = VixlReg(size);
1264 constexpr uint8_t I16 = 0x5;
1265 constexpr uint8_t I32 = 0x7;
1266 constexpr uint8_t F64 = 0xa;
1267 constexpr uint8_t REF = 0xd;
1268 constexpr uint8_t SMALLREF = ark::OBJECT_POINTER_SIZE < sizeof(uint64_t) ? 1 : 0;
1269 auto end = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1270
1271 GetMasm()->Mov(dreg, VixlImm(0));
1272 GetMasm()->Cmp(sreg, VixlImm(I16));
1273 GetMasm()->Cinc(dreg, dreg, vixl::aarch64::Condition::ge);
1274 GetMasm()->Cmp(sreg, VixlImm(I32));
1275 GetMasm()->Cinc(dreg, dreg, vixl::aarch64::Condition::ge);
1276 GetMasm()->Cmp(sreg, VixlImm(F64));
1277 GetMasm()->Cinc(dreg, dreg, vixl::aarch64::Condition::ge);
1278 GetMasm()->Cmp(sreg, VixlImm(REF));
1279 GetMasm()->B(end, vixl::aarch64::Condition::ne);
1280 GetMasm()->Sub(dreg, dreg, VixlImm(SMALLREF));
1281 GetMasm()->Bind(end);
1282 }
1283
EncodeReverseBits(Reg dst,Reg src)1284 void Aarch64Encoder::EncodeReverseBits(Reg dst, Reg src)
1285 {
1286 auto rzero = GetRegfile()->GetZeroReg();
1287 if (rzero.GetId() == src.GetId()) {
1288 EncodeMov(dst, Imm(0));
1289 return;
1290 }
1291 ASSERT(src.GetSize() == WORD_SIZE || src.GetSize() == DOUBLE_WORD_SIZE);
1292 ASSERT(src.GetSize() == dst.GetSize());
1293
1294 GetMasm()->Rbit(VixlReg(dst), VixlReg(src));
1295 }
1296
EncodeCompressedStringCharAt(ArgsCompressedStringCharAt && args)1297 void Aarch64Encoder::EncodeCompressedStringCharAt(ArgsCompressedStringCharAt &&args)
1298 {
1299 auto [dst, str, idx, length, tmp, dataOffset, shift] = args;
1300 ASSERT(dst.GetSize() == HALF_SIZE);
1301
1302 auto labelNotCompressed = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1303 auto labelCharLoaded = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1304 auto vixlTmp = VixlReg(tmp, DOUBLE_WORD_SIZE);
1305 auto vixlDst = VixlReg(dst);
1306
1307 GetMasm()->Tbnz(VixlReg(length), 0, labelNotCompressed);
1308 EncodeAdd(tmp, str, idx);
1309 GetMasm()->ldrb(vixlDst, MemOperand(vixlTmp, dataOffset));
1310 GetMasm()->B(labelCharLoaded);
1311 GetMasm()->Bind(labelNotCompressed);
1312 EncodeAdd(tmp, str, Shift(idx, shift));
1313 GetMasm()->ldrh(vixlDst, MemOperand(vixlTmp, dataOffset));
1314 GetMasm()->Bind(labelCharLoaded);
1315 }
1316
EncodeCompressedStringCharAtI(ArgsCompressedStringCharAtI && args)1317 void Aarch64Encoder::EncodeCompressedStringCharAtI(ArgsCompressedStringCharAtI &&args)
1318 {
1319 auto [dst, str, length, dataOffset, index, shift] = args;
1320 ASSERT(dst.GetSize() == HALF_SIZE);
1321
1322 auto labelNotCompressed = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1323 auto labelCharLoaded = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1324 auto vixlStr = VixlReg(str);
1325 auto vixlDst = VixlReg(dst);
1326
1327 auto rzero = GetRegfile()->GetZeroReg().GetId();
1328 if (str.GetId() == rzero) {
1329 return;
1330 }
1331 GetMasm()->Tbnz(VixlReg(length), 0, labelNotCompressed);
1332 GetMasm()->Ldrb(vixlDst, MemOperand(vixlStr, dataOffset + index));
1333 GetMasm()->B(labelCharLoaded);
1334 GetMasm()->Bind(labelNotCompressed);
1335 GetMasm()->Ldrh(vixlDst, MemOperand(vixlStr, dataOffset + (index << shift)));
1336 GetMasm()->Bind(labelCharLoaded);
1337 }
1338
1339 /* Unsafe builtins implementation */
EncodeCompareAndSwap(Reg dst,Reg obj,Reg offset,Reg val,Reg newval)1340 void Aarch64Encoder::EncodeCompareAndSwap(Reg dst, Reg obj, Reg offset, Reg val, Reg newval)
1341 {
1342 /* Modeled according to the following logic:
1343 .L2:
1344 ldaxr cur, [addr]
1345 cmp cur, old
1346 bne .L3
1347 stlxr res, new, [addr]
1348 cbnz res, .L2
1349 .L3:
1350 cset w0, eq
1351 */
1352 ScopedTmpReg addr(this, true); /* LR is used */
1353 ScopedTmpReg cur(this, val.GetType());
1354 ScopedTmpReg res(this, val.GetType());
1355 auto loop = CreateLabel();
1356 auto exit = CreateLabel();
1357
1358 /* ldaxr wants [reg]-form of memref (no offset or disp) */
1359 EncodeAdd(addr, obj, offset);
1360
1361 BindLabel(loop);
1362 EncodeLdrExclusive(cur, addr, true);
1363 EncodeJump(exit, cur, val, Condition::NE);
1364 cur.Release();
1365 EncodeStrExclusive(res, newval, addr, true);
1366 EncodeJump(loop, res, Imm(0), Condition::NE);
1367 BindLabel(exit);
1368
1369 GetMasm()->Cset(VixlReg(dst), vixl::aarch64::Condition::eq);
1370 }
1371
EncodeUnsafeGetAndSet(Reg dst,Reg obj,Reg offset,Reg val)1372 void Aarch64Encoder::EncodeUnsafeGetAndSet(Reg dst, Reg obj, Reg offset, Reg val)
1373 {
1374 auto cur = ScopedTmpReg(this, val.GetType());
1375 auto last = ScopedTmpReg(this, val.GetType());
1376 auto addr = ScopedTmpReg(this, true); /* LR is used */
1377 auto mem = MemRef(addr);
1378 auto restart = CreateLabel();
1379 auto retryLdaxr = CreateLabel();
1380
1381 /* ldaxr wants [reg]-form of memref (no offset or disp) */
1382 EncodeAdd(addr, obj, offset);
1383
1384 /* Since GetAndSet is defined as a non-faulting operation we
1385 * have to cover two possible faulty cases:
1386 * 1. stlxr failed, we have to retry ldxar
1387 * 2. the value we got via ldxar was not the value we initially
1388 * loaded, we have to start from the very beginning */
1389 BindLabel(restart);
1390 EncodeLdrAcquire(last, false, mem);
1391
1392 BindLabel(retryLdaxr);
1393 EncodeLdrExclusive(cur, addr, true);
1394 EncodeJump(restart, cur, last, Condition::NE);
1395 last.Release();
1396 EncodeStrExclusive(dst, val, addr, true);
1397 EncodeJump(retryLdaxr, dst, Imm(0), Condition::NE);
1398
1399 EncodeMov(dst, cur);
1400 }
1401
EncodeUnsafeGetAndAdd(Reg dst,Reg obj,Reg offset,Reg val,Reg tmp)1402 void Aarch64Encoder::EncodeUnsafeGetAndAdd(Reg dst, Reg obj, Reg offset, Reg val, Reg tmp)
1403 {
1404 ScopedTmpReg cur(this, val.GetType());
1405 ScopedTmpReg last(this, val.GetType());
1406 auto newval = Reg(tmp.GetId(), val.GetType());
1407
1408 auto restart = CreateLabel();
1409 auto retryLdaxr = CreateLabel();
1410
1411 /* addr_reg aliases obj, obj reg will be restored bedore exit */
1412 auto addr = Reg(obj.GetId(), INT64_TYPE);
1413
1414 /* ldaxr wants [reg]-form of memref (no offset or disp) */
1415 auto mem = MemRef(addr);
1416 EncodeAdd(addr, obj, offset);
1417
1418 /* Since GetAndAdd is defined as a non-faulting operation we
1419 * have to cover two possible faulty cases:
1420 * 1. stlxr failed, we have to retry ldxar
1421 * 2. the value we got via ldxar was not the value we initially
1422 * loaded, we have to start from the very beginning */
1423 BindLabel(restart);
1424 EncodeLdrAcquire(last, false, mem);
1425 EncodeAdd(newval, last, val);
1426
1427 BindLabel(retryLdaxr);
1428 EncodeLdrExclusive(cur, addr, true);
1429 EncodeJump(restart, cur, last, Condition::NE);
1430 last.Release();
1431 EncodeStrExclusive(dst, newval, addr, true);
1432 EncodeJump(retryLdaxr, dst, Imm(0), Condition::NE);
1433
1434 EncodeSub(obj, addr, offset); /* restore the original value */
1435 EncodeMov(dst, cur);
1436 }
1437
EncodeMemoryBarrier(memory_order::Order order)1438 void Aarch64Encoder::EncodeMemoryBarrier(memory_order::Order order)
1439 {
1440 switch (order) {
1441 case memory_order::ACQUIRE: {
1442 GetMasm()->Dmb(vixl::aarch64::InnerShareable, vixl::aarch64::BarrierReads);
1443 break;
1444 }
1445 case memory_order::RELEASE: {
1446 GetMasm()->Dmb(vixl::aarch64::InnerShareable, vixl::aarch64::BarrierWrites);
1447 break;
1448 }
1449 case memory_order::FULL: {
1450 GetMasm()->Dmb(vixl::aarch64::InnerShareable, vixl::aarch64::BarrierAll);
1451 break;
1452 }
1453 default:
1454 break;
1455 }
1456 }
1457
EncodeNot(Reg dst,Reg src)1458 void Aarch64Encoder::EncodeNot(Reg dst, Reg src)
1459 {
1460 GetMasm()->Mvn(VixlReg(dst), VixlReg(src));
1461 }
1462
EncodeCastFloat(Reg dst,bool dstSigned,Reg src,bool srcSigned)1463 void Aarch64Encoder::EncodeCastFloat(Reg dst, bool dstSigned, Reg src, bool srcSigned)
1464 {
1465 // We DON'T support casts from float32/64 to int8/16 and bool, because this caste is not declared anywhere
1466 // in other languages and architecture, we do not know what the behavior should be.
1467 // But there is one implementation in other function: "EncodeCastFloatWithSmallDst". Call it in the "EncodeCast"
1468 // function instead of "EncodeCastFloat". It works as follows: cast from float32/64 to int32, moving sign bit from
1469 // int32 to dst type, then extend number from dst type to int32 (a necessary condition for an isa). All work in dst
1470 // register.
1471 ASSERT(dst.GetSize() >= WORD_SIZE);
1472
1473 if (src.IsFloat() && dst.IsScalar()) {
1474 if (dstSigned) {
1475 GetMasm()->Fcvtzs(VixlReg(dst), VixlVReg(src));
1476 } else {
1477 GetMasm()->Fcvtzu(VixlReg(dst), VixlVReg(src));
1478 }
1479 return;
1480 }
1481 if (src.IsScalar() && dst.IsFloat()) {
1482 auto rzero = GetRegfile()->GetZeroReg().GetId();
1483 if (src.GetId() == rzero) {
1484 if (dst.GetSize() == WORD_SIZE) {
1485 GetMasm()->Fmov(VixlVReg(dst), 0.0F);
1486 } else {
1487 GetMasm()->Fmov(VixlVReg(dst), 0.0);
1488 }
1489 } else if (srcSigned) {
1490 GetMasm()->Scvtf(VixlVReg(dst), VixlReg(src));
1491 } else {
1492 GetMasm()->Ucvtf(VixlVReg(dst), VixlReg(src));
1493 }
1494 return;
1495 }
1496 if (src.IsFloat() && dst.IsFloat()) {
1497 if (src.GetSize() != dst.GetSize()) {
1498 GetMasm()->Fcvt(VixlVReg(dst), VixlVReg(src));
1499 return;
1500 }
1501 GetMasm()->Fmov(VixlVReg(dst), VixlVReg(src));
1502 return;
1503 }
1504 UNREACHABLE();
1505 }
1506
EncodeCastFloatWithSmallDst(Reg dst,bool dstSigned,Reg src,bool srcSigned)1507 void Aarch64Encoder::EncodeCastFloatWithSmallDst(Reg dst, bool dstSigned, Reg src, bool srcSigned)
1508 {
1509 // Dst bool type don't supported!
1510
1511 if (src.IsFloat() && dst.IsScalar()) {
1512 if (dstSigned) {
1513 GetMasm()->Fcvtzs(VixlReg(dst), VixlVReg(src));
1514 if (dst.GetSize() < WORD_SIZE) {
1515 constexpr uint32_t TEST_BIT = (1U << (static_cast<uint32_t>(WORD_SIZE) - 1));
1516 ScopedTmpReg tmpReg1(this, dst.GetType());
1517 auto tmp1 = VixlReg(tmpReg1);
1518 ScopedTmpReg tmpReg2(this, dst.GetType());
1519 auto tmp2 = VixlReg(tmpReg2);
1520
1521 // NOLINTNEXTLINE(hicpp-signed-bitwise)
1522 int32_t setBit = (dst.GetSize() == BYTE_SIZE) ? (1UL << (BYTE_SIZE - 1)) : (1UL << (HALF_SIZE - 1));
1523 int32_t remBit = setBit - 1;
1524 GetMasm()->Ands(tmp1, VixlReg(dst), TEST_BIT);
1525
1526 GetMasm()->Orr(tmp1, VixlReg(dst), setBit);
1527 GetMasm()->And(tmp2, VixlReg(dst), remBit);
1528 // Select result - if zero set - tmp2, else tmp1
1529 GetMasm()->Csel(VixlReg(dst), tmp2, tmp1, vixl::aarch64::eq);
1530 EncodeCastScalar(Reg(dst.GetId(), INT32_TYPE), dstSigned, dst, dstSigned);
1531 }
1532 return;
1533 }
1534 GetMasm()->Fcvtzu(VixlReg(dst), VixlVReg(src));
1535 if (dst.GetSize() < WORD_SIZE) {
1536 EncodeCastScalar(Reg(dst.GetId(), INT32_TYPE), dstSigned, dst, dstSigned);
1537 }
1538 return;
1539 }
1540 if (src.IsScalar() && dst.IsFloat()) {
1541 if (srcSigned) {
1542 GetMasm()->Scvtf(VixlVReg(dst), VixlReg(src));
1543 } else {
1544 GetMasm()->Ucvtf(VixlVReg(dst), VixlReg(src));
1545 }
1546 return;
1547 }
1548 if (src.IsFloat() && dst.IsFloat()) {
1549 if (src.GetSize() != dst.GetSize()) {
1550 GetMasm()->Fcvt(VixlVReg(dst), VixlVReg(src));
1551 return;
1552 }
1553 GetMasm()->Fmov(VixlVReg(dst), VixlVReg(src));
1554 return;
1555 }
1556 UNREACHABLE();
1557 }
1558
EncodeCastSigned(Reg dst,Reg src)1559 void Aarch64Encoder::EncodeCastSigned(Reg dst, Reg src)
1560 {
1561 size_t srcSize = src.GetSize();
1562 size_t dstSize = dst.GetSize();
1563 auto srcR = Reg(src.GetId(), dst.GetType());
1564 // Else signed extend
1565 if (srcSize > dstSize) {
1566 srcSize = dstSize;
1567 }
1568 switch (srcSize) {
1569 case BYTE_SIZE:
1570 GetMasm()->Sxtb(VixlReg(dst), VixlReg(srcR));
1571 break;
1572 case HALF_SIZE:
1573 GetMasm()->Sxth(VixlReg(dst), VixlReg(srcR));
1574 break;
1575 case WORD_SIZE:
1576 GetMasm()->Sxtw(VixlReg(dst), VixlReg(srcR));
1577 break;
1578 case DOUBLE_WORD_SIZE:
1579 GetMasm()->Mov(VixlReg(dst), VixlReg(srcR));
1580 break;
1581 default:
1582 SetFalseResult();
1583 break;
1584 }
1585 }
1586
EncodeCastUnsigned(Reg dst,Reg src)1587 void Aarch64Encoder::EncodeCastUnsigned(Reg dst, Reg src)
1588 {
1589 size_t srcSize = src.GetSize();
1590 size_t dstSize = dst.GetSize();
1591 auto srcR = Reg(src.GetId(), dst.GetType());
1592 if (srcSize > dstSize && dstSize < WORD_SIZE) {
1593 // We need to cut the number, if it is less, than 32-bit. It is by ISA agreement.
1594 int64_t cutValue = (1ULL << dstSize) - 1;
1595 GetMasm()->And(VixlReg(dst), VixlReg(src), VixlImm(cutValue));
1596 return;
1597 }
1598 // Else unsigned extend
1599 switch (srcSize) {
1600 case BYTE_SIZE:
1601 GetMasm()->Uxtb(VixlReg(dst), VixlReg(srcR));
1602 return;
1603 case HALF_SIZE:
1604 GetMasm()->Uxth(VixlReg(dst), VixlReg(srcR));
1605 return;
1606 case WORD_SIZE:
1607 GetMasm()->Uxtw(VixlReg(dst), VixlReg(srcR));
1608 return;
1609 case DOUBLE_WORD_SIZE:
1610 GetMasm()->Mov(VixlReg(dst), VixlReg(srcR));
1611 return;
1612 default:
1613 SetFalseResult();
1614 return;
1615 }
1616 }
1617
EncodeCastScalar(Reg dst,bool dstSigned,Reg src,bool srcSigned)1618 void Aarch64Encoder::EncodeCastScalar(Reg dst, bool dstSigned, Reg src, bool srcSigned)
1619 {
1620 size_t srcSize = src.GetSize();
1621 size_t dstSize = dst.GetSize();
1622 // In our ISA minimal type is 32-bit, so type less then 32-bit
1623 // we should extend to 32-bit. So we can have 2 cast
1624 // (For examble, i8->u16 will work as i8->u16 and u16->u32)
1625 if (dstSize < WORD_SIZE) {
1626 if (srcSize > dstSize) {
1627 if (dstSigned) {
1628 EncodeCastSigned(dst, src);
1629 } else {
1630 EncodeCastUnsigned(dst, src);
1631 }
1632 return;
1633 }
1634 if (srcSize == dstSize) {
1635 GetMasm()->Mov(VixlReg(dst), VixlReg(src));
1636 if (!(srcSigned || dstSigned) || (srcSigned && dstSigned)) {
1637 return;
1638 }
1639 if (dstSigned) {
1640 EncodeCastSigned(Reg(dst.GetId(), INT32_TYPE), dst);
1641 } else {
1642 EncodeCastUnsigned(Reg(dst.GetId(), INT32_TYPE), dst);
1643 }
1644 return;
1645 }
1646 if (srcSigned) {
1647 EncodeCastSigned(dst, src);
1648 if (!dstSigned) {
1649 EncodeCastUnsigned(Reg(dst.GetId(), INT32_TYPE), dst);
1650 }
1651 } else {
1652 EncodeCastUnsigned(dst, src);
1653 if (dstSigned) {
1654 EncodeCastSigned(Reg(dst.GetId(), INT32_TYPE), dst);
1655 }
1656 }
1657 } else {
1658 if (srcSize == dstSize) {
1659 GetMasm()->Mov(VixlReg(dst), VixlReg(src));
1660 return;
1661 }
1662 if (srcSigned) {
1663 EncodeCastSigned(dst, src);
1664 } else {
1665 EncodeCastUnsigned(dst, src);
1666 }
1667 }
1668 }
1669
EncodeFastPathDynamicCast(Reg dst,Reg src,LabelHolder::LabelId slow)1670 void Aarch64Encoder::EncodeFastPathDynamicCast(Reg dst, Reg src, LabelHolder::LabelId slow)
1671 {
1672 ASSERT(src.IsFloat() && dst.IsScalar());
1673
1674 CHECK_EQ(src.GetSize(), BITS_PER_UINT64);
1675 CHECK_EQ(dst.GetSize(), BITS_PER_UINT32);
1676
1677 // We use slow path, because in general JS double -> int32 cast is complex and we check only few common cases here
1678 // and move other checks in slow path. In case CPU supports special JS double -> int32 instruction we do not need
1679 // slow path.
1680 if (!IsLabelValid(slow)) {
1681 // use special JS aarch64 instruction
1682 #ifndef NDEBUG
1683 vixl::CPUFeaturesScope scope(GetMasm(), vixl::CPUFeatures::kFP, vixl::CPUFeatures::kJSCVT);
1684 #endif
1685 GetMasm()->Fjcvtzs(VixlReg(dst), VixlVReg(src));
1686 return;
1687 }
1688
1689 // infinite and big numbers will overflow here to INT64_MIN or INT64_MAX, but NaN casts to 0
1690 GetMasm()->Fcvtzs(VixlReg(dst, DOUBLE_WORD_SIZE), VixlVReg(src));
1691 // check INT64_MIN
1692 GetMasm()->Cmp(VixlReg(dst, DOUBLE_WORD_SIZE), VixlImm(1));
1693 // check INT64_MAX
1694 GetMasm()->Ccmp(VixlReg(dst, DOUBLE_WORD_SIZE), VixlImm(-1), vixl::aarch64::StatusFlags::VFlag,
1695 vixl::aarch64::Condition::vc);
1696 auto slowLabel {static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(slow)};
1697 // jump to slow path in case of overflow
1698 GetMasm()->B(slowLabel, vixl::aarch64::Condition::vs);
1699 }
1700
EncodeJsDoubleToCharCast(Reg dst,Reg src)1701 void Aarch64Encoder::EncodeJsDoubleToCharCast(Reg dst, Reg src)
1702 {
1703 ASSERT(src.IsFloat() && dst.IsScalar());
1704
1705 CHECK_EQ(src.GetSize(), BITS_PER_UINT64);
1706 CHECK_EQ(dst.GetSize(), BITS_PER_UINT32);
1707
1708 // use special JS aarch64 instruction
1709 #ifndef NDEBUG
1710 vixl::CPUFeaturesScope scope(GetMasm(), vixl::CPUFeatures::kFP, vixl::CPUFeatures::kJSCVT);
1711 #endif
1712 GetMasm()->Fjcvtzs(VixlReg(dst), VixlVReg(src));
1713 }
1714
EncodeJsDoubleToCharCast(Reg dst,Reg src,Reg tmp,uint32_t failureResult)1715 void Aarch64Encoder::EncodeJsDoubleToCharCast(Reg dst, Reg src, Reg tmp, uint32_t failureResult)
1716 {
1717 ASSERT(src.IsFloat() && dst.IsScalar());
1718
1719 CHECK_EQ(src.GetSize(), BITS_PER_UINT64);
1720 CHECK_EQ(dst.GetSize(), BITS_PER_UINT32);
1721
1722 // infinite and big numbers will overflow here to INT64_MIN or INT64_MAX, but NaN casts to 0
1723 GetMasm()->Fcvtzs(VixlReg(dst, DOUBLE_WORD_SIZE), VixlVReg(src));
1724 // check INT64_MIN
1725 GetMasm()->Cmp(VixlReg(dst, DOUBLE_WORD_SIZE), VixlImm(1));
1726 // check INT64_MAX
1727 GetMasm()->Ccmp(VixlReg(dst, DOUBLE_WORD_SIZE), VixlImm(-1), vixl::aarch64::StatusFlags::VFlag,
1728 vixl::aarch64::Condition::vc);
1729 // 'And' with 0xffff
1730 constexpr uint32_t UTF16_CHAR_MASK = 0xffff;
1731 GetMasm()->And(VixlReg(dst), VixlReg(dst), VixlImm(UTF16_CHAR_MASK));
1732 // 'And' and 'Mov' change no flags so we may conditionally move failure result in case of overflow at old checking
1733 // for INT64_MAX
1734 GetMasm()->mov(VixlReg(tmp), failureResult);
1735 GetMasm()->csel(VixlReg(dst), VixlReg(tmp), VixlReg(dst), vixl::aarch64::Condition::vs);
1736 }
1737
EncodeCast(Reg dst,bool dstSigned,Reg src,bool srcSigned)1738 void Aarch64Encoder::EncodeCast(Reg dst, bool dstSigned, Reg src, bool srcSigned)
1739 {
1740 if (src.IsFloat() || dst.IsFloat()) {
1741 EncodeCastFloat(dst, dstSigned, src, srcSigned);
1742 return;
1743 }
1744
1745 ASSERT(src.IsScalar() && dst.IsScalar());
1746 auto rzero = GetRegfile()->GetZeroReg().GetId();
1747 if (src.GetId() == rzero) {
1748 ASSERT(dst.GetId() != rzero);
1749 EncodeMov(dst, Imm(0));
1750 return;
1751 }
1752 // Scalar part
1753 EncodeCastScalar(dst, dstSigned, src, srcSigned);
1754 }
1755
EncodeCastToBool(Reg dst,Reg src)1756 void Aarch64Encoder::EncodeCastToBool(Reg dst, Reg src)
1757 {
1758 // In ISA says that we only support casts:
1759 // i32tou1, i64tou1, u32tou1, u64tou1
1760 ASSERT(src.IsScalar());
1761 ASSERT(dst.IsScalar());
1762
1763 GetMasm()->Cmp(VixlReg(src), VixlImm(0));
1764 // In our ISA minimal type is 32-bit, so bool in 32bit
1765 GetMasm()->Cset(VixlReg(Reg(dst.GetId(), INT32_TYPE)), vixl::aarch64::Condition::ne);
1766 }
1767
EncodeAdd(Reg dst,Reg src0,Shift src1)1768 void Aarch64Encoder::EncodeAdd(Reg dst, Reg src0, Shift src1)
1769 {
1770 if (dst.IsFloat()) {
1771 UNREACHABLE();
1772 }
1773 ASSERT(src0.GetSize() <= dst.GetSize());
1774 if (src0.GetSize() < dst.GetSize()) {
1775 auto src0Reg = Reg(src0.GetId(), dst.GetType());
1776 auto src1Reg = Reg(src1.GetBase().GetId(), dst.GetType());
1777 GetMasm()->Add(VixlReg(dst), VixlReg(src0Reg), VixlShift(Shift(src1Reg, src1.GetType(), src1.GetScale())));
1778 return;
1779 }
1780 GetMasm()->Add(VixlReg(dst), VixlReg(src0), VixlShift(src1));
1781 }
1782
EncodeAdd(Reg dst,Reg src0,Reg src1)1783 void Aarch64Encoder::EncodeAdd(Reg dst, Reg src0, Reg src1)
1784 {
1785 if (dst.IsFloat()) {
1786 GetMasm()->Fadd(VixlVReg(dst), VixlVReg(src0), VixlVReg(src1));
1787 return;
1788 }
1789
1790 /* if any of the operands has 64-bits size,
1791 * forcibly do the 64-bits wide operation */
1792 if ((src0.GetSize() | src1.GetSize() | dst.GetSize()) >= DOUBLE_WORD_SIZE) {
1793 GetMasm()->Add(VixlReg(dst).X(), VixlReg(src0).X(), VixlReg(src1).X());
1794 } else {
1795 /* Otherwise do 32-bits operation as any lesser
1796 * sizes have to be upcasted to 32-bits anyway */
1797 GetMasm()->Add(VixlReg(dst).W(), VixlReg(src0).W(), VixlReg(src1).W());
1798 }
1799 }
1800
EncodeSub(Reg dst,Reg src0,Shift src1)1801 void Aarch64Encoder::EncodeSub(Reg dst, Reg src0, Shift src1)
1802 {
1803 ASSERT(dst.IsScalar());
1804 GetMasm()->Sub(VixlReg(dst), VixlReg(src0), VixlShift(src1));
1805 }
1806
EncodeSub(Reg dst,Reg src0,Reg src1)1807 void Aarch64Encoder::EncodeSub(Reg dst, Reg src0, Reg src1)
1808 {
1809 if (dst.IsFloat()) {
1810 GetMasm()->Fsub(VixlVReg(dst), VixlVReg(src0), VixlVReg(src1));
1811 return;
1812 }
1813
1814 /* if any of the operands has 64-bits size,
1815 * forcibly do the 64-bits wide operation */
1816 if ((src0.GetSize() | src1.GetSize() | dst.GetSize()) >= DOUBLE_WORD_SIZE) {
1817 GetMasm()->Sub(VixlReg(dst).X(), VixlReg(src0).X(), VixlReg(src1).X());
1818 } else {
1819 /* Otherwise do 32-bits operation as any lesser
1820 * sizes have to be upcasted to 32-bits anyway */
1821 GetMasm()->Sub(VixlReg(dst).W(), VixlReg(src0).W(), VixlReg(src1).W());
1822 }
1823 }
1824
EncodeMul(Reg dst,Reg src0,Reg src1)1825 void Aarch64Encoder::EncodeMul(Reg dst, Reg src0, Reg src1)
1826 {
1827 if (dst.IsFloat()) {
1828 GetMasm()->Fmul(VixlVReg(dst), VixlVReg(src0), VixlVReg(src1));
1829 return;
1830 }
1831 auto rzero = GetRegfile()->GetZeroReg().GetId();
1832 if (src0.GetId() == rzero || src1.GetId() == rzero) {
1833 EncodeMov(dst, Imm(0));
1834 return;
1835 }
1836 GetMasm()->Mul(VixlReg(dst), VixlReg(src0), VixlReg(src1));
1837 }
1838
EncodeAddOverflow(compiler::LabelHolder::LabelId id,Reg dst,Reg src0,Reg src1,Condition cc)1839 void Aarch64Encoder::EncodeAddOverflow(compiler::LabelHolder::LabelId id, Reg dst, Reg src0, Reg src1, Condition cc)
1840 {
1841 ASSERT(!dst.IsFloat() && !src0.IsFloat() && !src1.IsFloat());
1842 ASSERT(cc == Condition::VS || cc == Condition::VC);
1843 if (dst.GetSize() == DOUBLE_WORD_SIZE) {
1844 GetMasm()->Adds(VixlReg(dst).X(), VixlReg(src0).X(), VixlReg(src1).X());
1845 } else {
1846 /* Otherwise do 32-bits operation as any lesser
1847 * sizes have to be upcasted to 32-bits anyway */
1848 GetMasm()->Adds(VixlReg(dst).W(), VixlReg(src0).W(), VixlReg(src1).W());
1849 }
1850 auto label = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(id);
1851 GetMasm()->B(label, Convert(cc));
1852 }
1853
EncodeSubOverflow(compiler::LabelHolder::LabelId id,Reg dst,Reg src0,Reg src1,Condition cc)1854 void Aarch64Encoder::EncodeSubOverflow(compiler::LabelHolder::LabelId id, Reg dst, Reg src0, Reg src1, Condition cc)
1855 {
1856 ASSERT(!dst.IsFloat() && !src0.IsFloat() && !src1.IsFloat());
1857 ASSERT(cc == Condition::VS || cc == Condition::VC);
1858 if (dst.GetSize() == DOUBLE_WORD_SIZE) {
1859 GetMasm()->Subs(VixlReg(dst).X(), VixlReg(src0).X(), VixlReg(src1).X());
1860 } else {
1861 /* Otherwise do 32-bits operation as any lesser
1862 * sizes have to be upcasted to 32-bits anyway */
1863 GetMasm()->Subs(VixlReg(dst).W(), VixlReg(src0).W(), VixlReg(src1).W());
1864 }
1865 auto label = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(id);
1866 GetMasm()->B(label, Convert(cc));
1867 }
1868
EncodeNegOverflowAndZero(compiler::LabelHolder::LabelId id,Reg dst,Reg src)1869 void Aarch64Encoder::EncodeNegOverflowAndZero(compiler::LabelHolder::LabelId id, Reg dst, Reg src)
1870 {
1871 ASSERT(!dst.IsFloat() && !src.IsFloat());
1872 // NOLINTNEXTLINE(readability-magic-numbers)
1873 EncodeJumpTest(id, src, Imm(0x7fffffff), Condition::TST_EQ);
1874 GetMasm()->Neg(VixlReg(dst).W(), VixlReg(src).W());
1875 }
1876
EncodeDiv(Reg dst,bool dstSigned,Reg src0,Reg src1)1877 void Aarch64Encoder::EncodeDiv(Reg dst, bool dstSigned, Reg src0, Reg src1)
1878 {
1879 if (dst.IsFloat()) {
1880 GetMasm()->Fdiv(VixlVReg(dst), VixlVReg(src0), VixlVReg(src1));
1881 return;
1882 }
1883
1884 auto rzero = GetRegfile()->GetZeroReg().GetId();
1885 if (src1.GetId() == rzero || src0.GetId() == rzero) {
1886 ScopedTmpReg tmpReg(this, src1.GetType());
1887 EncodeMov(tmpReg, Imm(0));
1888 // Denominator is zero-reg
1889 if (src1.GetId() == rzero) {
1890 // Encode Abort
1891 GetMasm()->Udiv(VixlReg(dst), VixlReg(tmpReg), VixlReg(tmpReg));
1892 return;
1893 }
1894
1895 // But src1 still may be zero
1896 if (src1.GetId() != src0.GetId()) {
1897 if (dstSigned) {
1898 GetMasm()->Sdiv(VixlReg(dst), VixlReg(tmpReg), VixlReg(src1));
1899 } else {
1900 GetMasm()->Udiv(VixlReg(dst), VixlReg(tmpReg), VixlReg(src1));
1901 }
1902 return;
1903 }
1904 UNREACHABLE();
1905 }
1906 if (dstSigned) {
1907 GetMasm()->Sdiv(VixlReg(dst), VixlReg(src0), VixlReg(src1));
1908 } else {
1909 GetMasm()->Udiv(VixlReg(dst), VixlReg(src0), VixlReg(src1));
1910 }
1911 }
1912
EncodeMod(Reg dst,bool dstSigned,Reg src0,Reg src1)1913 void Aarch64Encoder::EncodeMod(Reg dst, bool dstSigned, Reg src0, Reg src1)
1914 {
1915 if (dst.IsScalar()) {
1916 auto rzero = GetRegfile()->GetZeroReg().GetId();
1917 if (src1.GetId() == rzero || src0.GetId() == rzero) {
1918 ScopedTmpReg tmpReg(this, src1.GetType());
1919 EncodeMov(tmpReg, Imm(0));
1920 // Denominator is zero-reg
1921 if (src1.GetId() == rzero) {
1922 // Encode Abort
1923 GetMasm()->Udiv(VixlReg(dst), VixlReg(tmpReg), VixlReg(tmpReg));
1924 return;
1925 }
1926
1927 if (src1.GetId() == src0.GetId()) {
1928 SetFalseResult();
1929 return;
1930 }
1931 // But src1 still may be zero
1932 ScopedTmpRegU64 tmpRegUd(this);
1933 if (dst.GetSize() < DOUBLE_WORD_SIZE) {
1934 tmpRegUd.ChangeType(INT32_TYPE);
1935 }
1936 auto tmp = VixlReg(tmpRegUd);
1937 if (!dstSigned) {
1938 GetMasm()->Udiv(tmp, VixlReg(tmpReg), VixlReg(src1));
1939 GetMasm()->Msub(VixlReg(dst), tmp, VixlReg(src1), VixlReg(tmpReg));
1940 return;
1941 }
1942 GetMasm()->Sdiv(tmp, VixlReg(tmpReg), VixlReg(src1));
1943 GetMasm()->Msub(VixlReg(dst), tmp, VixlReg(src1), VixlReg(tmpReg));
1944 return;
1945 }
1946
1947 ScopedTmpRegU64 tmpReg(this);
1948 if (dst.GetSize() < DOUBLE_WORD_SIZE) {
1949 tmpReg.ChangeType(INT32_TYPE);
1950 }
1951 auto tmp = VixlReg(tmpReg);
1952
1953 if (!dstSigned) {
1954 GetMasm()->Udiv(tmp, VixlReg(src0), VixlReg(src1));
1955 GetMasm()->Msub(VixlReg(dst), tmp, VixlReg(src1), VixlReg(src0));
1956 return;
1957 }
1958 GetMasm()->Sdiv(tmp, VixlReg(src0), VixlReg(src1));
1959 GetMasm()->Msub(VixlReg(dst), tmp, VixlReg(src1), VixlReg(src0));
1960 return;
1961 }
1962
1963 EncodeFMod(dst, src0, src1);
1964 }
1965
EncodeFMod(Reg dst,Reg src0,Reg src1)1966 void Aarch64Encoder::EncodeFMod(Reg dst, Reg src0, Reg src1)
1967 {
1968 ASSERT(dst.IsFloat());
1969
1970 if (dst.GetType() == FLOAT32_TYPE) {
1971 using Fp = float (*)(float, float);
1972 MakeLibCall(dst, src0, src1, reinterpret_cast<void *>(static_cast<Fp>(fmodf)));
1973 } else {
1974 using Fp = double (*)(double, double);
1975 MakeLibCall(dst, src0, src1, reinterpret_cast<void *>(static_cast<Fp>(fmod)));
1976 }
1977 }
1978
EncodeSignedDiv(Reg dst,Reg src0,Imm imm)1979 void Aarch64Encoder::EncodeSignedDiv(Reg dst, Reg src0, Imm imm)
1980 {
1981 int64_t divisor = imm.GetAsInt();
1982
1983 FastConstSignedDivisor fastDivisor(divisor, dst.GetSize());
1984 int64_t magic = fastDivisor.GetMagic();
1985
1986 ScopedTmpReg tmp(this, dst.GetType());
1987 Reg tmp64 = tmp.GetReg().As(INT64_TYPE);
1988 EncodeMov(tmp, Imm(magic));
1989
1990 int64_t extraShift = 0;
1991 if (dst.GetSize() == DOUBLE_WORD_SIZE) {
1992 GetMasm()->Smulh(VixlReg(tmp), VixlReg(src0), VixlReg(tmp));
1993 } else {
1994 GetMasm()->Smull(VixlReg(tmp64), VixlReg(src0), VixlReg(tmp));
1995 extraShift = WORD_SIZE;
1996 }
1997
1998 bool useSignFlag = false;
1999 if (divisor > 0 && magic < 0) {
2000 GetMasm()->Adds(VixlReg(tmp64), VixlReg(tmp64), VixlShift(Shift(src0.As(INT64_TYPE), extraShift)));
2001 useSignFlag = true;
2002 } else if (divisor < 0 && magic > 0) {
2003 GetMasm()->Subs(VixlReg(tmp64), VixlReg(tmp64), VixlShift(Shift(src0.As(INT64_TYPE), extraShift)));
2004 useSignFlag = true;
2005 }
2006
2007 int64_t shift = fastDivisor.GetShift();
2008 EncodeAShr(dst.As(INT64_TYPE), tmp64, Imm(shift + extraShift));
2009
2010 // result = (result < 0 ? result + 1 : result)
2011 if (useSignFlag) {
2012 GetMasm()->Cinc(VixlReg(dst), VixlReg(dst), vixl::aarch64::Condition::mi);
2013 } else {
2014 GetMasm()->Add(VixlReg(dst), VixlReg(dst), VixlShift(Shift(dst, ShiftType::LSR, dst.GetSize() - 1U)));
2015 }
2016 }
2017
EncodeUnsignedDiv(Reg dst,Reg src0,Imm imm)2018 void Aarch64Encoder::EncodeUnsignedDiv(Reg dst, Reg src0, Imm imm)
2019 {
2020 auto divisor = bit_cast<uint64_t>(imm.GetAsInt());
2021
2022 FastConstUnsignedDivisor fastDivisor(divisor, dst.GetSize());
2023 uint64_t magic = fastDivisor.GetMagic();
2024
2025 ScopedTmpReg tmp(this, dst.GetType());
2026 Reg tmp64 = tmp.GetReg().As(INT64_TYPE);
2027 EncodeMov(tmp, Imm(magic));
2028
2029 uint64_t extraShift = 0;
2030 if (dst.GetSize() == DOUBLE_WORD_SIZE) {
2031 GetMasm()->Umulh(VixlReg(tmp), VixlReg(src0), VixlReg(tmp));
2032 } else {
2033 GetMasm()->Umull(VixlReg(tmp64), VixlReg(src0), VixlReg(tmp));
2034 extraShift = WORD_SIZE;
2035 }
2036
2037 uint64_t shift = fastDivisor.GetShift();
2038 if (!fastDivisor.GetAdd()) {
2039 EncodeShr(dst.As(INT64_TYPE), tmp64, Imm(shift + extraShift));
2040 } else {
2041 ASSERT(shift >= 1U);
2042 if (extraShift > 0U) {
2043 EncodeShr(tmp64, tmp64, Imm(extraShift));
2044 }
2045 EncodeSub(dst, src0, tmp);
2046 GetMasm()->Add(VixlReg(dst), VixlReg(tmp), VixlShift(Shift(dst, ShiftType::LSR, 1U)));
2047 EncodeShr(dst, dst, Imm(shift - 1U));
2048 }
2049 }
2050
EncodeDiv(Reg dst,Reg src0,Imm imm,bool isSigned)2051 void Aarch64Encoder::EncodeDiv(Reg dst, Reg src0, Imm imm, bool isSigned)
2052 {
2053 ASSERT(dst.IsScalar() && dst.GetSize() >= WORD_SIZE);
2054 ASSERT(CanOptimizeImmDivMod(bit_cast<uint64_t>(imm.GetAsInt()), isSigned));
2055 if (isSigned) {
2056 EncodeSignedDiv(dst, src0, imm);
2057 } else {
2058 EncodeUnsignedDiv(dst, src0, imm);
2059 }
2060 }
2061
EncodeMod(Reg dst,Reg src0,Imm imm,bool isSigned)2062 void Aarch64Encoder::EncodeMod(Reg dst, Reg src0, Imm imm, bool isSigned)
2063 {
2064 ASSERT(dst.IsScalar() && dst.GetSize() >= WORD_SIZE);
2065 ASSERT(CanOptimizeImmDivMod(bit_cast<uint64_t>(imm.GetAsInt()), isSigned));
2066 // dst = src0 - imm * (src0 / imm)
2067 ScopedTmpReg tmp(this, dst.GetType());
2068 EncodeDiv(tmp, src0, imm, isSigned);
2069
2070 ScopedTmpReg immReg(this, dst.GetType());
2071 EncodeMov(immReg, imm);
2072
2073 GetMasm()->Msub(VixlReg(dst), VixlReg(immReg), VixlReg(tmp), VixlReg(src0));
2074 }
2075
EncodeMin(Reg dst,bool dstSigned,Reg src0,Reg src1)2076 void Aarch64Encoder::EncodeMin(Reg dst, bool dstSigned, Reg src0, Reg src1)
2077 {
2078 if (dst.IsFloat()) {
2079 GetMasm()->Fmin(VixlVReg(dst), VixlVReg(src0), VixlVReg(src1));
2080 return;
2081 }
2082 if (dstSigned) {
2083 GetMasm()->Cmp(VixlReg(src0), VixlReg(src1));
2084 GetMasm()->Csel(VixlReg(dst), VixlReg(src0), VixlReg(src1), vixl::aarch64::Condition::lt);
2085 return;
2086 }
2087 GetMasm()->Cmp(VixlReg(src0), VixlReg(src1));
2088 GetMasm()->Csel(VixlReg(dst), VixlReg(src0), VixlReg(src1), vixl::aarch64::Condition::ls);
2089 }
2090
EncodeMax(Reg dst,bool dstSigned,Reg src0,Reg src1)2091 void Aarch64Encoder::EncodeMax(Reg dst, bool dstSigned, Reg src0, Reg src1)
2092 {
2093 if (dst.IsFloat()) {
2094 GetMasm()->Fmax(VixlVReg(dst), VixlVReg(src0), VixlVReg(src1));
2095 return;
2096 }
2097 if (dstSigned) {
2098 GetMasm()->Cmp(VixlReg(src0), VixlReg(src1));
2099 GetMasm()->Csel(VixlReg(dst), VixlReg(src0), VixlReg(src1), vixl::aarch64::Condition::gt);
2100 return;
2101 }
2102 GetMasm()->Cmp(VixlReg(src0), VixlReg(src1));
2103 GetMasm()->Csel(VixlReg(dst), VixlReg(src0), VixlReg(src1), vixl::aarch64::Condition::hi);
2104 }
2105
EncodeShl(Reg dst,Reg src0,Reg src1)2106 void Aarch64Encoder::EncodeShl(Reg dst, Reg src0, Reg src1)
2107 {
2108 auto rzero = GetRegfile()->GetZeroReg().GetId();
2109 ASSERT(dst.GetId() != rzero);
2110 if (src0.GetId() == rzero) {
2111 EncodeMov(dst, Imm(0));
2112 return;
2113 }
2114 if (src1.GetId() == rzero) {
2115 EncodeMov(dst, src0);
2116 }
2117 if (dst.GetSize() < WORD_SIZE) {
2118 GetMasm()->And(VixlReg(src1), VixlReg(src1), VixlImm(dst.GetSize() - 1));
2119 }
2120 GetMasm()->Lsl(VixlReg(dst), VixlReg(src0), VixlReg(src1));
2121 }
2122
EncodeShr(Reg dst,Reg src0,Reg src1)2123 void Aarch64Encoder::EncodeShr(Reg dst, Reg src0, Reg src1)
2124 {
2125 auto rzero = GetRegfile()->GetZeroReg().GetId();
2126 ASSERT(dst.GetId() != rzero);
2127 if (src0.GetId() == rzero) {
2128 EncodeMov(dst, Imm(0));
2129 return;
2130 }
2131 if (src1.GetId() == rzero) {
2132 EncodeMov(dst, src0);
2133 }
2134
2135 if (dst.GetSize() < WORD_SIZE) {
2136 GetMasm()->And(VixlReg(src1), VixlReg(src1), VixlImm(dst.GetSize() - 1));
2137 }
2138
2139 GetMasm()->Lsr(VixlReg(dst), VixlReg(src0), VixlReg(src1));
2140 }
2141
EncodeAShr(Reg dst,Reg src0,Reg src1)2142 void Aarch64Encoder::EncodeAShr(Reg dst, Reg src0, Reg src1)
2143 {
2144 auto rzero = GetRegfile()->GetZeroReg().GetId();
2145 ASSERT(dst.GetId() != rzero);
2146 if (src0.GetId() == rzero) {
2147 EncodeMov(dst, Imm(0));
2148 return;
2149 }
2150 if (src1.GetId() == rzero) {
2151 EncodeMov(dst, src0);
2152 }
2153
2154 if (dst.GetSize() < WORD_SIZE) {
2155 GetMasm()->And(VixlReg(src1), VixlReg(src1), VixlImm(dst.GetSize() - 1));
2156 }
2157 GetMasm()->Asr(VixlReg(dst), VixlReg(src0), VixlReg(src1));
2158 }
2159
EncodeAnd(Reg dst,Reg src0,Reg src1)2160 void Aarch64Encoder::EncodeAnd(Reg dst, Reg src0, Reg src1)
2161 {
2162 GetMasm()->And(VixlReg(dst), VixlReg(src0), VixlReg(src1));
2163 }
2164
EncodeAnd(Reg dst,Reg src0,Shift src1)2165 void Aarch64Encoder::EncodeAnd(Reg dst, Reg src0, Shift src1)
2166 {
2167 GetMasm()->And(VixlReg(dst), VixlReg(src0), VixlShift(src1));
2168 }
2169
EncodeOr(Reg dst,Reg src0,Reg src1)2170 void Aarch64Encoder::EncodeOr(Reg dst, Reg src0, Reg src1)
2171 {
2172 GetMasm()->Orr(VixlReg(dst), VixlReg(src0), VixlReg(src1));
2173 }
2174
EncodeOr(Reg dst,Reg src0,Shift src1)2175 void Aarch64Encoder::EncodeOr(Reg dst, Reg src0, Shift src1)
2176 {
2177 GetMasm()->Orr(VixlReg(dst), VixlReg(src0), VixlShift(src1));
2178 }
2179
EncodeXor(Reg dst,Reg src0,Reg src1)2180 void Aarch64Encoder::EncodeXor(Reg dst, Reg src0, Reg src1)
2181 {
2182 GetMasm()->Eor(VixlReg(dst), VixlReg(src0), VixlReg(src1));
2183 }
2184
EncodeXor(Reg dst,Reg src0,Shift src1)2185 void Aarch64Encoder::EncodeXor(Reg dst, Reg src0, Shift src1)
2186 {
2187 GetMasm()->Eor(VixlReg(dst), VixlReg(src0), VixlShift(src1));
2188 }
2189
EncodeAdd(Reg dst,Reg src,Imm imm)2190 void Aarch64Encoder::EncodeAdd(Reg dst, Reg src, Imm imm)
2191 {
2192 ASSERT(dst.IsScalar() && "UNIMPLEMENTED");
2193 ASSERT(dst.GetSize() >= src.GetSize());
2194 if (dst.GetSize() != src.GetSize()) {
2195 auto srcReg = Reg(src.GetId(), dst.GetType());
2196 GetMasm()->Add(VixlReg(dst), VixlReg(srcReg), VixlImm(imm));
2197 return;
2198 }
2199 GetMasm()->Add(VixlReg(dst), VixlReg(src), VixlImm(imm));
2200 }
2201
EncodeSub(Reg dst,Reg src,Imm imm)2202 void Aarch64Encoder::EncodeSub(Reg dst, Reg src, Imm imm)
2203 {
2204 ASSERT(dst.IsScalar() && "UNIMPLEMENTED");
2205 GetMasm()->Sub(VixlReg(dst), VixlReg(src), VixlImm(imm));
2206 }
2207
EncodeShl(Reg dst,Reg src,Imm imm)2208 void Aarch64Encoder::EncodeShl(Reg dst, Reg src, Imm imm)
2209 {
2210 ASSERT(dst.IsScalar() && "Invalid operand type");
2211 auto rzero = GetRegfile()->GetZeroReg().GetId();
2212 ASSERT(dst.GetId() != rzero);
2213 if (src.GetId() == rzero) {
2214 EncodeMov(dst, Imm(0));
2215 return;
2216 }
2217
2218 GetMasm()->Lsl(VixlReg(dst), VixlReg(src), imm.GetAsInt());
2219 }
2220
EncodeShr(Reg dst,Reg src,Imm imm)2221 void Aarch64Encoder::EncodeShr(Reg dst, Reg src, Imm imm)
2222 {
2223 int64_t immValue = static_cast<uint64_t>(imm.GetAsInt()) & (dst.GetSize() - 1);
2224
2225 ASSERT(dst.IsScalar() && "Invalid operand type");
2226 auto rzero = GetRegfile()->GetZeroReg().GetId();
2227 ASSERT(dst.GetId() != rzero);
2228 if (src.GetId() == rzero) {
2229 EncodeMov(dst, Imm(0));
2230 return;
2231 }
2232
2233 GetMasm()->Lsr(VixlReg(dst), VixlReg(src), immValue);
2234 }
2235
EncodeAShr(Reg dst,Reg src,Imm imm)2236 void Aarch64Encoder::EncodeAShr(Reg dst, Reg src, Imm imm)
2237 {
2238 ASSERT(dst.IsScalar() && "Invalid operand type");
2239 GetMasm()->Asr(VixlReg(dst), VixlReg(src), imm.GetAsInt());
2240 }
2241
EncodeAnd(Reg dst,Reg src,Imm imm)2242 void Aarch64Encoder::EncodeAnd(Reg dst, Reg src, Imm imm)
2243 {
2244 ASSERT(dst.IsScalar() && "Invalid operand type");
2245 GetMasm()->And(VixlReg(dst), VixlReg(src), VixlImm(imm));
2246 }
2247
EncodeOr(Reg dst,Reg src,Imm imm)2248 void Aarch64Encoder::EncodeOr(Reg dst, Reg src, Imm imm)
2249 {
2250 ASSERT(dst.IsScalar() && "Invalid operand type");
2251 GetMasm()->Orr(VixlReg(dst), VixlReg(src), VixlImm(imm));
2252 }
2253
EncodeXor(Reg dst,Reg src,Imm imm)2254 void Aarch64Encoder::EncodeXor(Reg dst, Reg src, Imm imm)
2255 {
2256 ASSERT(dst.IsScalar() && "Invalid operand type");
2257 GetMasm()->Eor(VixlReg(dst), VixlReg(src), VixlImm(imm));
2258 }
2259
EncodeMov(Reg dst,Imm src)2260 void Aarch64Encoder::EncodeMov(Reg dst, Imm src)
2261 {
2262 if (dst.IsFloat()) {
2263 if (dst.GetSize() == WORD_SIZE) {
2264 GetMasm()->Fmov(VixlVReg(dst), src.GetAsFloat());
2265 } else {
2266 GetMasm()->Fmov(VixlVReg(dst), src.GetAsDouble());
2267 }
2268 return;
2269 }
2270 if (dst.GetSize() > WORD_SIZE) {
2271 GetMasm()->Mov(VixlReg(dst), VixlImm(src));
2272 } else {
2273 GetMasm()->Mov(VixlReg(dst), VixlImm(static_cast<int32_t>(src.GetAsInt())));
2274 }
2275 }
2276
EncodeLdr(Reg dst,bool dstSigned,MemRef mem)2277 void Aarch64Encoder::EncodeLdr(Reg dst, bool dstSigned, MemRef mem)
2278 {
2279 auto rzero = GetRegfile()->GetZeroReg().GetId();
2280 if (!ConvertMem(mem).IsValid() || (dst.GetId() == rzero && dst.IsScalar())) {
2281 // Try move zero reg to dst (for do not create temp-reg)
2282 // Check: dst not vector, dst not index, dst not rzero
2283 [[maybe_unused]] auto baseReg = mem.GetBase();
2284 auto indexReg = mem.GetIndex();
2285
2286 // Invalid == base is rzero or invalid
2287 ASSERT(baseReg.GetId() == rzero || !baseReg.IsValid());
2288 // checks for use dst-register
2289 if (dst.IsScalar() && dst.IsValid() && // not float
2290 (indexReg.GetId() != dst.GetId()) && // not index
2291 (dst.GetId() != rzero)) { // not rzero
2292 // May use dst like rzero
2293 EncodeMov(dst, Imm(0));
2294
2295 auto fixMem = MemRef(dst, indexReg, mem.GetScale(), mem.GetDisp());
2296 ASSERT(ConvertMem(fixMem).IsValid());
2297 EncodeLdr(dst, dstSigned, fixMem);
2298 } else {
2299 // Use tmp-reg
2300 ScopedTmpReg tmpReg(this);
2301 EncodeMov(tmpReg, Imm(0));
2302
2303 auto fixMem = MemRef(tmpReg, indexReg, mem.GetScale(), mem.GetDisp());
2304 ASSERT(ConvertMem(fixMem).IsValid());
2305 // Used for zero-dst
2306 EncodeLdr(tmpReg, dstSigned, fixMem);
2307 }
2308 return;
2309 }
2310 ASSERT(ConvertMem(mem).IsValid());
2311 if (dst.IsFloat()) {
2312 GetMasm()->Ldr(VixlVReg(dst), ConvertMem(mem));
2313 return;
2314 }
2315 if (dstSigned) {
2316 if (dst.GetSize() == BYTE_SIZE) {
2317 GetMasm()->Ldrsb(VixlReg(dst, DOUBLE_WORD_SIZE), ConvertMem(mem));
2318 return;
2319 }
2320 if (dst.GetSize() == HALF_SIZE) {
2321 GetMasm()->Ldrsh(VixlReg(dst), ConvertMem(mem));
2322 return;
2323 }
2324 } else {
2325 if (dst.GetSize() == BYTE_SIZE) {
2326 GetMasm()->Ldrb(VixlReg(dst, WORD_SIZE), ConvertMem(mem));
2327 return;
2328 }
2329 if (dst.GetSize() == HALF_SIZE) {
2330 GetMasm()->Ldrh(VixlReg(dst), ConvertMem(mem));
2331 return;
2332 }
2333 }
2334 GetMasm()->Ldr(VixlReg(dst), ConvertMem(mem));
2335 }
2336
EncodeLdrAcquireInvalid(Reg dst,bool dstSigned,MemRef mem)2337 void Aarch64Encoder::EncodeLdrAcquireInvalid(Reg dst, bool dstSigned, MemRef mem)
2338 {
2339 // Try move zero reg to dst (for do not create temp-reg)
2340 // Check: dst not vector, dst not index, dst not rzero
2341 [[maybe_unused]] auto baseReg = mem.GetBase();
2342 auto rzero = GetRegfile()->GetZeroReg().GetId();
2343
2344 auto indexReg = mem.GetIndex();
2345
2346 // Invalid == base is rzero or invalid
2347 ASSERT(baseReg.GetId() == rzero || !baseReg.IsValid());
2348 // checks for use dst-register
2349 if (dst.IsScalar() && dst.IsValid() && // not float
2350 (indexReg.GetId() != dst.GetId()) && // not index
2351 (dst.GetId() != rzero)) { // not rzero
2352 // May use dst like rzero
2353 EncodeMov(dst, Imm(0));
2354
2355 auto fixMem = MemRef(dst, indexReg, mem.GetScale(), mem.GetDisp());
2356 ASSERT(ConvertMem(fixMem).IsValid());
2357 EncodeLdrAcquire(dst, dstSigned, fixMem);
2358 } else {
2359 // Use tmp-reg
2360 ScopedTmpReg tmpReg(this);
2361 EncodeMov(tmpReg, Imm(0));
2362
2363 auto fixMem = MemRef(tmpReg, indexReg, mem.GetScale(), mem.GetDisp());
2364 ASSERT(ConvertMem(fixMem).IsValid());
2365 // Used for zero-dst
2366 EncodeLdrAcquire(tmpReg, dstSigned, fixMem);
2367 }
2368 }
2369
EncodeLdrAcquireScalar(Reg dst,bool dstSigned,MemRef mem)2370 void Aarch64Encoder::EncodeLdrAcquireScalar(Reg dst, bool dstSigned, MemRef mem)
2371 {
2372 #ifndef NDEBUG
2373 CheckAlignment(mem, dst.GetSize());
2374 #endif // NDEBUG
2375 if (dstSigned) {
2376 if (dst.GetSize() == BYTE_SIZE) {
2377 GetMasm()->Ldarb(VixlReg(dst), ConvertMem(mem));
2378 GetMasm()->Sxtb(VixlReg(dst), VixlReg(dst));
2379 return;
2380 }
2381 if (dst.GetSize() == HALF_SIZE) {
2382 GetMasm()->Ldarh(VixlReg(dst), ConvertMem(mem));
2383 GetMasm()->Sxth(VixlReg(dst), VixlReg(dst));
2384 return;
2385 }
2386 if (dst.GetSize() == WORD_SIZE) {
2387 GetMasm()->Ldar(VixlReg(dst), ConvertMem(mem));
2388 GetMasm()->Sxtw(VixlReg(dst), VixlReg(dst));
2389 return;
2390 }
2391 } else {
2392 if (dst.GetSize() == BYTE_SIZE) {
2393 GetMasm()->Ldarb(VixlReg(dst, WORD_SIZE), ConvertMem(mem));
2394 return;
2395 }
2396 if (dst.GetSize() == HALF_SIZE) {
2397 GetMasm()->Ldarh(VixlReg(dst), ConvertMem(mem));
2398 return;
2399 }
2400 }
2401 GetMasm()->Ldar(VixlReg(dst), ConvertMem(mem));
2402 }
2403
CheckAlignment(MemRef mem,size_t size)2404 void Aarch64Encoder::CheckAlignment(MemRef mem, size_t size)
2405 {
2406 ASSERT(size == WORD_SIZE || size == BYTE_SIZE || size == HALF_SIZE || size == DOUBLE_WORD_SIZE);
2407 if (size == BYTE_SIZE) {
2408 return;
2409 }
2410 size_t alignmentMask = (size >> 3U) - 1;
2411 ASSERT(!mem.HasIndex() && !mem.HasScale());
2412 if (mem.HasDisp()) {
2413 // We need additional tmp register for check base + offset.
2414 // The case when separately the base and the offset are not aligned, but in sum there are aligned very rarely.
2415 // Therefore, the alignment check for base and offset takes place separately
2416 [[maybe_unused]] auto offset = static_cast<size_t>(mem.GetDisp());
2417 ASSERT((offset & alignmentMask) == 0);
2418 }
2419 auto baseReg = mem.GetBase();
2420 auto end = CreateLabel();
2421 EncodeJumpTest(end, baseReg, Imm(alignmentMask), Condition::TST_EQ);
2422 EncodeAbort();
2423 BindLabel(end);
2424 }
2425
EncodeLdrAcquire(Reg dst,bool dstSigned,MemRef mem)2426 void Aarch64Encoder::EncodeLdrAcquire(Reg dst, bool dstSigned, MemRef mem)
2427 {
2428 if (mem.HasIndex()) {
2429 ScopedTmpRegU64 tmpReg(this);
2430 if (mem.HasScale()) {
2431 EncodeAdd(tmpReg, mem.GetBase(), Shift(mem.GetIndex(), mem.GetScale()));
2432 } else {
2433 EncodeAdd(tmpReg, mem.GetBase(), mem.GetIndex());
2434 }
2435 mem = MemRef(tmpReg, mem.GetDisp());
2436 }
2437
2438 auto rzero = GetRegfile()->GetZeroReg().GetId();
2439 if (!ConvertMem(mem).IsValid() || (dst.GetId() == rzero && dst.IsScalar())) {
2440 EncodeLdrAcquireInvalid(dst, dstSigned, mem);
2441 return;
2442 }
2443
2444 ASSERT(!mem.HasIndex() && !mem.HasScale());
2445 if (dst.IsFloat()) {
2446 ScopedTmpRegU64 tmpReg(this);
2447 auto memLdar = mem;
2448 if (mem.HasDisp()) {
2449 if (vixl::aarch64::Assembler::IsImmAddSub(mem.GetDisp())) {
2450 EncodeAdd(tmpReg, mem.GetBase(), Imm(mem.GetDisp()));
2451 } else {
2452 EncodeMov(tmpReg, Imm(mem.GetDisp()));
2453 EncodeAdd(tmpReg, mem.GetBase(), tmpReg);
2454 }
2455 memLdar = MemRef(tmpReg);
2456 }
2457 #ifndef NDEBUG
2458 CheckAlignment(memLdar, dst.GetSize());
2459 #endif // NDEBUG
2460 auto tmp = VixlReg(tmpReg, dst.GetSize());
2461 GetMasm()->Ldar(tmp, ConvertMem(memLdar));
2462 GetMasm()->Fmov(VixlVReg(dst), tmp);
2463 return;
2464 }
2465
2466 if (!mem.HasDisp()) {
2467 EncodeLdrAcquireScalar(dst, dstSigned, mem);
2468 return;
2469 }
2470
2471 Reg dst64(dst.GetId(), INT64_TYPE);
2472 if (vixl::aarch64::Assembler::IsImmAddSub(mem.GetDisp())) {
2473 EncodeAdd(dst64, mem.GetBase(), Imm(mem.GetDisp()));
2474 } else {
2475 EncodeMov(dst64, Imm(mem.GetDisp()));
2476 EncodeAdd(dst64, mem.GetBase(), dst64);
2477 }
2478 EncodeLdrAcquireScalar(dst, dstSigned, MemRef(dst64));
2479 }
2480
EncodeStr(Reg src,MemRef mem)2481 void Aarch64Encoder::EncodeStr(Reg src, MemRef mem)
2482 {
2483 if (!ConvertMem(mem).IsValid()) {
2484 auto indexReg = mem.GetIndex();
2485 auto rzero = GetRegfile()->GetZeroReg().GetId();
2486 // Invalid == base is rzero or invalid
2487 ASSERT(mem.GetBase().GetId() == rzero || !mem.GetBase().IsValid());
2488 // Use tmp-reg
2489 ScopedTmpReg tmpReg(this);
2490 EncodeMov(tmpReg, Imm(0));
2491
2492 auto fixMem = MemRef(tmpReg, indexReg, mem.GetScale(), mem.GetDisp());
2493 ASSERT(ConvertMem(fixMem).IsValid());
2494 if (src.GetId() != rzero) {
2495 EncodeStr(src, fixMem);
2496 } else {
2497 EncodeStr(tmpReg, fixMem);
2498 }
2499 return;
2500 }
2501 ASSERT(ConvertMem(mem).IsValid());
2502 if (src.IsFloat()) {
2503 GetMasm()->Str(VixlVReg(src), ConvertMem(mem));
2504 return;
2505 }
2506 if (src.GetSize() == BYTE_SIZE) {
2507 GetMasm()->Strb(VixlReg(src), ConvertMem(mem));
2508 return;
2509 }
2510 if (src.GetSize() == HALF_SIZE) {
2511 GetMasm()->Strh(VixlReg(src), ConvertMem(mem));
2512 return;
2513 }
2514 GetMasm()->Str(VixlReg(src), ConvertMem(mem));
2515 }
2516
EncodeStrRelease(Reg src,MemRef mem)2517 void Aarch64Encoder::EncodeStrRelease(Reg src, MemRef mem)
2518 {
2519 ScopedTmpRegLazy base(this);
2520 MemRef fixedMem;
2521 bool memWasFixed = false;
2522 if (mem.HasDisp()) {
2523 if (vixl::aarch64::Assembler::IsImmAddSub(mem.GetDisp())) {
2524 base.AcquireIfInvalid();
2525 EncodeAdd(base, mem.GetBase(), Imm(mem.GetDisp()));
2526 } else {
2527 base.AcquireIfInvalid();
2528 EncodeMov(base, Imm(mem.GetDisp()));
2529 EncodeAdd(base, mem.GetBase(), base);
2530 }
2531 memWasFixed = true;
2532 }
2533 if (mem.HasIndex()) {
2534 base.AcquireIfInvalid();
2535 if (mem.HasScale()) {
2536 EncodeAdd(base, memWasFixed ? base : mem.GetBase(), Shift(mem.GetIndex(), mem.GetScale()));
2537 } else {
2538 EncodeAdd(base, memWasFixed ? base : mem.GetBase(), mem.GetIndex());
2539 }
2540 memWasFixed = true;
2541 }
2542
2543 if (memWasFixed) {
2544 fixedMem = MemRef(base);
2545 } else {
2546 fixedMem = mem;
2547 }
2548
2549 #ifndef NDEBUG
2550 CheckAlignment(fixedMem, src.GetSize());
2551 #endif // NDEBUG
2552 if (src.IsFloat()) {
2553 ScopedTmpRegU64 tmpReg(this);
2554 auto tmp = VixlReg(tmpReg, src.GetSize());
2555 GetMasm()->Fmov(tmp, VixlVReg(src));
2556 GetMasm()->Stlr(tmp, ConvertMem(fixedMem));
2557 return;
2558 }
2559 if (src.GetSize() == BYTE_SIZE) {
2560 GetMasm()->Stlrb(VixlReg(src), ConvertMem(fixedMem));
2561 return;
2562 }
2563 if (src.GetSize() == HALF_SIZE) {
2564 GetMasm()->Stlrh(VixlReg(src), ConvertMem(fixedMem));
2565 return;
2566 }
2567 GetMasm()->Stlr(VixlReg(src), ConvertMem(fixedMem));
2568 }
2569
EncodeLdrExclusive(Reg dst,Reg addr,bool acquire)2570 void Aarch64Encoder::EncodeLdrExclusive(Reg dst, Reg addr, bool acquire)
2571 {
2572 ASSERT(dst.IsScalar());
2573 auto dstReg = VixlReg(dst);
2574 auto memCvt = ConvertMem(MemRef(addr));
2575 #ifndef NDEBUG
2576 CheckAlignment(MemRef(addr), dst.GetSize());
2577 #endif // NDEBUG
2578 if (dst.GetSize() == BYTE_SIZE) {
2579 if (acquire) {
2580 GetMasm()->Ldaxrb(dstReg, memCvt);
2581 return;
2582 }
2583 GetMasm()->Ldxrb(dstReg, memCvt);
2584 return;
2585 }
2586 if (dst.GetSize() == HALF_SIZE) {
2587 if (acquire) {
2588 GetMasm()->Ldaxrh(dstReg, memCvt);
2589 return;
2590 }
2591 GetMasm()->Ldxrh(dstReg, memCvt);
2592 return;
2593 }
2594 if (acquire) {
2595 GetMasm()->Ldaxr(dstReg, memCvt);
2596 return;
2597 }
2598 GetMasm()->Ldxr(dstReg, memCvt);
2599 }
2600
EncodeStrExclusive(Reg dst,Reg src,Reg addr,bool release)2601 void Aarch64Encoder::EncodeStrExclusive(Reg dst, Reg src, Reg addr, bool release)
2602 {
2603 ASSERT(dst.IsScalar() && src.IsScalar());
2604
2605 bool copyDst = dst.GetId() == src.GetId() || dst.GetId() == addr.GetId();
2606 ScopedTmpReg tmp(this);
2607 auto srcReg = VixlReg(src);
2608 auto memCvt = ConvertMem(MemRef(addr));
2609 auto dstReg = copyDst ? VixlReg(tmp) : VixlReg(dst);
2610 #ifndef NDEBUG
2611 CheckAlignment(MemRef(addr), src.GetSize());
2612 #endif // NDEBUG
2613
2614 if (src.GetSize() == BYTE_SIZE) {
2615 if (release) {
2616 GetMasm()->Stlxrb(dstReg, srcReg, memCvt);
2617 } else {
2618 GetMasm()->Stxrb(dstReg, srcReg, memCvt);
2619 }
2620 } else if (src.GetSize() == HALF_SIZE) {
2621 if (release) {
2622 GetMasm()->Stlxrh(dstReg, srcReg, memCvt);
2623 } else {
2624 GetMasm()->Stxrh(dstReg, srcReg, memCvt);
2625 }
2626 } else {
2627 if (release) {
2628 GetMasm()->Stlxr(dstReg, srcReg, memCvt);
2629 } else {
2630 GetMasm()->Stxr(dstReg, srcReg, memCvt);
2631 }
2632 }
2633 if (copyDst) {
2634 EncodeMov(dst, tmp);
2635 }
2636 }
2637
EncodeStrz(Reg src,MemRef mem)2638 void Aarch64Encoder::EncodeStrz(Reg src, MemRef mem)
2639 {
2640 if (!ConvertMem(mem).IsValid()) {
2641 EncodeStr(src, mem);
2642 return;
2643 }
2644 ASSERT(ConvertMem(mem).IsValid());
2645 // Upper half of registers must be zeroed by-default
2646 if (src.IsFloat()) {
2647 EncodeStr(src.As(FLOAT64_TYPE), mem);
2648 return;
2649 }
2650 if (src.GetSize() < WORD_SIZE) {
2651 EncodeCast(src, false, src.As(INT64_TYPE), false);
2652 }
2653 GetMasm()->Str(VixlReg(src.As(INT64_TYPE)), ConvertMem(mem));
2654 }
2655
EncodeSti(int64_t src,uint8_t srcSizeBytes,MemRef mem)2656 void Aarch64Encoder::EncodeSti(int64_t src, uint8_t srcSizeBytes, MemRef mem)
2657 {
2658 if (mem.IsValid() && mem.IsOffsetMem() && src == 0 && srcSizeBytes == 1) {
2659 auto rzero = GetRegfile()->GetZeroReg();
2660 GetMasm()->Strb(VixlReg(rzero), ConvertMem(mem));
2661 return;
2662 }
2663 if (!ConvertMem(mem).IsValid()) {
2664 auto rzero = GetRegfile()->GetZeroReg();
2665 EncodeStr(rzero, mem);
2666 return;
2667 }
2668
2669 ScopedTmpRegU64 tmpReg(this);
2670 auto tmp = VixlReg(tmpReg);
2671 GetMasm()->Mov(tmp, VixlImm(src));
2672 if (srcSizeBytes == 1U) {
2673 GetMasm()->Strb(tmp, ConvertMem(mem));
2674 return;
2675 }
2676 if (srcSizeBytes == HALF_WORD_SIZE_BYTES) {
2677 GetMasm()->Strh(tmp, ConvertMem(mem));
2678 return;
2679 }
2680 ASSERT((srcSizeBytes == WORD_SIZE_BYTES) || (srcSizeBytes == DOUBLE_WORD_SIZE_BYTES));
2681 GetMasm()->Str(tmp, ConvertMem(mem));
2682 }
2683
EncodeSti(float src,MemRef mem)2684 void Aarch64Encoder::EncodeSti(float src, MemRef mem)
2685 {
2686 if (!ConvertMem(mem).IsValid()) {
2687 auto rzero = GetRegfile()->GetZeroReg();
2688 EncodeStr(rzero, mem);
2689 return;
2690 }
2691 ScopedTmpRegF32 tmpReg(this);
2692 GetMasm()->Fmov(VixlVReg(tmpReg).S(), src);
2693 EncodeStr(tmpReg, mem);
2694 }
2695
EncodeSti(double src,MemRef mem)2696 void Aarch64Encoder::EncodeSti(double src, MemRef mem)
2697 {
2698 if (!ConvertMem(mem).IsValid()) {
2699 auto rzero = GetRegfile()->GetZeroReg();
2700 EncodeStr(rzero, mem);
2701 return;
2702 }
2703 ScopedTmpRegF64 tmpReg(this);
2704 GetMasm()->Fmov(VixlVReg(tmpReg).D(), src);
2705 EncodeStr(tmpReg, mem);
2706 }
2707
EncodeMemCopy(MemRef memFrom,MemRef memTo,size_t size)2708 void Aarch64Encoder::EncodeMemCopy(MemRef memFrom, MemRef memTo, size_t size)
2709 {
2710 if (!ConvertMem(memFrom).IsValid() || !ConvertMem(memTo).IsValid()) {
2711 auto rzero = GetRegfile()->GetZeroReg();
2712 if (!ConvertMem(memFrom).IsValid()) {
2713 // Encode one load - will fix inside
2714 EncodeLdr(rzero, false, memFrom);
2715 } else {
2716 ASSERT(!ConvertMem(memTo).IsValid());
2717 // Encode one store - will fix inside
2718 EncodeStr(rzero, memTo);
2719 }
2720 return;
2721 }
2722 ASSERT(ConvertMem(memFrom).IsValid());
2723 ASSERT(ConvertMem(memTo).IsValid());
2724 ScopedTmpRegU64 tmpReg(this);
2725 auto tmp = VixlReg(tmpReg, std::min(size, static_cast<size_t>(DOUBLE_WORD_SIZE)));
2726 if (size == BYTE_SIZE) {
2727 GetMasm()->Ldrb(tmp, ConvertMem(memFrom));
2728 GetMasm()->Strb(tmp, ConvertMem(memTo));
2729 } else if (size == HALF_SIZE) {
2730 GetMasm()->Ldrh(tmp, ConvertMem(memFrom));
2731 GetMasm()->Strh(tmp, ConvertMem(memTo));
2732 } else {
2733 ASSERT(size == WORD_SIZE || size == DOUBLE_WORD_SIZE);
2734 GetMasm()->Ldr(tmp, ConvertMem(memFrom));
2735 GetMasm()->Str(tmp, ConvertMem(memTo));
2736 }
2737 }
2738
EncodeMemCopyz(MemRef memFrom,MemRef memTo,size_t size)2739 void Aarch64Encoder::EncodeMemCopyz(MemRef memFrom, MemRef memTo, size_t size)
2740 {
2741 if (!ConvertMem(memFrom).IsValid() || !ConvertMem(memTo).IsValid()) {
2742 auto rzero = GetRegfile()->GetZeroReg();
2743 if (!ConvertMem(memFrom).IsValid()) {
2744 // Encode one load - will fix inside
2745 EncodeLdr(rzero, false, memFrom);
2746 } else {
2747 ASSERT(!ConvertMem(memTo).IsValid());
2748 // Encode one store - will fix inside
2749 EncodeStr(rzero, memTo);
2750 }
2751 return;
2752 }
2753 ASSERT(ConvertMem(memFrom).IsValid());
2754 ASSERT(ConvertMem(memTo).IsValid());
2755 ScopedTmpRegU64 tmpReg(this);
2756 auto tmp = VixlReg(tmpReg, std::min(size, static_cast<size_t>(DOUBLE_WORD_SIZE)));
2757 auto zero = VixlReg(GetRegfile()->GetZeroReg(), WORD_SIZE);
2758 if (size == BYTE_SIZE) {
2759 GetMasm()->Ldrb(tmp, ConvertMem(memFrom));
2760 GetMasm()->Stp(tmp, zero, ConvertMem(memTo));
2761 } else if (size == HALF_SIZE) {
2762 GetMasm()->Ldrh(tmp, ConvertMem(memFrom));
2763 GetMasm()->Stp(tmp, zero, ConvertMem(memTo));
2764 } else {
2765 ASSERT(size == WORD_SIZE || size == DOUBLE_WORD_SIZE);
2766 GetMasm()->Ldr(tmp, ConvertMem(memFrom));
2767 if (size == WORD_SIZE) {
2768 GetMasm()->Stp(tmp, zero, ConvertMem(memTo));
2769 } else {
2770 GetMasm()->Str(tmp, ConvertMem(memTo));
2771 }
2772 }
2773 }
2774
EncodeCompare(Reg dst,Reg src0,Reg src1,Condition cc)2775 void Aarch64Encoder::EncodeCompare(Reg dst, Reg src0, Reg src1, Condition cc)
2776 {
2777 ASSERT(src0.IsFloat() == src1.IsFloat());
2778 if (src0.IsFloat()) {
2779 GetMasm()->Fcmp(VixlVReg(src0), VixlVReg(src1));
2780 } else {
2781 GetMasm()->Cmp(VixlReg(src0), VixlReg(src1));
2782 }
2783 GetMasm()->Cset(VixlReg(dst), Convert(cc));
2784 }
2785
EncodeCompareTest(Reg dst,Reg src0,Reg src1,Condition cc)2786 void Aarch64Encoder::EncodeCompareTest(Reg dst, Reg src0, Reg src1, Condition cc)
2787 {
2788 ASSERT(src0.IsScalar() && src1.IsScalar());
2789
2790 GetMasm()->Tst(VixlReg(src0), VixlReg(src1));
2791 GetMasm()->Cset(VixlReg(dst), ConvertTest(cc));
2792 }
2793
EncodeAtomicByteOr(Reg addr,Reg value,bool fastEncoding)2794 void Aarch64Encoder::EncodeAtomicByteOr(Reg addr, Reg value, bool fastEncoding)
2795 {
2796 if (fastEncoding) {
2797 #ifndef NDEBUG
2798 vixl::CPUFeaturesScope scope(GetMasm(), vixl::CPUFeatures::kAtomics);
2799 #endif
2800 GetMasm()->Stsetb(VixlReg(value, BYTE_SIZE), MemOperand(VixlReg(addr)));
2801 return;
2802 }
2803
2804 // Slow encoding, should not be used in production code!!!
2805 auto linkReg = GetTarget().GetLinkReg();
2806 auto frameReg = GetTarget().GetFrameReg();
2807 static constexpr size_t PAIR_OFFSET = 2 * DOUBLE_WORD_SIZE_BYTES;
2808
2809 ScopedTmpRegLazy tmp1(this);
2810 ScopedTmpRegLazy tmp2(this);
2811 Reg orValue;
2812 Reg storeResult;
2813 bool hasTemps = GetScratchRegistersWithLrCount() >= 2U;
2814 if (hasTemps) {
2815 tmp1.AcquireWithLr();
2816 tmp2.AcquireWithLr();
2817 orValue = tmp1.GetReg().As(INT32_TYPE);
2818 storeResult = tmp2.GetReg().As(INT32_TYPE);
2819 } else {
2820 GetMasm()->stp(VixlReg(frameReg), VixlReg(linkReg),
2821 MemOperand(vixl::aarch64::sp, -PAIR_OFFSET, vixl::aarch64::AddrMode::PreIndex));
2822 orValue = frameReg.As(INT32_TYPE);
2823 storeResult = linkReg.As(INT32_TYPE);
2824 }
2825
2826 auto *loop = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
2827 GetMasm()->Bind(loop);
2828 GetMasm()->Ldxrb(VixlReg(orValue), MemOperand(VixlReg(addr)));
2829 GetMasm()->Orr(VixlReg(orValue), VixlReg(orValue), VixlReg(value, WORD_SIZE));
2830 GetMasm()->Stxrb(VixlReg(storeResult), VixlReg(orValue), MemOperand(VixlReg(addr)));
2831 GetMasm()->Cbnz(VixlReg(storeResult), loop);
2832 if (!hasTemps) {
2833 GetMasm()->ldp(VixlReg(frameReg), VixlReg(linkReg),
2834 MemOperand(vixl::aarch64::sp, PAIR_OFFSET, vixl::aarch64::AddrMode::PostIndex));
2835 }
2836 }
2837
EncodeCmp(Reg dst,Reg src0,Reg src1,Condition cc)2838 void Aarch64Encoder::EncodeCmp(Reg dst, Reg src0, Reg src1, Condition cc)
2839 {
2840 if (src0.IsFloat()) {
2841 ASSERT(src1.IsFloat());
2842 ASSERT(cc == Condition::MI || cc == Condition::LT);
2843 GetMasm()->Fcmp(VixlVReg(src0), VixlVReg(src1));
2844 } else {
2845 ASSERT(src0.IsScalar() && src1.IsScalar());
2846 ASSERT(cc == Condition::LO || cc == Condition::LT);
2847 GetMasm()->Cmp(VixlReg(src0), VixlReg(src1));
2848 }
2849 GetMasm()->Cset(VixlReg(dst), vixl::aarch64::Condition::ne);
2850 GetMasm()->Cneg(VixlReg(Promote(dst)), VixlReg(Promote(dst)), Convert(cc));
2851 }
2852
EncodeSelect(ArgsSelect && args)2853 void Aarch64Encoder::EncodeSelect(ArgsSelect &&args)
2854 {
2855 auto [dst, src0, src1, src2, src3, cc] = args;
2856 if (src2.IsScalar()) {
2857 GetMasm()->Cmp(VixlReg(src2), VixlReg(src3));
2858 } else {
2859 GetMasm()->Fcmp(VixlVReg(src2), VixlVReg(src3));
2860 }
2861 if (dst.IsFloat()) {
2862 GetMasm()->Fcsel(VixlVReg(dst), VixlVReg(src0), VixlVReg(src1), Convert(cc));
2863 } else {
2864 GetMasm()->Csel(VixlReg(dst), VixlReg(src0), VixlReg(src1), Convert(cc));
2865 }
2866 }
2867
EncodeSelect(ArgsSelectImm && args)2868 void Aarch64Encoder::EncodeSelect(ArgsSelectImm &&args)
2869 {
2870 auto [dst, src0, src1, src2, imm, cc] = args;
2871 if (src2.IsScalar()) {
2872 GetMasm()->Cmp(VixlReg(src2), VixlImm(imm));
2873 } else {
2874 GetMasm()->Fcmp(VixlVReg(src2), imm.GetAsDouble());
2875 }
2876 if (dst.IsFloat()) {
2877 GetMasm()->Fcsel(VixlVReg(dst), VixlVReg(src0), VixlVReg(src1), Convert(cc));
2878 } else {
2879 GetMasm()->Csel(VixlReg(dst), VixlReg(src0), VixlReg(src1), Convert(cc));
2880 }
2881 }
2882
EncodeSelectTest(ArgsSelect && args)2883 void Aarch64Encoder::EncodeSelectTest(ArgsSelect &&args)
2884 {
2885 auto [dst, src0, src1, src2, src3, cc] = args;
2886 ASSERT(!src2.IsFloat() && !src3.IsFloat());
2887 GetMasm()->Tst(VixlReg(src2), VixlReg(src3));
2888 if (dst.IsFloat()) {
2889 GetMasm()->Fcsel(VixlVReg(dst), VixlVReg(src0), VixlVReg(src1), ConvertTest(cc));
2890 } else {
2891 GetMasm()->Csel(VixlReg(dst), VixlReg(src0), VixlReg(src1), ConvertTest(cc));
2892 }
2893 }
2894
EncodeSelectTest(ArgsSelectImm && args)2895 void Aarch64Encoder::EncodeSelectTest(ArgsSelectImm &&args)
2896 {
2897 auto [dst, src0, src1, src2, imm, cc] = args;
2898 ASSERT(!src2.IsFloat());
2899 ASSERT(CanEncodeImmLogical(imm.GetAsInt(), src2.GetSize() > WORD_SIZE ? DOUBLE_WORD_SIZE : WORD_SIZE));
2900 GetMasm()->Tst(VixlReg(src2), VixlImm(imm));
2901 if (dst.IsFloat()) {
2902 GetMasm()->Fcsel(VixlVReg(dst), VixlVReg(src0), VixlVReg(src1), ConvertTest(cc));
2903 } else {
2904 GetMasm()->Csel(VixlReg(dst), VixlReg(src0), VixlReg(src1), ConvertTest(cc));
2905 }
2906 }
2907
EncodeLdp(Reg dst0,Reg dst1,bool dstSigned,MemRef mem)2908 void Aarch64Encoder::EncodeLdp(Reg dst0, Reg dst1, bool dstSigned, MemRef mem)
2909 {
2910 ASSERT(dst0.IsFloat() == dst1.IsFloat());
2911 ASSERT(dst0.GetSize() == dst1.GetSize());
2912 if (!ConvertMem(mem).IsValid()) {
2913 // Encode one Ldr - will fix inside
2914 EncodeLdr(dst0, dstSigned, mem);
2915 return;
2916 }
2917
2918 if (dst0.IsFloat()) {
2919 GetMasm()->Ldp(VixlVReg(dst0), VixlVReg(dst1), ConvertMem(mem));
2920 return;
2921 }
2922 if (dstSigned && dst0.GetSize() == WORD_SIZE) {
2923 GetMasm()->Ldpsw(VixlReg(dst0, DOUBLE_WORD_SIZE), VixlReg(dst1, DOUBLE_WORD_SIZE), ConvertMem(mem));
2924 return;
2925 }
2926 GetMasm()->Ldp(VixlReg(dst0), VixlReg(dst1), ConvertMem(mem));
2927 }
2928
EncodeStp(Reg src0,Reg src1,MemRef mem)2929 void Aarch64Encoder::EncodeStp(Reg src0, Reg src1, MemRef mem)
2930 {
2931 ASSERT(src0.IsFloat() == src1.IsFloat());
2932 ASSERT(src0.GetSize() == src1.GetSize());
2933 if (!ConvertMem(mem).IsValid()) {
2934 // Encode one Str - will fix inside
2935 EncodeStr(src0, mem);
2936 return;
2937 }
2938
2939 if (src0.IsFloat()) {
2940 GetMasm()->Stp(VixlVReg(src0), VixlVReg(src1), ConvertMem(mem));
2941 return;
2942 }
2943 GetMasm()->Stp(VixlReg(src0), VixlReg(src1), ConvertMem(mem));
2944 }
2945
EncodeMAdd(Reg dst,Reg src0,Reg src1,Reg src2)2946 void Aarch64Encoder::EncodeMAdd(Reg dst, Reg src0, Reg src1, Reg src2)
2947 {
2948 ASSERT(dst.GetSize() == src1.GetSize() && dst.GetSize() == src0.GetSize() && dst.GetSize() == src2.GetSize());
2949 ASSERT(dst.IsScalar() == src0.IsScalar() && dst.IsScalar() == src1.IsScalar() && dst.IsScalar() == src2.IsScalar());
2950
2951 ASSERT(!GetRegfile()->IsZeroReg(dst));
2952
2953 if (GetRegfile()->IsZeroReg(src0) || GetRegfile()->IsZeroReg(src1)) {
2954 EncodeMov(dst, src2);
2955 return;
2956 }
2957
2958 if (GetRegfile()->IsZeroReg(src2)) {
2959 EncodeMul(dst, src0, src1);
2960 return;
2961 }
2962
2963 if (dst.IsScalar()) {
2964 GetMasm()->Madd(VixlReg(dst), VixlReg(src0), VixlReg(src1), VixlReg(src2));
2965 } else {
2966 GetMasm()->Fmadd(VixlVReg(dst), VixlVReg(src0), VixlVReg(src1), VixlVReg(src2));
2967 }
2968 }
2969
EncodeMSub(Reg dst,Reg src0,Reg src1,Reg src2)2970 void Aarch64Encoder::EncodeMSub(Reg dst, Reg src0, Reg src1, Reg src2)
2971 {
2972 ASSERT(dst.GetSize() == src1.GetSize() && dst.GetSize() == src0.GetSize() && dst.GetSize() == src2.GetSize());
2973 ASSERT(dst.IsScalar() == src0.IsScalar() && dst.IsScalar() == src1.IsScalar() && dst.IsScalar() == src2.IsScalar());
2974
2975 ASSERT(!GetRegfile()->IsZeroReg(dst));
2976
2977 if (GetRegfile()->IsZeroReg(src0) || GetRegfile()->IsZeroReg(src1)) {
2978 EncodeMov(dst, src2);
2979 return;
2980 }
2981
2982 if (GetRegfile()->IsZeroReg(src2)) {
2983 EncodeMNeg(dst, src0, src1);
2984 return;
2985 }
2986
2987 if (dst.IsScalar()) {
2988 GetMasm()->Msub(VixlReg(dst), VixlReg(src0), VixlReg(src1), VixlReg(src2));
2989 } else {
2990 GetMasm()->Fmsub(VixlVReg(dst), VixlVReg(src0), VixlVReg(src1), VixlVReg(src2));
2991 }
2992 }
2993
EncodeMNeg(Reg dst,Reg src0,Reg src1)2994 void Aarch64Encoder::EncodeMNeg(Reg dst, Reg src0, Reg src1)
2995 {
2996 ASSERT(dst.GetSize() == src1.GetSize() && dst.GetSize() == src0.GetSize());
2997 ASSERT(dst.IsScalar() == src0.IsScalar() && dst.IsScalar() == src1.IsScalar());
2998
2999 ASSERT(!GetRegfile()->IsZeroReg(dst));
3000
3001 if (GetRegfile()->IsZeroReg(src0) || GetRegfile()->IsZeroReg(src1)) {
3002 EncodeMov(dst, Imm(0U));
3003 return;
3004 }
3005
3006 if (dst.IsScalar()) {
3007 GetMasm()->Mneg(VixlReg(dst), VixlReg(src0), VixlReg(src1));
3008 } else {
3009 GetMasm()->Fnmul(VixlVReg(dst), VixlVReg(src0), VixlVReg(src1));
3010 }
3011 }
3012
EncodeOrNot(Reg dst,Reg src0,Reg src1)3013 void Aarch64Encoder::EncodeOrNot(Reg dst, Reg src0, Reg src1)
3014 {
3015 ASSERT(dst.GetSize() == src1.GetSize() && dst.GetSize() == src0.GetSize());
3016 ASSERT(dst.IsScalar() && src0.IsScalar() && src1.IsScalar());
3017 GetMasm()->Orn(VixlReg(dst), VixlReg(src0), VixlReg(src1));
3018 }
3019
EncodeOrNot(Reg dst,Reg src0,Shift src1)3020 void Aarch64Encoder::EncodeOrNot(Reg dst, Reg src0, Shift src1)
3021 {
3022 ASSERT(dst.GetSize() == src0.GetSize() && dst.GetSize() == src1.GetBase().GetSize());
3023 ASSERT(dst.IsScalar() && src0.IsScalar() && src1.GetBase().IsScalar());
3024 GetMasm()->Orn(VixlReg(dst), VixlReg(src0), VixlShift(src1));
3025 }
3026
EncodeExtractBits(Reg dst,Reg src0,Imm imm1,Imm imm2)3027 void Aarch64Encoder::EncodeExtractBits(Reg dst, Reg src0, Imm imm1, Imm imm2)
3028 {
3029 GetMasm()->Ubfx(VixlReg(dst), VixlReg(src0), imm1.GetAsInt(), imm2.GetAsInt());
3030 }
3031
EncodeAndNot(Reg dst,Reg src0,Reg src1)3032 void Aarch64Encoder::EncodeAndNot(Reg dst, Reg src0, Reg src1)
3033 {
3034 ASSERT(dst.GetSize() == src1.GetSize() && dst.GetSize() == src0.GetSize());
3035 ASSERT(dst.IsScalar() && src0.IsScalar() && src1.IsScalar());
3036 GetMasm()->Bic(VixlReg(dst), VixlReg(src0), VixlReg(src1));
3037 }
3038
EncodeAndNot(Reg dst,Reg src0,Shift src1)3039 void Aarch64Encoder::EncodeAndNot(Reg dst, Reg src0, Shift src1)
3040 {
3041 ASSERT(dst.GetSize() == src0.GetSize() && dst.GetSize() == src1.GetBase().GetSize());
3042 ASSERT(dst.IsScalar() && src0.IsScalar() && src1.GetBase().IsScalar());
3043 GetMasm()->Bic(VixlReg(dst), VixlReg(src0), VixlShift(src1));
3044 }
3045
EncodeXorNot(Reg dst,Reg src0,Reg src1)3046 void Aarch64Encoder::EncodeXorNot(Reg dst, Reg src0, Reg src1)
3047 {
3048 ASSERT(dst.GetSize() == src1.GetSize() && dst.GetSize() == src0.GetSize());
3049 ASSERT(dst.IsScalar() && src0.IsScalar() && src1.IsScalar());
3050 GetMasm()->Eon(VixlReg(dst), VixlReg(src0), VixlReg(src1));
3051 }
3052
EncodeXorNot(Reg dst,Reg src0,Shift src1)3053 void Aarch64Encoder::EncodeXorNot(Reg dst, Reg src0, Shift src1)
3054 {
3055 ASSERT(dst.GetSize() == src0.GetSize() && dst.GetSize() == src1.GetBase().GetSize());
3056 ASSERT(dst.IsScalar() && src0.IsScalar() && src1.GetBase().IsScalar());
3057 GetMasm()->Eon(VixlReg(dst), VixlReg(src0), VixlShift(src1));
3058 }
3059
EncodeNeg(Reg dst,Shift src)3060 void Aarch64Encoder::EncodeNeg(Reg dst, Shift src)
3061 {
3062 ASSERT(dst.GetSize() == src.GetBase().GetSize());
3063 ASSERT(dst.IsScalar() && src.GetBase().IsScalar());
3064 GetMasm()->Neg(VixlReg(dst), VixlShift(src));
3065 }
3066
EncodeStackOverflowCheck(ssize_t offset)3067 void Aarch64Encoder::EncodeStackOverflowCheck(ssize_t offset)
3068 {
3069 ScopedTmpReg tmp(this);
3070 EncodeAdd(tmp, GetTarget().GetStackReg(), Imm(offset));
3071 EncodeLdr(tmp, false, MemRef(tmp));
3072 }
3073
EncodeGetCurrentPc(Reg dst)3074 void Aarch64Encoder::EncodeGetCurrentPc(Reg dst)
3075 {
3076 ASSERT(dst.GetType() == INT64_TYPE);
3077
3078 auto currentPc = CreateLabel();
3079 BindLabel(currentPc);
3080
3081 auto *labelHolder = static_cast<Aarch64LabelHolder *>(GetLabels());
3082 GetMasm()->Adr(VixlReg(dst), labelHolder->GetLabel(currentPc));
3083 }
3084
CanEncodeImmAddSubCmp(int64_t imm,uint32_t size,bool signedCompare)3085 bool Aarch64Encoder::CanEncodeImmAddSubCmp(int64_t imm, [[maybe_unused]] uint32_t size,
3086 [[maybe_unused]] bool signedCompare)
3087 {
3088 if (imm == INT64_MIN) {
3089 return false;
3090 }
3091 if (imm < 0) {
3092 imm = -imm;
3093 }
3094 return vixl::aarch64::Assembler::IsImmAddSub(imm);
3095 }
3096
CanEncodeImmLogical(uint64_t imm,uint32_t size)3097 bool Aarch64Encoder::CanEncodeImmLogical(uint64_t imm, uint32_t size)
3098 {
3099 #ifndef NDEBUG
3100 if (size < DOUBLE_WORD_SIZE) {
3101 // Test if the highest part is consistent:
3102 ASSERT(((imm >> size) == 0) || (((~imm) >> size) == 0));
3103 }
3104 #endif // NDEBUG
3105 return vixl::aarch64::Assembler::IsImmLogical(imm, size);
3106 }
3107
CanOptimizeImmDivMod(uint64_t imm,bool isSigned) const3108 bool Aarch64Encoder::CanOptimizeImmDivMod(uint64_t imm, bool isSigned) const
3109 {
3110 return CanOptimizeImmDivModCommon(imm, isSigned);
3111 }
3112
3113 /*
3114 * From aarch64 instruction set
3115 *
3116 * ========================================================
3117 * Syntax
3118 *
3119 * LDR Wt, [Xn|SP, Rm{, extend {amount}}] ; 32-bit general registers
3120 *
3121 * LDR Xt, [Xn|SP, Rm{, extend {amount}}] ; 64-bit general registers
3122 *
3123 * amount
3124 * Is the index shift amount, optional and defaulting to #0 when extend is not LSL:
3125 *
3126 * 32-bit general registers
3127 * Can be one of #0 or #2.
3128 *
3129 * 64-bit general registers
3130 * Can be one of #0 or #3.
3131 * ========================================================
3132 * Syntax
3133 *
3134 * LDRH Wt, [Xn|SP, Rm{, extend {amount}}]
3135 *
3136 * amount
3137 * Is the index shift amount, optional and defaulting to #0 when extend is not LSL, and can be either #0 or #1.
3138 * ========================================================
3139 *
3140 * Scale can be 0 or 1 for half load, 2 for word load, 3 for double word load
3141 */
CanEncodeScale(uint64_t imm,uint32_t size)3142 bool Aarch64Encoder::CanEncodeScale(uint64_t imm, uint32_t size)
3143 {
3144 return (imm == 0) || ((1U << imm) == (size >> 3U));
3145 }
3146
CanEncodeShiftedOperand(ShiftOpcode opcode,ShiftType shiftType)3147 bool Aarch64Encoder::CanEncodeShiftedOperand(ShiftOpcode opcode, ShiftType shiftType)
3148 {
3149 switch (opcode) {
3150 case ShiftOpcode::NEG_SR:
3151 case ShiftOpcode::ADD_SR:
3152 case ShiftOpcode::SUB_SR:
3153 return shiftType == ShiftType::LSL || shiftType == ShiftType::LSR || shiftType == ShiftType::ASR;
3154 case ShiftOpcode::AND_SR:
3155 case ShiftOpcode::OR_SR:
3156 case ShiftOpcode::XOR_SR:
3157 case ShiftOpcode::AND_NOT_SR:
3158 case ShiftOpcode::OR_NOT_SR:
3159 case ShiftOpcode::XOR_NOT_SR:
3160 return shiftType != ShiftType::INVALID_SHIFT;
3161 default:
3162 return false;
3163 }
3164 }
3165
CanEncodeFloatSelect()3166 bool Aarch64Encoder::CanEncodeFloatSelect()
3167 {
3168 return true;
3169 }
3170
AcquireScratchRegister(TypeInfo type)3171 Reg Aarch64Encoder::AcquireScratchRegister(TypeInfo type)
3172 {
3173 ASSERT(GetMasm()->GetCurrentScratchRegisterScope() == nullptr);
3174 auto reg = type.IsFloat() ? GetMasm()->GetScratchVRegisterList()->PopLowestIndex()
3175 : GetMasm()->GetScratchRegisterList()->PopLowestIndex();
3176 ASSERT(reg.IsValid());
3177 return Reg(reg.GetCode(), type);
3178 }
3179
AcquireScratchRegister(Reg reg)3180 void Aarch64Encoder::AcquireScratchRegister(Reg reg)
3181 {
3182 ASSERT(GetMasm()->GetCurrentScratchRegisterScope() == nullptr);
3183 if (reg == GetTarget().GetLinkReg()) {
3184 ASSERT_PRINT(!lrAcquired_, "Trying to acquire LR, which hasn't been released before");
3185 lrAcquired_ = true;
3186 return;
3187 }
3188 auto type = reg.GetType();
3189 auto regId = reg.GetId();
3190
3191 if (type.IsFloat()) {
3192 ASSERT(GetMasm()->GetScratchVRegisterList()->IncludesAliasOf(VixlVReg(reg)));
3193 GetMasm()->GetScratchVRegisterList()->Remove(regId);
3194 } else {
3195 ASSERT(GetMasm()->GetScratchRegisterList()->IncludesAliasOf(VixlReg(reg)));
3196 GetMasm()->GetScratchRegisterList()->Remove(regId);
3197 }
3198 }
3199
ReleaseScratchRegister(Reg reg)3200 void Aarch64Encoder::ReleaseScratchRegister(Reg reg)
3201 {
3202 if (reg == GetTarget().GetLinkReg()) {
3203 ASSERT_PRINT(lrAcquired_, "Trying to release LR, which hasn't been acquired before");
3204 lrAcquired_ = false;
3205 } else if (reg.IsFloat()) {
3206 GetMasm()->GetScratchVRegisterList()->Combine(reg.GetId());
3207 } else if (reg.GetId() != GetTarget().GetLinkReg().GetId()) {
3208 GetMasm()->GetScratchRegisterList()->Combine(reg.GetId());
3209 }
3210 }
3211
IsScratchRegisterReleased(Reg reg) const3212 bool Aarch64Encoder::IsScratchRegisterReleased(Reg reg) const
3213 {
3214 if (reg == GetTarget().GetLinkReg()) {
3215 return !lrAcquired_;
3216 }
3217 if (reg.IsFloat()) {
3218 return GetMasm()->GetScratchVRegisterList()->IncludesAliasOf(VixlVReg(reg));
3219 }
3220 return GetMasm()->GetScratchRegisterList()->IncludesAliasOf(VixlReg(reg));
3221 }
3222
GetScratchRegistersMask() const3223 RegMask Aarch64Encoder::GetScratchRegistersMask() const
3224 {
3225 return RegMask(GetMasm()->GetScratchRegisterList()->GetList());
3226 }
3227
GetScratchFpRegistersMask() const3228 RegMask Aarch64Encoder::GetScratchFpRegistersMask() const
3229 {
3230 return RegMask(GetMasm()->GetScratchVRegisterList()->GetList());
3231 }
3232
GetAvailableScratchRegisters() const3233 RegMask Aarch64Encoder::GetAvailableScratchRegisters() const
3234 {
3235 return RegMask(GetMasm()->GetScratchRegisterList()->GetList());
3236 }
3237
GetAvailableScratchFpRegisters() const3238 VRegMask Aarch64Encoder::GetAvailableScratchFpRegisters() const
3239 {
3240 return VRegMask(GetMasm()->GetScratchVRegisterList()->GetList());
3241 }
3242
GetRefType()3243 TypeInfo Aarch64Encoder::GetRefType()
3244 {
3245 return INT64_TYPE;
3246 }
3247
BufferData() const3248 void *Aarch64Encoder::BufferData() const
3249 {
3250 return GetMasm()->GetBuffer()->GetStartAddress<void *>();
3251 }
3252
BufferSize() const3253 size_t Aarch64Encoder::BufferSize() const
3254 {
3255 return GetMasm()->GetBuffer()->GetSizeInBytes();
3256 }
3257
MakeLibCall(Reg dst,Reg src0,Reg src1,const void * entryPoint)3258 void Aarch64Encoder::MakeLibCall(Reg dst, Reg src0, Reg src1, const void *entryPoint)
3259 {
3260 if (!dst.IsFloat()) {
3261 SetFalseResult();
3262 return;
3263 }
3264 if (dst.GetType() == FLOAT32_TYPE) {
3265 if (!src0.IsFloat() || !src1.IsFloat()) {
3266 SetFalseResult();
3267 return;
3268 }
3269
3270 if (src0.GetId() != vixl::aarch64::s0.GetCode() || src1.GetId() != vixl::aarch64::s1.GetCode()) {
3271 ScopedTmpRegF32 tmp(this);
3272 GetMasm()->Fmov(VixlVReg(tmp), VixlVReg(src1));
3273 GetMasm()->Fmov(vixl::aarch64::s0, VixlVReg(src0));
3274 GetMasm()->Fmov(vixl::aarch64::s1, VixlVReg(tmp));
3275 }
3276
3277 MakeCall(entryPoint);
3278
3279 if (dst.GetId() != vixl::aarch64::s0.GetCode()) {
3280 GetMasm()->Fmov(VixlVReg(dst), vixl::aarch64::s0);
3281 }
3282 } else if (dst.GetType() == FLOAT64_TYPE) {
3283 if (!src0.IsFloat() || !src1.IsFloat()) {
3284 SetFalseResult();
3285 return;
3286 }
3287
3288 if (src0.GetId() != vixl::aarch64::d0.GetCode() || src1.GetId() != vixl::aarch64::d1.GetCode()) {
3289 ScopedTmpRegF64 tmp(this);
3290 GetMasm()->Fmov(VixlVReg(tmp), VixlVReg(src1));
3291
3292 GetMasm()->Fmov(vixl::aarch64::d0, VixlVReg(src0));
3293 GetMasm()->Fmov(vixl::aarch64::d1, VixlVReg(tmp));
3294 }
3295
3296 MakeCall(entryPoint);
3297
3298 if (dst.GetId() != vixl::aarch64::d0.GetCode()) {
3299 GetMasm()->Fmov(VixlVReg(dst), vixl::aarch64::d0);
3300 }
3301 } else {
3302 UNREACHABLE();
3303 }
3304 }
3305
3306 template <bool IS_STORE>
LoadStoreRegisters(RegMask registers,ssize_t slot,size_t startReg,bool isFp)3307 void Aarch64Encoder::LoadStoreRegisters(RegMask registers, ssize_t slot, size_t startReg, bool isFp)
3308 {
3309 if (registers.none()) {
3310 return;
3311 }
3312 auto lastReg = static_cast<int32_t>(registers.size() - 1);
3313 for (; lastReg >= 0; --lastReg) {
3314 if (registers.test(lastReg)) {
3315 break;
3316 }
3317 }
3318 // Construct single add for big offset
3319 size_t spOffset;
3320 auto lastOffset = (slot + lastReg - static_cast<ssize_t>(startReg)) * static_cast<ssize_t>(DOUBLE_WORD_SIZE_BYTES);
3321
3322 if (!vixl::aarch64::Assembler::IsImmLSPair(lastOffset, vixl::aarch64::kXRegSizeInBytesLog2)) {
3323 ScopedTmpReg lrReg(this, true);
3324 auto tmp = VixlReg(lrReg);
3325 spOffset = static_cast<size_t>(slot * DOUBLE_WORD_SIZE_BYTES);
3326 slot = 0;
3327 if (vixl::aarch64::Assembler::IsImmAddSub(spOffset)) {
3328 GetMasm()->Add(tmp, vixl::aarch64::sp, VixlImm(spOffset));
3329 } else {
3330 GetMasm()->Mov(tmp, VixlImm(spOffset));
3331 GetMasm()->Add(tmp, vixl::aarch64::sp, tmp);
3332 }
3333 LoadStoreRegistersLoop<IS_STORE>(registers, slot, startReg, isFp, tmp);
3334 } else {
3335 LoadStoreRegistersLoop<IS_STORE>(registers, slot, startReg, isFp, vixl::aarch64::sp);
3336 }
3337 }
3338
3339 template <bool IS_STORE>
LoadStorePair(vixl::aarch64::MacroAssembler * masm,CPURegister lastReg,CPURegister reg,Reg base,int32_t idx)3340 static void LoadStorePair(vixl::aarch64::MacroAssembler *masm, CPURegister lastReg, CPURegister reg, Reg base,
3341 int32_t idx)
3342 {
3343 auto baseReg = VixlReg(base);
3344 static constexpr int32_t OFFSET = 2;
3345 if constexpr (IS_STORE) { // NOLINT
3346 masm->Stp(lastReg, reg, MemOperand(baseReg, (idx - OFFSET) * DOUBLE_WORD_SIZE_BYTES));
3347 } else { // NOLINT
3348 masm->Ldp(lastReg, reg, MemOperand(baseReg, (idx - OFFSET) * DOUBLE_WORD_SIZE_BYTES));
3349 }
3350 }
3351
3352 template <bool IS_STORE>
LoadStoreReg(vixl::aarch64::MacroAssembler * masm,CPURegister lastReg,Reg base,int32_t idx)3353 static void LoadStoreReg(vixl::aarch64::MacroAssembler *masm, CPURegister lastReg, Reg base, int32_t idx)
3354 {
3355 auto baseReg = VixlReg(base);
3356 if constexpr (IS_STORE) { // NOLINT
3357 masm->Str(lastReg, MemOperand(baseReg, (idx - 1) * DOUBLE_WORD_SIZE_BYTES));
3358 } else { // NOLINT
3359 masm->Ldr(lastReg, MemOperand(baseReg, (idx - 1) * DOUBLE_WORD_SIZE_BYTES));
3360 }
3361 }
3362
3363 template <bool IS_STORE>
LoadStoreRegistersMainLoop(RegMask registers,bool isFp,int32_t slot,Reg base,RegMask mask)3364 void Aarch64Encoder::LoadStoreRegistersMainLoop(RegMask registers, bool isFp, int32_t slot, Reg base, RegMask mask)
3365 {
3366 bool hasMask = mask.any();
3367 int32_t index = hasMask ? static_cast<int32_t>(mask.GetMinRegister()) : 0;
3368 int32_t lastIndex = -1;
3369 ssize_t lastId = -1;
3370
3371 slot -= index;
3372 for (ssize_t id = index; id < helpers::ToSigned(registers.size()); id++) {
3373 if (hasMask) {
3374 if (!mask.test(id)) {
3375 continue;
3376 }
3377 index++;
3378 }
3379 if (!registers.test(id)) {
3380 continue;
3381 }
3382 if (!hasMask) {
3383 index++;
3384 }
3385 if (lastId == -1) {
3386 lastId = id;
3387 lastIndex = index;
3388 continue;
3389 }
3390
3391 auto lastReg =
3392 CPURegister(lastId, vixl::aarch64::kXRegSize, isFp ? CPURegister::kVRegister : CPURegister::kRegister);
3393 if (!hasMask || lastId + 1 == id) {
3394 auto reg =
3395 CPURegister(id, vixl::aarch64::kXRegSize, isFp ? CPURegister::kVRegister : CPURegister::kRegister);
3396 LoadStorePair<IS_STORE>(GetMasm(), lastReg, reg, base, slot + index);
3397 lastId = -1;
3398 } else {
3399 LoadStoreReg<IS_STORE>(GetMasm(), lastReg, base, slot + lastIndex);
3400 lastId = id;
3401 lastIndex = index;
3402 }
3403 }
3404 if (lastId != -1) {
3405 auto lastReg =
3406 CPURegister(lastId, vixl::aarch64::kXRegSize, isFp ? CPURegister::kVRegister : CPURegister::kRegister);
3407 LoadStoreReg<IS_STORE>(GetMasm(), lastReg, base, slot + lastIndex);
3408 }
3409 }
3410
3411 template <bool IS_STORE>
LoadStoreRegisters(RegMask registers,bool isFp,int32_t slot,Reg base,RegMask mask)3412 void Aarch64Encoder::LoadStoreRegisters(RegMask registers, bool isFp, int32_t slot, Reg base, RegMask mask)
3413 {
3414 if (registers.none()) {
3415 return;
3416 }
3417
3418 int32_t maxOffset = (slot + helpers::ToSigned(registers.GetMaxRegister())) * DOUBLE_WORD_SIZE_BYTES;
3419 int32_t minOffset = (slot + helpers::ToSigned(registers.GetMinRegister())) * DOUBLE_WORD_SIZE_BYTES;
3420
3421 ScopedTmpRegLazy tmpReg(this, true);
3422 // Construct single add for big offset
3423 if (!vixl::aarch64::Assembler::IsImmLSPair(minOffset, vixl::aarch64::kXRegSizeInBytesLog2) ||
3424 !vixl::aarch64::Assembler::IsImmLSPair(maxOffset, vixl::aarch64::kXRegSizeInBytesLog2)) {
3425 tmpReg.AcquireWithLr();
3426 auto lrReg = VixlReg(tmpReg);
3427 ssize_t spOffset = slot * DOUBLE_WORD_SIZE_BYTES;
3428 if (vixl::aarch64::Assembler::IsImmAddSub(spOffset)) {
3429 GetMasm()->Add(lrReg, VixlReg(base), VixlImm(spOffset));
3430 } else {
3431 GetMasm()->Mov(lrReg, VixlImm(spOffset));
3432 GetMasm()->Add(lrReg, VixlReg(base), lrReg);
3433 }
3434 // Adjust new values for slot and base register
3435 slot = 0;
3436 base = tmpReg;
3437 }
3438
3439 LoadStoreRegistersMainLoop<IS_STORE>(registers, isFp, slot, base, mask);
3440 }
3441
3442 template <bool IS_STORE>
LoadStoreRegistersLoop(RegMask registers,ssize_t slot,size_t startReg,bool isFp,const vixl::aarch64::Register & baseReg)3443 void Aarch64Encoder::LoadStoreRegistersLoop(RegMask registers, ssize_t slot, size_t startReg, bool isFp,
3444 const vixl::aarch64::Register &baseReg)
3445 {
3446 size_t i = 0;
3447 const auto getNextReg = [®isters, &i, isFp]() {
3448 for (; i < registers.size(); i++) {
3449 if (registers.test(i)) {
3450 return CPURegister(i++, vixl::aarch64::kXRegSize,
3451 isFp ? CPURegister::kVRegister : CPURegister::kRegister);
3452 }
3453 }
3454 return CPURegister();
3455 };
3456
3457 for (CPURegister nextReg = getNextReg(); nextReg.IsValid();) {
3458 const CPURegister currReg = nextReg;
3459 nextReg = getNextReg();
3460 if (nextReg.IsValid() && (nextReg.GetCode() - 1 == currReg.GetCode())) {
3461 if constexpr (IS_STORE) { // NOLINT
3462 GetMasm()->Stp(currReg, nextReg,
3463 MemOperand(baseReg, (slot + currReg.GetCode() - startReg) * DOUBLE_WORD_SIZE_BYTES));
3464 } else { // NOLINT
3465 GetMasm()->Ldp(currReg, nextReg,
3466 MemOperand(baseReg, (slot + currReg.GetCode() - startReg) * DOUBLE_WORD_SIZE_BYTES));
3467 }
3468 nextReg = getNextReg();
3469 } else {
3470 if constexpr (IS_STORE) { // NOLINT
3471 GetMasm()->Str(currReg,
3472 MemOperand(baseReg, (slot + currReg.GetCode() - startReg) * DOUBLE_WORD_SIZE_BYTES));
3473 } else { // NOLINT
3474 GetMasm()->Ldr(currReg,
3475 MemOperand(baseReg, (slot + currReg.GetCode() - startReg) * DOUBLE_WORD_SIZE_BYTES));
3476 }
3477 }
3478 }
3479 }
3480
SaveRegisters(RegMask registers,ssize_t slot,size_t startReg,bool isFp)3481 void Aarch64Encoder::SaveRegisters(RegMask registers, ssize_t slot, size_t startReg, bool isFp)
3482 {
3483 LoadStoreRegisters<true>(registers, slot, startReg, isFp);
3484 }
3485
LoadRegisters(RegMask registers,ssize_t slot,size_t startReg,bool isFp)3486 void Aarch64Encoder::LoadRegisters(RegMask registers, ssize_t slot, size_t startReg, bool isFp)
3487 {
3488 LoadStoreRegisters<false>(registers, slot, startReg, isFp);
3489 }
3490
SaveRegisters(RegMask registers,bool isFp,ssize_t slot,Reg base,RegMask mask)3491 void Aarch64Encoder::SaveRegisters(RegMask registers, bool isFp, ssize_t slot, Reg base, RegMask mask)
3492 {
3493 LoadStoreRegisters<true>(registers, isFp, slot, base, mask);
3494 }
3495
LoadRegisters(RegMask registers,bool isFp,ssize_t slot,Reg base,RegMask mask)3496 void Aarch64Encoder::LoadRegisters(RegMask registers, bool isFp, ssize_t slot, Reg base, RegMask mask)
3497 {
3498 LoadStoreRegisters<false>(registers, isFp, slot, base, mask);
3499 }
3500
PushRegisters(RegMask registers,bool isFp)3501 void Aarch64Encoder::PushRegisters(RegMask registers, bool isFp)
3502 {
3503 static constexpr size_t PAIR_OFFSET = 2 * DOUBLE_WORD_SIZE_BYTES;
3504 Register lastReg = INVALID_REG;
3505 for (size_t i = 0; i < registers.size(); i++) {
3506 if (registers[i]) {
3507 if (lastReg == INVALID_REG) {
3508 lastReg = i;
3509 continue;
3510 }
3511 if (isFp) {
3512 GetMasm()->stp(vixl::aarch64::VRegister(lastReg, DOUBLE_WORD_SIZE),
3513 vixl::aarch64::VRegister(i, DOUBLE_WORD_SIZE),
3514 MemOperand(vixl::aarch64::sp, -PAIR_OFFSET, vixl::aarch64::AddrMode::PreIndex));
3515 } else {
3516 GetMasm()->stp(vixl::aarch64::Register(lastReg, DOUBLE_WORD_SIZE),
3517 vixl::aarch64::Register(i, DOUBLE_WORD_SIZE),
3518 MemOperand(vixl::aarch64::sp, -PAIR_OFFSET, vixl::aarch64::AddrMode::PreIndex));
3519 }
3520 lastReg = INVALID_REG;
3521 }
3522 }
3523 if (lastReg != INVALID_REG) {
3524 if (isFp) {
3525 GetMasm()->str(vixl::aarch64::VRegister(lastReg, DOUBLE_WORD_SIZE),
3526 MemOperand(vixl::aarch64::sp, -PAIR_OFFSET, vixl::aarch64::AddrMode::PreIndex));
3527 } else {
3528 GetMasm()->str(vixl::aarch64::Register(lastReg, DOUBLE_WORD_SIZE),
3529 MemOperand(vixl::aarch64::sp, -PAIR_OFFSET, vixl::aarch64::AddrMode::PreIndex));
3530 }
3531 }
3532 }
3533
PopRegisters(RegMask registers,bool isFp)3534 void Aarch64Encoder::PopRegisters(RegMask registers, bool isFp)
3535 {
3536 static constexpr size_t PAIR_OFFSET = 2 * DOUBLE_WORD_SIZE_BYTES;
3537 Register lastReg;
3538 if ((registers.count() & 1U) != 0) {
3539 lastReg = registers.GetMaxRegister();
3540 if (isFp) {
3541 GetMasm()->ldr(vixl::aarch64::VRegister(lastReg, DOUBLE_WORD_SIZE),
3542 MemOperand(vixl::aarch64::sp, PAIR_OFFSET, vixl::aarch64::AddrMode::PostIndex));
3543 } else {
3544 GetMasm()->ldr(vixl::aarch64::Register(lastReg, DOUBLE_WORD_SIZE),
3545 MemOperand(vixl::aarch64::sp, PAIR_OFFSET, vixl::aarch64::AddrMode::PostIndex));
3546 }
3547 registers.reset(lastReg);
3548 }
3549 lastReg = INVALID_REG;
3550 for (auto i = static_cast<ssize_t>(registers.size() - 1); i >= 0; i--) {
3551 if (registers[i]) {
3552 if (lastReg == INVALID_REG) {
3553 lastReg = i;
3554 continue;
3555 }
3556 if (isFp) {
3557 GetMasm()->ldp(vixl::aarch64::VRegister(i, DOUBLE_WORD_SIZE),
3558 vixl::aarch64::VRegister(lastReg, DOUBLE_WORD_SIZE),
3559 MemOperand(vixl::aarch64::sp, PAIR_OFFSET, vixl::aarch64::AddrMode::PostIndex));
3560 } else {
3561 GetMasm()->ldp(vixl::aarch64::Register(i, DOUBLE_WORD_SIZE),
3562 vixl::aarch64::Register(lastReg, DOUBLE_WORD_SIZE),
3563 MemOperand(vixl::aarch64::sp, PAIR_OFFSET, vixl::aarch64::AddrMode::PostIndex));
3564 }
3565 lastReg = INVALID_REG;
3566 }
3567 }
3568 }
3569
GetMasm() const3570 vixl::aarch64::MacroAssembler *Aarch64Encoder::GetMasm() const
3571 {
3572 ASSERT(masm_ != nullptr);
3573 return masm_;
3574 }
3575
GetLabelAddress(LabelHolder::LabelId label)3576 size_t Aarch64Encoder::GetLabelAddress(LabelHolder::LabelId label)
3577 {
3578 auto plabel = labels_->GetLabel(label);
3579 ASSERT(plabel->IsBound());
3580 return GetMasm()->GetLabelAddress<size_t>(plabel);
3581 }
3582
LabelHasLinks(LabelHolder::LabelId label)3583 bool Aarch64Encoder::LabelHasLinks(LabelHolder::LabelId label)
3584 {
3585 auto plabel = labels_->GetLabel(label);
3586 return plabel->IsLinked();
3587 }
3588
3589 #ifndef PANDA_MINIMAL_VIXL
GetDecoder() const3590 vixl::aarch64::Decoder &Aarch64Encoder::GetDecoder() const
3591 {
3592 if (!decoder_) {
3593 decoder_.emplace(GetAllocator());
3594 decoder_->visitors()->push_back(&GetDisasm());
3595 }
3596 return *decoder_;
3597 }
3598
GetDisasm() const3599 vixl::aarch64::Disassembler &Aarch64Encoder::GetDisasm() const
3600 {
3601 if (!disasm_) {
3602 disasm_.emplace(GetAllocator());
3603 }
3604 return *disasm_;
3605 }
3606 #endif
3607
DisasmInstr(std::ostream & stream,size_t pc,ssize_t codeOffset) const3608 size_t Aarch64Encoder::DisasmInstr([[maybe_unused]] std::ostream &stream, size_t pc,
3609 [[maybe_unused]] ssize_t codeOffset) const
3610 {
3611 #ifndef PANDA_MINIMAL_VIXL
3612 auto bufferStart = GetMasm()->GetBuffer()->GetOffsetAddress<uintptr_t>(0);
3613 auto instr = GetMasm()->GetBuffer()->GetOffsetAddress<vixl::aarch64::Instruction *>(pc);
3614 GetDecoder().Decode(instr);
3615 if (codeOffset < 0) {
3616 stream << GetDisasm().GetOutput();
3617 } else {
3618 stream << std::setw(0x4) << std::right << std::setfill('0') << std::hex
3619 << reinterpret_cast<uintptr_t>(instr) - bufferStart + static_cast<size_t>(codeOffset) << ": "
3620 << GetDisasm().GetOutput() << std::setfill(' ') << std::dec;
3621 }
3622
3623 #endif
3624 return pc + vixl::aarch64::kInstructionSize;
3625 }
3626 } // namespace ark::compiler::aarch64
3627