1 /**
2 * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15 /*
16 Encoder (implementation of math and mem Low-level emitters)
17 */
18
19 #include "encode.h"
20 #include "target/aarch64/target.h"
21 #include "compiler/optimizer/code_generator/relocations.h"
22
23 #if defined(USE_VIXL_ARM64) && !defined(PANDA_MINIMAL_VIXL)
24 #include "aarch64/disasm-aarch64.h"
25 #endif
26
27 #include <iomanip>
28
29 #include "lib_helpers.inl"
30
31 #ifndef PANDA_TARGET_MACOS
32 #include "elf.h"
33 #endif // PANDA_TARGET_MACOS
34
35 namespace panda::compiler::aarch64 {
36 using vixl::aarch64::CPURegister;
37 using vixl::aarch64::MemOperand;
38
Promote(Reg reg)39 static inline Reg Promote(Reg reg)
40 {
41 if (reg.GetType() == INT8_TYPE) {
42 return Reg(reg.GetId(), INT16_TYPE);
43 }
44 return reg;
45 }
46
BindLabel(LabelId id)47 void Aarch64LabelHolder::BindLabel(LabelId id)
48 {
49 static_cast<Aarch64Encoder *>(GetEncoder())->GetMasm()->Bind(labels_[id]);
50 }
51
Aarch64Encoder(ArenaAllocator * allocator)52 Aarch64Encoder::Aarch64Encoder(ArenaAllocator *allocator) : Encoder(allocator, Arch::AARCH64)
53 {
54 labels_ = allocator->New<Aarch64LabelHolder>(this);
55 if (labels_ == nullptr) {
56 SetFalseResult();
57 }
58 // We enable LR tmp reg by default in Aarch64
59 EnableLrAsTempReg(true);
60 }
61
~Aarch64Encoder()62 Aarch64Encoder::~Aarch64Encoder()
63 {
64 auto labels = static_cast<Aarch64LabelHolder *>(GetLabels())->labels_;
65 for (auto label : labels) {
66 label->~Label();
67 }
68 if (masm_ != nullptr) {
69 masm_->~MacroAssembler();
70 masm_ = nullptr;
71 }
72 #ifndef PANDA_MINIMAL_VIXL
73 if (decoder_ != nullptr) {
74 decoder_->~Decoder();
75 decoder_ = nullptr;
76 }
77 #endif
78 }
79
InitMasm()80 bool Aarch64Encoder::InitMasm()
81 {
82 if (masm_ == nullptr) {
83 // Initialize Masm
84 masm_ = GetAllocator()->New<vixl::aarch64::MacroAssembler>(GetAllocator());
85 if (masm_ == nullptr || !masm_->IsValid()) {
86 SetFalseResult();
87 return false;
88 }
89 ASSERT(GetMasm());
90
91 // Make sure that the compiler uses the same scratch registers as the assembler
92 CHECK_EQ(RegMask(GetMasm()->GetScratchRegisterList()->GetList()), GetTarget().GetTempRegsMask());
93 CHECK_EQ(RegMask(GetMasm()->GetScratchVRegisterList()->GetList()), GetTarget().GetTempVRegsMask());
94 }
95 return true;
96 }
97
Finalize()98 void Aarch64Encoder::Finalize()
99 {
100 GetMasm()->FinalizeCode();
101 }
102
EncodeJump(LabelHolder::LabelId id)103 void Aarch64Encoder::EncodeJump(LabelHolder::LabelId id)
104 {
105 auto label = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(id);
106 GetMasm()->B(label);
107 }
108
EncodeJump(LabelHolder::LabelId id,Reg src0,Reg src1,Condition cc)109 void Aarch64Encoder::EncodeJump(LabelHolder::LabelId id, Reg src0, Reg src1, Condition cc)
110 {
111 if (src1.GetId() == GetRegfile()->GetZeroReg().GetId()) {
112 EncodeJump(id, src0, cc);
113 return;
114 }
115
116 if (src0.IsScalar()) {
117 GetMasm()->Cmp(VixlReg(src0), VixlReg(src1));
118 } else {
119 GetMasm()->Fcmp(VixlVReg(src0), VixlVReg(src1));
120 }
121
122 auto label = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(id);
123 GetMasm()->B(label, Convert(cc));
124 }
125
EncodeJump(LabelHolder::LabelId id,Reg src,Imm imm,Condition cc)126 void Aarch64Encoder::EncodeJump(LabelHolder::LabelId id, Reg src, Imm imm, Condition cc)
127 {
128 auto value = GetIntValue(imm);
129 if (value == 0) {
130 EncodeJump(id, src, cc);
131 return;
132 }
133
134 ASSERT(CanEncodeImmAddSubCmp(value, src.GetSize(), false));
135 if (value < 0) {
136 GetMasm()->Cmn(VixlReg(src), VixlImm(-value));
137 } else { // if (value > 0)
138 GetMasm()->Cmp(VixlReg(src), VixlImm(value));
139 }
140
141 auto label = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(id);
142 GetMasm()->B(label, Convert(cc));
143 }
144
EncodeJumpTest(LabelHolder::LabelId id,Reg src0,Reg src1,Condition cc)145 void Aarch64Encoder::EncodeJumpTest(LabelHolder::LabelId id, Reg src0, Reg src1, Condition cc)
146 {
147 ASSERT(src0.IsScalar() && src1.IsScalar());
148
149 GetMasm()->Tst(VixlReg(src0), VixlReg(src1));
150 auto label = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(id);
151 GetMasm()->B(label, ConvertTest(cc));
152 }
153
EncodeJumpTest(LabelHolder::LabelId id,Reg src,Imm imm,Condition cc)154 void Aarch64Encoder::EncodeJumpTest(LabelHolder::LabelId id, Reg src, Imm imm, Condition cc)
155 {
156 ASSERT(src.IsScalar());
157
158 auto value = GetIntValue(imm);
159 ASSERT(CanEncodeImmLogical(value, imm.GetSize() > WORD_SIZE ? DOUBLE_WORD_SIZE : WORD_SIZE));
160
161 GetMasm()->Tst(VixlReg(src), VixlImm(value));
162 auto label = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(id);
163 GetMasm()->B(label, ConvertTest(cc));
164 }
165
EncodeJump(LabelHolder::LabelId id,Reg src,Condition cc)166 void Aarch64Encoder::EncodeJump(LabelHolder::LabelId id, Reg src, Condition cc)
167 {
168 auto label = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(id);
169 ASSERT(src.IsScalar());
170 auto rzero = Reg(GetRegfile()->GetZeroReg().GetId(), src.GetType());
171
172 switch (cc) {
173 case Condition::LO:
174 // Always false
175 return;
176 case Condition::HS:
177 // Always true
178 GetMasm()->B(label);
179 return;
180 case Condition::EQ:
181 case Condition::LS:
182 if (src.GetId() == rzero.GetId()) {
183 GetMasm()->B(label);
184 return;
185 }
186 // True only when zero
187 GetMasm()->Cbz(VixlReg(src), label);
188 return;
189 case Condition::NE:
190 case Condition::HI:
191 if (src.GetId() == rzero.GetId()) {
192 // Do nothing
193 return;
194 }
195 // True only when non-zero
196 GetMasm()->Cbnz(VixlReg(src), label);
197 return;
198 default:
199 break;
200 }
201
202 ASSERT(rzero.IsValid());
203 GetMasm()->Cmp(VixlReg(src), VixlReg(rzero));
204 GetMasm()->B(label, Convert(cc));
205 }
206
EncodeJump(Reg dst)207 void Aarch64Encoder::EncodeJump(Reg dst)
208 {
209 GetMasm()->Br(VixlReg(dst));
210 }
211
EncodeJump(RelocationInfo * relocation)212 void Aarch64Encoder::EncodeJump([[maybe_unused]] RelocationInfo *relocation)
213 {
214 #ifdef PANDA_TARGET_MACOS
215 LOG(FATAL, COMPILER) << "Not supported in Macos build";
216 #else
217 auto buffer = GetMasm()->GetBuffer();
218 relocation->offset = GetCursorOffset();
219 relocation->addend = 0;
220 relocation->type = R_AARCH64_CALL26;
221 static constexpr uint32_t CALL_WITH_ZERO_OFFSET = 0x14000000;
222 buffer->Emit32(CALL_WITH_ZERO_OFFSET);
223 #endif
224 }
225
EncodeBitTestAndBranch(LabelHolder::LabelId id,compiler::Reg reg,uint32_t bit_pos,bool bit_value)226 void Aarch64Encoder::EncodeBitTestAndBranch(LabelHolder::LabelId id, compiler::Reg reg, uint32_t bit_pos,
227 bool bit_value)
228 {
229 ASSERT(reg.IsScalar() && reg.GetSize() > bit_pos);
230 auto label = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(id);
231 if (bit_value) {
232 GetMasm()->Tbnz(VixlReg(reg), bit_pos, label);
233 } else {
234 GetMasm()->Tbz(VixlReg(reg), bit_pos, label);
235 }
236 }
237
EncodeNop()238 void Aarch64Encoder::EncodeNop()
239 {
240 GetMasm()->Nop();
241 }
242
MakeCall(compiler::RelocationInfo * relocation)243 void Aarch64Encoder::MakeCall([[maybe_unused]] compiler::RelocationInfo *relocation)
244 {
245 #ifdef PANDA_TARGET_MACOS
246 LOG(FATAL, COMPILER) << "Not supported in Macos build";
247 #else
248 auto buffer = GetMasm()->GetBuffer();
249 relocation->offset = GetCursorOffset();
250 relocation->addend = 0;
251 relocation->type = R_AARCH64_CALL26;
252 static constexpr uint32_t CALL_WITH_ZERO_OFFSET = 0x94000000;
253 buffer->Emit32(CALL_WITH_ZERO_OFFSET);
254 #endif
255 }
256
MakeCall(const void * entry_point)257 void Aarch64Encoder::MakeCall(const void *entry_point)
258 {
259 auto lr_reg = GetTarget().GetLinkReg();
260 EncodeMov(lr_reg, Imm(reinterpret_cast<uintptr_t>(entry_point)));
261 GetMasm()->Blr(VixlReg(lr_reg));
262 }
263
MakeCall(MemRef entry_point)264 void Aarch64Encoder::MakeCall(MemRef entry_point)
265 {
266 auto lr_reg = GetTarget().GetLinkReg();
267 EncodeLdr(lr_reg, false, entry_point);
268 GetMasm()->Blr(VixlReg(lr_reg));
269 }
270
MakeCall(Reg reg)271 void Aarch64Encoder::MakeCall(Reg reg)
272 {
273 GetMasm()->Blr(VixlReg(reg));
274 }
275
MakeCall(LabelHolder::LabelId id)276 void Aarch64Encoder::MakeCall(LabelHolder::LabelId id)
277 {
278 auto label = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(id);
279 GetMasm()->Bl(label);
280 }
281
LoadPcRelative(Reg reg,intptr_t offset,Reg reg_addr)282 void Aarch64Encoder::LoadPcRelative(Reg reg, intptr_t offset, Reg reg_addr)
283 {
284 ASSERT(GetCodeOffset() != Encoder::INVALID_OFFSET);
285 ASSERT(reg.IsValid() || reg_addr.IsValid());
286
287 if (!reg_addr.IsValid()) {
288 reg_addr = reg.As(INT64_TYPE);
289 }
290
291 if (vixl::IsInt21(offset)) {
292 GetMasm()->adr(VixlReg(reg_addr), offset);
293 if (reg != INVALID_REGISTER) {
294 EncodeLdr(reg, false, MemRef(reg_addr));
295 }
296 } else {
297 size_t pc = GetCodeOffset() + GetCursorOffset();
298 size_t addr;
299 if (intptr_t res = helpers::ToSigned(pc) + offset; res < 0) {
300 // Make both, pc and addr, positive
301 ssize_t extend = RoundUp(std::abs(res), vixl::aarch64::kPageSize);
302 addr = res + extend;
303 pc += extend;
304 } else {
305 addr = res;
306 }
307
308 ssize_t adrp_imm = (addr >> vixl::aarch64::kPageSizeLog2) - (pc >> vixl::aarch64::kPageSizeLog2);
309
310 GetMasm()->adrp(VixlReg(reg_addr), adrp_imm);
311
312 offset = panda::helpers::ToUnsigned(addr) & (vixl::aarch64::kPageSize - 1);
313 if (reg.GetId() != reg_addr.GetId()) {
314 EncodeAdd(reg_addr, reg_addr, Imm(offset));
315 if (reg != INVALID_REGISTER) {
316 EncodeLdr(reg, true, MemRef(reg_addr));
317 }
318 } else {
319 EncodeLdr(reg, true, MemRef(reg_addr, offset));
320 }
321 }
322 }
323
MakeCallAot(intptr_t offset)324 void Aarch64Encoder::MakeCallAot(intptr_t offset)
325 {
326 auto lr_reg = GetTarget().GetLinkReg();
327 LoadPcRelative(lr_reg, offset);
328 GetMasm()->Blr(VixlReg(lr_reg));
329 }
330
CanMakeCallByOffset(intptr_t offset)331 bool Aarch64Encoder::CanMakeCallByOffset(intptr_t offset)
332 {
333 // NOLINTNEXTLINE(hicpp-signed-bitwise)
334 auto off = (offset >> vixl::aarch64::kInstructionSizeLog2);
335 return vixl::aarch64::Instruction::IsValidImmPCOffset(vixl::aarch64::ImmBranchType::UncondBranchType, off);
336 }
337
MakeCallByOffset(intptr_t offset)338 void Aarch64Encoder::MakeCallByOffset(intptr_t offset)
339 {
340 GetMasm()->Bl(offset);
341 }
342
MakeLoadAotTable(intptr_t offset,Reg reg)343 void Aarch64Encoder::MakeLoadAotTable(intptr_t offset, Reg reg)
344 {
345 LoadPcRelative(reg, offset);
346 }
347
MakeLoadAotTableAddr(intptr_t offset,Reg addr,Reg val)348 void Aarch64Encoder::MakeLoadAotTableAddr(intptr_t offset, Reg addr, Reg val)
349 {
350 LoadPcRelative(val, offset, addr);
351 }
352
EncodeAbort()353 void Aarch64Encoder::EncodeAbort()
354 {
355 GetMasm()->Brk();
356 }
357
EncodeReturn()358 void Aarch64Encoder::EncodeReturn()
359 {
360 GetMasm()->Ret();
361 }
362
EncodeMul(Reg unused1,Reg unused2,Imm unused3)363 void Aarch64Encoder::EncodeMul([[maybe_unused]] Reg unused1, [[maybe_unused]] Reg unused2, [[maybe_unused]] Imm unused3)
364 {
365 SetFalseResult();
366 }
367
EncodeMov(Reg dst,Reg src)368 void Aarch64Encoder::EncodeMov(Reg dst, Reg src)
369 {
370 if (dst == src) {
371 return;
372 }
373 if (src.IsFloat() && dst.IsFloat()) {
374 if (src.GetSize() != dst.GetSize()) {
375 GetMasm()->Fcvt(VixlVReg(dst), VixlVReg(src));
376 return;
377 }
378 GetMasm()->Fmov(VixlVReg(dst), VixlVReg(src));
379 return;
380 }
381 if (src.IsFloat() && !dst.IsFloat()) {
382 GetMasm()->Fmov(VixlReg(dst, src.GetSize()), VixlVReg(src));
383 return;
384 }
385 if (dst.IsFloat()) {
386 ASSERT(src.IsScalar());
387 GetMasm()->Fmov(VixlVReg(dst), VixlReg(src));
388 return;
389 }
390 // DiscardForSameWReg below means we would drop "mov w0, w0", but it is guarded by "dst == src" above anyway.
391 // NOTE: "mov w0, w0" is not equal "nop", as it clears upper bits of x0.
392 // Keeping the option here helps to generate nothing when e.g. src is x0 and dst is w0.
393 // Probably, a better solution here is to system-wide checking register size on Encoder level.
394 if (src.GetSize() != dst.GetSize()) {
395 auto src_reg = Reg(src.GetId(), dst.GetType());
396 GetMasm()->Mov(VixlReg(dst), VixlReg(src_reg), vixl::aarch64::DiscardMoveMode::kDiscardForSameWReg);
397 return;
398 }
399 GetMasm()->Mov(VixlReg(dst), VixlReg(src), vixl::aarch64::DiscardMoveMode::kDiscardForSameWReg);
400 }
401
EncodeNeg(Reg dst,Reg src)402 void Aarch64Encoder::EncodeNeg(Reg dst, Reg src)
403 {
404 if (dst.IsFloat()) {
405 GetMasm()->Fneg(VixlVReg(dst), VixlVReg(src));
406 return;
407 }
408 GetMasm()->Neg(VixlReg(dst), VixlReg(src));
409 }
410
EncodeAbs(Reg dst,Reg src)411 void Aarch64Encoder::EncodeAbs(Reg dst, Reg src)
412 {
413 if (dst.IsFloat()) {
414 GetMasm()->Fabs(VixlVReg(dst), VixlVReg(src));
415 return;
416 }
417
418 ASSERT(!GetRegfile()->IsZeroReg(dst));
419 if (GetRegfile()->IsZeroReg(src)) {
420 EncodeMov(dst, src);
421 return;
422 }
423
424 if (src.GetSize() == DOUBLE_WORD_SIZE) {
425 GetMasm()->Cmp(VixlReg(src), vixl::aarch64::xzr);
426 } else {
427 GetMasm()->Cmp(VixlReg(src), vixl::aarch64::wzr);
428 }
429 GetMasm()->Cneg(VixlReg(Promote(dst)), VixlReg(Promote(src)), vixl::aarch64::Condition::lt);
430 }
431
EncodeSqrt(Reg dst,Reg src)432 void Aarch64Encoder::EncodeSqrt(Reg dst, Reg src)
433 {
434 ASSERT(dst.IsFloat());
435 GetMasm()->Fsqrt(VixlVReg(dst), VixlVReg(src));
436 }
437
EncodeIsInf(Reg dst,Reg src)438 void Aarch64Encoder::EncodeIsInf(Reg dst, Reg src)
439 {
440 ASSERT(dst.IsScalar() && src.IsFloat());
441
442 if (src.GetSize() == WORD_SIZE) {
443 constexpr uint32_t INF_MASK = 0xff000000;
444
445 ScopedTmpRegU32 tmp_reg(this);
446 auto tmp = VixlReg(tmp_reg);
447 GetMasm()->Fmov(tmp, VixlVReg(src));
448 GetMasm()->Mov(VixlReg(dst).W(), INF_MASK);
449 GetMasm()->Lsl(tmp, tmp, 1);
450 GetMasm()->Cmp(tmp, VixlReg(dst, WORD_SIZE));
451 } else {
452 constexpr uint64_t INF_MASK = 0xffe0000000000000;
453
454 ScopedTmpRegU64 tmp_reg(this);
455 auto tmp = VixlReg(tmp_reg);
456 GetMasm()->Fmov(tmp, VixlVReg(src));
457 GetMasm()->Mov(VixlReg(dst).X(), INF_MASK);
458 GetMasm()->Lsl(tmp, tmp, 1);
459 GetMasm()->Cmp(tmp, VixlReg(dst, DOUBLE_WORD_SIZE));
460 }
461
462 GetMasm()->Cset(VixlReg(dst), vixl::aarch64::Condition::eq);
463 }
464
465 /* NaN values are needed to be canonicalized */
EncodeFpToBits(Reg dst,Reg src)466 void Aarch64Encoder::EncodeFpToBits(Reg dst, Reg src)
467 {
468 ASSERT(dst.IsScalar() && src.IsFloat());
469 ASSERT(dst.GetSize() == WORD_SIZE || dst.GetSize() == DOUBLE_WORD_SIZE);
470
471 if (dst.GetSize() == WORD_SIZE) {
472 ASSERT(src.GetSize() == WORD_SIZE);
473
474 constexpr auto FNAN = 0x7fc00000;
475
476 ScopedTmpRegU32 tmp(this);
477
478 GetMasm()->Fcmp(VixlVReg(src), VixlVReg(src));
479 GetMasm()->Mov(VixlReg(tmp), FNAN);
480 GetMasm()->Umov(VixlReg(dst), VixlVReg(src), 0);
481 GetMasm()->Csel(VixlReg(dst), VixlReg(tmp), VixlReg(dst), vixl::aarch64::Condition::ne);
482 } else {
483 ASSERT(src.GetSize() == DOUBLE_WORD_SIZE);
484
485 constexpr auto DNAN = 0x7ff8000000000000;
486
487 ScopedTmpRegU64 tmp_reg(this);
488 auto tmp = VixlReg(tmp_reg);
489
490 GetMasm()->Fcmp(VixlVReg(src), VixlVReg(src));
491 GetMasm()->Mov(tmp, DNAN);
492 GetMasm()->Umov(VixlReg(dst), VixlVReg(src), 0);
493 GetMasm()->Csel(VixlReg(dst), tmp, VixlReg(dst), vixl::aarch64::Condition::ne);
494 }
495 }
496
EncodeMoveBitsRaw(Reg dst,Reg src)497 void Aarch64Encoder::EncodeMoveBitsRaw(Reg dst, Reg src)
498 {
499 ASSERT((dst.IsFloat() && src.IsScalar()) || (src.IsFloat() && dst.IsScalar()));
500 if (dst.IsScalar()) {
501 ASSERT(src.GetSize() == dst.GetSize());
502 if (dst.GetSize() == WORD_SIZE) {
503 GetMasm()->Umov(VixlReg(dst).W(), VixlVReg(src).S(), 0);
504 } else {
505 GetMasm()->Umov(VixlReg(dst), VixlVReg(src), 0);
506 }
507 } else {
508 ASSERT(dst.GetSize() == src.GetSize());
509 ScopedTmpReg tmp_reg(this, src.GetType());
510 auto src_reg = src;
511 auto rzero = GetRegfile()->GetZeroReg();
512 if (src.GetId() == rzero.GetId()) {
513 EncodeMov(tmp_reg, Imm(0));
514 src_reg = tmp_reg;
515 }
516
517 if (src_reg.GetSize() == WORD_SIZE) {
518 GetMasm()->Fmov(VixlVReg(dst).S(), VixlReg(src_reg).W());
519 } else {
520 GetMasm()->Fmov(VixlVReg(dst), VixlReg(src_reg));
521 }
522 }
523 }
524
EncodeReverseBytes(Reg dst,Reg src)525 void Aarch64Encoder::EncodeReverseBytes(Reg dst, Reg src)
526 {
527 auto rzero = GetRegfile()->GetZeroReg();
528 if (src.GetId() == rzero.GetId()) {
529 EncodeMov(dst, Imm(0));
530 return;
531 }
532
533 ASSERT(src.GetSize() > BYTE_SIZE);
534 ASSERT(src.GetSize() == dst.GetSize());
535
536 if (src.GetSize() == HALF_SIZE) {
537 GetMasm()->Rev16(VixlReg(dst), VixlReg(src));
538 GetMasm()->Sxth(VixlReg(dst), VixlReg(dst));
539 } else {
540 GetMasm()->Rev(VixlReg(dst), VixlReg(src));
541 }
542 }
543
EncodeBitCount(Reg dst,Reg src)544 void Aarch64Encoder::EncodeBitCount(Reg dst, Reg src)
545 {
546 auto rzero = GetRegfile()->GetZeroReg();
547 if (src.GetId() == rzero.GetId()) {
548 EncodeMov(dst, Imm(0));
549 return;
550 }
551
552 ASSERT(dst.GetSize() == WORD_SIZE);
553
554 ScopedTmpRegF64 tmp_reg0(this);
555 vixl::aarch64::VRegister tmp_reg;
556 if (src.GetSize() == DOUBLE_WORD_SIZE) {
557 tmp_reg = VixlVReg(tmp_reg0).D();
558 } else {
559 tmp_reg = VixlVReg(tmp_reg0).S();
560 }
561
562 if (src.GetSize() < WORD_SIZE) {
563 int64_t cut_value = (1ULL << src.GetSize()) - 1;
564 EncodeAnd(src, src, Imm(cut_value));
565 }
566
567 GetMasm()->Fmov(tmp_reg, VixlReg(src));
568 GetMasm()->Cnt(tmp_reg.V8B(), tmp_reg.V8B());
569 GetMasm()->Addv(tmp_reg.B(), tmp_reg.V8B());
570 EncodeMov(dst, tmp_reg0);
571 }
572
573 /* Since only ROR is supported on AArch64 we do
574 * left rotaion as ROR(v, -count) */
EncodeRotate(Reg dst,Reg src1,Reg src2,bool is_ror)575 void Aarch64Encoder::EncodeRotate(Reg dst, Reg src1, Reg src2, bool is_ror)
576 {
577 ASSERT(src1.GetSize() == WORD_SIZE || src1.GetSize() == DOUBLE_WORD_SIZE);
578 ASSERT(src1.GetSize() == dst.GetSize());
579 auto rzero = GetRegfile()->GetZeroReg();
580 if (rzero.GetId() == src2.GetId() || rzero.GetId() == src1.GetId()) {
581 EncodeMov(dst, src1);
582 return;
583 }
584 /* as the second parameters is always 32-bits long we have to
585 * adjust the counter register for the 64-bits first operand case */
586 if (is_ror) {
587 auto count = (dst.GetSize() == WORD_SIZE ? VixlReg(src2) : VixlReg(src2).X());
588 GetMasm()->Ror(VixlReg(dst), VixlReg(src1), count);
589 } else {
590 ScopedTmpReg tmp(this);
591 auto cnt = (dst.GetId() == src1.GetId() ? tmp : dst);
592 auto count = (dst.GetSize() == WORD_SIZE ? VixlReg(cnt).W() : VixlReg(cnt).X());
593 auto source2 = (dst.GetSize() == WORD_SIZE ? VixlReg(src2).W() : VixlReg(src2).X());
594 GetMasm()->Neg(count, source2);
595 GetMasm()->Ror(VixlReg(dst), VixlReg(src1), count);
596 }
597 }
598
EncodeSignum(Reg dst,Reg src)599 void Aarch64Encoder::EncodeSignum(Reg dst, Reg src)
600 {
601 ASSERT(src.GetSize() == WORD_SIZE || src.GetSize() == DOUBLE_WORD_SIZE);
602
603 ScopedTmpRegU32 tmp(this);
604 auto sign = (dst.GetId() == src.GetId() ? tmp : dst);
605
606 GetMasm()->Cmp(VixlReg(src), VixlImm(0));
607 GetMasm()->Cset(VixlReg(sign), vixl::aarch64::Condition::gt);
608
609 constexpr auto SHIFT_WORD_BITS = 31;
610 constexpr auto SHIFT_DWORD_BITS = 63;
611
612 /* The operation below is "sub dst, dst, src, lsr #reg_size-1"
613 * however, we can only encode as many as 32 bits in lsr field, so
614 * for 64-bits cases we cannot avoid having a separate lsr instruction */
615 if (src.GetSize() == WORD_SIZE) {
616 auto shift = Shift(src, LSR, SHIFT_WORD_BITS);
617 EncodeSub(dst, sign, shift);
618 } else {
619 ScopedTmpRegU64 shift(this);
620 sign = Reg(sign.GetId(), INT64_TYPE);
621 EncodeShr(shift, src, Imm(SHIFT_DWORD_BITS));
622 EncodeSub(dst, sign, shift);
623 }
624 }
625
EncodeCountLeadingZeroBits(Reg dst,Reg src)626 void Aarch64Encoder::EncodeCountLeadingZeroBits(Reg dst, Reg src)
627 {
628 auto rzero = GetRegfile()->GetZeroReg();
629 if (rzero.GetId() == src.GetId()) {
630 EncodeMov(dst, Imm(src.GetSize()));
631 return;
632 }
633 GetMasm()->Clz(VixlReg(dst), VixlReg(src));
634 }
635
EncodeCountTrailingZeroBits(Reg dst,Reg src)636 void Aarch64Encoder::EncodeCountTrailingZeroBits(Reg dst, Reg src)
637 {
638 auto rzero = GetRegfile()->GetZeroReg();
639 if (rzero.GetId() == src.GetId()) {
640 EncodeMov(dst, Imm(src.GetSize()));
641 return;
642 }
643 GetMasm()->Rbit(VixlReg(dst), VixlReg(src));
644 GetMasm()->Clz(VixlReg(dst), VixlReg(dst));
645 }
646
EncodeCeil(Reg dst,Reg src)647 void Aarch64Encoder::EncodeCeil(Reg dst, Reg src)
648 {
649 GetMasm()->Frintp(VixlVReg(dst), VixlVReg(src));
650 }
651
EncodeFloor(Reg dst,Reg src)652 void Aarch64Encoder::EncodeFloor(Reg dst, Reg src)
653 {
654 GetMasm()->Frintm(VixlVReg(dst), VixlVReg(src));
655 }
656
EncodeRint(Reg dst,Reg src)657 void Aarch64Encoder::EncodeRint(Reg dst, Reg src)
658 {
659 GetMasm()->Frintn(VixlVReg(dst), VixlVReg(src));
660 }
661
EncodeRound(Reg dst,Reg src)662 void Aarch64Encoder::EncodeRound(Reg dst, Reg src)
663 {
664 auto done = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
665 ScopedTmpReg tmp(this, src.GetType());
666 // round to nearest integer, ties away from zero
667 GetMasm()->Fcvtas(VixlReg(dst), VixlVReg(src));
668 // for positive values, zero and NaN inputs rounding is done
669 GetMasm()->Tbz(VixlReg(dst), dst.GetSize() - 1, done);
670 // if input is negative but not a tie, round to nearest is valid
671 // if input is a negative tie, dst += 1
672 GetMasm()->Frinta(VixlVReg(tmp), VixlVReg(src));
673 GetMasm()->Fsub(VixlVReg(tmp), VixlVReg(src), VixlVReg(tmp));
674 // NOLINTNEXTLINE(readability-magic-numbers)
675 const auto HALF = 0.5;
676 GetMasm()->Fcmp(VixlVReg(tmp), HALF);
677 GetMasm()->Cinc(VixlReg(dst), VixlReg(dst), vixl::aarch64::Condition::eq);
678 GetMasm()->Bind(done);
679 }
680
EncodeStringEquals(Reg dst,Reg str1,Reg str2,bool COMPRESSION,uint32_t LENGTH_OFFSET,uint32_t DATA_OFFSET)681 void Aarch64Encoder::EncodeStringEquals(Reg dst, Reg str1, Reg str2, bool COMPRESSION, uint32_t LENGTH_OFFSET,
682 uint32_t DATA_OFFSET)
683 {
684 /* Pseudo code:
685 if (str1 == str2) return true; // pointers compare. Fast path for same object
686 if (str1.length_field() != str2.length_field()) return false; // case length or compression is different
687
688 // code below use tmp3 both as counter and as offset to keep str1 and str2 untouched and to
689 // use minimal amount of scratch register. Then only 3 scratch registers are used: tmp1 and tmp2 for
690 // loaded string data of str1 and str2 respectively. And tmp3 as counter and offset at the same time.
691 // Then tmp3 will be "DATA_OFFSET + <offset inside string data>" almost everywhere. Check string from
692 // the end to make tmp3 manipulation easier. It'll be probably a bit less effective on large string and
693 // almost identical strings due to mostly unaligned access, but we can ignore it because most strings
694 // are less than 32 chars and in most cases it'll be different characters on first comparison. Then simpler
695 // code without additional operations wins.
696
697 int tmp3 = str1.length() * <size of str1 characters>; // data size in bytes
698 tmp3 = tmp3 + DATA_OFFSET - DOUBLE_WORD_SIZE_BYTE; // offset of last 8 data bytes (last octet)
699 while (tmp3 >= DATA_OFFSET) {
700 if (<load-8-bytes-at>(str1 + tmp3) != <load-8-bytes-at>(str2 + tmp3)) return false;
701 tmp3 -= 8;
702 }
703 // less than 8 bytes left to load and check. possibly 0.
704 if (tmp3 == DATA_OFFSET - DOUBLE_WORD_SIZE_BYTE) return true; // 0 bytes left
705 // 1..7 bytes left. Read whole octet (8 bytes) including few bytes from object header. Shift off header bytes
706 tmp1 = <load-8-bytes-at>(str1 + tmp3);
707 tmp2 = <load-8-bytes-at>(str2 + tmp3);
708 tmp3 = tmp3 - DATA_OFFSET; // <useful bytes> - 8 (== -<bytes to shift off>)
709 // calculate amount of bits to shift off. Note that for negative numbers shift result is undefined behavior
710 // for some languages like c/c++, but it's still fine for h/w logical shift on assembly level. We can use it.
711 tmp3 = - (tmp3 << 3);
712 if ((tmp1 >> tmp3) != (tmp2 >> tmp3)) return false;
713 return true;
714 */
715
716 ASSERT(dst.IsScalar());
717
718 ScopedTmpRegU64 tmp1_scoped(this);
719 ScopedTmpRegU64 tmp2_scoped(this);
720 ScopedTmpRegU64 tmp3_scoped(this);
721
722 auto tmp1_u32 = VixlReg(Reg(tmp1_scoped.GetReg().GetId(), INT32_TYPE)); // 32-bit alias for tmp1
723 auto tmp2_u32 = VixlReg(Reg(tmp2_scoped.GetReg().GetId(), INT32_TYPE)); // 32-bit alias for tmp2
724
725 auto tmp1 = VixlReg(tmp1_scoped.GetReg());
726 auto tmp2 = VixlReg(tmp2_scoped.GetReg());
727 auto tmp3 = VixlReg(tmp3_scoped.GetReg());
728
729 auto label_false = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
730 auto label_cset = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
731
732 // compare to itself case
733 GetMasm()->Cmp(VixlReg(str1), VixlReg(str2));
734 GetMasm()->B(label_cset, vixl::aarch64::Condition::eq);
735
736 EncodeLdr(tmp1_scoped.GetReg().As(INT32_TYPE), false, MemRef(str1, LENGTH_OFFSET));
737 EncodeLdr(tmp2_scoped.GetReg().As(INT32_TYPE), false, MemRef(str2, LENGTH_OFFSET));
738
739 // compare length and potentially, compressed-string status
740 GetMasm()->Cmp(tmp1_u32, tmp2_u32);
741 GetMasm()->B(label_cset, vixl::aarch64::Condition::ne);
742
743 // compare data. Assume result is "true" unless different bytes found
744 if (COMPRESSION) {
745 // branchless byte length calculation
746 GetMasm()->Lsr(tmp1_u32, tmp1_u32, 1); // string length
747 GetMasm()->And(tmp2_u32, tmp2_u32, 1); // compressed-string bit. If 1 then not compressed.
748 GetMasm()->Lsl(tmp3, tmp1, tmp2); // if not compressed, then shift left by 1 bit
749 }
750 EncodeStringEqualsMainLoop(dst, str1, str2, tmp1_scoped, tmp2_scoped, tmp3_scoped, label_false, label_cset,
751 DATA_OFFSET);
752 }
753
EncodeStringEqualsMainLoop(Reg dst,Reg str1,Reg str2,Reg tmp1_scoped,Reg tmp2_scoped,Reg tmp3_scoped,vixl::aarch64::Label * label_false,vixl::aarch64::Label * label_cset,const uint32_t DATA_OFFSET)754 void Aarch64Encoder::EncodeStringEqualsMainLoop(Reg dst, Reg str1, Reg str2, Reg tmp1_scoped, Reg tmp2_scoped,
755 Reg tmp3_scoped, vixl::aarch64::Label *label_false,
756 vixl::aarch64::Label *label_cset, const uint32_t DATA_OFFSET)
757 {
758 auto tmp1 = VixlReg(tmp1_scoped);
759 auto tmp2 = VixlReg(tmp2_scoped);
760 auto tmp3 = VixlReg(tmp3_scoped);
761
762 auto label_loop_begin = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
763 auto label_end = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
764 auto label_loop = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
765 // Now tmp3 is byte-counter. Use it as offset register as well.
766 GetMasm()->Add(tmp3, tmp3, DATA_OFFSET - DOUBLE_WORD_SIZE_BYTE);
767 GetMasm()->B(label_loop_begin);
768 GetMasm()->Bind(label_false);
769 EncodeMov(dst, Imm(0));
770 GetMasm()->B(label_end);
771 // case: >=8 bytes
772 GetMasm()->Bind(label_loop);
773
774 auto str1_last_word_mem = MemRef(str1, tmp3_scoped, 0);
775 auto str2_last_word_mem = MemRef(str2, tmp3_scoped, 0);
776
777 {
778 EncodeLdr(tmp1_scoped, false, str1_last_word_mem);
779 EncodeLdr(tmp2_scoped, false, str2_last_word_mem);
780 GetMasm()->Cmp(tmp1, tmp2);
781 GetMasm()->B(label_cset, vixl::aarch64::Condition::ne);
782 GetMasm()->Sub(tmp3, tmp3, DOUBLE_WORD_SIZE_BYTE);
783 GetMasm()->Bind(label_loop_begin);
784 GetMasm()->Cmp(tmp3, DATA_OFFSET);
785 GetMasm()->B(label_loop, vixl::aarch64::Condition::ge);
786 }
787
788 // case: 0..7 bytes left (tmp3 is DATA_OFFSET + -8..0)
789 GetMasm()->Cmp(tmp3, DATA_OFFSET - DOUBLE_WORD_SIZE_BYTE);
790 GetMasm()->B(label_cset, vixl::aarch64::Condition::eq);
791 EncodeLdr(tmp1_scoped, false, str1_last_word_mem);
792 EncodeLdr(tmp2_scoped, false, str2_last_word_mem);
793 // 1..7 bytes left to check. tmp3 is DATA_OFFSET + -7..-1
794 GetMasm()->Sub(tmp3, tmp3, DATA_OFFSET);
795
796 auto zero = VixlReg(GetRegfile()->GetZeroReg(), DOUBLE_WORD_SIZE);
797 // tmp3 is now -(amount_of_bytes_to_shift_off). Convert it to bits via single instruction
798 GetMasm()->Sub(tmp3, zero, vixl::aarch64::Operand(tmp3, vixl::aarch64::Shift::LSL, 3));
799 GetMasm()->Lsr(tmp1, tmp1, tmp3);
800 GetMasm()->Lsr(tmp2, tmp2, tmp3);
801 GetMasm()->Cmp(tmp1, tmp2);
802 GetMasm()->Bind(label_cset);
803 GetMasm()->Cset(VixlReg(dst), vixl::aarch64::Condition::eq);
804 GetMasm()->Bind(label_end);
805 }
806
EncodeCrc32Update(Reg dst,Reg crc_reg,Reg val_reg)807 void Aarch64Encoder::EncodeCrc32Update(Reg dst, Reg crc_reg, Reg val_reg)
808 {
809 auto tmp =
810 dst.GetId() != crc_reg.GetId() && dst.GetId() != val_reg.GetId() ? dst : ScopedTmpReg(this, dst.GetType());
811 GetMasm()->Mvn(VixlReg(tmp), VixlReg(crc_reg));
812 GetMasm()->Crc32b(VixlReg(tmp), VixlReg(tmp), VixlReg(val_reg));
813 GetMasm()->Mvn(VixlReg(dst), VixlReg(tmp));
814 }
815
816 /**
817 * Helper function for generating String::indexOf intrinsic: case of Latin1 (8-bit) character search
818 *
819 * Inputs: str - pointer to first character in string
820 * character - character to search
821 * idx: original start index
822 * tmp: address of 1st string character
823 * tmp1: length field value (potentially with compression bit).
824 * tmp2: MAX(idx, 0) if idx is not zero register. Anything otherwise.
825 * tmp3: temporary register to use
826 * label_found: label to jump when match found.
827 * Label contract requirement 1: leave calculated result in tmp1.
828 * label_not_found: label to jump when no match found.
829 * Assumptions: starting search index is less than string length (tmp1)
830 */
IndexOfHandleLatin1Case(Reg str,Reg character,Reg idx,Reg tmp,const bool COMPRESSION,const uint32_t DATA_OFFSET,const vixl::aarch64::Register & tmp1,const vixl::aarch64::Register & tmp2,const vixl::aarch64::Register & tmp3,vixl::aarch64::Label * label_found,vixl::aarch64::Label * label_not_found)831 void Aarch64Encoder::IndexOfHandleLatin1Case(Reg str, Reg character, Reg idx, Reg tmp, const bool COMPRESSION,
832 const uint32_t DATA_OFFSET, const vixl::aarch64::Register &tmp1,
833 const vixl::aarch64::Register &tmp2, const vixl::aarch64::Register &tmp3,
834 vixl::aarch64::Label *label_found, vixl::aarch64::Label *label_not_found)
835 {
836 // vixl register aliases
837 auto character_w = VixlReg(character).W();
838 auto character_x = VixlReg(character).X();
839 auto tmp0_x = VixlReg(tmp).X();
840
841 // more vixl aliases
842 auto lsl = vixl::aarch64::Shift::LSL;
843 auto lsr = vixl::aarch64::Shift::LSR;
844
845 bool idx_is_zero = (idx.GetId() == GetRegfile()->GetZeroReg().GetId());
846 bool character_is_zero = (character.GetId() == GetRegfile()->GetZeroReg().GetId());
847
848 // calculate address of first byte after string
849 if (COMPRESSION) {
850 GetMasm()->Add(tmp0_x, tmp0_x, vixl::aarch64::Operand(tmp1.X(), lsr, 1));
851 if (idx_is_zero) {
852 GetMasm()->Neg(tmp2.X(), vixl::aarch64::Operand(tmp1.X(), lsr, 1));
853 } else {
854 GetMasm()->Sub(tmp2.X(), tmp2.X(), vixl::aarch64::Operand(tmp1.X(), lsr, 1));
855 }
856 } else {
857 if (idx_is_zero) {
858 GetMasm()->Neg(tmp2.X(), tmp1.X());
859 } else {
860 GetMasm()->Sub(tmp2.X(), tmp2.X(), tmp1.X());
861 }
862 }
863 GetMasm()->Cmp(tmp2.X(), -DOUBLE_WORD_SIZE_BYTE);
864
865 auto label_small_loop = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
866 GetMasm()->B(label_small_loop, vixl::aarch64::Condition::gt);
867 // clone character to the size of register (i.e. 8 x 8-bit characters)
868 if (!character_is_zero) {
869 GetMasm()->Orr(character_w, character_w, vixl::aarch64::Operand(character_w, lsl, BYTE_SIZE));
870 GetMasm()->Orr(character_w, character_w, vixl::aarch64::Operand(character_w, lsl, HALF_SIZE));
871 GetMasm()->Orr(character_x, character_x, vixl::aarch64::Operand(character_x, lsl, WORD_SIZE));
872 }
873 IndexOfHandleLatin1CaseMainLoop(str, character, tmp, DATA_OFFSET, tmp1, tmp2, tmp3, label_found, label_not_found,
874 label_small_loop);
875 }
876
877 // constants for the indexOf implementation
878 constexpr int32_t MAX_8BIT_CHAR = 0xFF;
879 constexpr int32_t LOG2_BITS_PER_BYTE = 3;
880 constexpr uint32_t CLEAR_BIT_MASK = -2;
881 constexpr int32_t MIN_SUPPLEMENTARY_CODE_POINT = 0x10000;
882 constexpr int32_t MAX_SUPPLEMENTARY_CODE_POINT = 0X10FFFF;
883 constexpr uint64_t LATIN1_MASK = 0x7f7f7f7f7f7f7f7f;
884 constexpr uint64_t LATIN1_MASK2 = 0x0101010101010101;
885
IndexOfHandleLatin1CaseMainLoop(Reg str,Reg character,Reg tmp,const uint32_t DATA_OFFSET,const vixl::aarch64::Register & tmp1,const vixl::aarch64::Register & tmp2,const vixl::aarch64::Register & tmp3,vixl::aarch64::Label * label_found,vixl::aarch64::Label * label_not_found,vixl::aarch64::Label * label_small_loop)886 void Aarch64Encoder::IndexOfHandleLatin1CaseMainLoop(
887 Reg str, Reg character, Reg tmp, const uint32_t DATA_OFFSET, const vixl::aarch64::Register &tmp1,
888 const vixl::aarch64::Register &tmp2, const vixl::aarch64::Register &tmp3, vixl::aarch64::Label *label_found,
889 vixl::aarch64::Label *label_not_found, vixl::aarch64::Label *label_small_loop)
890 {
891 auto character_w = VixlReg(character).W();
892 bool character_is_zero = (character.GetId() == GetRegfile()->GetZeroReg().GetId());
893
894 auto label_loop = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
895 auto label_has_zero = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
896 auto label_small_match = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
897 auto label_not_found_restore_char = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
898
899 auto mem_tmp_tmp2_x = vixl::aarch64::MemOperand(VixlReg(tmp).X(), tmp2.X());
900
901 GetMasm()->Bind(label_loop);
902 {
903 GetMasm()->Ldr(tmp1.X(), mem_tmp_tmp2_x);
904 GetMasm()->Mov(tmp3.X(), LATIN1_MASK2); // can (re)init during ldr to save 1 reg
905 GetMasm()->Eor(tmp1.X(), tmp1.X(), VixlReg(character).X());
906 GetMasm()->Sub(tmp3.X(), tmp1.X(), tmp3.X());
907 GetMasm()->Orr(tmp1.X(), tmp1.X(), LATIN1_MASK);
908 GetMasm()->Bics(tmp1.X(), tmp3.X(), tmp1.X());
909 GetMasm()->B(label_has_zero, vixl::aarch64::Condition::ne);
910 GetMasm()->Add(tmp2.X(), tmp2.X(), DOUBLE_WORD_SIZE_BYTE);
911 GetMasm()->Cmp(tmp2.X(), -DOUBLE_WORD_SIZE_BYTE); // has enough bytes left to read whole register?
912 GetMasm()->B(label_loop, vixl::aarch64::Condition::lt); // yes. time to loop
913 }
914 GetMasm()->Cbz(tmp2.X(), character_is_zero ? label_not_found : label_not_found_restore_char); // done
915 GetMasm()->Mov(tmp2.X(), -DOUBLE_WORD_SIZE_BYTE); // setup data to read last 8 bytes. One more loop
916 GetMasm()->B(label_loop);
917 GetMasm()->Bind(label_small_loop);
918 {
919 GetMasm()->Ldrb(tmp1.W(), mem_tmp_tmp2_x);
920 GetMasm()->Cmp(tmp1.W(), character_w);
921 GetMasm()->B(label_small_match, vixl::aarch64::Condition::eq);
922 GetMasm()->Adds(tmp2.X(), tmp2.X(), BYTE_SIZE / BITS_PER_BYTE);
923 GetMasm()->Cbnz(tmp2.X(), label_small_loop);
924 GetMasm()->B(label_not_found);
925 }
926 GetMasm()->Bind(label_has_zero);
927 GetMasm()->Rev(tmp1.X(), tmp1.X());
928 if (!character_is_zero) {
929 GetMasm()->And(character_w, character_w, MAX_8BIT_CHAR);
930 }
931 GetMasm()->Clz(tmp1.X(), tmp1.X()); // difference bit index in current octet
932 GetMasm()->Add(tmp2.X(), tmp2.X(), vixl::aarch64::Operand(tmp1.X(), vixl::aarch64::Shift::ASR, LOG2_BITS_PER_BYTE));
933 GetMasm()->Bind(label_small_match);
934 // string length in bytes is: tmp - str - DATA_OFFSET
935 GetMasm()->Add(tmp2.X(), tmp2.X(), VixlReg(tmp).X());
936 GetMasm()->Sub(tmp2.X(), tmp2.X(), VixlReg(str).X());
937 GetMasm()->Sub(tmp2.X(), tmp2.X(), DATA_OFFSET);
938 GetMasm()->B(label_found);
939 GetMasm()->Bind(label_not_found_restore_char);
940 if (!character_is_zero) {
941 GetMasm()->And(character_w, character_w, MAX_8BIT_CHAR);
942 }
943 GetMasm()->B(label_not_found);
944 }
945
946 constexpr uint32_t UTF16_IDX2OFFSET_SHIFT = 1;
947
948 /**
949 * Helper function for generating String::indexOf intrinsic: case of normal utf-16 character search
950 *
951 * Inputs: str - pointer to first character in string
952 * character - character to search
953 * idx: original start index
954 * tmp: address of 1st string character
955 * tmp1: length field value (potentially with compression bit).
956 * tmp2: MAX(idx, 0) if idx is not zero register. Anything otherwise.
957 * tmp3: temporary register to use
958 * label_found: label to jump when match found.
959 * Label contract requirement 1: leave calculated result in tmp1.
960 * label_not_found: label to jump when no match found.
961 * Assumptions: starting search index is less than string length (tmp1)
962 */
IndexOfHandleUtf16NormalCase(Reg str,Reg character,Reg idx,Reg tmp,const bool COMPRESSION,const uint32_t DATA_OFFSET,const vixl::aarch64::Register & tmp1,const vixl::aarch64::Register & tmp2,const vixl::aarch64::Register & tmp3,vixl::aarch64::Label * label_found,vixl::aarch64::Label * label_not_found)963 void Aarch64Encoder::IndexOfHandleUtf16NormalCase(Reg str, Reg character, Reg idx, Reg tmp, const bool COMPRESSION,
964 const uint32_t DATA_OFFSET, const vixl::aarch64::Register &tmp1,
965 const vixl::aarch64::Register &tmp2,
966 const vixl::aarch64::Register &tmp3,
967 vixl::aarch64::Label *label_found,
968 vixl::aarch64::Label *label_not_found)
969 {
970 // vixl register aliases
971 auto character_w = VixlReg(character).W();
972 auto character_x = VixlReg(character).X();
973 auto tmp0_x = VixlReg(tmp).X();
974
975 // more vixl aliases
976 auto lsl = vixl::aarch64::Shift::LSL;
977
978 // local labels
979 auto label_small_loop = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
980
981 bool idx_is_zero = (idx.GetId() == GetRegfile()->GetZeroReg().GetId());
982 bool character_is_zero = (character.GetId() == GetRegfile()->GetZeroReg().GetId());
983
984 if (COMPRESSION) {
985 GetMasm()->And(tmp1.W(), tmp1.W(), CLEAR_BIT_MASK); // clear lowest bit to get string length in bytes
986 } else {
987 GetMasm()->Lsl(tmp1.W(), tmp1.W(), 1); // string length in bytes for non-compressed case
988 }
989 // amount of bytes to scan in worst case
990 GetMasm()->Add(tmp0_x, tmp0_x, tmp1.X()); // calculate address of first byte after string
991 if (idx_is_zero) {
992 GetMasm()->Neg(tmp2.X(), tmp1.X());
993 } else {
994 GetMasm()->Sub(tmp2.X(), tmp1.X(), vixl::aarch64::Operand(tmp2.X(), lsl, UTF16_IDX2OFFSET_SHIFT));
995 GetMasm()->Neg(tmp2.X(), tmp2.X());
996 }
997 GetMasm()->Cmp(tmp2.X(), -DOUBLE_WORD_SIZE_BYTE);
998 GetMasm()->B(label_small_loop, vixl::aarch64::Condition::gt);
999 // clone character to the size of register (i.e. 4 x 16-bit characters)
1000 if (!character_is_zero) {
1001 GetMasm()->Orr(character_w, character_w, vixl::aarch64::Operand(character_w, lsl, HALF_SIZE));
1002 GetMasm()->Orr(character_x, character_x, vixl::aarch64::Operand(character_x, lsl, WORD_SIZE));
1003 }
1004 IndexOfHandleUtf16NormalCaseMainLoop(str, character, tmp, DATA_OFFSET, tmp1, tmp2, tmp3, label_found,
1005 label_not_found, label_small_loop);
1006 }
1007
1008 constexpr uint64_t UTF16_MASK = 0x7fff7fff7fff7fff;
1009 constexpr uint64_t UTF16_MASK2 = 0x0001000100010001;
1010 constexpr int32_t MAX_UTF16_CHAR = 0xFFFF;
1011
IndexOfHandleUtf16NormalCaseMainLoop(Reg str,Reg character,Reg tmp,const uint32_t DATA_OFFSET,const vixl::aarch64::Register & tmp1,const vixl::aarch64::Register & tmp2,const vixl::aarch64::Register & tmp3,vixl::aarch64::Label * label_found,vixl::aarch64::Label * label_not_found,vixl::aarch64::Label * label_small_loop)1012 void Aarch64Encoder::IndexOfHandleUtf16NormalCaseMainLoop(
1013 Reg str, Reg character, Reg tmp, const uint32_t DATA_OFFSET, const vixl::aarch64::Register &tmp1,
1014 const vixl::aarch64::Register &tmp2, const vixl::aarch64::Register &tmp3, vixl::aarch64::Label *label_found,
1015 vixl::aarch64::Label *label_not_found, vixl::aarch64::Label *label_small_loop)
1016 {
1017 auto tmp0_x = VixlReg(tmp).X();
1018 auto character_w = VixlReg(character).W();
1019 bool character_is_zero = (character.GetId() == GetRegfile()->GetZeroReg().GetId());
1020
1021 auto label_loop = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1022 auto label_has_zero = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1023 auto label_small_match = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1024 auto label_not_found_restore_char = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1025
1026 GetMasm()->Bind(label_loop);
1027 {
1028 GetMasm()->Ldr(tmp1.X(), vixl::aarch64::MemOperand(tmp0_x, tmp2.X()));
1029 GetMasm()->Mov(tmp3.X(), UTF16_MASK2); // can (re)init during ldr to save 1 reg
1030 GetMasm()->Eor(tmp1.X(), tmp1.X(), VixlReg(character).X());
1031 GetMasm()->Sub(tmp3.X(), tmp1.X(), tmp3.X());
1032 GetMasm()->Orr(tmp1.X(), tmp1.X(), UTF16_MASK);
1033 GetMasm()->Bics(tmp1.X(), tmp3.X(), tmp1.X());
1034 GetMasm()->B(label_has_zero, vixl::aarch64::Condition::ne);
1035 GetMasm()->Add(tmp2.X(), tmp2.X(), DOUBLE_WORD_SIZE_BYTE);
1036 GetMasm()->Cmp(tmp2.X(), -DOUBLE_WORD_SIZE_BYTE); // has enough bytes left to read whole register?
1037 GetMasm()->B(label_loop, vixl::aarch64::Condition::lt); // yes. time to loop
1038 }
1039 GetMasm()->Cbz(tmp2.X(), character_is_zero ? label_not_found : label_not_found_restore_char); // done
1040 GetMasm()->Mov(tmp2.X(), -DOUBLE_WORD_SIZE_BYTE); // setup data to read last 8 bytes. One more loop
1041 GetMasm()->B(label_loop);
1042 GetMasm()->Bind(label_small_loop);
1043 {
1044 GetMasm()->Ldrh(tmp1.W(), vixl::aarch64::MemOperand(tmp0_x, tmp2.X()));
1045 GetMasm()->Cmp(tmp1.W(), character_w);
1046 GetMasm()->B(label_small_match, vixl::aarch64::Condition::eq);
1047 GetMasm()->Adds(tmp2.X(), tmp2.X(), HALF_SIZE / BITS_PER_BYTE);
1048 GetMasm()->Cbnz(tmp2.X(), label_small_loop);
1049 GetMasm()->B(label_not_found);
1050 }
1051 GetMasm()->Bind(label_has_zero);
1052 GetMasm()->Rev(tmp1.X(), tmp1.X());
1053 if (!character_is_zero) {
1054 GetMasm()->And(character_w, character_w, MAX_UTF16_CHAR);
1055 }
1056 GetMasm()->Clz(tmp1.X(), tmp1.X()); // difference bit index in current octet
1057 GetMasm()->Add(tmp2.X(), tmp2.X(), vixl::aarch64::Operand(tmp1.X(), vixl::aarch64::Shift::ASR, LOG2_BITS_PER_BYTE));
1058 GetMasm()->Bind(label_small_match);
1059 // string length in bytes is: tmp - str - DATA_OFFSET
1060 GetMasm()->Add(tmp2.X(), tmp2.X(), tmp0_x);
1061 GetMasm()->Sub(tmp2.X(), tmp2.X(), VixlReg(str).X());
1062 GetMasm()->Sub(tmp2.X(), tmp2.X(), DATA_OFFSET);
1063 GetMasm()->Lsr(tmp2.X(), tmp2.X(), UTF16_IDX2OFFSET_SHIFT);
1064 GetMasm()->B(label_found);
1065 GetMasm()->Bind(label_not_found_restore_char);
1066 if (!character_is_zero) {
1067 GetMasm()->And(character_w, character_w, MAX_UTF16_CHAR);
1068 }
1069 GetMasm()->B(label_not_found);
1070 }
1071
1072 /**
1073 * Helper function for generating String::indexOf intrinsic: case of surrogate character search
1074 *
1075 * Inputs: str - pointer to first character in string
1076 * character - character to search
1077 * idx: original start index
1078 * tmp: address of 1st string character
1079 * tmp1: length field value (potentially with compression bit).
1080 * tmp2: MAX(idx, 0) if idx is not zero register. Anything otherwise.
1081 * tmp3: temporary register to use
1082 * label_found: label to jump when match found.
1083 * Label contract requirement 1: leave calculated result in tmp1.
1084 * label_not_found: label to jump when no match found.
1085 * Assumptions: starting search index is less than string length (tmp1)
1086 */
IndexOfHandleSurrogateCase(Reg str,Reg character,Reg idx,Reg tmp,const bool COMPRESSION,const uint32_t DATA_OFFSET,const vixl::aarch64::Register & tmp1,const vixl::aarch64::Register & tmp2,const vixl::aarch64::Register & tmp3,vixl::aarch64::Label * label_found,vixl::aarch64::Label * label_not_found)1087 void Aarch64Encoder::IndexOfHandleSurrogateCase(Reg str, Reg character, Reg idx, Reg tmp, const bool COMPRESSION,
1088 const uint32_t DATA_OFFSET, const vixl::aarch64::Register &tmp1,
1089 const vixl::aarch64::Register &tmp2,
1090 const vixl::aarch64::Register &tmp3, vixl::aarch64::Label *label_found,
1091 vixl::aarch64::Label *label_not_found)
1092 {
1093 // local constants
1094 constexpr uint32_t MIN_HIGH_SURROGATE = 0xD800;
1095 constexpr uint32_t MIN_LOW_SURROGATE = 0xDC00;
1096 constexpr uint32_t SURROGATE_LOW_BITS = 10;
1097
1098 // vixl register aliases
1099 auto character_w = VixlReg(character).W();
1100 auto str_x = VixlReg(str).X();
1101 auto tmp0_x = VixlReg(tmp).X();
1102
1103 // more vixl aliases
1104 auto lsl = vixl::aarch64::Shift::LSL;
1105 auto lsr = vixl::aarch64::Shift::LSR;
1106
1107 // local labels
1108 auto label_sur_loop = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1109 auto label_match = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1110
1111 bool idx_is_zero = (idx.GetId() == GetRegfile()->GetZeroReg().GetId());
1112
1113 if (COMPRESSION) {
1114 GetMasm()->And(tmp1.W(), tmp1.W(), CLEAR_BIT_MASK); // clear lowest bit to get string length in bytes
1115 } else {
1116 GetMasm()->Lsl(tmp1.W(), tmp1.W(), 1); // string length in bytes for non-compressed case
1117 }
1118 GetMasm()->Add(tmp0_x, tmp0_x, tmp1.X()); // calculate address of first byte after string
1119 GetMasm()->Sub(tmp0_x, tmp0_x, HALF_SIZE / BITS_PER_BYTE); // don't scan last UTF-16 entry
1120 // amount of bytes to scan in worst case
1121 if (idx_is_zero) {
1122 GetMasm()->Neg(tmp2.X(), tmp1.X());
1123 } else {
1124 GetMasm()->Sub(tmp2.X(), tmp1.X(), vixl::aarch64::Operand(tmp2.X(), lsl, UTF16_IDX2OFFSET_SHIFT));
1125 GetMasm()->Neg(tmp2.X(), tmp2.X());
1126 }
1127 GetMasm()->Add(tmp2.X(), tmp2.X(), HALF_SIZE / BITS_PER_BYTE);
1128 GetMasm()->Cbz(tmp2.X(), label_not_found);
1129 GetMasm()->Sub(tmp1.W(), character_w, MIN_SUPPLEMENTARY_CODE_POINT); // shifted immediate version
1130 GetMasm()->Mov(tmp3.W(), MIN_HIGH_SURROGATE);
1131 GetMasm()->Add(tmp1.W(), tmp3.W(), vixl::aarch64::Operand(tmp1.W(), lsr, SURROGATE_LOW_BITS)); // high surrogate
1132 // low surrogate calculation below
1133 GetMasm()->Movk(tmp1.X(), MIN_LOW_SURROGATE, HALF_SIZE);
1134 // copy lowest 10 bits into (low surrogate)'s lowest 10 bits
1135 GetMasm()->Bfm(tmp1.W(), character_w, HALF_SIZE, SURROGATE_LOW_BITS - 1);
1136 GetMasm()->Bind(label_sur_loop);
1137 GetMasm()->Ldr(tmp3.W(), vixl::aarch64::MemOperand(tmp0_x, tmp2.X()));
1138 GetMasm()->Cmp(tmp3.W(), tmp1.W());
1139 GetMasm()->B(label_match, vixl::aarch64::Condition::eq);
1140 GetMasm()->Adds(tmp2.X(), tmp2.X(), HALF_SIZE / BITS_PER_BYTE);
1141 GetMasm()->Cbnz(tmp2.X(), label_sur_loop);
1142 GetMasm()->B(label_not_found);
1143 GetMasm()->Bind(label_match);
1144 // string length in bytes is: tmp - str - DATA_OFFSET
1145 GetMasm()->Add(tmp2.X(), tmp2.X(), tmp0_x);
1146 GetMasm()->Sub(tmp2.X(), tmp2.X(), str_x);
1147 GetMasm()->Sub(tmp2.X(), tmp2.X(), DATA_OFFSET);
1148 GetMasm()->Lsr(tmp2.X(), tmp2.X(), UTF16_IDX2OFFSET_SHIFT);
1149 GetMasm()->B(label_found);
1150 }
1151
EncodeStringIndexOfAfter(Reg dst,Reg str,Reg character,Reg idx,Reg tmp,bool COMPRESSION,uint32_t LENGTH_OFFSET,uint32_t DATA_OFFSET,int32_t CHAR_CONST_VALUE)1152 void Aarch64Encoder::EncodeStringIndexOfAfter(Reg dst, Reg str, Reg character, Reg idx, Reg tmp, bool COMPRESSION,
1153 uint32_t LENGTH_OFFSET, uint32_t DATA_OFFSET, int32_t CHAR_CONST_VALUE)
1154 {
1155 // NullCheck must check str register before StringIndexOfAfter.
1156 // If str is zero register, execution mustn't go to this instruction.
1157 auto zero_reg_id = GetRegfile()->GetZeroReg().GetId();
1158 if (str.GetId() == zero_reg_id) {
1159 EncodeAbort();
1160 return;
1161 }
1162
1163 auto zero = VixlReg(GetRegfile()->GetZeroReg(), DOUBLE_WORD_SIZE);
1164 ScopedTmpRegU64 tmp1_scoped(this);
1165 ScopedTmpRegU64 tmp2_scoped(this);
1166 ScopedTmpRegU64 tmp3_scoped(this);
1167 auto tmp1 = VixlReg(tmp1_scoped.GetReg());
1168 auto tmp2 = VixlReg(tmp2_scoped.GetReg());
1169 auto tmp3 = VixlReg(tmp3_scoped.GetReg());
1170
1171 // vixl register aliases
1172 bool idx_is_zero = (idx.GetId() == zero_reg_id);
1173
1174 /* Pseudo code:
1175 if (idx < 0) idx = 0;
1176
1177 if (idx >= length) {
1178 return -1;
1179 }
1180
1181 if (!<character_is_utf16_surrogate_pair>) { // main case
1182 if (<string_is_utf16>) {
1183 <search char in utf-16 string>; // IndexOfHandleUtf16NormalCase
1184 } else { // 8-bit string case
1185 if (<character_is_utf16>) {
1186 return -1;
1187 }
1188 <search 8-bit char in 8-bit string>; // IndexOfHandleLatin1
1189 }
1190 } else { // surrogate pair case
1191 if (!<string_is_utf16>) {
1192 return -1;
1193 }
1194 <per-character surrogate pair search>; // IndexOfHandleSurrogateCase
1195 }
1196 */
1197
1198 if (!idx_is_zero) {
1199 auto idx_w = VixlReg(idx).W();
1200 GetMasm()->Cmp(idx_w, zero.W());
1201 GetMasm()->Csel(tmp2.W(), idx_w, zero.W(), vixl::aarch64::Condition::gt); // max(idx, 0)
1202 }
1203
1204 GetMasm()->Ldr(tmp1.W(), vixl::aarch64::MemOperand(VixlReg(str).X(),
1205 LENGTH_OFFSET)); // string length with potential compression bit
1206 GetMasm()->Cmp(idx_is_zero ? zero.W() : tmp2.W(),
1207 COMPRESSION ? vixl::aarch64::Operand(tmp1.W(), vixl::aarch64::Shift::LSR, 1) : tmp1.W());
1208
1209 auto label_not_found = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1210 GetMasm()->B(label_not_found, vixl::aarch64::Condition::ge);
1211
1212 // check if character is larger than upper bound of UTF-16
1213 GetMasm()->Mov(tmp3.X(), MAX_SUPPLEMENTARY_CODE_POINT);
1214 GetMasm()->Cmp(VixlReg(character).X(), tmp3);
1215 GetMasm()->B(label_not_found, vixl::aarch64::Condition::gt);
1216
1217 // memo: compression: 0 = compressed(i.e. 8 bits), 1 = uncompressed(i.e. utf16)
1218 EncodeStringIndexOfAfterMainCase(dst, str, character, idx, tmp, tmp1, tmp2, tmp3, COMPRESSION, DATA_OFFSET,
1219 CHAR_CONST_VALUE, label_not_found);
1220
1221 // local constants
1222 constexpr int32_t RESULT_NOT_FOUND = -1;
1223 auto label_done = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1224
1225 GetMasm()->B(label_done);
1226 GetMasm()->Bind(label_not_found);
1227 GetMasm()->Mov(VixlReg(dst).W(), RESULT_NOT_FOUND);
1228 GetMasm()->Bind(label_done);
1229 }
1230
HandleChar(int32_t ch,const vixl::aarch64::Register & tmp,vixl::aarch64::Label * label_not_found,vixl::aarch64::Label * label_uncompressed_string)1231 void Aarch64Encoder::HandleChar(int32_t ch, const vixl::aarch64::Register &tmp, vixl::aarch64::Label *label_not_found,
1232 vixl::aarch64::Label *label_uncompressed_string)
1233 {
1234 if (ch > MAX_8BIT_CHAR) {
1235 GetMasm()->Tbz(tmp.W(), 0,
1236 label_not_found); // no need to search 16-bit character in compressed string
1237 } else {
1238 GetMasm()->Tbnz(tmp.W(), 0,
1239 label_uncompressed_string); // go to utf16 case if string is uncompressed
1240 }
1241 }
1242
EncodeStringIndexOfAfterMainCase(Reg dst,Reg str,Reg character,Reg idx,Reg tmp,const vixl::aarch64::Register & tmp1,const vixl::aarch64::Register & tmp2,const vixl::aarch64::Register & tmp3,const bool COMPRESSION,const uint32_t DATA_OFFSET,const int32_t CHAR_CONST_VALUE,vixl::aarch64::Label * label_not_found)1243 void Aarch64Encoder::EncodeStringIndexOfAfterMainCase(Reg dst, Reg str, Reg character, Reg idx, Reg tmp,
1244 const vixl::aarch64::Register &tmp1,
1245 const vixl::aarch64::Register &tmp2,
1246 const vixl::aarch64::Register &tmp3, const bool COMPRESSION,
1247 const uint32_t DATA_OFFSET, const int32_t CHAR_CONST_VALUE,
1248 vixl::aarch64::Label *label_not_found)
1249 {
1250 constexpr int32_t CHAR_CONST_UNKNOWN = -1;
1251 auto label_surrogate = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1252 auto label_found = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1253 auto label_uncompressed_string = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1254
1255 GetMasm()->Add(VixlReg(tmp).X(), VixlReg(str).X(), DATA_OFFSET);
1256
1257 if (CHAR_CONST_VALUE < MIN_SUPPLEMENTARY_CODE_POINT) {
1258 // case of non-surrogate constant char or non-constant char
1259 if (CHAR_CONST_VALUE == CHAR_CONST_UNKNOWN) { // run time check for surrogate pair
1260 GetMasm()->Cmp(VixlReg(character).W(),
1261 MIN_SUPPLEMENTARY_CODE_POINT); // shifted immediate form of Cmp (i.e. 0x10 << 12)
1262 GetMasm()->B(label_surrogate, vixl::aarch64::Condition::ge);
1263 }
1264 if (COMPRESSION) {
1265 if (CHAR_CONST_VALUE != CHAR_CONST_UNKNOWN) {
1266 HandleChar(CHAR_CONST_VALUE, tmp1, label_not_found, label_uncompressed_string);
1267 } else {
1268 GetMasm()->Tbnz(tmp1.W(), 0, label_uncompressed_string);
1269 GetMasm()->Cmp(VixlReg(character).W(), MAX_8BIT_CHAR);
1270 GetMasm()->B(label_not_found,
1271 vixl::aarch64::Condition::gt); // do no search 16-bit char in compressed string
1272 }
1273 if (CHAR_CONST_VALUE <= MAX_8BIT_CHAR) { // i.e. character is 8-bit constant or unknown
1274 IndexOfHandleLatin1Case(str, character, idx, tmp, COMPRESSION, DATA_OFFSET, tmp1, tmp2, tmp3,
1275 label_found, label_not_found);
1276 }
1277 GetMasm()->Bind(label_uncompressed_string);
1278 }
1279 IndexOfHandleUtf16NormalCase(str, character, idx, tmp, COMPRESSION, DATA_OFFSET, tmp1, tmp2, tmp3, label_found,
1280 label_not_found);
1281 }
1282
1283 if (CHAR_CONST_VALUE >= MIN_SUPPLEMENTARY_CODE_POINT || CHAR_CONST_VALUE == CHAR_CONST_UNKNOWN) {
1284 GetMasm()->Bind(label_surrogate);
1285 if (COMPRESSION) {
1286 GetMasm()->Tbz(tmp1.W(), 0, label_not_found); // no need to search 16-bit character in compressed string
1287 }
1288 IndexOfHandleSurrogateCase(str, character, idx, tmp, COMPRESSION, DATA_OFFSET, tmp1, tmp2, tmp3, label_found,
1289 label_not_found);
1290 }
1291 // various exit handling below
1292 GetMasm()->Bind(label_found);
1293 GetMasm()->Mov(VixlReg(dst).W(), tmp2.W());
1294 }
1295
1296 /* return the power of 2 for the size of the type */
EncodeGetTypeSize(Reg size,Reg type)1297 void Aarch64Encoder::EncodeGetTypeSize(Reg size, Reg type)
1298 {
1299 auto sreg = VixlReg(type);
1300 auto dreg = VixlReg(size);
1301 constexpr uint8_t I16 = 0x5;
1302 constexpr uint8_t I32 = 0x7;
1303 constexpr uint8_t F64 = 0xa;
1304 constexpr uint8_t REF = 0xd;
1305 constexpr uint8_t SMALLREF = panda::OBJECT_POINTER_SIZE < sizeof(uint64_t) ? 1 : 0;
1306 auto end = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1307
1308 GetMasm()->Mov(dreg, VixlImm(0));
1309 GetMasm()->Cmp(sreg, VixlImm(I16));
1310 GetMasm()->Cinc(dreg, dreg, vixl::aarch64::Condition::ge);
1311 GetMasm()->Cmp(sreg, VixlImm(I32));
1312 GetMasm()->Cinc(dreg, dreg, vixl::aarch64::Condition::ge);
1313 GetMasm()->Cmp(sreg, VixlImm(F64));
1314 GetMasm()->Cinc(dreg, dreg, vixl::aarch64::Condition::ge);
1315 GetMasm()->Cmp(sreg, VixlImm(REF));
1316 GetMasm()->B(end, vixl::aarch64::Condition::ne);
1317 GetMasm()->Sub(dreg, dreg, VixlImm(SMALLREF));
1318 GetMasm()->Bind(end);
1319 }
1320
EncodeReverseBits(Reg dst,Reg src)1321 void Aarch64Encoder::EncodeReverseBits(Reg dst, Reg src)
1322 {
1323 auto rzero = GetRegfile()->GetZeroReg();
1324 if (rzero.GetId() == src.GetId()) {
1325 EncodeMov(dst, Imm(0));
1326 return;
1327 }
1328 ASSERT(src.GetSize() == WORD_SIZE || src.GetSize() == DOUBLE_WORD_SIZE);
1329 ASSERT(src.GetSize() == dst.GetSize());
1330
1331 GetMasm()->Rbit(VixlReg(dst), VixlReg(src));
1332 }
1333
EncodeCompressedStringCharAt(Reg dst,Reg str,Reg idx,Reg length,Reg tmp,size_t data_offset,uint32_t shift)1334 void Aarch64Encoder::EncodeCompressedStringCharAt(Reg dst, Reg str, Reg idx, Reg length, Reg tmp, size_t data_offset,
1335 uint32_t shift)
1336 {
1337 ASSERT(dst.GetSize() == HALF_SIZE);
1338
1339 auto label_not_compressed = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1340 auto label_char_loaded = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1341 auto vixl_tmp = VixlReg(tmp, DOUBLE_WORD_SIZE);
1342 auto vixl_dst = VixlReg(dst);
1343
1344 GetMasm()->Tbnz(VixlReg(length), 0, label_not_compressed);
1345 EncodeAdd(tmp, str, idx);
1346 GetMasm()->ldrb(vixl_dst, MemOperand(vixl_tmp, data_offset));
1347 GetMasm()->B(label_char_loaded);
1348 GetMasm()->Bind(label_not_compressed);
1349 EncodeAdd(tmp, str, Shift(idx, shift));
1350 GetMasm()->ldrh(vixl_dst, MemOperand(vixl_tmp, data_offset));
1351 GetMasm()->Bind(label_char_loaded);
1352 }
1353
EncodeCompressedStringCharAtI(Reg dst,Reg str,Reg length,size_t data_offset,uint32_t index,uint32_t shift)1354 void Aarch64Encoder::EncodeCompressedStringCharAtI(Reg dst, Reg str, Reg length, size_t data_offset, uint32_t index,
1355 uint32_t shift)
1356 {
1357 ASSERT(dst.GetSize() == HALF_SIZE);
1358
1359 auto label_not_compressed = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1360 auto label_char_loaded = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
1361 auto vixl_str = VixlReg(str);
1362 auto vixl_dst = VixlReg(dst);
1363
1364 auto rzero = GetRegfile()->GetZeroReg().GetId();
1365 if (str.GetId() == rzero) {
1366 return;
1367 }
1368 GetMasm()->Tbnz(VixlReg(length), 0, label_not_compressed);
1369 GetMasm()->Ldrb(vixl_dst, MemOperand(vixl_str, data_offset + index));
1370 GetMasm()->B(label_char_loaded);
1371 GetMasm()->Bind(label_not_compressed);
1372 GetMasm()->Ldrh(vixl_dst, MemOperand(vixl_str, data_offset + (index << shift)));
1373 GetMasm()->Bind(label_char_loaded);
1374 }
1375
1376 /* Unsafe builtins implementation */
EncodeCompareAndSwap(Reg dst,Reg obj,Reg offset,Reg val,Reg newval)1377 void Aarch64Encoder::EncodeCompareAndSwap(Reg dst, Reg obj, Reg offset, Reg val, Reg newval)
1378 {
1379 /* Modeled according to the following logic:
1380 .L2:
1381 ldaxr cur, [addr]
1382 cmp cur, old
1383 bne .L3
1384 stlxr res, new, [addr]
1385 cbnz res, .L2
1386 .L3:
1387 cset w0, eq
1388 */
1389 ScopedTmpReg addr(this, true); /* LR is used */
1390 ScopedTmpReg cur(this, val.GetType());
1391 ScopedTmpReg res(this, val.GetType());
1392 auto loop = CreateLabel();
1393 auto exit = CreateLabel();
1394
1395 /* ldaxr wants [reg]-form of memref (no offset or disp) */
1396 EncodeAdd(addr, obj, offset);
1397
1398 BindLabel(loop);
1399 EncodeLdrExclusive(cur, addr, true);
1400 EncodeJump(exit, cur, val, Condition::NE);
1401 EncodeStrExclusive(res, newval, addr, true);
1402 EncodeJump(loop, res, Imm(0), Condition::NE);
1403 BindLabel(exit);
1404
1405 GetMasm()->Cset(VixlReg(dst), vixl::aarch64::Condition::eq);
1406 }
1407
EncodeUnsafeGetAndSet(Reg dst,Reg obj,Reg offset,Reg val)1408 void Aarch64Encoder::EncodeUnsafeGetAndSet(Reg dst, Reg obj, Reg offset, Reg val)
1409 {
1410 auto cur = ScopedTmpReg(this, val.GetType());
1411 auto last = ScopedTmpReg(this, val.GetType());
1412 auto addr = ScopedTmpReg(this, true); /* LR is used */
1413 auto mem = MemRef(addr);
1414 auto restart = CreateLabel();
1415 auto retry_ldaxr = CreateLabel();
1416
1417 /* ldaxr wants [reg]-form of memref (no offset or disp) */
1418 EncodeAdd(addr, obj, offset);
1419
1420 /* Since GetAndSet is defined as a non-faulting operation we
1421 * have to cover two possible faulty cases:
1422 * 1. stlxr failed, we have to retry ldxar
1423 * 2. the value we got via ldxar was not the value we initially
1424 * loaded, we have to start from the very beginning */
1425 BindLabel(restart);
1426 EncodeLdrAcquire(last, false, mem);
1427
1428 BindLabel(retry_ldaxr);
1429 EncodeLdrExclusive(cur, addr, true);
1430 EncodeJump(restart, cur, last, Condition::NE);
1431 EncodeStrExclusive(dst, val, addr, true);
1432 EncodeJump(retry_ldaxr, dst, Imm(0), Condition::NE);
1433
1434 EncodeMov(dst, cur);
1435 }
1436
EncodeUnsafeGetAndAdd(Reg dst,Reg obj,Reg offset,Reg val,Reg tmp)1437 void Aarch64Encoder::EncodeUnsafeGetAndAdd(Reg dst, Reg obj, Reg offset, Reg val, Reg tmp)
1438 {
1439 ScopedTmpReg cur(this, val.GetType());
1440 ScopedTmpReg last(this, val.GetType());
1441 auto newval = Reg(tmp.GetId(), val.GetType());
1442
1443 auto restart = CreateLabel();
1444 auto retry_ldaxr = CreateLabel();
1445
1446 /* addr_reg aliases obj, obj reg will be restored bedore exit */
1447 auto addr = Reg(obj.GetId(), INT64_TYPE);
1448
1449 /* ldaxr wants [reg]-form of memref (no offset or disp) */
1450 auto mem = MemRef(addr);
1451 EncodeAdd(addr, obj, offset);
1452
1453 /* Since GetAndAdd is defined as a non-faulting operation we
1454 * have to cover two possible faulty cases:
1455 * 1. stlxr failed, we have to retry ldxar
1456 * 2. the value we got via ldxar was not the value we initially
1457 * loaded, we have to start from the very beginning */
1458 BindLabel(restart);
1459 EncodeLdrAcquire(last, false, mem);
1460 EncodeAdd(newval, last, val);
1461
1462 BindLabel(retry_ldaxr);
1463 EncodeLdrExclusive(cur, addr, true);
1464 EncodeJump(restart, cur, last, Condition::NE);
1465 EncodeStrExclusive(dst, newval, addr, true);
1466 EncodeJump(retry_ldaxr, dst, Imm(0), Condition::NE);
1467
1468 EncodeSub(obj, addr, offset); /* restore the original value */
1469 EncodeMov(dst, cur);
1470 }
1471
EncodeMemoryBarrier(MemoryOrder::Order order)1472 void Aarch64Encoder::EncodeMemoryBarrier(MemoryOrder::Order order)
1473 {
1474 switch (order) {
1475 case MemoryOrder::Acquire: {
1476 GetMasm()->Dmb(vixl::aarch64::InnerShareable, vixl::aarch64::BarrierReads);
1477 break;
1478 }
1479 case MemoryOrder::Release: {
1480 GetMasm()->Dmb(vixl::aarch64::InnerShareable, vixl::aarch64::BarrierWrites);
1481 break;
1482 }
1483 case MemoryOrder::Full: {
1484 GetMasm()->Dmb(vixl::aarch64::InnerShareable, vixl::aarch64::BarrierAll);
1485 break;
1486 }
1487 default:
1488 break;
1489 }
1490 }
1491
EncodeNot(Reg dst,Reg src)1492 void Aarch64Encoder::EncodeNot(Reg dst, Reg src)
1493 {
1494 GetMasm()->Mvn(VixlReg(dst), VixlReg(src));
1495 }
1496
EncodeCastFloat(Reg dst,bool dst_signed,Reg src,bool src_signed)1497 void Aarch64Encoder::EncodeCastFloat(Reg dst, bool dst_signed, Reg src, bool src_signed)
1498 {
1499 // We DON'T support casts from float32/64 to int8/16 and bool, because this caste is not declared anywhere
1500 // in other languages and architecture, we do not know what the behavior should be.
1501 // But there is one implementation in other function: "EncodeCastFloatWithSmallDst". Call it in the "EncodeCast"
1502 // function instead of "EncodeCastFloat". It works as follows: cast from float32/64 to int32, moving sign bit from
1503 // int32 to dst type, then extend number from dst type to int32 (a necessary condition for an isa). All work in dst
1504 // register.
1505 ASSERT(dst.GetSize() >= WORD_SIZE);
1506
1507 if (src.IsFloat() && dst.IsScalar()) {
1508 if (dst_signed) {
1509 if (!IsJsNumberCast()) {
1510 GetMasm()->Fcvtzs(VixlReg(dst), VixlVReg(src));
1511 } else {
1512 CHECK_EQ(src.GetSize(), BITS_PER_UINT64);
1513 vixl::CPUFeaturesScope scope(GetMasm(), vixl::CPUFeatures::kFP, vixl::CPUFeatures::kJSCVT);
1514 GetMasm()->Fjcvtzs(VixlReg(dst.As(INT32_TYPE)), VixlVReg(src));
1515 }
1516 return;
1517 }
1518 GetMasm()->Fcvtzu(VixlReg(dst), VixlVReg(src));
1519 return;
1520 }
1521 if (src.IsScalar() && dst.IsFloat()) {
1522 if (src_signed) {
1523 GetMasm()->Scvtf(VixlVReg(dst), VixlReg(src));
1524 } else {
1525 GetMasm()->Ucvtf(VixlVReg(dst), VixlReg(src));
1526 }
1527 return;
1528 }
1529 if (src.IsFloat() && dst.IsFloat()) {
1530 if (src.GetSize() != dst.GetSize()) {
1531 GetMasm()->Fcvt(VixlVReg(dst), VixlVReg(src));
1532 return;
1533 }
1534 GetMasm()->Fmov(VixlVReg(dst), VixlVReg(src));
1535 return;
1536 }
1537 UNREACHABLE();
1538 }
1539
EncodeCastFloatWithSmallDst(Reg dst,bool dst_signed,Reg src,bool src_signed)1540 void Aarch64Encoder::EncodeCastFloatWithSmallDst(Reg dst, bool dst_signed, Reg src, bool src_signed)
1541 {
1542 // Dst bool type don't supported!
1543
1544 if (src.IsFloat() && dst.IsScalar()) {
1545 if (dst_signed) {
1546 GetMasm()->Fcvtzs(VixlReg(dst), VixlVReg(src));
1547 if (dst.GetSize() < WORD_SIZE) {
1548 constexpr uint32_t TEST_BIT = (1U << (static_cast<uint32_t>(WORD_SIZE) - 1));
1549 ScopedTmpReg tmp_reg1(this, dst.GetType());
1550 auto tmp1 = VixlReg(tmp_reg1);
1551 ScopedTmpReg tmp_reg2(this, dst.GetType());
1552 auto tmp2 = VixlReg(tmp_reg2);
1553
1554 // NOLINTNEXTLINE(hicpp-signed-bitwise)
1555 int32_t set_bit = (dst.GetSize() == BYTE_SIZE) ? (1UL << (BYTE_SIZE - 1)) : (1UL << (HALF_SIZE - 1));
1556 int32_t rem_bit = set_bit - 1;
1557 GetMasm()->Ands(tmp1, VixlReg(dst), TEST_BIT);
1558
1559 GetMasm()->Orr(tmp1, VixlReg(dst), set_bit);
1560 GetMasm()->And(tmp2, VixlReg(dst), rem_bit);
1561 // Select result - if zero set - tmp2, else tmp1
1562 GetMasm()->Csel(VixlReg(dst), tmp2, tmp1, vixl::aarch64::eq);
1563 EncodeCastScalar(Reg(dst.GetId(), INT32_TYPE), dst_signed, dst, dst_signed);
1564 }
1565 return;
1566 }
1567 GetMasm()->Fcvtzu(VixlReg(dst), VixlVReg(src));
1568 if (dst.GetSize() < WORD_SIZE) {
1569 EncodeCastScalar(Reg(dst.GetId(), INT32_TYPE), dst_signed, dst, dst_signed);
1570 }
1571 return;
1572 }
1573 if (src.IsScalar() && dst.IsFloat()) {
1574 if (src_signed) {
1575 GetMasm()->Scvtf(VixlVReg(dst), VixlReg(src));
1576 } else {
1577 GetMasm()->Ucvtf(VixlVReg(dst), VixlReg(src));
1578 }
1579 return;
1580 }
1581 if (src.IsFloat() && dst.IsFloat()) {
1582 if (src.GetSize() != dst.GetSize()) {
1583 GetMasm()->Fcvt(VixlVReg(dst), VixlVReg(src));
1584 return;
1585 }
1586 GetMasm()->Fmov(VixlVReg(dst), VixlVReg(src));
1587 return;
1588 }
1589 UNREACHABLE();
1590 }
1591
EncodeCastSigned(Reg dst,Reg src)1592 void Aarch64Encoder::EncodeCastSigned(Reg dst, Reg src)
1593 {
1594 size_t src_size = src.GetSize();
1595 size_t dst_size = dst.GetSize();
1596 auto src_r = Reg(src.GetId(), dst.GetType());
1597 // Else signed extend
1598 if (src_size > dst_size) {
1599 src_size = dst_size;
1600 }
1601 switch (src_size) {
1602 case BYTE_SIZE:
1603 GetMasm()->Sxtb(VixlReg(dst), VixlReg(src_r));
1604 break;
1605 case HALF_SIZE:
1606 GetMasm()->Sxth(VixlReg(dst), VixlReg(src_r));
1607 break;
1608 case WORD_SIZE:
1609 GetMasm()->Sxtw(VixlReg(dst), VixlReg(src_r));
1610 break;
1611 case DOUBLE_WORD_SIZE:
1612 GetMasm()->Mov(VixlReg(dst), VixlReg(src_r));
1613 break;
1614 default:
1615 SetFalseResult();
1616 break;
1617 }
1618 }
1619
EncodeCastUnsigned(Reg dst,Reg src)1620 void Aarch64Encoder::EncodeCastUnsigned(Reg dst, Reg src)
1621 {
1622 size_t src_size = src.GetSize();
1623 size_t dst_size = dst.GetSize();
1624 auto src_r = Reg(src.GetId(), dst.GetType());
1625 if (src_size > dst_size && dst_size < WORD_SIZE) {
1626 // We need to cut the number, if it is less, than 32-bit. It is by ISA agreement.
1627 int64_t cut_value = (1ULL << dst_size) - 1;
1628 GetMasm()->And(VixlReg(dst), VixlReg(src), VixlImm(cut_value));
1629 return;
1630 }
1631 // Else unsigned extend
1632 switch (src_size) {
1633 case BYTE_SIZE:
1634 GetMasm()->Uxtb(VixlReg(dst), VixlReg(src_r));
1635 return;
1636 case HALF_SIZE:
1637 GetMasm()->Uxth(VixlReg(dst), VixlReg(src_r));
1638 return;
1639 case WORD_SIZE:
1640 GetMasm()->Uxtw(VixlReg(dst), VixlReg(src_r));
1641 return;
1642 case DOUBLE_WORD_SIZE:
1643 GetMasm()->Mov(VixlReg(dst), VixlReg(src_r));
1644 return;
1645 default:
1646 SetFalseResult();
1647 return;
1648 }
1649 }
1650
EncodeCastScalar(Reg dst,bool dst_signed,Reg src,bool src_signed)1651 void Aarch64Encoder::EncodeCastScalar(Reg dst, bool dst_signed, Reg src, bool src_signed)
1652 {
1653 size_t src_size = src.GetSize();
1654 size_t dst_size = dst.GetSize();
1655 // In our ISA minimal type is 32-bit, so type less then 32-bit
1656 // we should extend to 32-bit. So we can have 2 cast
1657 // (For examble, i8->u16 will work as i8->u16 and u16->u32)
1658 if (dst_size < WORD_SIZE) {
1659 if (src_size > dst_size) {
1660 if (dst_signed) {
1661 EncodeCastSigned(dst, src);
1662 } else {
1663 EncodeCastUnsigned(dst, src);
1664 }
1665 return;
1666 }
1667 if (src_size == dst_size) {
1668 GetMasm()->Mov(VixlReg(dst), VixlReg(src));
1669 if (!(src_signed || dst_signed) || (src_signed && dst_signed)) {
1670 return;
1671 }
1672 if (dst_signed) {
1673 EncodeCastSigned(Reg(dst.GetId(), INT32_TYPE), dst);
1674 } else {
1675 EncodeCastUnsigned(Reg(dst.GetId(), INT32_TYPE), dst);
1676 }
1677 return;
1678 }
1679 if (src_signed) {
1680 EncodeCastSigned(dst, src);
1681 if (!dst_signed) {
1682 EncodeCastUnsigned(Reg(dst.GetId(), INT32_TYPE), dst);
1683 }
1684 } else {
1685 EncodeCastUnsigned(dst, src);
1686 if (dst_signed) {
1687 EncodeCastSigned(Reg(dst.GetId(), INT32_TYPE), dst);
1688 }
1689 }
1690 } else {
1691 if (src_size == dst_size) {
1692 GetMasm()->Mov(VixlReg(dst), VixlReg(src));
1693 return;
1694 }
1695 if (src_signed) {
1696 EncodeCastSigned(dst, src);
1697 } else {
1698 EncodeCastUnsigned(dst, src);
1699 }
1700 }
1701 }
1702
EncodeCast(Reg dst,bool dst_signed,Reg src,bool src_signed)1703 void Aarch64Encoder::EncodeCast(Reg dst, bool dst_signed, Reg src, bool src_signed)
1704 {
1705 if (src.IsFloat() || dst.IsFloat()) {
1706 EncodeCastFloat(dst, dst_signed, src, src_signed);
1707 return;
1708 }
1709
1710 ASSERT(src.IsScalar() && dst.IsScalar());
1711 auto rzero = GetRegfile()->GetZeroReg().GetId();
1712 if (src.GetId() == rzero) {
1713 ASSERT(dst.GetId() != rzero);
1714 EncodeMov(dst, Imm(0));
1715 return;
1716 }
1717 // Scalar part
1718 EncodeCastScalar(dst, dst_signed, src, src_signed);
1719 }
1720
EncodeCastToBool(Reg dst,Reg src)1721 void Aarch64Encoder::EncodeCastToBool(Reg dst, Reg src)
1722 {
1723 // In ISA says that we only support casts:
1724 // i32tou1, i64tou1, u32tou1, u64tou1
1725 ASSERT(src.IsScalar());
1726 ASSERT(dst.IsScalar());
1727
1728 GetMasm()->Cmp(VixlReg(src), VixlImm(0));
1729 // In our ISA minimal type is 32-bit, so bool in 32bit
1730 GetMasm()->Cset(VixlReg(Reg(dst.GetId(), INT32_TYPE)), vixl::aarch64::Condition::ne);
1731 }
1732
EncodeAdd(Reg dst,Reg src0,Shift src1)1733 void Aarch64Encoder::EncodeAdd(Reg dst, Reg src0, Shift src1)
1734 {
1735 if (dst.IsFloat()) {
1736 UNREACHABLE();
1737 }
1738 ASSERT(src0.GetSize() <= dst.GetSize());
1739 if (src0.GetSize() < dst.GetSize()) {
1740 auto src0_reg = Reg(src0.GetId(), dst.GetType());
1741 auto src1_reg = Reg(src1.GetBase().GetId(), dst.GetType());
1742 GetMasm()->Add(VixlReg(dst), VixlReg(src0_reg), VixlShift(Shift(src1_reg, src1.GetType(), src1.GetScale())));
1743 return;
1744 }
1745 GetMasm()->Add(VixlReg(dst), VixlReg(src0), VixlShift(src1));
1746 }
1747
EncodeAdd(Reg dst,Reg src0,Reg src1)1748 void Aarch64Encoder::EncodeAdd(Reg dst, Reg src0, Reg src1)
1749 {
1750 if (dst.IsFloat()) {
1751 GetMasm()->Fadd(VixlVReg(dst), VixlVReg(src0), VixlVReg(src1));
1752 return;
1753 }
1754
1755 /* if any of the operands has 64-bits size,
1756 * forcibly do the 64-bits wide operation */
1757 if ((src0.GetSize() | src1.GetSize() | dst.GetSize()) >= DOUBLE_WORD_SIZE) {
1758 GetMasm()->Add(VixlReg(dst).X(), VixlReg(src0).X(), VixlReg(src1).X());
1759 } else {
1760 /* Otherwise do 32-bits operation as any lesser
1761 * sizes have to be upcasted to 32-bits anyway */
1762 GetMasm()->Add(VixlReg(dst).W(), VixlReg(src0).W(), VixlReg(src1).W());
1763 }
1764 }
1765
EncodeSub(Reg dst,Reg src0,Shift src1)1766 void Aarch64Encoder::EncodeSub(Reg dst, Reg src0, Shift src1)
1767 {
1768 ASSERT(dst.IsScalar());
1769 GetMasm()->Sub(VixlReg(dst), VixlReg(src0), VixlShift(src1));
1770 }
1771
EncodeSub(Reg dst,Reg src0,Reg src1)1772 void Aarch64Encoder::EncodeSub(Reg dst, Reg src0, Reg src1)
1773 {
1774 if (dst.IsFloat()) {
1775 GetMasm()->Fsub(VixlVReg(dst), VixlVReg(src0), VixlVReg(src1));
1776 return;
1777 }
1778
1779 /* if any of the operands has 64-bits size,
1780 * forcibly do the 64-bits wide operation */
1781 if ((src0.GetSize() | src1.GetSize() | dst.GetSize()) >= DOUBLE_WORD_SIZE) {
1782 GetMasm()->Sub(VixlReg(dst).X(), VixlReg(src0).X(), VixlReg(src1).X());
1783 } else {
1784 /* Otherwise do 32-bits operation as any lesser
1785 * sizes have to be upcasted to 32-bits anyway */
1786 GetMasm()->Sub(VixlReg(dst).W(), VixlReg(src0).W(), VixlReg(src1).W());
1787 }
1788 }
1789
EncodeMul(Reg dst,Reg src0,Reg src1)1790 void Aarch64Encoder::EncodeMul(Reg dst, Reg src0, Reg src1)
1791 {
1792 if (dst.IsFloat()) {
1793 GetMasm()->Fmul(VixlVReg(dst), VixlVReg(src0), VixlVReg(src1));
1794 return;
1795 }
1796 auto rzero = GetRegfile()->GetZeroReg().GetId();
1797 if (src0.GetId() == rzero || src1.GetId() == rzero) {
1798 EncodeMov(dst, Imm(0));
1799 return;
1800 }
1801 GetMasm()->Mul(VixlReg(dst), VixlReg(src0), VixlReg(src1));
1802 }
1803
EncodeAddOverflow(compiler::LabelHolder::LabelId id,Reg dst,Reg src0,Reg src1,Condition cc)1804 void Aarch64Encoder::EncodeAddOverflow(compiler::LabelHolder::LabelId id, Reg dst, Reg src0, Reg src1, Condition cc)
1805 {
1806 ASSERT(!dst.IsFloat() && !src0.IsFloat() && !src1.IsFloat());
1807 ASSERT(cc == Condition::VS || cc == Condition::VC);
1808 if (dst.GetSize() == DOUBLE_WORD_SIZE) {
1809 GetMasm()->Adds(VixlReg(dst).X(), VixlReg(src0).X(), VixlReg(src1).X());
1810 } else {
1811 /* Otherwise do 32-bits operation as any lesser
1812 * sizes have to be upcasted to 32-bits anyway */
1813 GetMasm()->Adds(VixlReg(dst).W(), VixlReg(src0).W(), VixlReg(src1).W());
1814 }
1815 auto label = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(id);
1816 GetMasm()->B(label, Convert(cc));
1817 }
1818
EncodeSubOverflow(compiler::LabelHolder::LabelId id,Reg dst,Reg src0,Reg src1,Condition cc)1819 void Aarch64Encoder::EncodeSubOverflow(compiler::LabelHolder::LabelId id, Reg dst, Reg src0, Reg src1, Condition cc)
1820 {
1821 ASSERT(!dst.IsFloat() && !src0.IsFloat() && !src1.IsFloat());
1822 ASSERT(cc == Condition::VS || cc == Condition::VC);
1823 if (dst.GetSize() == DOUBLE_WORD_SIZE) {
1824 GetMasm()->Subs(VixlReg(dst).X(), VixlReg(src0).X(), VixlReg(src1).X());
1825 } else {
1826 /* Otherwise do 32-bits operation as any lesser
1827 * sizes have to be upcasted to 32-bits anyway */
1828 GetMasm()->Subs(VixlReg(dst).W(), VixlReg(src0).W(), VixlReg(src1).W());
1829 }
1830 auto label = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(id);
1831 GetMasm()->B(label, Convert(cc));
1832 }
1833
EncodeDiv(Reg dst,bool dst_signed,Reg src0,Reg src1)1834 void Aarch64Encoder::EncodeDiv(Reg dst, bool dst_signed, Reg src0, Reg src1)
1835 {
1836 if (dst.IsFloat()) {
1837 GetMasm()->Fdiv(VixlVReg(dst), VixlVReg(src0), VixlVReg(src1));
1838 return;
1839 }
1840
1841 auto rzero = GetRegfile()->GetZeroReg().GetId();
1842 if (src1.GetId() == rzero || src0.GetId() == rzero) {
1843 ScopedTmpReg tmp_reg(this, src1.GetType());
1844 EncodeMov(tmp_reg, Imm(0));
1845 // Denominator is zero-reg
1846 if (src1.GetId() == rzero) {
1847 // Encode Abort
1848 GetMasm()->Udiv(VixlReg(dst), VixlReg(tmp_reg), VixlReg(tmp_reg));
1849 return;
1850 }
1851
1852 // But src1 still may be zero
1853 if (src1.GetId() != src0.GetId()) {
1854 if (dst_signed) {
1855 GetMasm()->Sdiv(VixlReg(dst), VixlReg(tmp_reg), VixlReg(src1));
1856 } else {
1857 GetMasm()->Udiv(VixlReg(dst), VixlReg(tmp_reg), VixlReg(src1));
1858 }
1859 return;
1860 }
1861 UNREACHABLE();
1862 }
1863 if (dst_signed) {
1864 GetMasm()->Sdiv(VixlReg(dst), VixlReg(src0), VixlReg(src1));
1865 } else {
1866 GetMasm()->Udiv(VixlReg(dst), VixlReg(src0), VixlReg(src1));
1867 }
1868 }
1869
EncodeMod(Reg dst,bool dst_signed,Reg src0,Reg src1)1870 void Aarch64Encoder::EncodeMod(Reg dst, bool dst_signed, Reg src0, Reg src1)
1871 {
1872 if (dst.IsScalar()) {
1873 auto rzero = GetRegfile()->GetZeroReg().GetId();
1874 if (src1.GetId() == rzero || src0.GetId() == rzero) {
1875 ScopedTmpReg tmp_reg(this, src1.GetType());
1876 EncodeMov(tmp_reg, Imm(0));
1877 // Denominator is zero-reg
1878 if (src1.GetId() == rzero) {
1879 // Encode Abort
1880 GetMasm()->Udiv(VixlReg(dst), VixlReg(tmp_reg), VixlReg(tmp_reg));
1881 return;
1882 }
1883
1884 if (src1.GetId() == src0.GetId()) {
1885 SetFalseResult();
1886 return;
1887 }
1888 // But src1 still may be zero
1889 ScopedTmpRegU64 tmp_reg_ud(this);
1890 if (dst.GetSize() < DOUBLE_WORD_SIZE) {
1891 tmp_reg_ud.ChangeType(INT32_TYPE);
1892 }
1893 auto tmp = VixlReg(tmp_reg_ud);
1894 if (!dst_signed) {
1895 GetMasm()->Udiv(tmp, VixlReg(tmp_reg), VixlReg(src1));
1896 GetMasm()->Msub(VixlReg(dst), tmp, VixlReg(src1), VixlReg(tmp_reg));
1897 return;
1898 }
1899 GetMasm()->Sdiv(tmp, VixlReg(tmp_reg), VixlReg(src1));
1900 GetMasm()->Msub(VixlReg(dst), tmp, VixlReg(src1), VixlReg(tmp_reg));
1901 return;
1902 }
1903
1904 ScopedTmpRegU64 tmp_reg(this);
1905 if (dst.GetSize() < DOUBLE_WORD_SIZE) {
1906 tmp_reg.ChangeType(INT32_TYPE);
1907 }
1908 auto tmp = VixlReg(tmp_reg);
1909
1910 if (!dst_signed) {
1911 GetMasm()->Udiv(tmp, VixlReg(src0), VixlReg(src1));
1912 GetMasm()->Msub(VixlReg(dst), tmp, VixlReg(src1), VixlReg(src0));
1913 return;
1914 }
1915 GetMasm()->Sdiv(tmp, VixlReg(src0), VixlReg(src1));
1916 GetMasm()->Msub(VixlReg(dst), tmp, VixlReg(src1), VixlReg(src0));
1917 return;
1918 }
1919
1920 EncodeFMod(dst, src0, src1);
1921 }
1922
EncodeFMod(Reg dst,Reg src0,Reg src1)1923 void Aarch64Encoder::EncodeFMod(Reg dst, Reg src0, Reg src1)
1924 {
1925 ASSERT(dst.IsFloat());
1926
1927 if (dst.GetType() == FLOAT32_TYPE) {
1928 using fp = float (*)(float, float);
1929 MakeLibCall(dst, src0, src1, reinterpret_cast<void *>(static_cast<fp>(fmodf)));
1930 } else {
1931 using fp = double (*)(double, double);
1932 MakeLibCall(dst, src0, src1, reinterpret_cast<void *>(static_cast<fp>(fmod)));
1933 }
1934 }
1935
EncodeMin(Reg dst,bool dst_signed,Reg src0,Reg src1)1936 void Aarch64Encoder::EncodeMin(Reg dst, bool dst_signed, Reg src0, Reg src1)
1937 {
1938 if (dst.IsFloat()) {
1939 GetMasm()->Fmin(VixlVReg(dst), VixlVReg(src0), VixlVReg(src1));
1940 return;
1941 }
1942 if (dst_signed) {
1943 GetMasm()->Cmp(VixlReg(src0), VixlReg(src1));
1944 GetMasm()->Csel(VixlReg(dst), VixlReg(src0), VixlReg(src1), vixl::aarch64::Condition::lt);
1945 return;
1946 }
1947 GetMasm()->Cmp(VixlReg(src0), VixlReg(src1));
1948 GetMasm()->Csel(VixlReg(dst), VixlReg(src0), VixlReg(src1), vixl::aarch64::Condition::ls);
1949 }
1950
EncodeMax(Reg dst,bool dst_signed,Reg src0,Reg src1)1951 void Aarch64Encoder::EncodeMax(Reg dst, bool dst_signed, Reg src0, Reg src1)
1952 {
1953 if (dst.IsFloat()) {
1954 GetMasm()->Fmax(VixlVReg(dst), VixlVReg(src0), VixlVReg(src1));
1955 return;
1956 }
1957 if (dst_signed) {
1958 GetMasm()->Cmp(VixlReg(src0), VixlReg(src1));
1959 GetMasm()->Csel(VixlReg(dst), VixlReg(src0), VixlReg(src1), vixl::aarch64::Condition::gt);
1960 return;
1961 }
1962 GetMasm()->Cmp(VixlReg(src0), VixlReg(src1));
1963 GetMasm()->Csel(VixlReg(dst), VixlReg(src0), VixlReg(src1), vixl::aarch64::Condition::hi);
1964 }
1965
EncodeShl(Reg dst,Reg src0,Reg src1)1966 void Aarch64Encoder::EncodeShl(Reg dst, Reg src0, Reg src1)
1967 {
1968 auto rzero = GetRegfile()->GetZeroReg().GetId();
1969 ASSERT(dst.GetId() != rzero);
1970 if (src0.GetId() == rzero) {
1971 EncodeMov(dst, Imm(0));
1972 return;
1973 }
1974 if (src1.GetId() == rzero) {
1975 EncodeMov(dst, src0);
1976 }
1977 if (dst.GetSize() < WORD_SIZE) {
1978 GetMasm()->And(VixlReg(src1), VixlReg(src1), VixlImm(dst.GetSize() - 1));
1979 }
1980 GetMasm()->Lsl(VixlReg(dst), VixlReg(src0), VixlReg(src1));
1981 }
1982
EncodeShr(Reg dst,Reg src0,Reg src1)1983 void Aarch64Encoder::EncodeShr(Reg dst, Reg src0, Reg src1)
1984 {
1985 auto rzero = GetRegfile()->GetZeroReg().GetId();
1986 ASSERT(dst.GetId() != rzero);
1987 if (src0.GetId() == rzero) {
1988 EncodeMov(dst, Imm(0));
1989 return;
1990 }
1991 if (src1.GetId() == rzero) {
1992 EncodeMov(dst, src0);
1993 }
1994
1995 if (dst.GetSize() < WORD_SIZE) {
1996 GetMasm()->And(VixlReg(src1), VixlReg(src1), VixlImm(dst.GetSize() - 1));
1997 }
1998
1999 GetMasm()->Lsr(VixlReg(dst), VixlReg(src0), VixlReg(src1));
2000 }
2001
EncodeAShr(Reg dst,Reg src0,Reg src1)2002 void Aarch64Encoder::EncodeAShr(Reg dst, Reg src0, Reg src1)
2003 {
2004 auto rzero = GetRegfile()->GetZeroReg().GetId();
2005 ASSERT(dst.GetId() != rzero);
2006 if (src0.GetId() == rzero) {
2007 EncodeMov(dst, Imm(0));
2008 return;
2009 }
2010 if (src1.GetId() == rzero) {
2011 EncodeMov(dst, src0);
2012 }
2013
2014 if (dst.GetSize() < WORD_SIZE) {
2015 GetMasm()->And(VixlReg(src1), VixlReg(src1), VixlImm(dst.GetSize() - 1));
2016 }
2017 GetMasm()->Asr(VixlReg(dst), VixlReg(src0), VixlReg(src1));
2018 }
2019
EncodeAnd(Reg dst,Reg src0,Reg src1)2020 void Aarch64Encoder::EncodeAnd(Reg dst, Reg src0, Reg src1)
2021 {
2022 GetMasm()->And(VixlReg(dst), VixlReg(src0), VixlReg(src1));
2023 }
2024
EncodeAnd(Reg dst,Reg src0,Shift src1)2025 void Aarch64Encoder::EncodeAnd(Reg dst, Reg src0, Shift src1)
2026 {
2027 GetMasm()->And(VixlReg(dst), VixlReg(src0), VixlShift(src1));
2028 }
2029
EncodeOr(Reg dst,Reg src0,Reg src1)2030 void Aarch64Encoder::EncodeOr(Reg dst, Reg src0, Reg src1)
2031 {
2032 GetMasm()->Orr(VixlReg(dst), VixlReg(src0), VixlReg(src1));
2033 }
2034
EncodeOr(Reg dst,Reg src0,Shift src1)2035 void Aarch64Encoder::EncodeOr(Reg dst, Reg src0, Shift src1)
2036 {
2037 GetMasm()->Orr(VixlReg(dst), VixlReg(src0), VixlShift(src1));
2038 }
2039
EncodeXor(Reg dst,Reg src0,Reg src1)2040 void Aarch64Encoder::EncodeXor(Reg dst, Reg src0, Reg src1)
2041 {
2042 GetMasm()->Eor(VixlReg(dst), VixlReg(src0), VixlReg(src1));
2043 }
2044
EncodeXor(Reg dst,Reg src0,Shift src1)2045 void Aarch64Encoder::EncodeXor(Reg dst, Reg src0, Shift src1)
2046 {
2047 GetMasm()->Eor(VixlReg(dst), VixlReg(src0), VixlShift(src1));
2048 }
2049
EncodeAdd(Reg dst,Reg src,Imm imm)2050 void Aarch64Encoder::EncodeAdd(Reg dst, Reg src, Imm imm)
2051 {
2052 ASSERT(dst.IsScalar() && "UNIMPLEMENTED");
2053 ASSERT(dst.GetSize() >= src.GetSize());
2054 if (dst.GetSize() != src.GetSize()) {
2055 auto src_reg = Reg(src.GetId(), dst.GetType());
2056 GetMasm()->Add(VixlReg(dst), VixlReg(src_reg), VixlImm(imm));
2057 return;
2058 }
2059 GetMasm()->Add(VixlReg(dst), VixlReg(src), VixlImm(imm));
2060 }
2061
EncodeSub(Reg dst,Reg src,Imm imm)2062 void Aarch64Encoder::EncodeSub(Reg dst, Reg src, Imm imm)
2063 {
2064 ASSERT(dst.IsScalar() && "UNIMPLEMENTED");
2065 GetMasm()->Sub(VixlReg(dst), VixlReg(src), VixlImm(imm));
2066 }
2067
EncodeShl(Reg dst,Reg src,Imm imm)2068 void Aarch64Encoder::EncodeShl(Reg dst, Reg src, Imm imm)
2069 {
2070 ASSERT(dst.IsScalar() && "Invalid operand type");
2071 auto rzero = GetRegfile()->GetZeroReg().GetId();
2072 ASSERT(dst.GetId() != rzero);
2073 if (src.GetId() == rzero) {
2074 EncodeMov(dst, Imm(0));
2075 return;
2076 }
2077
2078 GetMasm()->Lsl(VixlReg(dst), VixlReg(src), GetIntValue(imm));
2079 }
2080
EncodeShr(Reg dst,Reg src,Imm imm)2081 void Aarch64Encoder::EncodeShr(Reg dst, Reg src, Imm imm)
2082 {
2083 int64_t imm_value = static_cast<uint64_t>(GetIntValue(imm)) & (dst.GetSize() - 1);
2084
2085 ASSERT(dst.IsScalar() && "Invalid operand type");
2086 auto rzero = GetRegfile()->GetZeroReg().GetId();
2087 ASSERT(dst.GetId() != rzero);
2088 if (src.GetId() == rzero) {
2089 EncodeMov(dst, Imm(0));
2090 return;
2091 }
2092
2093 GetMasm()->Lsr(VixlReg(dst), VixlReg(src), imm_value);
2094 }
2095
EncodeAShr(Reg dst,Reg src,Imm imm)2096 void Aarch64Encoder::EncodeAShr(Reg dst, Reg src, Imm imm)
2097 {
2098 ASSERT(dst.IsScalar() && "Invalid operand type");
2099 GetMasm()->Asr(VixlReg(dst), VixlReg(src), GetIntValue(imm));
2100 }
2101
EncodeAnd(Reg dst,Reg src,Imm imm)2102 void Aarch64Encoder::EncodeAnd(Reg dst, Reg src, Imm imm)
2103 {
2104 ASSERT(dst.IsScalar() && "Invalid operand type");
2105 GetMasm()->And(VixlReg(dst), VixlReg(src), VixlImm(imm));
2106 }
2107
EncodeOr(Reg dst,Reg src,Imm imm)2108 void Aarch64Encoder::EncodeOr(Reg dst, Reg src, Imm imm)
2109 {
2110 ASSERT(dst.IsScalar() && "Invalid operand type");
2111 GetMasm()->Orr(VixlReg(dst), VixlReg(src), VixlImm(imm));
2112 }
2113
EncodeXor(Reg dst,Reg src,Imm imm)2114 void Aarch64Encoder::EncodeXor(Reg dst, Reg src, Imm imm)
2115 {
2116 ASSERT(dst.IsScalar() && "Invalid operand type");
2117 GetMasm()->Eor(VixlReg(dst), VixlReg(src), VixlImm(imm));
2118 }
2119
EncodeMov(Reg dst,Imm src)2120 void Aarch64Encoder::EncodeMov(Reg dst, Imm src)
2121 {
2122 if (dst.IsFloat()) {
2123 if (dst.GetSize() == WORD_SIZE) {
2124 GetMasm()->Fmov(VixlVReg(dst), src.GetValue<float>());
2125 } else {
2126 GetMasm()->Fmov(VixlVReg(dst), src.GetValue<double>());
2127 }
2128 return;
2129 }
2130 GetMasm()->Mov(VixlReg(dst), VixlImm(src));
2131 }
2132
EncodeLdr(Reg dst,bool dst_signed,MemRef mem)2133 void Aarch64Encoder::EncodeLdr(Reg dst, bool dst_signed, MemRef mem)
2134 {
2135 auto rzero = GetRegfile()->GetZeroReg().GetId();
2136
2137 if (!ConvertMem(mem).IsValid() || (dst.GetId() == rzero && dst.IsScalar())) {
2138 // Try move zero reg to dst (for do not create temp-reg)
2139 // Check: dst not vector, dst not index, dst not rzero
2140 [[maybe_unused]] auto base_reg = mem.GetBase();
2141 auto index_reg = mem.GetIndex();
2142
2143 // Invalid == base is rzero or invalid
2144 ASSERT(base_reg.GetId() == rzero || !base_reg.IsValid());
2145 // checks for use dst-register
2146 if (dst.IsScalar() && dst.IsValid() && // not float
2147 (index_reg.GetId() != dst.GetId()) && // not index
2148 (dst.GetId() != rzero)) { // not rzero
2149 // May use dst like rzero
2150 EncodeMov(dst, Imm(0));
2151
2152 auto fix_mem = MemRef(dst, index_reg, mem.GetScale(), mem.GetDisp());
2153 ASSERT(ConvertMem(fix_mem).IsValid());
2154 EncodeLdr(dst, dst_signed, fix_mem);
2155 } else {
2156 // Use tmp-reg
2157 ScopedTmpReg tmp_reg(this);
2158 EncodeMov(tmp_reg, Imm(0));
2159
2160 auto fix_mem = MemRef(tmp_reg, index_reg, mem.GetScale(), mem.GetDisp());
2161 ASSERT(ConvertMem(fix_mem).IsValid());
2162 // Used for zero-dst
2163 EncodeLdr(tmp_reg, dst_signed, fix_mem);
2164 }
2165 return;
2166 }
2167 ASSERT(ConvertMem(mem).IsValid());
2168 if (dst.IsFloat()) {
2169 GetMasm()->Ldr(VixlVReg(dst), ConvertMem(mem));
2170 return;
2171 }
2172 if (dst_signed) {
2173 if (dst.GetSize() == BYTE_SIZE) {
2174 GetMasm()->Ldrsb(VixlReg(dst, DOUBLE_WORD_SIZE), ConvertMem(mem));
2175 return;
2176 }
2177 if (dst.GetSize() == HALF_SIZE) {
2178 GetMasm()->Ldrsh(VixlReg(dst), ConvertMem(mem));
2179 return;
2180 }
2181 } else {
2182 if (dst.GetSize() == BYTE_SIZE) {
2183 GetMasm()->Ldrb(VixlReg(dst, WORD_SIZE), ConvertMem(mem));
2184 return;
2185 }
2186 if (dst.GetSize() == HALF_SIZE) {
2187 GetMasm()->Ldrh(VixlReg(dst), ConvertMem(mem));
2188 return;
2189 }
2190 }
2191 GetMasm()->Ldr(VixlReg(dst), ConvertMem(mem));
2192 }
2193
EncodeLdrAcquireInvalid(Reg dst,bool dst_signed,MemRef mem)2194 void Aarch64Encoder::EncodeLdrAcquireInvalid(Reg dst, bool dst_signed, MemRef mem)
2195 {
2196 // Try move zero reg to dst (for do not create temp-reg)
2197 // Check: dst not vector, dst not index, dst not rzero
2198 [[maybe_unused]] auto base_reg = mem.GetBase();
2199 auto rzero = GetRegfile()->GetZeroReg().GetId();
2200
2201 auto index_reg = mem.GetIndex();
2202
2203 // Invalid == base is rzero or invalid
2204 ASSERT(base_reg.GetId() == rzero || !base_reg.IsValid());
2205 // checks for use dst-register
2206 if (dst.IsScalar() && dst.IsValid() && // not float
2207 (index_reg.GetId() != dst.GetId()) && // not index
2208 (dst.GetId() != rzero)) { // not rzero
2209 // May use dst like rzero
2210 EncodeMov(dst, Imm(0));
2211
2212 auto fix_mem = MemRef(dst, index_reg, mem.GetScale(), mem.GetDisp());
2213 ASSERT(ConvertMem(fix_mem).IsValid());
2214 EncodeLdrAcquire(dst, dst_signed, fix_mem);
2215 } else {
2216 // Use tmp-reg
2217 ScopedTmpReg tmp_reg(this);
2218 EncodeMov(tmp_reg, Imm(0));
2219
2220 auto fix_mem = MemRef(tmp_reg, index_reg, mem.GetScale(), mem.GetDisp());
2221 ASSERT(ConvertMem(fix_mem).IsValid());
2222 // Used for zero-dst
2223 EncodeLdrAcquire(tmp_reg, dst_signed, fix_mem);
2224 }
2225 }
2226
EncodeLdrAcquireScalar(Reg dst,bool dst_signed,MemRef mem)2227 void Aarch64Encoder::EncodeLdrAcquireScalar(Reg dst, bool dst_signed, MemRef mem)
2228 {
2229 #ifndef NDEBUG
2230 CheckAlignment(mem, dst.GetSize());
2231 #endif // NDEBUG
2232 if (dst_signed) {
2233 if (dst.GetSize() == BYTE_SIZE) {
2234 GetMasm()->Ldarb(VixlReg(dst), ConvertMem(mem));
2235 GetMasm()->Sxtb(VixlReg(dst), VixlReg(dst));
2236 return;
2237 }
2238 if (dst.GetSize() == HALF_SIZE) {
2239 GetMasm()->Ldarh(VixlReg(dst), ConvertMem(mem));
2240 GetMasm()->Sxth(VixlReg(dst), VixlReg(dst));
2241 return;
2242 }
2243 if (dst.GetSize() == WORD_SIZE) {
2244 GetMasm()->Ldar(VixlReg(dst), ConvertMem(mem));
2245 GetMasm()->Sxtw(VixlReg(dst), VixlReg(dst));
2246 return;
2247 }
2248 } else {
2249 if (dst.GetSize() == BYTE_SIZE) {
2250 GetMasm()->Ldarb(VixlReg(dst, WORD_SIZE), ConvertMem(mem));
2251 return;
2252 }
2253 if (dst.GetSize() == HALF_SIZE) {
2254 GetMasm()->Ldarh(VixlReg(dst), ConvertMem(mem));
2255 return;
2256 }
2257 }
2258 GetMasm()->Ldar(VixlReg(dst), ConvertMem(mem));
2259 }
2260
CheckAlignment(MemRef mem,size_t size)2261 void Aarch64Encoder::CheckAlignment(MemRef mem, size_t size)
2262 {
2263 ASSERT(size == WORD_SIZE || size == BYTE_SIZE || size == HALF_SIZE || size == DOUBLE_WORD_SIZE);
2264 if (size == BYTE_SIZE) {
2265 return;
2266 }
2267 size_t alignment_mask = (size >> 3U) - 1;
2268 ASSERT(!mem.HasIndex() && !mem.HasScale());
2269 if (mem.HasDisp()) {
2270 // We need additional tmp register for check base + offset.
2271 // The case when separately the base and the offset are not aligned, but in sum there are aligned very rarely.
2272 // Therefore, the alignment check for base and offset takes place separately
2273 [[maybe_unused]] size_t offset = mem.GetDisp();
2274 ASSERT((offset & alignment_mask) == 0);
2275 }
2276 auto base_reg = mem.GetBase();
2277 auto end = CreateLabel();
2278 EncodeJumpTest(end, base_reg, Imm(alignment_mask), Condition::TST_EQ);
2279 EncodeAbort();
2280 BindLabel(end);
2281 }
2282
EncodeLdrAcquire(Reg dst,bool dst_signed,MemRef mem)2283 void Aarch64Encoder::EncodeLdrAcquire(Reg dst, bool dst_signed, MemRef mem)
2284 {
2285 ASSERT(!mem.HasIndex() && !mem.HasScale());
2286 auto rzero = GetRegfile()->GetZeroReg().GetId();
2287 if (!ConvertMem(mem).IsValid() || (dst.GetId() == rzero && dst.IsScalar())) {
2288 EncodeLdrAcquireInvalid(dst, dst_signed, mem);
2289 return;
2290 }
2291
2292 if (dst.IsFloat()) {
2293 ScopedTmpRegU64 tmp_reg(this);
2294 auto mem_ldar = mem;
2295 if (mem.HasDisp()) {
2296 if (vixl::aarch64::Assembler::IsImmAddSub(mem.GetDisp())) {
2297 EncodeAdd(tmp_reg, mem.GetBase(), Imm(mem.GetDisp()));
2298 } else {
2299 EncodeMov(tmp_reg, Imm(mem.GetDisp()));
2300 EncodeAdd(tmp_reg, mem.GetBase(), tmp_reg);
2301 }
2302 mem_ldar = MemRef(tmp_reg);
2303 }
2304 #ifndef NDEBUG
2305 CheckAlignment(mem_ldar, dst.GetSize());
2306 #endif // NDEBUG
2307 auto tmp = VixlReg(tmp_reg, dst.GetSize());
2308 GetMasm()->Ldar(tmp, ConvertMem(mem_ldar));
2309 GetMasm()->Fmov(VixlVReg(dst), tmp);
2310 return;
2311 }
2312
2313 if (!mem.HasDisp()) {
2314 EncodeLdrAcquireScalar(dst, dst_signed, mem);
2315 return;
2316 }
2317
2318 Reg dst_64(dst.GetId(), INT64_TYPE);
2319 if (vixl::aarch64::Assembler::IsImmAddSub(mem.GetDisp())) {
2320 EncodeAdd(dst_64, mem.GetBase(), Imm(mem.GetDisp()));
2321 } else {
2322 EncodeMov(dst_64, Imm(mem.GetDisp()));
2323 EncodeAdd(dst_64, mem.GetBase(), dst_64);
2324 }
2325 EncodeLdrAcquireScalar(dst, dst_signed, MemRef(dst_64));
2326 }
2327
EncodeStr(Reg src,MemRef mem)2328 void Aarch64Encoder::EncodeStr(Reg src, MemRef mem)
2329 {
2330 if (!ConvertMem(mem).IsValid()) {
2331 auto index_reg = mem.GetIndex();
2332 auto rzero = GetRegfile()->GetZeroReg().GetId();
2333 // Invalid == base is rzero or invalid
2334 ASSERT(mem.GetBase().GetId() == rzero || !mem.GetBase().IsValid());
2335 // Use tmp-reg
2336 ScopedTmpReg tmp_reg(this);
2337 EncodeMov(tmp_reg, Imm(0));
2338
2339 auto fix_mem = MemRef(tmp_reg, index_reg, mem.GetScale(), mem.GetDisp());
2340 ASSERT(ConvertMem(fix_mem).IsValid());
2341 if (src.GetId() != rzero) {
2342 EncodeStr(src, fix_mem);
2343 } else {
2344 EncodeStr(tmp_reg, fix_mem);
2345 }
2346 return;
2347 }
2348 ASSERT(ConvertMem(mem).IsValid());
2349 if (src.IsFloat()) {
2350 GetMasm()->Str(VixlVReg(src), ConvertMem(mem));
2351 return;
2352 }
2353 if (src.GetSize() == BYTE_SIZE) {
2354 GetMasm()->Strb(VixlReg(src), ConvertMem(mem));
2355 return;
2356 }
2357 if (src.GetSize() == HALF_SIZE) {
2358 GetMasm()->Strh(VixlReg(src), ConvertMem(mem));
2359 return;
2360 }
2361 GetMasm()->Str(VixlReg(src), ConvertMem(mem));
2362 }
2363
EncodeStrRelease(Reg src,MemRef mem)2364 void Aarch64Encoder::EncodeStrRelease(Reg src, MemRef mem)
2365 {
2366 ASSERT(!mem.HasScale());
2367
2368 ScopedTmpRegLazy base(this);
2369 MemRef fixed_mem;
2370 bool mem_was_fixed = false;
2371 if (mem.HasDisp()) {
2372 if (vixl::aarch64::Assembler::IsImmAddSub(mem.GetDisp())) {
2373 base.AcquireIfInvalid();
2374 EncodeAdd(base, mem.GetBase(), Imm(mem.GetDisp()));
2375 } else {
2376 base.AcquireIfInvalid();
2377 EncodeMov(base, Imm(mem.GetDisp()));
2378 EncodeAdd(base, mem.GetBase(), base);
2379 }
2380 mem_was_fixed = true;
2381 }
2382 if (mem.HasIndex()) {
2383 base.AcquireIfInvalid();
2384 EncodeAdd(base, mem_was_fixed ? base : mem.GetBase(), mem.GetIndex());
2385 mem_was_fixed = true;
2386 }
2387
2388 if (mem_was_fixed) {
2389 fixed_mem = MemRef(base);
2390 } else {
2391 fixed_mem = mem;
2392 }
2393
2394 #ifndef NDEBUG
2395 CheckAlignment(mem, src.GetSize());
2396 #endif // NDEBUG
2397 if (src.IsFloat()) {
2398 ScopedTmpRegU64 tmp_reg(this);
2399 auto tmp = VixlReg(tmp_reg, src.GetSize());
2400 GetMasm()->Fmov(tmp, VixlVReg(src));
2401 GetMasm()->Stlr(tmp, ConvertMem(fixed_mem));
2402 return;
2403 }
2404 if (src.GetSize() == BYTE_SIZE) {
2405 GetMasm()->Stlrb(VixlReg(src), ConvertMem(fixed_mem));
2406 return;
2407 }
2408 if (src.GetSize() == HALF_SIZE) {
2409 GetMasm()->Stlrh(VixlReg(src), ConvertMem(fixed_mem));
2410 return;
2411 }
2412 GetMasm()->Stlr(VixlReg(src), ConvertMem(fixed_mem));
2413 }
2414
EncodeLdrExclusive(Reg dst,Reg addr,bool acquire)2415 void Aarch64Encoder::EncodeLdrExclusive(Reg dst, Reg addr, bool acquire)
2416 {
2417 ASSERT(dst.IsScalar());
2418 auto dst_reg = VixlReg(dst);
2419 auto mem_cvt = ConvertMem(MemRef(addr));
2420 #ifndef NDEBUG
2421 CheckAlignment(MemRef(addr), dst.GetSize());
2422 #endif // NDEBUG
2423 if (dst.GetSize() == BYTE_SIZE) {
2424 if (acquire) {
2425 GetMasm()->Ldaxrb(dst_reg, mem_cvt);
2426 return;
2427 }
2428 GetMasm()->Ldxrb(dst_reg, mem_cvt);
2429 return;
2430 }
2431 if (dst.GetSize() == HALF_SIZE) {
2432 if (acquire) {
2433 GetMasm()->Ldaxrh(dst_reg, mem_cvt);
2434 return;
2435 }
2436 GetMasm()->Ldxrh(dst_reg, mem_cvt);
2437 return;
2438 }
2439 if (acquire) {
2440 GetMasm()->Ldaxr(dst_reg, mem_cvt);
2441 return;
2442 }
2443 GetMasm()->Ldxr(dst_reg, mem_cvt);
2444 }
2445
EncodeStrExclusive(Reg dst,Reg src,Reg addr,bool release)2446 void Aarch64Encoder::EncodeStrExclusive(Reg dst, Reg src, Reg addr, bool release)
2447 {
2448 ASSERT(dst.IsScalar() && src.IsScalar());
2449
2450 bool copy_dst = dst.GetId() == src.GetId() || dst.GetId() == addr.GetId();
2451 ScopedTmpReg tmp(this);
2452 auto src_reg = VixlReg(src);
2453 auto mem_cvt = ConvertMem(MemRef(addr));
2454 auto dst_reg = copy_dst ? VixlReg(tmp) : VixlReg(dst);
2455 #ifndef NDEBUG
2456 CheckAlignment(MemRef(addr), src.GetSize());
2457 #endif // NDEBUG
2458
2459 if (src.GetSize() == BYTE_SIZE) {
2460 if (release) {
2461 GetMasm()->Stlxrb(dst_reg, src_reg, mem_cvt);
2462 } else {
2463 GetMasm()->Stxrb(dst_reg, src_reg, mem_cvt);
2464 }
2465 } else if (src.GetSize() == HALF_SIZE) {
2466 if (release) {
2467 GetMasm()->Stlxrh(dst_reg, src_reg, mem_cvt);
2468 } else {
2469 GetMasm()->Stxrh(dst_reg, src_reg, mem_cvt);
2470 }
2471 } else {
2472 if (release) {
2473 GetMasm()->Stlxr(dst_reg, src_reg, mem_cvt);
2474 } else {
2475 GetMasm()->Stxr(dst_reg, src_reg, mem_cvt);
2476 }
2477 }
2478 if (copy_dst) {
2479 EncodeMov(dst, tmp);
2480 }
2481 }
2482
EncodeStrz(Reg src,MemRef mem)2483 void Aarch64Encoder::EncodeStrz(Reg src, MemRef mem)
2484 {
2485 if (!ConvertMem(mem).IsValid()) {
2486 EncodeStr(src, mem);
2487 return;
2488 }
2489 ASSERT(ConvertMem(mem).IsValid());
2490 // Upper half of registers must be zeroed by-default
2491 if (src.IsFloat()) {
2492 EncodeStr(src.As(FLOAT64_TYPE), mem);
2493 return;
2494 }
2495 if (src.GetSize() < WORD_SIZE) {
2496 EncodeCast(src, false, src.As(INT64_TYPE), false);
2497 }
2498 GetMasm()->Str(VixlReg(src.As(INT64_TYPE)), ConvertMem(mem));
2499 }
2500
EncodeSti(Imm src,MemRef mem)2501 void Aarch64Encoder::EncodeSti(Imm src, MemRef mem)
2502 {
2503 if (!ConvertMem(mem).IsValid()) {
2504 auto rzero = GetRegfile()->GetZeroReg();
2505 EncodeStr(rzero, mem);
2506 return;
2507 }
2508
2509 if (src.GetType().IsFloat()) {
2510 if (src.GetSize() == WORD_SIZE) {
2511 ScopedTmpRegF32 tmp_reg(this);
2512 GetMasm()->Fmov(VixlVReg(tmp_reg).S(), src.GetValue<float>());
2513 EncodeStr(tmp_reg, mem);
2514 } else {
2515 ScopedTmpRegF64 tmp_reg(this);
2516 GetMasm()->Fmov(VixlVReg(tmp_reg).D(), src.GetValue<double>());
2517 EncodeStr(tmp_reg, mem);
2518 }
2519 return;
2520 }
2521
2522 ScopedTmpRegU64 tmp_reg(this);
2523 auto tmp = VixlReg(tmp_reg);
2524 GetMasm()->Mov(tmp, VixlImm(src));
2525 if (src.GetSize() == BYTE_SIZE) {
2526 GetMasm()->Strb(tmp, ConvertMem(mem));
2527 return;
2528 }
2529 if (src.GetSize() == HALF_SIZE) {
2530 GetMasm()->Strh(tmp, ConvertMem(mem));
2531 return;
2532 }
2533 GetMasm()->Str(tmp, ConvertMem(mem));
2534 }
2535
EncodeMemCopy(MemRef mem_from,MemRef mem_to,size_t size)2536 void Aarch64Encoder::EncodeMemCopy(MemRef mem_from, MemRef mem_to, size_t size)
2537 {
2538 if (!ConvertMem(mem_from).IsValid() || !ConvertMem(mem_to).IsValid()) {
2539 auto rzero = GetRegfile()->GetZeroReg();
2540 if (!ConvertMem(mem_from).IsValid()) {
2541 // Encode one load - will fix inside
2542 EncodeLdr(rzero, false, mem_from);
2543 } else {
2544 ASSERT(!ConvertMem(mem_to).IsValid());
2545 // Encode one store - will fix inside
2546 EncodeStr(rzero, mem_to);
2547 }
2548 return;
2549 }
2550 ASSERT(ConvertMem(mem_from).IsValid());
2551 ASSERT(ConvertMem(mem_to).IsValid());
2552 ScopedTmpRegU64 tmp_reg(this);
2553 auto tmp = VixlReg(tmp_reg, std::min(size, static_cast<size_t>(DOUBLE_WORD_SIZE)));
2554 if (size == BYTE_SIZE) {
2555 GetMasm()->Ldrb(tmp, ConvertMem(mem_from));
2556 GetMasm()->Strb(tmp, ConvertMem(mem_to));
2557 } else if (size == HALF_SIZE) {
2558 GetMasm()->Ldrh(tmp, ConvertMem(mem_from));
2559 GetMasm()->Strh(tmp, ConvertMem(mem_to));
2560 } else {
2561 ASSERT(size == WORD_SIZE || size == DOUBLE_WORD_SIZE);
2562 GetMasm()->Ldr(tmp, ConvertMem(mem_from));
2563 GetMasm()->Str(tmp, ConvertMem(mem_to));
2564 }
2565 }
2566
EncodeMemCopyz(MemRef mem_from,MemRef mem_to,size_t size)2567 void Aarch64Encoder::EncodeMemCopyz(MemRef mem_from, MemRef mem_to, size_t size)
2568 {
2569 if (!ConvertMem(mem_from).IsValid() || !ConvertMem(mem_to).IsValid()) {
2570 auto rzero = GetRegfile()->GetZeroReg();
2571 if (!ConvertMem(mem_from).IsValid()) {
2572 // Encode one load - will fix inside
2573 EncodeLdr(rzero, false, mem_from);
2574 } else {
2575 ASSERT(!ConvertMem(mem_to).IsValid());
2576 // Encode one store - will fix inside
2577 EncodeStr(rzero, mem_to);
2578 }
2579 return;
2580 }
2581 ASSERT(ConvertMem(mem_from).IsValid());
2582 ASSERT(ConvertMem(mem_to).IsValid());
2583 ScopedTmpRegU64 tmp_reg(this);
2584 auto tmp = VixlReg(tmp_reg, std::min(size, static_cast<size_t>(DOUBLE_WORD_SIZE)));
2585 auto zero = VixlReg(GetRegfile()->GetZeroReg(), WORD_SIZE);
2586 if (size == BYTE_SIZE) {
2587 GetMasm()->Ldrb(tmp, ConvertMem(mem_from));
2588 GetMasm()->Stp(tmp, zero, ConvertMem(mem_to));
2589 } else if (size == HALF_SIZE) {
2590 GetMasm()->Ldrh(tmp, ConvertMem(mem_from));
2591 GetMasm()->Stp(tmp, zero, ConvertMem(mem_to));
2592 } else {
2593 ASSERT(size == WORD_SIZE || size == DOUBLE_WORD_SIZE);
2594 GetMasm()->Ldr(tmp, ConvertMem(mem_from));
2595 if (size == WORD_SIZE) {
2596 GetMasm()->Stp(tmp, zero, ConvertMem(mem_to));
2597 } else {
2598 GetMasm()->Str(tmp, ConvertMem(mem_to));
2599 }
2600 }
2601 }
2602
EncodeCompare(Reg dst,Reg src0,Reg src1,Condition cc)2603 void Aarch64Encoder::EncodeCompare(Reg dst, Reg src0, Reg src1, Condition cc)
2604 {
2605 ASSERT(src0.IsFloat() == src1.IsFloat());
2606 if (src0.IsFloat()) {
2607 GetMasm()->Fcmp(VixlVReg(src0), VixlVReg(src1));
2608 } else {
2609 GetMasm()->Cmp(VixlReg(src0), VixlReg(src1));
2610 }
2611 GetMasm()->Cset(VixlReg(dst), Convert(cc));
2612 }
2613
EncodeCompareTest(Reg dst,Reg src0,Reg src1,Condition cc)2614 void Aarch64Encoder::EncodeCompareTest(Reg dst, Reg src0, Reg src1, Condition cc)
2615 {
2616 ASSERT(src0.IsScalar() && src1.IsScalar());
2617
2618 GetMasm()->Tst(VixlReg(src0), VixlReg(src1));
2619 GetMasm()->Cset(VixlReg(dst), ConvertTest(cc));
2620 }
2621
EncodeCmp(Reg dst,Reg src0,Reg src1,Condition cc)2622 void Aarch64Encoder::EncodeCmp(Reg dst, Reg src0, Reg src1, Condition cc)
2623 {
2624 if (src0.IsFloat()) {
2625 ASSERT(src1.IsFloat());
2626 ASSERT(cc == Condition::MI || cc == Condition::LT);
2627 GetMasm()->Fcmp(VixlVReg(src0), VixlVReg(src1));
2628 } else {
2629 ASSERT(src0.IsScalar() && src1.IsScalar());
2630 ASSERT(cc == Condition::LO || cc == Condition::LT);
2631 GetMasm()->Cmp(VixlReg(src0), VixlReg(src1));
2632 }
2633 GetMasm()->Cset(VixlReg(dst), vixl::aarch64::Condition::ne);
2634 GetMasm()->Cneg(VixlReg(Promote(dst)), VixlReg(Promote(dst)), Convert(cc));
2635 }
2636
EncodeSelect(Reg dst,Reg src0,Reg src1,Reg src2,Reg src3,Condition cc)2637 void Aarch64Encoder::EncodeSelect(Reg dst, Reg src0, Reg src1, Reg src2, Reg src3, Condition cc)
2638 {
2639 ASSERT(!src0.IsFloat() && !src1.IsFloat());
2640 if (src2.IsScalar()) {
2641 GetMasm()->Cmp(VixlReg(src2), VixlReg(src3));
2642 } else {
2643 GetMasm()->Fcmp(VixlVReg(src2), VixlVReg(src3));
2644 }
2645 GetMasm()->Csel(VixlReg(dst), VixlReg(src0), VixlReg(src1), Convert(cc));
2646 }
2647
EncodeSelect(Reg dst,Reg src0,Reg src1,Reg src2,Imm imm,Condition cc)2648 void Aarch64Encoder::EncodeSelect(Reg dst, Reg src0, Reg src1, Reg src2, Imm imm, Condition cc)
2649 {
2650 ASSERT(!src0.IsFloat() && !src1.IsFloat() && !src2.IsFloat());
2651 GetMasm()->Cmp(VixlReg(src2), VixlImm(imm));
2652 GetMasm()->Csel(VixlReg(dst), VixlReg(src0), VixlReg(src1), Convert(cc));
2653 }
2654
EncodeSelectTest(Reg dst,Reg src0,Reg src1,Reg src2,Reg src3,Condition cc)2655 void Aarch64Encoder::EncodeSelectTest(Reg dst, Reg src0, Reg src1, Reg src2, Reg src3, Condition cc)
2656 {
2657 ASSERT(!src0.IsFloat() && !src1.IsFloat() && !src2.IsFloat() && !src3.IsFloat());
2658 GetMasm()->Tst(VixlReg(src2), VixlReg(src3));
2659 GetMasm()->Csel(VixlReg(dst), VixlReg(src0), VixlReg(src1), ConvertTest(cc));
2660 }
2661
EncodeSelectTest(Reg dst,Reg src0,Reg src1,Reg src2,Imm imm,Condition cc)2662 void Aarch64Encoder::EncodeSelectTest(Reg dst, Reg src0, Reg src1, Reg src2, Imm imm, Condition cc)
2663 {
2664 ASSERT(!src0.IsFloat() && !src1.IsFloat() && !src2.IsFloat());
2665 ASSERT(CanEncodeImmLogical(GetIntValue(imm), imm.GetSize() > WORD_SIZE ? DOUBLE_WORD_SIZE : WORD_SIZE));
2666 GetMasm()->Tst(VixlReg(src2), VixlImm(imm));
2667 GetMasm()->Csel(VixlReg(dst), VixlReg(src0), VixlReg(src1), ConvertTest(cc));
2668 }
2669
EncodeLdp(Reg dst0,Reg dst1,bool dst_signed,MemRef mem)2670 void Aarch64Encoder::EncodeLdp(Reg dst0, Reg dst1, bool dst_signed, MemRef mem)
2671 {
2672 ASSERT(dst0.IsFloat() == dst1.IsFloat());
2673 ASSERT(dst0.GetSize() == dst1.GetSize());
2674 if (!ConvertMem(mem).IsValid()) {
2675 // Encode one Ldr - will fix inside
2676 EncodeLdr(dst0, dst_signed, mem);
2677 return;
2678 }
2679
2680 if (dst0.IsFloat()) {
2681 GetMasm()->Ldp(VixlVReg(dst0), VixlVReg(dst1), ConvertMem(mem));
2682 return;
2683 }
2684 if (dst_signed && dst0.GetSize() == WORD_SIZE) {
2685 GetMasm()->Ldpsw(VixlReg(dst0, DOUBLE_WORD_SIZE), VixlReg(dst1, DOUBLE_WORD_SIZE), ConvertMem(mem));
2686 return;
2687 }
2688 GetMasm()->Ldp(VixlReg(dst0), VixlReg(dst1), ConvertMem(mem));
2689 }
2690
EncodeStp(Reg src0,Reg src1,MemRef mem)2691 void Aarch64Encoder::EncodeStp(Reg src0, Reg src1, MemRef mem)
2692 {
2693 ASSERT(src0.IsFloat() == src1.IsFloat());
2694 ASSERT(src0.GetSize() == src1.GetSize());
2695 if (!ConvertMem(mem).IsValid()) {
2696 // Encode one Str - will fix inside
2697 EncodeStr(src0, mem);
2698 return;
2699 }
2700
2701 if (src0.IsFloat()) {
2702 GetMasm()->Stp(VixlVReg(src0), VixlVReg(src1), ConvertMem(mem));
2703 return;
2704 }
2705 GetMasm()->Stp(VixlReg(src0), VixlReg(src1), ConvertMem(mem));
2706 }
2707
EncodeMAdd(Reg dst,Reg src0,Reg src1,Reg src2)2708 void Aarch64Encoder::EncodeMAdd(Reg dst, Reg src0, Reg src1, Reg src2)
2709 {
2710 ASSERT(dst.GetSize() == src1.GetSize() && dst.GetSize() == src0.GetSize() && dst.GetSize() == src2.GetSize());
2711 ASSERT(dst.IsScalar() == src0.IsScalar() && dst.IsScalar() == src1.IsScalar() && dst.IsScalar() == src2.IsScalar());
2712
2713 ASSERT(!GetRegfile()->IsZeroReg(dst));
2714
2715 if (GetRegfile()->IsZeroReg(src0) || GetRegfile()->IsZeroReg(src1)) {
2716 EncodeMov(dst, src2);
2717 return;
2718 }
2719
2720 if (GetRegfile()->IsZeroReg(src2)) {
2721 EncodeMul(dst, src0, src1);
2722 return;
2723 }
2724
2725 if (dst.IsScalar()) {
2726 GetMasm()->Madd(VixlReg(dst), VixlReg(src0), VixlReg(src1), VixlReg(src2));
2727 } else {
2728 GetMasm()->Fmadd(VixlVReg(dst), VixlVReg(src0), VixlVReg(src1), VixlVReg(src2));
2729 }
2730 }
2731
EncodeMSub(Reg dst,Reg src0,Reg src1,Reg src2)2732 void Aarch64Encoder::EncodeMSub(Reg dst, Reg src0, Reg src1, Reg src2)
2733 {
2734 ASSERT(dst.GetSize() == src1.GetSize() && dst.GetSize() == src0.GetSize() && dst.GetSize() == src2.GetSize());
2735 ASSERT(dst.IsScalar() == src0.IsScalar() && dst.IsScalar() == src1.IsScalar() && dst.IsScalar() == src2.IsScalar());
2736
2737 ASSERT(!GetRegfile()->IsZeroReg(dst));
2738
2739 if (GetRegfile()->IsZeroReg(src0) || GetRegfile()->IsZeroReg(src1)) {
2740 EncodeMov(dst, src2);
2741 return;
2742 }
2743
2744 if (GetRegfile()->IsZeroReg(src2)) {
2745 EncodeMNeg(dst, src0, src1);
2746 return;
2747 }
2748
2749 if (dst.IsScalar()) {
2750 GetMasm()->Msub(VixlReg(dst), VixlReg(src0), VixlReg(src1), VixlReg(src2));
2751 } else {
2752 GetMasm()->Fmsub(VixlVReg(dst), VixlVReg(src0), VixlVReg(src1), VixlVReg(src2));
2753 }
2754 }
2755
EncodeMNeg(Reg dst,Reg src0,Reg src1)2756 void Aarch64Encoder::EncodeMNeg(Reg dst, Reg src0, Reg src1)
2757 {
2758 ASSERT(dst.GetSize() == src1.GetSize() && dst.GetSize() == src0.GetSize());
2759 ASSERT(dst.IsScalar() == src0.IsScalar() && dst.IsScalar() == src1.IsScalar());
2760
2761 ASSERT(!GetRegfile()->IsZeroReg(dst));
2762
2763 if (GetRegfile()->IsZeroReg(src0) || GetRegfile()->IsZeroReg(src1)) {
2764 EncodeMov(dst, Imm(0U));
2765 return;
2766 }
2767
2768 if (dst.IsScalar()) {
2769 GetMasm()->Mneg(VixlReg(dst), VixlReg(src0), VixlReg(src1));
2770 } else {
2771 GetMasm()->Fnmul(VixlVReg(dst), VixlVReg(src0), VixlVReg(src1));
2772 }
2773 }
2774
EncodeOrNot(Reg dst,Reg src0,Reg src1)2775 void Aarch64Encoder::EncodeOrNot(Reg dst, Reg src0, Reg src1)
2776 {
2777 ASSERT(dst.GetSize() == src1.GetSize() && dst.GetSize() == src0.GetSize());
2778 ASSERT(dst.IsScalar() && src0.IsScalar() && src1.IsScalar());
2779 GetMasm()->Orn(VixlReg(dst), VixlReg(src0), VixlReg(src1));
2780 }
2781
EncodeOrNot(Reg dst,Reg src0,Shift src1)2782 void Aarch64Encoder::EncodeOrNot(Reg dst, Reg src0, Shift src1)
2783 {
2784 ASSERT(dst.GetSize() == src0.GetSize() && dst.GetSize() == src1.GetBase().GetSize());
2785 ASSERT(dst.IsScalar() && src0.IsScalar() && src1.GetBase().IsScalar());
2786 GetMasm()->Orn(VixlReg(dst), VixlReg(src0), VixlShift(src1));
2787 }
2788
EncodeExtractBits(Reg dst,Reg src0,Imm imm1,Imm imm2)2789 void Aarch64Encoder::EncodeExtractBits(Reg dst, Reg src0, Imm imm1, Imm imm2)
2790 {
2791 GetMasm()->Ubfx(VixlReg(dst), VixlReg(src0), GetIntValue(imm1), GetIntValue(imm2));
2792 }
2793
EncodeAndNot(Reg dst,Reg src0,Reg src1)2794 void Aarch64Encoder::EncodeAndNot(Reg dst, Reg src0, Reg src1)
2795 {
2796 ASSERT(dst.GetSize() == src1.GetSize() && dst.GetSize() == src0.GetSize());
2797 ASSERT(dst.IsScalar() && src0.IsScalar() && src1.IsScalar());
2798 GetMasm()->Bic(VixlReg(dst), VixlReg(src0), VixlReg(src1));
2799 }
2800
EncodeAndNot(Reg dst,Reg src0,Shift src1)2801 void Aarch64Encoder::EncodeAndNot(Reg dst, Reg src0, Shift src1)
2802 {
2803 ASSERT(dst.GetSize() == src0.GetSize() && dst.GetSize() == src1.GetBase().GetSize());
2804 ASSERT(dst.IsScalar() && src0.IsScalar() && src1.GetBase().IsScalar());
2805 GetMasm()->Bic(VixlReg(dst), VixlReg(src0), VixlShift(src1));
2806 }
2807
EncodeXorNot(Reg dst,Reg src0,Reg src1)2808 void Aarch64Encoder::EncodeXorNot(Reg dst, Reg src0, Reg src1)
2809 {
2810 ASSERT(dst.GetSize() == src1.GetSize() && dst.GetSize() == src0.GetSize());
2811 ASSERT(dst.IsScalar() && src0.IsScalar() && src1.IsScalar());
2812 GetMasm()->Eon(VixlReg(dst), VixlReg(src0), VixlReg(src1));
2813 }
2814
EncodeXorNot(Reg dst,Reg src0,Shift src1)2815 void Aarch64Encoder::EncodeXorNot(Reg dst, Reg src0, Shift src1)
2816 {
2817 ASSERT(dst.GetSize() == src0.GetSize() && dst.GetSize() == src1.GetBase().GetSize());
2818 ASSERT(dst.IsScalar() && src0.IsScalar() && src1.GetBase().IsScalar());
2819 GetMasm()->Eon(VixlReg(dst), VixlReg(src0), VixlShift(src1));
2820 }
2821
EncodeNeg(Reg dst,Shift src)2822 void Aarch64Encoder::EncodeNeg(Reg dst, Shift src)
2823 {
2824 ASSERT(dst.GetSize() == src.GetBase().GetSize());
2825 ASSERT(dst.IsScalar() && src.GetBase().IsScalar());
2826 GetMasm()->Neg(VixlReg(dst), VixlShift(src));
2827 }
2828
EncodeStackOverflowCheck(ssize_t offset)2829 void Aarch64Encoder::EncodeStackOverflowCheck(ssize_t offset)
2830 {
2831 ScopedTmpReg tmp(this);
2832 EncodeAdd(tmp, GetTarget().GetStackReg(), Imm(offset));
2833 EncodeLdr(tmp, false, MemRef(tmp));
2834 }
2835
CanEncodeImmAddSubCmp(int64_t imm,uint32_t size,bool signed_compare)2836 bool Aarch64Encoder::CanEncodeImmAddSubCmp(int64_t imm, [[maybe_unused]] uint32_t size,
2837 [[maybe_unused]] bool signed_compare)
2838 {
2839 if (imm == INT64_MIN) {
2840 return false;
2841 }
2842 if (imm < 0) {
2843 imm = -imm;
2844 }
2845 return vixl::aarch64::Assembler::IsImmAddSub(imm);
2846 }
2847
CanEncodeImmLogical(uint64_t imm,uint32_t size)2848 bool Aarch64Encoder::CanEncodeImmLogical(uint64_t imm, uint32_t size)
2849 {
2850 return vixl::aarch64::Assembler::IsImmLogical(imm, size);
2851 }
2852
2853 /*
2854 * From aarch64 instruction set
2855 *
2856 * ========================================================
2857 * Syntax
2858 *
2859 * LDR Wt, [Xn|SP, Rm{, extend {amount}}] ; 32-bit general registers
2860 *
2861 * LDR Xt, [Xn|SP, Rm{, extend {amount}}] ; 64-bit general registers
2862 *
2863 * amount
2864 * Is the index shift amount, optional and defaulting to #0 when extend is not LSL:
2865 *
2866 * 32-bit general registers
2867 * Can be one of #0 or #2.
2868 *
2869 * 64-bit general registers
2870 * Can be one of #0 or #3.
2871 * ========================================================
2872 * Syntax
2873 *
2874 * LDRH Wt, [Xn|SP, Rm{, extend {amount}}]
2875 *
2876 * amount
2877 * Is the index shift amount, optional and defaulting to #0 when extend is not LSL, and can be either #0 or #1.
2878 * ========================================================
2879 *
2880 * Scale can be 0 or 1 for half load, 2 for word load, 3 for double word load
2881 */
CanEncodeScale(uint64_t imm,uint32_t size)2882 bool Aarch64Encoder::CanEncodeScale(uint64_t imm, uint32_t size)
2883 {
2884 return (imm == 0) || ((1U << imm) == (size >> 3U));
2885 }
2886
CanEncodeShiftedOperand(ShiftOpcode opcode,ShiftType shift_type)2887 bool Aarch64Encoder::CanEncodeShiftedOperand(ShiftOpcode opcode, ShiftType shift_type)
2888 {
2889 switch (opcode) {
2890 case ShiftOpcode::NEG_SR:
2891 case ShiftOpcode::ADD_SR:
2892 case ShiftOpcode::SUB_SR:
2893 return shift_type == ShiftType::LSL || shift_type == ShiftType::LSR || shift_type == ShiftType::ASR;
2894 case ShiftOpcode::AND_SR:
2895 case ShiftOpcode::OR_SR:
2896 case ShiftOpcode::XOR_SR:
2897 case ShiftOpcode::AND_NOT_SR:
2898 case ShiftOpcode::OR_NOT_SR:
2899 case ShiftOpcode::XOR_NOT_SR:
2900 return shift_type != ShiftType::INVALID_SHIFT;
2901 default:
2902 return false;
2903 }
2904 }
2905
AcquireScratchRegister(TypeInfo type)2906 Reg Aarch64Encoder::AcquireScratchRegister(TypeInfo type)
2907 {
2908 ASSERT(GetMasm()->GetCurrentScratchRegisterScope() == nullptr);
2909 auto reg = type.IsFloat() ? GetMasm()->GetScratchVRegisterList()->PopLowestIndex()
2910 : GetMasm()->GetScratchRegisterList()->PopLowestIndex();
2911 ASSERT(reg.IsValid());
2912 return Reg(reg.GetCode(), type);
2913 }
2914
AcquireScratchRegister(Reg reg)2915 void Aarch64Encoder::AcquireScratchRegister(Reg reg)
2916 {
2917 ASSERT(GetMasm()->GetCurrentScratchRegisterScope() == nullptr);
2918 if (reg == GetTarget().GetLinkReg()) {
2919 ASSERT_PRINT(!lr_acquired_, "Trying to acquire LR, which hasn't been released before");
2920 lr_acquired_ = true;
2921 return;
2922 }
2923 auto type = reg.GetType();
2924 auto reg_id = reg.GetId();
2925
2926 if (type.IsFloat()) {
2927 ASSERT(GetMasm()->GetScratchVRegisterList()->IncludesAliasOf(VixlVReg(reg)));
2928 GetMasm()->GetScratchVRegisterList()->Remove(reg_id);
2929 } else {
2930 ASSERT(GetMasm()->GetScratchRegisterList()->IncludesAliasOf(VixlReg(reg)));
2931 GetMasm()->GetScratchRegisterList()->Remove(reg_id);
2932 }
2933 }
2934
ReleaseScratchRegister(Reg reg)2935 void Aarch64Encoder::ReleaseScratchRegister(Reg reg)
2936 {
2937 if (reg == GetTarget().GetLinkReg()) {
2938 ASSERT_PRINT(lr_acquired_, "Trying to release LR, which hasn't been acquired before");
2939 lr_acquired_ = false;
2940 } else if (reg.IsFloat()) {
2941 GetMasm()->GetScratchVRegisterList()->Combine(reg.GetId());
2942 } else if (reg.GetId() != GetTarget().GetLinkReg().GetId()) {
2943 GetMasm()->GetScratchRegisterList()->Combine(reg.GetId());
2944 }
2945 }
2946
IsScratchRegisterReleased(Reg reg)2947 bool Aarch64Encoder::IsScratchRegisterReleased(Reg reg)
2948 {
2949 if (reg == GetTarget().GetLinkReg()) {
2950 return !lr_acquired_;
2951 }
2952 if (reg.IsFloat()) {
2953 return GetMasm()->GetScratchVRegisterList()->IncludesAliasOf(VixlVReg(reg));
2954 }
2955 return GetMasm()->GetScratchRegisterList()->IncludesAliasOf(VixlReg(reg));
2956 }
2957
MakeLibCall(Reg dst,Reg src0,Reg src1,const void * entry_point)2958 void Aarch64Encoder::MakeLibCall(Reg dst, Reg src0, Reg src1, const void *entry_point)
2959 {
2960 if (!dst.IsFloat()) {
2961 SetFalseResult();
2962 return;
2963 }
2964 if (dst.GetType() == FLOAT32_TYPE) {
2965 if (!src0.IsFloat() || !src1.IsFloat()) {
2966 SetFalseResult();
2967 return;
2968 }
2969
2970 if (src0.GetId() != vixl::aarch64::s0.GetCode() || src1.GetId() != vixl::aarch64::s1.GetCode()) {
2971 ScopedTmpRegF32 tmp(this);
2972 GetMasm()->Fmov(VixlVReg(tmp), VixlVReg(src1));
2973 GetMasm()->Fmov(vixl::aarch64::s0, VixlVReg(src0));
2974 GetMasm()->Fmov(vixl::aarch64::s1, VixlVReg(tmp));
2975 }
2976
2977 MakeCall(entry_point);
2978
2979 if (dst.GetId() != vixl::aarch64::s0.GetCode()) {
2980 GetMasm()->Fmov(VixlVReg(dst), vixl::aarch64::s0);
2981 }
2982 } else if (dst.GetType() == FLOAT64_TYPE) {
2983 if (!src0.IsFloat() || !src1.IsFloat()) {
2984 SetFalseResult();
2985 return;
2986 }
2987
2988 if (src0.GetId() != vixl::aarch64::d0.GetCode() || src1.GetId() != vixl::aarch64::d1.GetCode()) {
2989 ScopedTmpRegF64 tmp(this);
2990 GetMasm()->Fmov(VixlVReg(tmp), VixlVReg(src1));
2991
2992 GetMasm()->Fmov(vixl::aarch64::d0, VixlVReg(src0));
2993 GetMasm()->Fmov(vixl::aarch64::d1, VixlVReg(tmp));
2994 }
2995
2996 MakeCall(entry_point);
2997
2998 if (dst.GetId() != vixl::aarch64::d0.GetCode()) {
2999 GetMasm()->Fmov(VixlVReg(dst), vixl::aarch64::d0);
3000 }
3001 } else {
3002 UNREACHABLE();
3003 }
3004 }
3005
3006 template <bool is_store>
LoadStoreRegisters(RegMask registers,ssize_t slot,size_t start_reg,bool is_fp)3007 void Aarch64Encoder::LoadStoreRegisters(RegMask registers, ssize_t slot, size_t start_reg, bool is_fp)
3008 {
3009 if (registers.none()) {
3010 return;
3011 }
3012 int32_t last_reg = registers.size() - 1;
3013 for (; last_reg >= 0; --last_reg) {
3014 if (registers.test(last_reg)) {
3015 break;
3016 }
3017 }
3018 // Construct single add for big offset
3019 size_t sp_offset = 0;
3020 auto last_offset = (slot + last_reg - start_reg) * DOUBLE_WORD_SIZE_BYTE;
3021
3022 if (!vixl::aarch64::Assembler::IsImmLSPair(last_offset, vixl::aarch64::kXRegSizeInBytesLog2)) {
3023 ScopedTmpReg lr_reg(this, true);
3024 auto tmp = VixlReg(lr_reg);
3025 sp_offset = slot * DOUBLE_WORD_SIZE_BYTE;
3026 slot = 0;
3027 if (vixl::aarch64::Assembler::IsImmAddSub(sp_offset)) {
3028 GetMasm()->Add(tmp, vixl::aarch64::sp, VixlImm(sp_offset));
3029 } else {
3030 GetMasm()->Mov(tmp, VixlImm(sp_offset));
3031 GetMasm()->Add(tmp, vixl::aarch64::sp, tmp);
3032 }
3033 LoadStoreRegistersLoop<is_store>(registers, slot, start_reg, is_fp, tmp);
3034 } else {
3035 LoadStoreRegistersLoop<is_store>(registers, slot, start_reg, is_fp, vixl::aarch64::sp);
3036 }
3037 }
3038
3039 template <bool is_store>
LoadStoreRegisters(RegMask registers,bool is_fp,int32_t slot,Reg base,RegMask mask)3040 void Aarch64Encoder::LoadStoreRegisters(RegMask registers, bool is_fp, int32_t slot, Reg base, RegMask mask)
3041 {
3042 if (registers.none()) {
3043 return;
3044 }
3045
3046 int32_t max_offset = (slot + helpers::ToSigned(registers.GetMaxRegister())) * DOUBLE_WORD_SIZE_BYTE;
3047 int32_t min_offset = (slot + helpers::ToSigned(registers.GetMinRegister())) * DOUBLE_WORD_SIZE_BYTE;
3048
3049 ScopedTmpRegLazy tmp_reg(this, true);
3050 // Construct single add for big offset
3051 if (!vixl::aarch64::Assembler::IsImmLSPair(min_offset, vixl::aarch64::kXRegSizeInBytesLog2) ||
3052 !vixl::aarch64::Assembler::IsImmLSPair(max_offset, vixl::aarch64::kXRegSizeInBytesLog2)) {
3053 tmp_reg.Acquire();
3054 auto lr_reg = VixlReg(tmp_reg);
3055 ssize_t sp_offset = slot * DOUBLE_WORD_SIZE_BYTE;
3056 if (vixl::aarch64::Assembler::IsImmAddSub(sp_offset)) {
3057 GetMasm()->Add(lr_reg, VixlReg(base), VixlImm(sp_offset));
3058 } else {
3059 GetMasm()->Mov(lr_reg, VixlImm(sp_offset));
3060 GetMasm()->Add(lr_reg, VixlReg(base), lr_reg);
3061 }
3062 // Adjust new values for slot and base register
3063 slot = 0;
3064 base = tmp_reg;
3065 }
3066
3067 auto base_reg = VixlReg(base);
3068 bool has_mask = mask.any();
3069 int32_t index = has_mask ? static_cast<int32_t>(mask.GetMinRegister()) : 0;
3070 int32_t last_index = -1;
3071 ssize_t last_id = -1;
3072
3073 slot -= index;
3074 for (ssize_t id = index; id < helpers::ToSigned(registers.size()); id++) {
3075 if (has_mask) {
3076 if (!mask.test(id)) {
3077 continue;
3078 }
3079 index++;
3080 }
3081 if (!registers.test(id)) {
3082 continue;
3083 }
3084 if (!has_mask) {
3085 index++;
3086 }
3087 if (last_id != -1) {
3088 auto reg =
3089 CPURegister(id, vixl::aarch64::kXRegSize, is_fp ? CPURegister::kVRegister : CPURegister::kRegister);
3090 auto last_reg = CPURegister(last_id, vixl::aarch64::kXRegSize,
3091 is_fp ? CPURegister::kVRegister : CPURegister::kRegister);
3092 if (!has_mask || last_id + 1 == id) {
3093 static constexpr ssize_t OFFSET = 2;
3094 if constexpr (is_store) { // NOLINT
3095 GetMasm()->Stp(last_reg, reg,
3096 MemOperand(base_reg, (slot + index - OFFSET) * DOUBLE_WORD_SIZE_BYTE));
3097 } else { // NOLINT
3098 GetMasm()->Ldp(last_reg, reg,
3099 MemOperand(base_reg, (slot + index - OFFSET) * DOUBLE_WORD_SIZE_BYTE));
3100 }
3101 last_id = -1;
3102 } else {
3103 if constexpr (is_store) { // NOLINT
3104 GetMasm()->Str(last_reg, MemOperand(base_reg, (slot + last_index - 1) * DOUBLE_WORD_SIZE_BYTE));
3105 } else { // NOLINT
3106 GetMasm()->Ldr(last_reg, MemOperand(base_reg, (slot + last_index - 1) * DOUBLE_WORD_SIZE_BYTE));
3107 }
3108 last_id = id;
3109 last_index = index;
3110 }
3111 } else {
3112 last_id = id;
3113 last_index = index;
3114 }
3115 }
3116 if (last_id != -1) {
3117 auto last_reg =
3118 CPURegister(last_id, vixl::aarch64::kXRegSize, is_fp ? CPURegister::kVRegister : CPURegister::kRegister);
3119 if constexpr (is_store) { // NOLINT
3120 GetMasm()->Str(last_reg, MemOperand(base_reg, (slot + last_index - 1) * DOUBLE_WORD_SIZE_BYTE));
3121 } else { // NOLINT
3122 GetMasm()->Ldr(last_reg, MemOperand(base_reg, (slot + last_index - 1) * DOUBLE_WORD_SIZE_BYTE));
3123 }
3124 }
3125 }
3126
3127 template <bool is_store>
LoadStoreRegistersLoop(RegMask registers,ssize_t slot,size_t start_reg,bool is_fp,const vixl::aarch64::Register & base_reg)3128 void Aarch64Encoder::LoadStoreRegistersLoop(RegMask registers, ssize_t slot, size_t start_reg, bool is_fp,
3129 const vixl::aarch64::Register &base_reg)
3130 {
3131 size_t i = 0;
3132 const auto GET_NEXT_REG = [®isters, &i, is_fp]() {
3133 for (; i < registers.size(); i++) {
3134 if (registers.test(i)) {
3135 return CPURegister(i++, vixl::aarch64::kXRegSize,
3136 is_fp ? CPURegister::kVRegister : CPURegister::kRegister);
3137 }
3138 }
3139 return CPURegister();
3140 };
3141
3142 for (CPURegister next_reg = GET_NEXT_REG(); next_reg.IsValid();) {
3143 const CPURegister CURR_REG = next_reg;
3144 next_reg = GET_NEXT_REG();
3145 if (next_reg.IsValid() && (next_reg.GetCode() - 1 == CURR_REG.GetCode())) {
3146 if constexpr (is_store) { // NOLINT
3147 GetMasm()->Stp(CURR_REG, next_reg,
3148 MemOperand(base_reg, (slot + CURR_REG.GetCode() - start_reg) * DOUBLE_WORD_SIZE_BYTE));
3149 } else { // NOLINT
3150 GetMasm()->Ldp(CURR_REG, next_reg,
3151 MemOperand(base_reg, (slot + CURR_REG.GetCode() - start_reg) * DOUBLE_WORD_SIZE_BYTE));
3152 }
3153 next_reg = GET_NEXT_REG();
3154 } else {
3155 if constexpr (is_store) { // NOLINT
3156 GetMasm()->Str(CURR_REG,
3157 MemOperand(base_reg, (slot + CURR_REG.GetCode() - start_reg) * DOUBLE_WORD_SIZE_BYTE));
3158 } else { // NOLINT
3159 GetMasm()->Ldr(CURR_REG,
3160 MemOperand(base_reg, (slot + CURR_REG.GetCode() - start_reg) * DOUBLE_WORD_SIZE_BYTE));
3161 }
3162 }
3163 }
3164 }
3165
PushRegisters(RegMask registers,bool is_fp,bool align)3166 void Aarch64Encoder::PushRegisters(RegMask registers, bool is_fp, [[maybe_unused]] bool align)
3167 {
3168 static constexpr size_t PAIR_OFFSET = 2 * DOUBLE_WORD_SIZE_BYTE;
3169 Register last_reg = INVALID_REG;
3170 for (size_t i = 0; i < registers.size(); i++) {
3171 if (registers[i]) {
3172 if (last_reg == INVALID_REG) {
3173 last_reg = i;
3174 continue;
3175 }
3176 if (is_fp) {
3177 GetMasm()->stp(vixl::aarch64::VRegister(last_reg, DOUBLE_WORD_SIZE),
3178 vixl::aarch64::VRegister(i, DOUBLE_WORD_SIZE),
3179 MemOperand(vixl::aarch64::sp, -PAIR_OFFSET, vixl::aarch64::AddrMode::PreIndex));
3180 } else {
3181 GetMasm()->stp(vixl::aarch64::Register(last_reg, DOUBLE_WORD_SIZE),
3182 vixl::aarch64::Register(i, DOUBLE_WORD_SIZE),
3183 MemOperand(vixl::aarch64::sp, -PAIR_OFFSET, vixl::aarch64::AddrMode::PreIndex));
3184 }
3185 last_reg = INVALID_REG;
3186 }
3187 }
3188 if (last_reg != INVALID_REG) {
3189 if (is_fp) {
3190 GetMasm()->str(vixl::aarch64::VRegister(last_reg, DOUBLE_WORD_SIZE),
3191 MemOperand(vixl::aarch64::sp, align ? -PAIR_OFFSET : -DOUBLE_WORD_SIZE_BYTE,
3192 vixl::aarch64::AddrMode::PreIndex));
3193 } else {
3194 GetMasm()->str(vixl::aarch64::Register(last_reg, DOUBLE_WORD_SIZE),
3195 MemOperand(vixl::aarch64::sp, align ? -PAIR_OFFSET : -DOUBLE_WORD_SIZE_BYTE,
3196 vixl::aarch64::AddrMode::PreIndex));
3197 }
3198 }
3199 }
3200
PopRegisters(RegMask registers,bool is_fp,bool align)3201 void Aarch64Encoder::PopRegisters(RegMask registers, bool is_fp, [[maybe_unused]] bool align)
3202 {
3203 static constexpr size_t PAIR_OFFSET = 2 * DOUBLE_WORD_SIZE_BYTE;
3204 Register last_reg;
3205 if ((registers.count() & 1U) != 0) {
3206 last_reg = registers.GetMaxRegister();
3207 if (is_fp) {
3208 GetMasm()->ldr(vixl::aarch64::VRegister(last_reg, DOUBLE_WORD_SIZE),
3209 MemOperand(vixl::aarch64::sp, align ? PAIR_OFFSET : DOUBLE_WORD_SIZE_BYTE,
3210 vixl::aarch64::AddrMode::PostIndex));
3211 } else {
3212 GetMasm()->ldr(vixl::aarch64::Register(last_reg, DOUBLE_WORD_SIZE),
3213 MemOperand(vixl::aarch64::sp, align ? PAIR_OFFSET : DOUBLE_WORD_SIZE_BYTE,
3214 vixl::aarch64::AddrMode::PostIndex));
3215 }
3216 registers.reset(last_reg);
3217 }
3218 last_reg = INVALID_REG;
3219 for (ssize_t i = registers.size() - 1; i >= 0; i--) {
3220 if (registers[i]) {
3221 if (last_reg == INVALID_REG) {
3222 last_reg = i;
3223 continue;
3224 }
3225 if (is_fp) {
3226 GetMasm()->ldp(vixl::aarch64::VRegister(i, DOUBLE_WORD_SIZE),
3227 vixl::aarch64::VRegister(last_reg, DOUBLE_WORD_SIZE),
3228 MemOperand(vixl::aarch64::sp, PAIR_OFFSET, vixl::aarch64::AddrMode::PostIndex));
3229 } else {
3230 GetMasm()->ldp(vixl::aarch64::Register(i, DOUBLE_WORD_SIZE),
3231 vixl::aarch64::Register(last_reg, DOUBLE_WORD_SIZE),
3232 MemOperand(vixl::aarch64::sp, PAIR_OFFSET, vixl::aarch64::AddrMode::PostIndex));
3233 }
3234 last_reg = INVALID_REG;
3235 }
3236 }
3237 }
3238
3239 #ifndef PANDA_MINIMAL_VIXL
GetDecoder() const3240 auto &Aarch64Encoder::GetDecoder() const
3241 {
3242 if (decoder_ == nullptr) {
3243 decoder_ = GetAllocator()->New<vixl::aarch64::Decoder>(GetAllocator());
3244 }
3245 return *decoder_;
3246 }
3247 #endif
3248
DisasmInstr(std::ostream & stream,size_t pc,ssize_t code_offset) const3249 size_t Aarch64Encoder::DisasmInstr([[maybe_unused]] std::ostream &stream, size_t pc,
3250 [[maybe_unused]] ssize_t code_offset) const
3251 {
3252 #ifndef PANDA_MINIMAL_VIXL
3253 // NOLINTNEXTLINE (cppcoreguidelines-pro-type-member-ini)
3254 std::array<char, vixl::aarch64::Disassembler::GetDefaultBufferSize()> buf;
3255 vixl::aarch64::Disassembler disasm(std::data(buf), std::size(buf));
3256
3257 auto &decoder {GetDecoder()};
3258 vixl::aarch64::Decoder::ScopedVisitors sv(decoder, {&disasm});
3259 auto instr = GetMasm()->GetBuffer()->GetOffsetAddress<vixl::aarch64::Instruction *>(pc);
3260
3261 auto buffer_start = GetMasm()->GetBuffer()->GetOffsetAddress<uintptr_t>(0);
3262 decoder.Decode(instr);
3263 if (code_offset < 0) {
3264 stream << disasm.GetOutput();
3265 } else {
3266 stream << std::setw(0x4) << std::right << std::setfill('0') << std::hex
3267 << reinterpret_cast<uintptr_t>(instr) - buffer_start + code_offset << ": " << disasm.GetOutput()
3268 << std::setfill(' ') << std::dec;
3269 }
3270
3271 #endif
3272 return pc + vixl::aarch64::kInstructionSize;
3273 }
3274 } // namespace panda::compiler::aarch64
3275