1 /*
2 * Copyright (c) 2021-2024 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15 /*
16 Encoder (implementation of math and mem Low-level emitters)
17 */
18
19 #include <iomanip>
20
21 #include "libpandabase/utils/utils.h"
22 #include "compiler/optimizer/code_generator/relocations.h"
23 #include "compiler/optimizer/code_generator/fast_divisor.h"
24 #include "operands.h"
25 #include "scoped_tmp_reg.h"
26 #include "target/amd64/target.h"
27
28 #include "lib_helpers.inl"
29
30 #include "Zydis/Zydis.h"
31
32 #ifndef PANDA_TARGET_MACOS
33 #include "elf.h"
34 #endif // PANDA_TARGET_MACOS
35
36 namespace ark::compiler::amd64 {
37
ArchCcInt(Condition cc)38 static auto ArchCcInt(Condition cc)
39 {
40 switch (cc) {
41 case Condition::EQ:
42 return asmjit::x86::Condition::Code::kEqual;
43 case Condition::NE:
44 return asmjit::x86::Condition::Code::kNotEqual;
45 case Condition::LT:
46 return asmjit::x86::Condition::Code::kSignedLT;
47 case Condition::GT:
48 return asmjit::x86::Condition::Code::kSignedGT;
49 case Condition::LE:
50 return asmjit::x86::Condition::Code::kSignedLE;
51 case Condition::GE:
52 return asmjit::x86::Condition::Code::kSignedGE;
53 case Condition::LO:
54 return asmjit::x86::Condition::Code::kUnsignedLT;
55 case Condition::LS:
56 return asmjit::x86::Condition::Code::kUnsignedLE;
57 case Condition::HI:
58 return asmjit::x86::Condition::Code::kUnsignedGT;
59 case Condition::HS:
60 return asmjit::x86::Condition::Code::kUnsignedGE;
61 // NOTE(igorban) : Remove them
62 case Condition::MI:
63 return asmjit::x86::Condition::Code::kNegative;
64 case Condition::PL:
65 return asmjit::x86::Condition::Code::kPositive;
66 case Condition::VS:
67 return asmjit::x86::Condition::Code::kOverflow;
68 case Condition::VC:
69 return asmjit::x86::Condition::Code::kNotOverflow;
70 case Condition::AL:
71 case Condition::NV:
72 default:
73 UNREACHABLE();
74 return asmjit::x86::Condition::Code::kEqual;
75 }
76 }
ArchCcFloat(Condition cc)77 static auto ArchCcFloat(Condition cc)
78 {
79 switch (cc) {
80 case Condition::EQ:
81 return asmjit::x86::Condition::Code::kEqual;
82 case Condition::NE:
83 return asmjit::x86::Condition::Code::kNotEqual;
84 case Condition::LT:
85 return asmjit::x86::Condition::Code::kUnsignedLT;
86 case Condition::GT:
87 return asmjit::x86::Condition::Code::kUnsignedGT;
88 case Condition::LE:
89 return asmjit::x86::Condition::Code::kUnsignedLE;
90 case Condition::GE:
91 return asmjit::x86::Condition::Code::kUnsignedGE;
92 case Condition::LO:
93 return asmjit::x86::Condition::Code::kUnsignedLT;
94 case Condition::LS:
95 return asmjit::x86::Condition::Code::kUnsignedLE;
96 case Condition::HI:
97 return asmjit::x86::Condition::Code::kUnsignedGT;
98 case Condition::HS:
99 return asmjit::x86::Condition::Code::kUnsignedGE;
100 // NOTE(igorban) : Remove them
101 case Condition::MI:
102 return asmjit::x86::Condition::Code::kNegative;
103 case Condition::PL:
104 return asmjit::x86::Condition::Code::kPositive;
105 case Condition::VS:
106 return asmjit::x86::Condition::Code::kOverflow;
107 case Condition::VC:
108 return asmjit::x86::Condition::Code::kNotOverflow;
109 case Condition::AL:
110 case Condition::NV:
111 default:
112 UNREACHABLE();
113 return asmjit::x86::Condition::Code::kEqual;
114 }
115 }
116 /// Converters
ArchCc(Condition cc,bool isFloat=false)117 static asmjit::x86::Condition::Code ArchCc(Condition cc, bool isFloat = false)
118 {
119 return isFloat ? ArchCcFloat(cc) : ArchCcInt(cc);
120 }
121
ArchCcTest(Condition cc)122 static asmjit::x86::Condition::Code ArchCcTest(Condition cc)
123 {
124 ASSERT(cc == Condition::TST_EQ || cc == Condition::TST_NE);
125 return cc == Condition::TST_EQ ? asmjit::x86::Condition::Code::kEqual : asmjit::x86::Condition::Code::kNotEqual;
126 }
127
CcMatchesNan(Condition cc)128 static bool CcMatchesNan(Condition cc)
129 {
130 switch (cc) {
131 case Condition::NE:
132 case Condition::LT:
133 case Condition::LE:
134 case Condition::HI:
135 case Condition::HS:
136 return true;
137
138 default:
139 return false;
140 }
141 }
142
143 /// Converters
ArchReg(Reg reg,uint8_t size=0)144 static asmjit::x86::Gp ArchReg(Reg reg, uint8_t size = 0)
145 {
146 ASSERT(reg.IsValid());
147 if (reg.IsScalar()) {
148 size_t regSize = size == 0 ? reg.GetSize() : size;
149 auto archId = ConvertRegNumber(reg.GetId());
150
151 asmjit::x86::Gp archReg;
152 switch (regSize) {
153 case DOUBLE_WORD_SIZE:
154 archReg = asmjit::x86::Gp(asmjit::x86::Gpq::kSignature, archId);
155 break;
156 case WORD_SIZE:
157 archReg = asmjit::x86::Gp(asmjit::x86::Gpd::kSignature, archId);
158 break;
159 case HALF_SIZE:
160 archReg = asmjit::x86::Gp(asmjit::x86::Gpw::kSignature, archId);
161 break;
162 case BYTE_SIZE:
163 archReg = asmjit::x86::Gp(asmjit::x86::GpbLo::kSignature, archId);
164 break;
165
166 default:
167 UNREACHABLE();
168 }
169
170 ASSERT(archReg.isValid());
171 return archReg;
172 }
173 if (reg.GetId() == ConvertRegNumber(asmjit::x86::rsp.id())) {
174 return asmjit::x86::rsp;
175 }
176
177 // Invalid register type
178 UNREACHABLE();
179 return asmjit::x86::rax;
180 }
181
ArchVReg(Reg reg)182 static asmjit::x86::Xmm ArchVReg(Reg reg)
183 {
184 ASSERT(reg.IsValid() && reg.IsFloat());
185 auto archVreg = asmjit::x86::xmm(reg.GetId());
186 return archVreg;
187 }
188
ArchImm(Imm imm)189 static asmjit::Imm ArchImm(Imm imm)
190 {
191 ASSERT(imm.GetType() == INT64_TYPE);
192 return asmjit::imm(imm.GetAsInt());
193 }
194
ImmToUnsignedInt(Imm imm)195 static uint64_t ImmToUnsignedInt(Imm imm)
196 {
197 ASSERT(imm.GetType() == INT64_TYPE);
198 return uint64_t(imm.GetAsInt());
199 }
200
ImmFitsSize(int64_t imm,uint8_t size)201 static bool ImmFitsSize(int64_t imm, uint8_t size)
202 {
203 if (size == DOUBLE_WORD_SIZE) {
204 size = WORD_SIZE;
205 }
206
207 // NOLINTNEXTLINE(clang-analyzer-core.UndefinedBinaryOperatorResult)
208 int64_t max = (uint64_t(1) << (size - 1U)) - 1U;
209 int64_t min = ~uint64_t(max);
210 ASSERT(min < 0);
211 ASSERT(max > 0);
212
213 return imm >= min && imm <= max;
214 }
215
CreateLabel()216 LabelHolder::LabelId Amd64LabelHolder::CreateLabel()
217 {
218 ++id_;
219
220 auto masm = (static_cast<Amd64Encoder *>(GetEncoder()))->GetMasm();
221 auto label = masm->newLabel();
222
223 auto allocator = GetEncoder()->GetAllocator();
224 labels_.push_back(allocator->New<LabelType>(std::move(label)));
225 ASSERT(labels_.size() == id_);
226 return id_ - 1;
227 }
228
ArchMem(MemRef mem)229 ArchMem::ArchMem(MemRef mem)
230 {
231 bool base = mem.HasBase();
232 bool regoffset = mem.HasIndex();
233 bool shift = mem.HasScale();
234 bool offset = mem.HasDisp();
235
236 if (base && !regoffset && !shift) {
237 // Default memory - base + offset
238 mem_ = asmjit::x86::ptr(ArchReg(mem.GetBase()), mem.GetDisp());
239 } else if (base && regoffset && !offset) {
240 auto baseSize = mem.GetBase().GetSize();
241 auto indexSize = mem.GetIndex().GetSize();
242
243 ASSERT(baseSize >= indexSize);
244 ASSERT(indexSize >= WORD_SIZE);
245
246 if (baseSize > indexSize) {
247 needExtendIndex_ = true;
248 }
249
250 if (mem.GetScale() == 0) {
251 mem_ = asmjit::x86::ptr(ArchReg(mem.GetBase()), ArchReg(mem.GetIndex(), baseSize));
252 } else {
253 auto scale = mem.GetScale();
254 if (scale <= 3U) {
255 mem_ = asmjit::x86::ptr(ArchReg(mem.GetBase()), ArchReg(mem.GetIndex(), baseSize), scale);
256 } else {
257 mem_ = asmjit::x86::ptr(ArchReg(mem.GetBase()), ArchReg(mem.GetIndex(), baseSize));
258 bigShift_ = scale;
259 }
260 }
261 } else {
262 // Wrong memRef
263 UNREACHABLE();
264 }
265 }
266
Prepare(asmjit::x86::Assembler * masm)267 asmjit::x86::Mem ArchMem::Prepare(asmjit::x86::Assembler *masm)
268 {
269 if (isPrepared_) {
270 return mem_;
271 }
272
273 if (bigShift_ != 0) {
274 ASSERT(!mem_.hasOffset() && mem_.hasIndex() && bigShift_ > 3U);
275 masm->shl(mem_.indexReg().as<asmjit::x86::Gp>(), asmjit::imm(bigShift_));
276 }
277
278 if (needExtendIndex_) {
279 ASSERT(mem_.hasIndex());
280 auto qIndex = mem_.indexReg().as<asmjit::x86::Gp>();
281 auto dIndex {qIndex};
282 dIndex.setSignature(asmjit::x86::Gpd::kSignature);
283 masm->movsxd(qIndex, dIndex);
284 }
285
286 isPrepared_ = true;
287 return mem_;
288 }
289
AsmJitErrorHandler(Encoder * encoder)290 AsmJitErrorHandler::AsmJitErrorHandler(Encoder *encoder) : encoder_(encoder)
291 {
292 ASSERT(encoder != nullptr);
293 }
294
handleError(asmjit::Error err,const char * message,asmjit::BaseEmitter * origin)295 void AsmJitErrorHandler::handleError([[maybe_unused]] asmjit::Error err, [[maybe_unused]] const char *message,
296 [[maybe_unused]] asmjit::BaseEmitter *origin)
297 {
298 encoder_->SetFalseResult();
299 }
300
CreateLabels(LabelId max)301 void Amd64LabelHolder::CreateLabels(LabelId max)
302 {
303 for (LabelId i = 0; i < max; ++i) {
304 CreateLabel();
305 }
306 }
307
GetLabel(LabelId id)308 Amd64LabelHolder::LabelType *Amd64LabelHolder::GetLabel(LabelId id)
309 {
310 ASSERT(labels_.size() > id);
311 return labels_[id];
312 }
313
Size()314 Amd64LabelHolder::LabelId Amd64LabelHolder::Size()
315 {
316 return labels_.size();
317 }
318
BindLabel(LabelId id)319 void Amd64LabelHolder::BindLabel(LabelId id)
320 {
321 static_cast<Amd64Encoder *>(GetEncoder())->GetMasm()->bind(*labels_[id]);
322 }
323
Amd64Encoder(ArenaAllocator * allocator)324 Amd64Encoder::Amd64Encoder(ArenaAllocator *allocator) : Encoder(allocator, Arch::X86_64, false) {}
325
~Amd64Encoder()326 Amd64Encoder::~Amd64Encoder()
327 {
328 if (masm_ != nullptr) {
329 masm_->~Assembler();
330 masm_ = nullptr;
331 }
332
333 if (codeHolder_ != nullptr) {
334 codeHolder_->~CodeHolder();
335 codeHolder_ = nullptr;
336 }
337
338 if (errorHandler_ != nullptr) {
339 errorHandler_->~ErrorHandler();
340 errorHandler_ = nullptr;
341 }
342
343 if (labels_ != nullptr) {
344 labels_->~Amd64LabelHolder();
345 labels_ = nullptr;
346 }
347 }
348
GetLabels() const349 LabelHolder *Amd64Encoder::GetLabels() const
350 {
351 ASSERT(labels_ != nullptr);
352 return labels_;
353 }
354
IsValid() const355 bool Amd64Encoder::IsValid() const
356 {
357 return true;
358 }
359
GetTarget()360 constexpr auto Amd64Encoder::GetTarget()
361 {
362 return ark::compiler::Target(Arch::X86_64);
363 }
364
InitMasm()365 bool Amd64Encoder::InitMasm()
366 {
367 if (masm_ == nullptr) {
368 labels_ = GetAllocator()->New<Amd64LabelHolder>(this);
369 if (labels_ == nullptr) {
370 SetFalseResult();
371 return false;
372 }
373
374 asmjit::Environment env;
375 env.setArch(asmjit::Environment::kArchX64);
376
377 codeHolder_ = GetAllocator()->New<asmjit::CodeHolder>(GetAllocator());
378 if (codeHolder_ == nullptr) {
379 SetFalseResult();
380 return false;
381 }
382 codeHolder_->init(env, 0U);
383
384 masm_ = GetAllocator()->New<asmjit::x86::Assembler>(codeHolder_);
385 if (masm_ == nullptr) {
386 SetFalseResult();
387 return false;
388 }
389
390 // Enable strict validation.
391 masm_->addValidationOptions(asmjit::BaseEmitter::kValidationOptionAssembler);
392 errorHandler_ = GetAllocator()->New<AsmJitErrorHandler>(this);
393 if (errorHandler_ == nullptr) {
394 SetFalseResult();
395 return false;
396 }
397 masm_->setErrorHandler(errorHandler_);
398
399 // Make sure that the compiler uses the same scratch registers as the assembler
400 CHECK_EQ(compiler::arch_info::x86_64::TEMP_REGS, GetTarget().GetTempRegsMask());
401 CHECK_EQ(compiler::arch_info::x86_64::TEMP_FP_REGS, GetTarget().GetTempVRegsMask());
402 }
403 return true;
404 }
405
Finalize()406 void Amd64Encoder::Finalize()
407 {
408 auto code = GetMasm()->code();
409 auto codeSize = code->codeSize();
410
411 code->flatten();
412 code->resolveUnresolvedLinks();
413
414 auto codeBuffer = GetAllocator()->Alloc(codeSize);
415
416 code->relocateToBase(reinterpret_cast<uintptr_t>(codeBuffer));
417 code->copyFlattenedData(codeBuffer, codeSize, asmjit::CodeHolder::kCopyPadSectionBuffer);
418 }
419
EncodeJump(LabelHolder::LabelId id)420 void Amd64Encoder::EncodeJump(LabelHolder::LabelId id)
421 {
422 auto label = static_cast<Amd64LabelHolder *>(GetLabels())->GetLabel(id);
423 GetMasm()->jmp(*label);
424 }
425
EncodeJump(LabelHolder::LabelId id,Reg src0,Reg src1,Condition cc)426 void Amd64Encoder::EncodeJump(LabelHolder::LabelId id, Reg src0, Reg src1, Condition cc)
427 {
428 if (src0.IsScalar()) {
429 if (src0.GetSize() == src1.GetSize()) {
430 GetMasm()->cmp(ArchReg(src0), ArchReg(src1));
431 } else if (src0.GetSize() > src1.GetSize()) {
432 ScopedTmpReg tmpReg(this, src0.GetType());
433 EncodeCast(tmpReg, false, src1, false);
434 GetMasm()->cmp(ArchReg(src0), ArchReg(tmpReg));
435 } else {
436 ScopedTmpReg tmpReg(this, src1.GetType());
437 EncodeCast(tmpReg, false, src0, false);
438 GetMasm()->cmp(ArchReg(tmpReg), ArchReg(src1));
439 }
440 } else if (src0.GetType() == FLOAT32_TYPE) {
441 GetMasm()->comiss(ArchVReg(src0), ArchVReg(src1));
442 } else {
443 GetMasm()->comisd(ArchVReg(src0), ArchVReg(src1));
444 }
445
446 auto label = static_cast<Amd64LabelHolder *>(GetLabels())->GetLabel(id);
447 if (src0.IsScalar()) {
448 GetMasm()->j(ArchCc(cc, src0.IsFloat()), *label);
449 return;
450 }
451
452 if (CcMatchesNan(cc)) {
453 GetMasm()->jp(*label);
454 GetMasm()->j(ArchCc(cc, src0.IsFloat()), *label);
455 } else {
456 auto end = GetMasm()->newLabel();
457
458 GetMasm()->jp(end);
459 GetMasm()->j(ArchCc(cc, src0.IsFloat()), *label);
460 GetMasm()->bind(end);
461 }
462 }
463
EncodeJump(LabelHolder::LabelId id,Reg src,Imm imm,Condition cc)464 void Amd64Encoder::EncodeJump(LabelHolder::LabelId id, Reg src, Imm imm, Condition cc)
465 {
466 ASSERT(src.IsScalar());
467
468 auto immVal = imm.GetAsInt();
469 if (immVal == 0) {
470 EncodeJump(id, src, cc);
471 return;
472 }
473
474 if (ImmFitsSize(immVal, src.GetSize())) {
475 auto label = static_cast<Amd64LabelHolder *>(GetLabels())->GetLabel(id);
476
477 GetMasm()->cmp(ArchReg(src), asmjit::imm(immVal));
478 GetMasm()->j(ArchCc(cc), *label);
479 } else {
480 ScopedTmpReg tmpReg(this, src.GetType());
481 GetMasm()->mov(ArchReg(tmpReg), asmjit::imm(immVal));
482 EncodeJump(id, src, tmpReg, cc);
483 }
484 }
485
EncodeJumpTest(LabelHolder::LabelId id,Reg src0,Reg src1,Condition cc)486 void Amd64Encoder::EncodeJumpTest(LabelHolder::LabelId id, Reg src0, Reg src1, Condition cc)
487 {
488 ASSERT(src0.IsScalar());
489 if (src0.GetSize() == src1.GetSize()) {
490 GetMasm()->test(ArchReg(src0), ArchReg(src1));
491 } else if (src0.GetSize() > src1.GetSize()) {
492 ScopedTmpReg tmpReg(this, src0.GetType());
493 EncodeCast(tmpReg, false, src1, false);
494 GetMasm()->test(ArchReg(src0), ArchReg(tmpReg));
495 } else {
496 ScopedTmpReg tmpReg(this, src1.GetType());
497 EncodeCast(tmpReg, false, src0, false);
498 GetMasm()->test(ArchReg(tmpReg), ArchReg(src1));
499 }
500
501 auto label = static_cast<Amd64LabelHolder *>(GetLabels())->GetLabel(id);
502 GetMasm()->j(ArchCcTest(cc), *label);
503 }
504
EncodeJumpTest(LabelHolder::LabelId id,Reg src,Imm imm,Condition cc)505 void Amd64Encoder::EncodeJumpTest(LabelHolder::LabelId id, Reg src, Imm imm, Condition cc)
506 {
507 ASSERT(src.IsScalar());
508
509 auto immVal = imm.GetAsInt();
510 if (ImmFitsSize(immVal, src.GetSize())) {
511 auto label = static_cast<Amd64LabelHolder *>(GetLabels())->GetLabel(id);
512
513 GetMasm()->test(ArchReg(src), asmjit::imm(immVal));
514 GetMasm()->j(ArchCcTest(cc), *label);
515 } else {
516 ScopedTmpReg tmpReg(this, src.GetType());
517 GetMasm()->mov(ArchReg(tmpReg), asmjit::imm(immVal));
518 EncodeJumpTest(id, src, tmpReg, cc);
519 }
520 }
521
EncodeJump(LabelHolder::LabelId id,Reg src,Condition cc)522 void Amd64Encoder::EncodeJump(LabelHolder::LabelId id, Reg src, Condition cc)
523 {
524 if (src.IsScalar()) {
525 auto label = static_cast<Amd64LabelHolder *>(GetLabels())->GetLabel(id);
526
527 GetMasm()->cmp(ArchReg(src), asmjit::imm(0));
528 GetMasm()->j(ArchCc(cc), *label);
529 return;
530 }
531
532 ScopedTmpReg tmpReg(this, src.GetType());
533 if (src.GetType() == FLOAT32_TYPE) {
534 GetMasm()->xorps(ArchVReg(tmpReg), ArchVReg(tmpReg));
535 } else {
536 GetMasm()->xorpd(ArchVReg(tmpReg), ArchVReg(tmpReg));
537 }
538 EncodeJump(id, src, tmpReg, cc);
539 }
540
EncodeJump(Reg dst)541 void Amd64Encoder::EncodeJump(Reg dst)
542 {
543 GetMasm()->jmp(ArchReg(dst));
544 }
545
EncodeJump(RelocationInfo * relocation)546 void Amd64Encoder::EncodeJump(RelocationInfo *relocation)
547 {
548 #ifdef PANDA_TARGET_MACOS
549 LOG(FATAL, COMPILER) << "Not supported in Macos build";
550 #else
551 // NOLINTNEXTLINE(readability-magic-numbers)
552 std::array<uint8_t, 5U> data = {0xe9, 0, 0, 0, 0};
553 GetMasm()->embed(data.data(), data.size());
554
555 constexpr int ADDEND = 4;
556 relocation->offset = GetCursorOffset() - ADDEND;
557 relocation->addend = -ADDEND;
558 relocation->type = R_X86_64_PLT32;
559 #endif
560 }
561
EncodeBitTestAndBranch(LabelHolder::LabelId id,compiler::Reg reg,uint32_t bitPos,bool bitValue)562 void Amd64Encoder::EncodeBitTestAndBranch(LabelHolder::LabelId id, compiler::Reg reg, uint32_t bitPos, bool bitValue)
563 {
564 ASSERT(reg.IsScalar() && reg.GetSize() > bitPos);
565 auto label = static_cast<Amd64LabelHolder *>(GetLabels())->GetLabel(id);
566 if (reg.GetSize() == DOUBLE_WORD_SIZE) {
567 ScopedTmpRegU64 tmpReg(this);
568 GetMasm()->mov(ArchReg(tmpReg), asmjit::imm(static_cast<uint64_t>(1) << bitPos));
569 GetMasm()->test(ArchReg(reg), ArchReg(tmpReg));
570 } else {
571 GetMasm()->test(ArchReg(reg), asmjit::imm(1U << bitPos));
572 }
573 if (bitValue) {
574 GetMasm()->j(ArchCc(Condition::NE), *label);
575 } else {
576 GetMasm()->j(ArchCc(Condition::EQ), *label);
577 }
578 }
579
MakeCall(compiler::RelocationInfo * relocation)580 void Amd64Encoder::MakeCall([[maybe_unused]] compiler::RelocationInfo *relocation)
581 {
582 #ifdef PANDA_TARGET_MACOS
583 LOG(FATAL, COMPILER) << "Not supported in Macos build";
584 #else
585 // NOLINTNEXTLINE(readability-magic-numbers)
586 std::array<uint8_t, 5U> data = {0xe8, 0, 0, 0, 0};
587 GetMasm()->embed(data.data(), data.size());
588
589 relocation->offset = GetCursorOffset() - 4_I;
590 relocation->addend = -4_I;
591 relocation->type = R_X86_64_PLT32;
592 #endif
593 }
594
MakeCall(LabelHolder::LabelId id)595 void Amd64Encoder::MakeCall(LabelHolder::LabelId id)
596 {
597 auto label = static_cast<Amd64LabelHolder *>(GetLabels())->GetLabel(id);
598 GetMasm()->call(*label);
599 }
600
MakeCall(const void * entryPoint)601 void Amd64Encoder::MakeCall(const void *entryPoint)
602 {
603 ScopedTmpRegU64 tmpReg(this);
604 GetMasm()->mov(ArchReg(tmpReg), asmjit::imm(entryPoint));
605 GetMasm()->call(ArchReg(tmpReg));
606 }
607
MakeCall(Reg reg)608 void Amd64Encoder::MakeCall(Reg reg)
609 {
610 GetMasm()->call(ArchReg(reg));
611 }
612
MakeCall(MemRef entryPoint)613 void Amd64Encoder::MakeCall(MemRef entryPoint)
614 {
615 ScopedTmpRegU64 tmpReg(this);
616 EncodeLdr(tmpReg, false, entryPoint);
617 GetMasm()->call(ArchReg(tmpReg));
618 }
619
620 template <typename Func>
EncodeRelativePcMov(Reg reg,intptr_t offset,Func encodeInstruction)621 void Amd64Encoder::EncodeRelativePcMov(Reg reg, intptr_t offset, Func encodeInstruction)
622 {
623 // NOLINTNEXTLINE(readability-identifier-naming)
624 auto pos = GetMasm()->offset();
625 encodeInstruction(reg, offset);
626 // NOLINTNEXTLINE(readability-identifier-naming)
627 offset -= (GetMasm()->offset() - pos);
628 // NOLINTNEXTLINE(readability-identifier-naming)
629 GetMasm()->setOffset(pos);
630 encodeInstruction(reg, offset);
631 }
632
MakeCallAot(intptr_t offset)633 void Amd64Encoder::MakeCallAot(intptr_t offset)
634 {
635 ScopedTmpRegU64 tmpReg(this);
636 EncodeRelativePcMov(tmpReg, offset, [this](Reg reg, intptr_t offset) {
637 GetMasm()->long_().mov(ArchReg(reg), asmjit::x86::ptr(asmjit::x86::rip, offset));
638 });
639 GetMasm()->call(ArchReg(tmpReg));
640 }
641
CanMakeCallByOffset(intptr_t offset)642 bool Amd64Encoder::CanMakeCallByOffset(intptr_t offset)
643 {
644 return offset == static_cast<intptr_t>(static_cast<int32_t>(offset));
645 }
646
MakeCallByOffset(intptr_t offset)647 void Amd64Encoder::MakeCallByOffset(intptr_t offset)
648 {
649 GetMasm()->call(GetCursorOffset() + int32_t(offset));
650 }
651
MakeLoadAotTable(intptr_t offset,Reg reg)652 void Amd64Encoder::MakeLoadAotTable(intptr_t offset, Reg reg)
653 {
654 EncodeRelativePcMov(reg, offset, [this](Reg reg, intptr_t offset) {
655 GetMasm()->long_().mov(ArchReg(reg), asmjit::x86::ptr(asmjit::x86::rip, offset));
656 });
657 }
658
MakeLoadAotTableAddr(intptr_t offset,Reg addr,Reg val)659 void Amd64Encoder::MakeLoadAotTableAddr([[maybe_unused]] intptr_t offset, [[maybe_unused]] Reg addr,
660 [[maybe_unused]] Reg val)
661 {
662 EncodeRelativePcMov(addr, offset, [this](Reg reg, intptr_t offset) {
663 GetMasm()->long_().lea(ArchReg(reg), asmjit::x86::ptr(asmjit::x86::rip, offset));
664 });
665 GetMasm()->mov(ArchReg(val), asmjit::x86::ptr(ArchReg(addr)));
666 }
667
EncodeAbort()668 void Amd64Encoder::EncodeAbort()
669 {
670 GetMasm()->int3();
671 }
672
EncodeReturn()673 void Amd64Encoder::EncodeReturn()
674 {
675 GetMasm()->ret();
676 }
677
EncodeMul(Reg dst,Reg src,Imm imm)678 void Amd64Encoder::EncodeMul([[maybe_unused]] Reg dst, [[maybe_unused]] Reg src, [[maybe_unused]] Imm imm)
679 {
680 SetFalseResult();
681 }
682
EncodeNop()683 void Amd64Encoder::EncodeNop()
684 {
685 GetMasm()->nop();
686 }
687
EncodeMov(Reg dst,Reg src)688 void Amd64Encoder::EncodeMov(Reg dst, Reg src)
689 {
690 if (dst == src) {
691 return;
692 }
693
694 if (dst.IsFloat() != src.IsFloat()) {
695 ASSERT(src.GetSize() == dst.GetSize());
696 if (dst.GetSize() == WORD_SIZE) {
697 if (dst.IsFloat()) {
698 GetMasm()->movd(ArchVReg(dst), ArchReg(src));
699 } else {
700 GetMasm()->movd(ArchReg(dst), ArchVReg(src));
701 }
702 } else {
703 ASSERT(dst.GetSize() == DOUBLE_WORD_SIZE);
704 if (dst.IsFloat()) {
705 GetMasm()->movq(ArchVReg(dst), ArchReg(src));
706 } else {
707 GetMasm()->movq(ArchReg(dst), ArchVReg(src));
708 }
709 }
710 return;
711 }
712
713 if (dst.IsFloat()) {
714 ASSERT(src.IsFloat());
715 if (dst.GetType() == FLOAT32_TYPE) {
716 GetMasm()->movss(ArchVReg(dst), ArchVReg(src));
717 } else {
718 GetMasm()->movsd(ArchVReg(dst), ArchVReg(src));
719 }
720 return;
721 }
722
723 if (dst.GetSize() < WORD_SIZE && dst.GetSize() == src.GetSize()) {
724 GetMasm()->xor_(ArchReg(dst, WORD_SIZE), ArchReg(dst, WORD_SIZE));
725 }
726
727 if (dst.GetSize() == src.GetSize()) {
728 GetMasm()->mov(ArchReg(dst), ArchReg(src));
729 } else {
730 EncodeCast(dst, false, src, false);
731 }
732 }
733
EncodeNeg(Reg dst,Reg src)734 void Amd64Encoder::EncodeNeg(Reg dst, Reg src)
735 {
736 if (dst.IsScalar()) {
737 EncodeMov(dst, src);
738 GetMasm()->neg(ArchReg(dst));
739 return;
740 }
741
742 if (dst.GetType() == FLOAT32_TYPE) {
743 ScopedTmpRegF32 tmp(this);
744 CopyImmToXmm(tmp, -0.0F);
745
746 if (dst.GetId() != src.GetId()) {
747 GetMasm()->movsd(ArchVReg(dst), ArchVReg(src));
748 }
749 GetMasm()->xorps(ArchVReg(dst), ArchVReg(tmp));
750 } else {
751 ScopedTmpRegF64 tmp(this);
752 CopyImmToXmm(tmp, -0.0);
753
754 if (dst.GetId() != src.GetId()) {
755 GetMasm()->movsd(ArchVReg(dst), ArchVReg(src));
756 }
757 GetMasm()->xorps(ArchVReg(dst), ArchVReg(tmp));
758 }
759 }
760
EncodeAbs(Reg dst,Reg src)761 void Amd64Encoder::EncodeAbs(Reg dst, Reg src)
762 {
763 if (dst.IsScalar()) {
764 auto size = std::max<uint8_t>(src.GetSize(), WORD_SIZE);
765
766 if (dst.GetId() != src.GetId()) {
767 GetMasm()->mov(ArchReg(dst), ArchReg(src));
768 GetMasm()->neg(ArchReg(dst));
769 GetMasm()->cmovl(ArchReg(dst, size), ArchReg(src, size));
770 } else if (GetScratchRegistersCount() > 0) {
771 ScopedTmpReg tmpReg(this, dst.GetType());
772
773 GetMasm()->mov(ArchReg(tmpReg), ArchReg(src));
774 GetMasm()->neg(ArchReg(tmpReg));
775
776 GetMasm()->cmovl(ArchReg(tmpReg, size), ArchReg(src, size));
777 GetMasm()->mov(ArchReg(dst), ArchReg(tmpReg));
778 } else {
779 auto end = GetMasm()->newLabel();
780
781 GetMasm()->test(ArchReg(dst), ArchReg(dst));
782 GetMasm()->jns(end);
783
784 GetMasm()->neg(ArchReg(dst));
785 GetMasm()->bind(end);
786 }
787 return;
788 }
789
790 if (dst.GetType() == FLOAT32_TYPE) {
791 ScopedTmpRegF32 tmp(this);
792 // NOLINTNEXTLINE(readability-magic-numbers)
793 CopyImmToXmm(tmp, uint32_t(0x7fffffff));
794
795 if (dst.GetId() != src.GetId()) {
796 GetMasm()->movss(ArchVReg(dst), ArchVReg(src));
797 }
798 GetMasm()->andps(ArchVReg(dst), ArchVReg(tmp));
799 } else {
800 ScopedTmpRegF64 tmp(this);
801 // NOLINTNEXTLINE(readability-magic-numbers)
802 CopyImmToXmm(tmp, uint64_t(0x7fffffffffffffff));
803
804 if (dst.GetId() != src.GetId()) {
805 GetMasm()->movsd(ArchVReg(dst), ArchVReg(src));
806 }
807 GetMasm()->andps(ArchVReg(dst), ArchVReg(tmp));
808 }
809 }
810
EncodeNot(Reg dst,Reg src)811 void Amd64Encoder::EncodeNot(Reg dst, Reg src)
812 {
813 ASSERT(dst.IsScalar());
814
815 EncodeMov(dst, src);
816 GetMasm()->not_(ArchReg(dst));
817 }
818
EncodeSqrt(Reg dst,Reg src)819 void Amd64Encoder::EncodeSqrt(Reg dst, Reg src)
820 {
821 ASSERT(dst.IsFloat());
822 if (src.GetType() == FLOAT32_TYPE) {
823 GetMasm()->sqrtps(ArchVReg(dst), ArchVReg(src));
824 } else {
825 GetMasm()->sqrtpd(ArchVReg(dst), ArchVReg(src));
826 }
827 }
828
EncodeCastFloatToScalar(Reg dst,bool dstSigned,Reg src)829 void Amd64Encoder::EncodeCastFloatToScalar(Reg dst, bool dstSigned, Reg src)
830 {
831 // We DON'T support casts from float32/64 to int8/16 and bool, because this caste is not declared anywhere
832 // in other languages and architecture, we do not know what the behavior should be.
833 ASSERT(dst.GetSize() >= WORD_SIZE);
834 auto end = GetMasm()->newLabel();
835
836 // if src is NaN, then dst = 0
837 EncodeCastFloatCheckNan(dst, src, end);
838
839 if (dstSigned) {
840 EncodeCastFloatSignCheckRange(dst, src, end);
841 } else {
842 EncodeCastFloatUnsignCheckRange(dst, src, end);
843 }
844
845 if (src.GetType() == FLOAT32_TYPE) {
846 if (dst.GetSize() == DOUBLE_WORD_SIZE) {
847 EncodeCastFloat32ToUint64(dst, src);
848 } else {
849 GetMasm()->cvttss2si(ArchReg(dst, DOUBLE_WORD_SIZE), ArchVReg(src));
850 }
851 } else {
852 if (dst.GetSize() == DOUBLE_WORD_SIZE) {
853 EncodeCastFloat64ToUint64(dst, src);
854 } else {
855 GetMasm()->cvttsd2si(ArchReg(dst, DOUBLE_WORD_SIZE), ArchVReg(src));
856 }
857 }
858
859 GetMasm()->bind(end);
860 }
861
EncodeCastFloat32ToUint64(Reg dst,Reg src)862 void Amd64Encoder::EncodeCastFloat32ToUint64(Reg dst, Reg src)
863 {
864 auto bigNumberLabel = GetMasm()->newLabel();
865 auto endLabel = GetMasm()->newLabel();
866 ScopedTmpReg tmpReg(this, src.GetType());
867 ScopedTmpReg tmpNum(this, dst.GetType());
868
869 // It is max number with max degree that we can load in sign int64
870 // NOLINTNEXTLINE (readability-magic-numbers)
871 GetMasm()->mov(ArchReg(dst, WORD_SIZE), asmjit::imm(0x5F000000));
872 GetMasm()->movd(ArchVReg(tmpReg), ArchReg(dst, WORD_SIZE));
873 GetMasm()->comiss(ArchVReg(src), ArchVReg(tmpReg));
874 GetMasm()->jnb(bigNumberLabel);
875
876 GetMasm()->cvttss2si(ArchReg(dst), ArchVReg(src));
877 GetMasm()->jmp(endLabel);
878
879 GetMasm()->bind(bigNumberLabel);
880 GetMasm()->subss(ArchVReg(src), ArchVReg(tmpReg));
881 GetMasm()->cvttss2si(ArchReg(dst), ArchVReg(src));
882 // NOLINTNEXTLINE (readability-magic-numbers)
883 GetMasm()->mov(ArchReg(tmpNum), asmjit::imm(0x8000000000000000));
884 GetMasm()->xor_(ArchReg(dst), ArchReg(tmpNum));
885 GetMasm()->bind(endLabel);
886 }
887
EncodeCastFloat64ToUint64(Reg dst,Reg src)888 void Amd64Encoder::EncodeCastFloat64ToUint64(Reg dst, Reg src)
889 {
890 auto bigNumberLabel = GetMasm()->newLabel();
891 auto endLabel = GetMasm()->newLabel();
892 ScopedTmpReg tmpReg(this, src.GetType());
893 ScopedTmpReg tmpNum(this, dst.GetType());
894
895 // It is max number with max degree that we can load in sign int64
896 // NOLINTNEXTLINE (readability-magic-numbers)
897 GetMasm()->mov(ArchReg(dst), asmjit::imm(0x43E0000000000000));
898 GetMasm()->movq(ArchVReg(tmpReg), ArchReg(dst));
899 GetMasm()->comisd(ArchVReg(src), ArchVReg(tmpReg));
900 GetMasm()->jnb(bigNumberLabel);
901
902 GetMasm()->cvttsd2si(ArchReg(dst), ArchVReg(src));
903 GetMasm()->jmp(endLabel);
904
905 GetMasm()->bind(bigNumberLabel);
906 GetMasm()->subsd(ArchVReg(src), ArchVReg(tmpReg));
907 GetMasm()->cvttsd2si(ArchReg(dst), ArchVReg(src));
908 // NOLINTNEXTLINE (readability-magic-numbers)
909 GetMasm()->mov(ArchReg(tmpNum), asmjit::imm(0x8000000000000000));
910 GetMasm()->xor_(ArchReg(dst), ArchReg(tmpNum));
911 GetMasm()->bind(endLabel);
912 }
913
EncodeCastFloatCheckNan(Reg dst,Reg src,const asmjit::Label & end)914 void Amd64Encoder::EncodeCastFloatCheckNan(Reg dst, Reg src, const asmjit::Label &end)
915 {
916 GetMasm()->xor_(ArchReg(dst, DOUBLE_WORD_SIZE), ArchReg(dst, DOUBLE_WORD_SIZE));
917 if (src.GetType() == FLOAT32_TYPE) {
918 GetMasm()->ucomiss(ArchVReg(src), ArchVReg(src));
919 } else {
920 GetMasm()->ucomisd(ArchVReg(src), ArchVReg(src));
921 }
922 GetMasm()->jp(end);
923 }
924
EncodeCastFloatSignCheckRange(Reg dst,Reg src,const asmjit::Label & end)925 void Amd64Encoder::EncodeCastFloatSignCheckRange(Reg dst, Reg src, const asmjit::Label &end)
926 {
927 // if src < INT_MIN, then dst = INT_MIN
928 // if src >= (INT_MAX + 1), then dst = INT_MAX
929 if (dst.GetSize() == DOUBLE_WORD_SIZE) {
930 EncodeCastFloatCheckRange(dst, src, end, INT64_MIN, INT64_MAX);
931 } else {
932 EncodeCastFloatCheckRange(dst, src, end, INT32_MIN, INT32_MAX);
933 }
934 }
935
EncodeCastFloatCheckRange(Reg dst,Reg src,const asmjit::Label & end,const int64_t minValue,const uint64_t maxValue)936 void Amd64Encoder::EncodeCastFloatCheckRange(Reg dst, Reg src, const asmjit::Label &end, const int64_t minValue,
937 const uint64_t maxValue)
938 {
939 ScopedTmpReg cmpReg(this, src.GetType());
940 ScopedTmpReg tmpReg(this, src.GetType() == FLOAT64_TYPE ? INT64_TYPE : INT32_TYPE);
941
942 GetMasm()->mov(ArchReg(dst, DOUBLE_WORD_SIZE), asmjit::imm(minValue));
943 if (src.GetType() == FLOAT32_TYPE) {
944 GetMasm()->mov(ArchReg(tmpReg), asmjit::imm(bit_cast<uint32_t>(float(minValue))));
945 GetMasm()->movd(ArchVReg(cmpReg), ArchReg(tmpReg));
946 GetMasm()->ucomiss(ArchVReg(src), ArchVReg(cmpReg));
947 } else {
948 GetMasm()->mov(ArchReg(tmpReg), asmjit::imm(bit_cast<uint64_t>(double(minValue))));
949 GetMasm()->movq(ArchVReg(cmpReg), ArchReg(tmpReg));
950 GetMasm()->ucomisd(ArchVReg(src), ArchVReg(cmpReg));
951 }
952 GetMasm()->jb(end);
953
954 GetMasm()->mov(ArchReg(dst, DOUBLE_WORD_SIZE), asmjit::imm(maxValue));
955 if (src.GetType() == FLOAT32_TYPE) {
956 GetMasm()->mov(ArchReg(tmpReg), asmjit::imm(bit_cast<uint32_t>(float(maxValue) + 1U)));
957 GetMasm()->movd(ArchVReg(cmpReg), ArchReg(tmpReg));
958 GetMasm()->ucomiss(ArchVReg(src), ArchVReg(cmpReg));
959 } else {
960 GetMasm()->mov(ArchReg(tmpReg), asmjit::imm(bit_cast<uint64_t>(double(maxValue) + 1U)));
961 GetMasm()->movq(ArchVReg(cmpReg), ArchReg(tmpReg));
962 GetMasm()->ucomisd(ArchVReg(src), ArchVReg(cmpReg));
963 }
964 GetMasm()->jae(end);
965 }
966
EncodeCastFloatUnsignCheckRange(Reg dst,Reg src,const asmjit::Label & end)967 void Amd64Encoder::EncodeCastFloatUnsignCheckRange(Reg dst, Reg src, const asmjit::Label &end)
968 {
969 // if src < 0, then dst = 0
970 // if src >= (UINT_MAX + 1), then dst = UINT_MAX
971 if (dst.GetSize() == DOUBLE_WORD_SIZE) {
972 EncodeCastFloatCheckRange(dst, src, end, 0, UINT64_MAX);
973 } else {
974 EncodeCastFloatCheckRange(dst, src, end, 0, UINT32_MAX);
975 }
976 }
977
EncodeCastScalarToFloatUnsignDouble(Reg dst,Reg src)978 void Amd64Encoder::EncodeCastScalarToFloatUnsignDouble(Reg dst, Reg src)
979 {
980 if (dst.GetType() == FLOAT32_TYPE) {
981 ScopedTmpRegU64 int1Reg(this);
982 ScopedTmpRegU64 int2Reg(this);
983
984 auto sgn = GetMasm()->newLabel();
985 auto end = GetMasm()->newLabel();
986
987 GetMasm()->test(ArchReg(src), ArchReg(src));
988 GetMasm()->js(sgn);
989 GetMasm()->cvtsi2ss(ArchVReg(dst), ArchReg(src));
990 GetMasm()->jmp(end);
991
992 GetMasm()->bind(sgn);
993 GetMasm()->mov(ArchReg(int1Reg), ArchReg(src));
994 GetMasm()->mov(ArchReg(int2Reg), ArchReg(src));
995 GetMasm()->shr(ArchReg(int2Reg), asmjit::imm(1));
996 GetMasm()->and_(ArchReg(int1Reg, WORD_SIZE), asmjit::imm(1));
997 GetMasm()->or_(ArchReg(int1Reg), ArchReg(int2Reg));
998 GetMasm()->cvtsi2ss(ArchVReg(dst), ArchReg(int1Reg));
999 GetMasm()->addss(ArchVReg(dst), ArchVReg(dst));
1000
1001 GetMasm()->bind(end);
1002 } else {
1003 static constexpr std::array<uint32_t, 4> ARR1 = {uint32_t(0x43300000), uint32_t(0x45300000), 0x0, 0x0};
1004 static constexpr std::array<uint64_t, 2> ARR2 = {uint64_t(0x4330000000000000), uint64_t(0x4530000000000000)};
1005
1006 ScopedTmpReg float1Reg(this, dst.GetType());
1007 ScopedTmpRegF64 tmp(this);
1008
1009 GetMasm()->movq(ArchVReg(float1Reg), ArchReg(src));
1010 CopyArrayToXmm(tmp, ARR1);
1011 GetMasm()->punpckldq(ArchVReg(float1Reg), ArchVReg(tmp));
1012 CopyArrayToXmm(tmp, ARR2);
1013 GetMasm()->subpd(ArchVReg(float1Reg), ArchVReg(tmp));
1014 GetMasm()->movapd(ArchVReg(dst), ArchVReg(float1Reg));
1015 GetMasm()->unpckhpd(ArchVReg(dst), ArchVReg(float1Reg));
1016 GetMasm()->addsd(ArchVReg(dst), ArchVReg(float1Reg));
1017 }
1018 }
1019
EncodeCastScalarToFloat(Reg dst,Reg src,bool srcSigned)1020 void Amd64Encoder::EncodeCastScalarToFloat(Reg dst, Reg src, bool srcSigned)
1021 {
1022 if (!srcSigned && src.GetSize() == DOUBLE_WORD_SIZE) {
1023 EncodeCastScalarToFloatUnsignDouble(dst, src);
1024 return;
1025 }
1026
1027 if (src.GetSize() < WORD_SIZE || (srcSigned && src.GetSize() == WORD_SIZE)) {
1028 if (dst.GetType() == FLOAT32_TYPE) {
1029 GetMasm()->cvtsi2ss(ArchVReg(dst), ArchReg(src, WORD_SIZE));
1030 } else {
1031 GetMasm()->cvtsi2sd(ArchVReg(dst), ArchReg(src, WORD_SIZE));
1032 }
1033 return;
1034 }
1035
1036 if (!srcSigned && src.GetSize() == WORD_SIZE) {
1037 ScopedTmpRegU64 int1Reg(this);
1038
1039 GetMasm()->mov(ArchReg(int1Reg, WORD_SIZE), ArchReg(src, WORD_SIZE));
1040 if (dst.GetType() == FLOAT32_TYPE) {
1041 GetMasm()->cvtsi2ss(ArchVReg(dst), ArchReg(int1Reg));
1042 } else {
1043 GetMasm()->cvtsi2sd(ArchVReg(dst), ArchReg(int1Reg));
1044 }
1045 return;
1046 }
1047
1048 ASSERT(srcSigned && src.GetSize() == DOUBLE_WORD_SIZE);
1049 if (dst.GetType() == FLOAT32_TYPE) {
1050 GetMasm()->cvtsi2ss(ArchVReg(dst), ArchReg(src));
1051 } else {
1052 GetMasm()->cvtsi2sd(ArchVReg(dst), ArchReg(src));
1053 }
1054 }
1055
EncodeCastToBool(Reg dst,Reg src)1056 void Amd64Encoder::EncodeCastToBool(Reg dst, Reg src)
1057 {
1058 // In ISA says that we only support casts:
1059 // i32tou1, i64tou1, u32tou1, u64tou1
1060 ASSERT(src.IsScalar());
1061 ASSERT(dst.IsScalar());
1062
1063 // In our ISA minimal type is 32-bit, so bool in 32bit
1064 GetMasm()->test(ArchReg(src), ArchReg(src));
1065 // One "mov" will be better, then 2 jump. Else other instructions will overwrite the flags.
1066 GetMasm()->mov(ArchReg(dst, WORD_SIZE), asmjit::imm(0));
1067 GetMasm()->setne(ArchReg(dst));
1068 }
1069
EncodeFastPathDynamicCast(Reg dst,Reg src,LabelHolder::LabelId slow)1070 void Amd64Encoder::EncodeFastPathDynamicCast(Reg dst, Reg src, LabelHolder::LabelId slow)
1071 {
1072 ASSERT(IsLabelValid(slow));
1073 ASSERT(IsJsNumberCast());
1074 ASSERT(src.IsFloat() && dst.IsScalar());
1075
1076 CHECK_EQ(src.GetSize(), BITS_PER_UINT64);
1077 CHECK_EQ(dst.GetSize(), BITS_PER_UINT32);
1078
1079 auto end {GetMasm()->newLabel()};
1080
1081 // if src is NaN, then dst = 0
1082 EncodeCastFloatCheckNan(dst, src, end);
1083
1084 // infinite and big numbers will overflow here to INT64_MIN
1085 GetMasm()->cvttsd2si(ArchReg(dst, DOUBLE_WORD_SIZE), ArchVReg(src));
1086 // check INT64_MIN
1087 GetMasm()->cmp(ArchReg(dst, DOUBLE_WORD_SIZE), asmjit::imm(1));
1088 auto slowLabel {static_cast<Amd64LabelHolder *>(GetLabels())->GetLabel(slow)};
1089 // jump to slow path in case of overflow
1090 GetMasm()->jo(*slowLabel);
1091
1092 GetMasm()->bind(end);
1093 }
1094
EncodeCast(Reg dst,bool dstSigned,Reg src,bool srcSigned)1095 void Amd64Encoder::EncodeCast(Reg dst, bool dstSigned, Reg src, bool srcSigned)
1096 {
1097 if (src.IsFloat() && dst.IsScalar()) {
1098 EncodeCastFloatToScalar(dst, dstSigned, src);
1099 return;
1100 }
1101
1102 if (src.IsScalar() && dst.IsFloat()) {
1103 EncodeCastScalarToFloat(dst, src, srcSigned);
1104 return;
1105 }
1106
1107 if (src.IsFloat() && dst.IsFloat()) {
1108 if (src.GetSize() != dst.GetSize()) {
1109 if (src.GetType() == FLOAT32_TYPE) {
1110 GetMasm()->cvtss2sd(ArchVReg(dst), ArchVReg(src));
1111 } else {
1112 GetMasm()->cvtsd2ss(ArchVReg(dst), ArchVReg(src));
1113 }
1114 return;
1115 }
1116
1117 if (src.GetType() == FLOAT32_TYPE) {
1118 GetMasm()->movss(ArchVReg(dst), ArchVReg(src));
1119 } else {
1120 GetMasm()->movsd(ArchVReg(dst), ArchVReg(src));
1121 }
1122 return;
1123 }
1124
1125 ASSERT(src.IsScalar() && dst.IsScalar());
1126 EncodeCastScalar(dst, dstSigned, src, srcSigned);
1127 }
1128
EncodeCastScalar(Reg dst,bool dstSigned,Reg src,bool srcSigned)1129 void Amd64Encoder::EncodeCastScalar(Reg dst, bool dstSigned, Reg src, bool srcSigned)
1130 {
1131 auto extendTo32bit = [this](Reg reg, bool isSigned) {
1132 if (reg.GetSize() < WORD_SIZE) {
1133 if (isSigned) {
1134 GetMasm()->movsx(ArchReg(reg, WORD_SIZE), ArchReg(reg));
1135 } else {
1136 GetMasm()->movzx(ArchReg(reg, WORD_SIZE), ArchReg(reg));
1137 }
1138 }
1139 };
1140
1141 if (src.GetSize() >= dst.GetSize()) {
1142 if (dst.GetId() != src.GetId()) {
1143 GetMasm()->mov(ArchReg(dst), ArchReg(src, dst.GetSize()));
1144 }
1145 extendTo32bit(dst, dstSigned);
1146 return;
1147 }
1148
1149 if (srcSigned) {
1150 if (dst.GetSize() < DOUBLE_WORD_SIZE) {
1151 GetMasm()->movsx(ArchReg(dst), ArchReg(src));
1152 extendTo32bit(dst, dstSigned);
1153 } else if (src.GetSize() == WORD_SIZE) {
1154 GetMasm()->movsxd(ArchReg(dst), ArchReg(src));
1155 } else {
1156 GetMasm()->movsx(ArchReg(dst, WORD_SIZE), ArchReg(src));
1157 GetMasm()->movsxd(ArchReg(dst), ArchReg(dst, WORD_SIZE));
1158 }
1159 return;
1160 }
1161
1162 if (src.GetSize() == WORD_SIZE) {
1163 GetMasm()->mov(ArchReg(dst, WORD_SIZE), ArchReg(src));
1164 } else if (dst.GetSize() == DOUBLE_WORD_SIZE) {
1165 GetMasm()->movzx(ArchReg(dst, WORD_SIZE), ArchReg(src));
1166 } else {
1167 GetMasm()->movzx(ArchReg(dst), ArchReg(src));
1168 extendTo32bit(dst, dstSigned);
1169 }
1170 }
1171
MakeShift(Shift shift)1172 Reg Amd64Encoder::MakeShift(Shift shift)
1173 {
1174 Reg reg = shift.GetBase();
1175 ASSERT(reg.IsValid());
1176 if (reg.IsScalar()) {
1177 ASSERT(shift.GetType() != ShiftType::INVALID_SHIFT);
1178 switch (shift.GetType()) {
1179 case ShiftType::LSL:
1180 GetMasm()->shl(ArchReg(reg), asmjit::imm(shift.GetScale()));
1181 break;
1182 case ShiftType::LSR:
1183 GetMasm()->shr(ArchReg(reg), asmjit::imm(shift.GetScale()));
1184 break;
1185 case ShiftType::ASR:
1186 GetMasm()->sar(ArchReg(reg), asmjit::imm(shift.GetScale()));
1187 break;
1188 case ShiftType::ROR:
1189 GetMasm()->ror(ArchReg(reg), asmjit::imm(shift.GetScale()));
1190 break;
1191 default:
1192 UNREACHABLE();
1193 }
1194
1195 return reg;
1196 }
1197
1198 // Invalid register type
1199 UNREACHABLE();
1200 }
1201
EncodeAdd(Reg dst,Reg src0,Shift src1)1202 void Amd64Encoder::EncodeAdd(Reg dst, Reg src0, Shift src1)
1203 {
1204 if (dst.IsFloat()) {
1205 SetFalseResult();
1206 return;
1207 }
1208
1209 ASSERT(dst.GetSize() >= src0.GetSize());
1210
1211 auto shiftReg = MakeShift(src1);
1212
1213 if (src0.GetSize() < WORD_SIZE) {
1214 EncodeAdd(dst, src0, shiftReg);
1215 return;
1216 }
1217
1218 if (src0.GetSize() == DOUBLE_WORD_SIZE && shiftReg.GetSize() < DOUBLE_WORD_SIZE) {
1219 GetMasm()->movsxd(ArchReg(shiftReg, DOUBLE_WORD_SIZE), ArchReg(shiftReg));
1220 }
1221
1222 GetMasm()->lea(ArchReg(dst), asmjit::x86::ptr(ArchReg(src0), ArchReg(shiftReg, src0.GetSize())));
1223 }
1224
EncodeAdd(Reg dst,Reg src0,Reg src1)1225 void Amd64Encoder::EncodeAdd(Reg dst, Reg src0, Reg src1)
1226 {
1227 if (dst.IsScalar()) {
1228 auto size = std::max<uint8_t>(WORD_SIZE, dst.GetSize());
1229 GetMasm()->lea(ArchReg(dst, size), asmjit::x86::ptr(ArchReg(src0, size), ArchReg(src1, size)));
1230 return;
1231 }
1232
1233 if (dst.GetType() == FLOAT32_TYPE) {
1234 if (dst.GetId() == src0.GetId()) {
1235 GetMasm()->addss(ArchVReg(dst), ArchVReg(src1));
1236 } else if (dst.GetId() == src1.GetId()) {
1237 GetMasm()->addss(ArchVReg(dst), ArchVReg(src0));
1238 } else {
1239 GetMasm()->movss(ArchVReg(dst), ArchVReg(src0));
1240 GetMasm()->addss(ArchVReg(dst), ArchVReg(src1));
1241 }
1242 } else {
1243 if (dst.GetId() == src0.GetId()) {
1244 GetMasm()->addsd(ArchVReg(dst), ArchVReg(src1));
1245 } else if (dst.GetId() == src1.GetId()) {
1246 GetMasm()->addsd(ArchVReg(dst), ArchVReg(src0));
1247 } else {
1248 GetMasm()->movsd(ArchVReg(dst), ArchVReg(src0));
1249 GetMasm()->addsd(ArchVReg(dst), ArchVReg(src1));
1250 }
1251 }
1252 }
1253
EncodeSub(Reg dst,Reg src0,Reg src1)1254 void Amd64Encoder::EncodeSub(Reg dst, Reg src0, Reg src1)
1255 {
1256 if (dst.IsScalar()) {
1257 if (dst.GetId() == src0.GetId()) {
1258 GetMasm()->sub(ArchReg(dst), ArchReg(src1));
1259 } else if (dst.GetId() == src1.GetId()) {
1260 GetMasm()->sub(ArchReg(dst), ArchReg(src0));
1261 GetMasm()->neg(ArchReg(dst));
1262 } else {
1263 EncodeMov(dst, src0);
1264 GetMasm()->sub(ArchReg(dst), ArchReg(src1));
1265 }
1266 return;
1267 }
1268
1269 if (dst.GetType() == FLOAT32_TYPE) {
1270 if (dst.GetId() == src0.GetId()) {
1271 GetMasm()->subss(ArchVReg(dst), ArchVReg(src1));
1272 } else if (dst.GetId() != src1.GetId()) {
1273 GetMasm()->movss(ArchVReg(dst), ArchVReg(src0));
1274 GetMasm()->subss(ArchVReg(dst), ArchVReg(src1));
1275 } else {
1276 ScopedTmpReg tmpReg(this, dst.GetType());
1277 GetMasm()->movss(ArchVReg(tmpReg), ArchVReg(src0));
1278 GetMasm()->subss(ArchVReg(tmpReg), ArchVReg(src1));
1279 GetMasm()->movss(ArchVReg(dst), ArchVReg(tmpReg));
1280 }
1281 } else {
1282 if (dst.GetId() == src0.GetId()) {
1283 GetMasm()->subsd(ArchVReg(dst), ArchVReg(src1));
1284 } else if (dst.GetId() != src1.GetId()) {
1285 GetMasm()->movsd(ArchVReg(dst), ArchVReg(src0));
1286 GetMasm()->subsd(ArchVReg(dst), ArchVReg(src1));
1287 } else {
1288 ScopedTmpReg tmpReg(this, dst.GetType());
1289 GetMasm()->movsd(ArchVReg(tmpReg), ArchVReg(src0));
1290 GetMasm()->subsd(ArchVReg(tmpReg), ArchVReg(src1));
1291 GetMasm()->movsd(ArchVReg(dst), ArchVReg(tmpReg));
1292 }
1293 }
1294 }
1295
EncodeMul(Reg dst,Reg src0,Reg src1)1296 void Amd64Encoder::EncodeMul(Reg dst, Reg src0, Reg src1)
1297 {
1298 if (dst.IsScalar()) {
1299 auto size = std::max<uint8_t>(WORD_SIZE, dst.GetSize());
1300
1301 if (dst.GetId() == src0.GetId()) {
1302 GetMasm()->imul(ArchReg(dst, size), ArchReg(src1, size));
1303 } else if (dst.GetId() == src1.GetId()) {
1304 GetMasm()->imul(ArchReg(dst, size), ArchReg(src0, size));
1305 } else {
1306 GetMasm()->mov(ArchReg(dst, size), ArchReg(src0, size));
1307 GetMasm()->imul(ArchReg(dst, size), ArchReg(src1, size));
1308 }
1309 return;
1310 }
1311
1312 if (dst.GetType() == FLOAT32_TYPE) {
1313 if (dst.GetId() == src0.GetId()) {
1314 GetMasm()->mulss(ArchVReg(dst), ArchVReg(src1));
1315 } else if (dst.GetId() == src1.GetId()) {
1316 GetMasm()->mulss(ArchVReg(dst), ArchVReg(src0));
1317 } else {
1318 GetMasm()->movss(ArchVReg(dst), ArchVReg(src0));
1319 GetMasm()->mulss(ArchVReg(dst), ArchVReg(src1));
1320 }
1321 } else {
1322 if (dst.GetId() == src0.GetId()) {
1323 GetMasm()->mulsd(ArchVReg(dst), ArchVReg(src1));
1324 } else if (dst.GetId() == src1.GetId()) {
1325 GetMasm()->mulsd(ArchVReg(dst), ArchVReg(src0));
1326 } else {
1327 GetMasm()->movsd(ArchVReg(dst), ArchVReg(src0));
1328 GetMasm()->mulsd(ArchVReg(dst), ArchVReg(src1));
1329 }
1330 }
1331 }
1332
EncodeAddOverflow(compiler::LabelHolder::LabelId id,Reg dst,Reg src0,Reg src1,Condition cc)1333 void Amd64Encoder::EncodeAddOverflow(compiler::LabelHolder::LabelId id, Reg dst, Reg src0, Reg src1, Condition cc)
1334 {
1335 ASSERT(!dst.IsFloat() && !src0.IsFloat() && !src1.IsFloat());
1336 ASSERT(cc == Condition::VS || cc == Condition::VC);
1337 auto size = dst.GetSize();
1338 if (dst.GetId() == src0.GetId()) {
1339 GetMasm()->add(ArchReg(dst, size), ArchReg(src1, size));
1340 } else if (dst.GetId() == src1.GetId()) {
1341 GetMasm()->add(ArchReg(dst, size), ArchReg(src0, size));
1342 } else {
1343 GetMasm()->mov(ArchReg(dst, size), ArchReg(src0, size));
1344 GetMasm()->add(ArchReg(dst, size), ArchReg(src1, size));
1345 }
1346 auto label = static_cast<Amd64LabelHolder *>(GetLabels())->GetLabel(id);
1347 GetMasm()->j(ArchCc(cc, false), *label);
1348 }
1349
EncodeSubOverflow(compiler::LabelHolder::LabelId id,Reg dst,Reg src0,Reg src1,Condition cc)1350 void Amd64Encoder::EncodeSubOverflow(compiler::LabelHolder::LabelId id, Reg dst, Reg src0, Reg src1, Condition cc)
1351 {
1352 ASSERT(!dst.IsFloat() && !src0.IsFloat() && !src1.IsFloat());
1353 ASSERT(cc == Condition::VS || cc == Condition::VC);
1354 auto size = dst.GetSize();
1355 if (dst.GetId() == src0.GetId()) {
1356 GetMasm()->sub(ArchReg(dst, size), ArchReg(src1, size));
1357 } else if (dst.GetId() == src1.GetId()) {
1358 ScopedTmpReg tmpReg(this, dst.GetType());
1359 GetMasm()->mov(ArchReg(tmpReg, size), ArchReg(src1, size));
1360 GetMasm()->mov(ArchReg(dst, size), ArchReg(src0, size));
1361 GetMasm()->sub(ArchReg(dst, size), ArchReg(tmpReg, size));
1362 } else {
1363 GetMasm()->mov(ArchReg(dst, size), ArchReg(src0, size));
1364 GetMasm()->sub(ArchReg(dst, size), ArchReg(src1, size));
1365 }
1366 auto label = static_cast<Amd64LabelHolder *>(GetLabels())->GetLabel(id);
1367 GetMasm()->j(ArchCc(cc, false), *label);
1368 }
1369
EncodeNegOverflowAndZero(compiler::LabelHolder::LabelId id,Reg dst,Reg src)1370 void Amd64Encoder::EncodeNegOverflowAndZero(compiler::LabelHolder::LabelId id, Reg dst, Reg src)
1371 {
1372 ASSERT(!dst.IsFloat() && !src.IsFloat());
1373 auto size = dst.GetSize();
1374 // NOLINTNEXTLINE(readability-magic-numbers)
1375 EncodeJumpTest(id, src, Imm(0x7fffffff), Condition::TST_EQ);
1376 EncodeMov(dst, src);
1377 GetMasm()->neg(ArchReg(dst, size));
1378 }
1379
EncodeDivFloat(Reg dst,Reg src0,Reg src1)1380 void Amd64Encoder::EncodeDivFloat(Reg dst, Reg src0, Reg src1)
1381 {
1382 ASSERT(dst.IsFloat());
1383 if (dst.GetType() == FLOAT32_TYPE) {
1384 if (dst.GetId() == src0.GetId()) {
1385 GetMasm()->divss(ArchVReg(dst), ArchVReg(src1));
1386 } else if (dst.GetId() != src1.GetId()) {
1387 GetMasm()->movss(ArchVReg(dst), ArchVReg(src0));
1388 GetMasm()->divss(ArchVReg(dst), ArchVReg(src1));
1389 } else {
1390 ScopedTmpRegF32 tmp(this);
1391 GetMasm()->movss(ArchVReg(tmp), ArchVReg(src0));
1392 GetMasm()->divss(ArchVReg(tmp), ArchVReg(src1));
1393 GetMasm()->movss(ArchVReg(dst), ArchVReg(tmp));
1394 }
1395 } else {
1396 if (dst.GetId() == src0.GetId()) {
1397 GetMasm()->divsd(ArchVReg(dst), ArchVReg(src1));
1398 } else if (dst.GetId() != src1.GetId()) {
1399 GetMasm()->movsd(ArchVReg(dst), ArchVReg(src0));
1400 GetMasm()->divsd(ArchVReg(dst), ArchVReg(src1));
1401 } else {
1402 ScopedTmpRegF64 tmp(this);
1403 GetMasm()->movsd(ArchVReg(tmp), ArchVReg(src0));
1404 GetMasm()->divsd(ArchVReg(tmp), ArchVReg(src1));
1405 GetMasm()->movsd(ArchVReg(dst), ArchVReg(tmp));
1406 }
1407 }
1408 }
1409
EncodeDivSpillDst(asmjit::x86::Assembler * masm,Reg dst)1410 static void EncodeDivSpillDst(asmjit::x86::Assembler *masm, Reg dst)
1411 {
1412 if (dst.GetId() != ConvertRegNumber(asmjit::x86::rdx.id())) {
1413 masm->push(asmjit::x86::rdx);
1414 }
1415 if (dst.GetId() != ConvertRegNumber(asmjit::x86::rax.id())) {
1416 masm->push(asmjit::x86::rax);
1417 }
1418 }
1419
EncodeDivFillDst(asmjit::x86::Assembler * masm,Reg dst)1420 static void EncodeDivFillDst(asmjit::x86::Assembler *masm, Reg dst)
1421 {
1422 if (dst.GetId() != ConvertRegNumber(asmjit::x86::rax.id())) {
1423 masm->mov(ArchReg(dst, DOUBLE_WORD_SIZE), asmjit::x86::rax);
1424 masm->pop(asmjit::x86::rax);
1425 }
1426
1427 if (dst.GetId() != ConvertRegNumber(asmjit::x86::rdx.id())) {
1428 masm->pop(asmjit::x86::rdx);
1429 }
1430 }
1431
EncodeDiv(Reg dst,bool dstSigned,Reg src0,Reg src1)1432 void Amd64Encoder::EncodeDiv(Reg dst, bool dstSigned, Reg src0, Reg src1)
1433 {
1434 if (dst.IsFloat()) {
1435 EncodeDivFloat(dst, src0, src1);
1436 return;
1437 }
1438
1439 auto negPath = GetMasm()->newLabel();
1440 auto crossroad = GetMasm()->newLabel();
1441
1442 if (dstSigned) {
1443 GetMasm()->cmp(ArchReg(src1), asmjit::imm(-1));
1444 GetMasm()->je(negPath);
1445 }
1446
1447 EncodeDivSpillDst(GetMasm(), dst);
1448
1449 ScopedTmpReg tmpReg(this, dst.GetType());
1450 Reg op1 {src1};
1451 if (src1.GetId() == ConvertRegNumber(asmjit::x86::rax.id()) ||
1452 src1.GetId() == ConvertRegNumber(asmjit::x86::rdx.id())) {
1453 GetMasm()->mov(ArchReg(tmpReg), ArchReg(src1));
1454 op1 = Reg(tmpReg);
1455 }
1456
1457 if (src0.GetId() != ConvertRegNumber(asmjit::x86::rax.id())) {
1458 GetMasm()->mov(asmjit::x86::rax, ArchReg(src0, DOUBLE_WORD_SIZE));
1459 }
1460 if (dstSigned) {
1461 if (dst.GetSize() <= WORD_SIZE) {
1462 GetMasm()->cdq();
1463 } else {
1464 GetMasm()->cqo();
1465 }
1466 GetMasm()->idiv(ArchReg(op1));
1467 } else {
1468 GetMasm()->xor_(asmjit::x86::rdx, asmjit::x86::rdx);
1469 GetMasm()->div(ArchReg(op1));
1470 }
1471
1472 EncodeDivFillDst(GetMasm(), dst);
1473
1474 GetMasm()->jmp(crossroad);
1475
1476 GetMasm()->bind(negPath);
1477 if (dst.GetId() != src0.GetId()) {
1478 GetMasm()->mov(ArchReg(dst), ArchReg(src0));
1479 }
1480 GetMasm()->neg(ArchReg(dst));
1481
1482 GetMasm()->bind(crossroad);
1483 }
1484
EncodeSignedDiv(Reg dst,Reg src0,Imm imm)1485 void Amd64Encoder::EncodeSignedDiv(Reg dst, Reg src0, Imm imm)
1486 {
1487 int64_t divisor = imm.GetAsInt();
1488
1489 Reg ax(ConvertRegNumber(asmjit::x86::rax.id()), dst.GetType());
1490 Reg dx(ConvertRegNumber(asmjit::x86::rdx.id()), dst.GetType());
1491
1492 if (dst != ax) {
1493 GetMasm()->push(ArchReg(ax, DOUBLE_WORD_SIZE));
1494 }
1495 if (dst != dx) {
1496 GetMasm()->push(ArchReg(dx, DOUBLE_WORD_SIZE));
1497 }
1498
1499 FastConstSignedDivisor fastDivisor(divisor, dst.GetSize());
1500 int64_t magic = fastDivisor.GetMagic();
1501
1502 ScopedTmpReg tmp(this, dst.GetType());
1503 EncodeMov(tmp, src0);
1504 EncodeMov(ax, src0);
1505 EncodeMov(dx, Imm(magic));
1506 GetMasm()->imul(ArchReg(dx));
1507
1508 if (divisor > 0 && magic < 0) {
1509 EncodeAdd(dx, dx, tmp);
1510 } else if (divisor < 0 && magic > 0) {
1511 EncodeSub(dx, dx, tmp);
1512 }
1513
1514 int64_t shift = fastDivisor.GetShift();
1515 EncodeAShr(dst, dx, Imm(shift));
1516
1517 // result = (result < 0 ? result + 1 : result)
1518 EncodeShr(tmp, dst, Imm(dst.GetSize() - 1U));
1519 EncodeAdd(dst, dst, tmp);
1520
1521 if (dst != dx) {
1522 GetMasm()->pop(ArchReg(dx, DOUBLE_WORD_SIZE));
1523 }
1524 if (dst != ax) {
1525 GetMasm()->pop(ArchReg(ax, DOUBLE_WORD_SIZE));
1526 }
1527 }
1528
EncodeUnsignedDiv(Reg dst,Reg src0,Imm imm)1529 void Amd64Encoder::EncodeUnsignedDiv(Reg dst, Reg src0, Imm imm)
1530 {
1531 auto divisor = bit_cast<uint64_t>(imm.GetAsInt());
1532
1533 Reg ax(ConvertRegNumber(asmjit::x86::rax.id()), dst.GetType());
1534 Reg dx(ConvertRegNumber(asmjit::x86::rdx.id()), dst.GetType());
1535
1536 if (dst != ax) {
1537 GetMasm()->push(ArchReg(ax, DOUBLE_WORD_SIZE));
1538 }
1539 if (dst != dx) {
1540 GetMasm()->push(ArchReg(dx, DOUBLE_WORD_SIZE));
1541 }
1542
1543 FastConstUnsignedDivisor fastDivisor(divisor, dst.GetSize());
1544 uint64_t magic = fastDivisor.GetMagic();
1545
1546 ScopedTmpReg tmp(this, dst.GetType());
1547 if (fastDivisor.GetAdd()) {
1548 EncodeMov(tmp, src0);
1549 }
1550 EncodeMov(ax, src0);
1551 EncodeMov(dx, Imm(magic));
1552 GetMasm()->mul(ArchReg(dx));
1553
1554 uint64_t shift = fastDivisor.GetShift();
1555 if (!fastDivisor.GetAdd()) {
1556 EncodeShr(dst, dx, Imm(shift));
1557 } else {
1558 ASSERT(shift >= 1U);
1559 EncodeSub(tmp, tmp, dx);
1560 EncodeShr(tmp, tmp, Imm(1U));
1561 EncodeAdd(tmp, tmp, dx);
1562 EncodeShr(dst, tmp, Imm(shift - 1U));
1563 }
1564
1565 if (dst != dx) {
1566 GetMasm()->pop(ArchReg(dx, DOUBLE_WORD_SIZE));
1567 }
1568 if (dst != ax) {
1569 GetMasm()->pop(ArchReg(ax, DOUBLE_WORD_SIZE));
1570 }
1571 }
1572
EncodeDiv(Reg dst,Reg src0,Imm imm,bool isSigned)1573 void Amd64Encoder::EncodeDiv(Reg dst, Reg src0, Imm imm, bool isSigned)
1574 {
1575 ASSERT(dst.IsScalar() && dst.GetSize() >= WORD_SIZE);
1576 ASSERT(CanOptimizeImmDivMod(bit_cast<uint64_t>(imm.GetAsInt()), isSigned));
1577 if (isSigned) {
1578 EncodeSignedDiv(dst, src0, imm);
1579 } else {
1580 EncodeUnsignedDiv(dst, src0, imm);
1581 }
1582 }
1583
EncodeMod(Reg dst,Reg src0,Imm imm,bool isSigned)1584 void Amd64Encoder::EncodeMod(Reg dst, Reg src0, Imm imm, bool isSigned)
1585 {
1586 ASSERT(dst.IsScalar() && dst.GetSize() >= WORD_SIZE);
1587 ASSERT(CanOptimizeImmDivMod(bit_cast<uint64_t>(imm.GetAsInt()), isSigned));
1588
1589 // dst = src0 - imm * (src0 / imm)
1590 ScopedTmpReg tmp(this, dst.GetType());
1591 EncodeDiv(tmp, src0, imm, isSigned);
1592 if (dst.GetSize() == WORD_SIZE) {
1593 GetMasm()->imul(ArchReg(tmp), ArchReg(tmp), asmjit::imm(imm.GetAsInt()));
1594 } else {
1595 ScopedTmpRegU64 immReg(this);
1596 EncodeMov(immReg, imm);
1597 EncodeMul(tmp, tmp, immReg);
1598 }
1599 EncodeSub(dst, src0, tmp);
1600 }
1601
EncodeModFloat(Reg dst,Reg src0,Reg src1)1602 void Amd64Encoder::EncodeModFloat(Reg dst, Reg src0, Reg src1)
1603 {
1604 ASSERT(dst.IsFloat());
1605 if (dst.GetType() == FLOAT32_TYPE) {
1606 using Fp = float (*)(float, float);
1607 MakeLibCall(dst, src0, src1, reinterpret_cast<void *>(static_cast<Fp>(fmodf)));
1608 } else {
1609 using Fp = double (*)(double, double);
1610 MakeLibCall(dst, src0, src1, reinterpret_cast<void *>(static_cast<Fp>(fmod)));
1611 }
1612 }
1613
EncodeMod(Reg dst,bool dstSigned,Reg src0,Reg src1)1614 void Amd64Encoder::EncodeMod(Reg dst, bool dstSigned, Reg src0, Reg src1)
1615 {
1616 if (dst.IsFloat()) {
1617 EncodeModFloat(dst, src0, src1);
1618 return;
1619 }
1620
1621 auto zeroPath = GetMasm()->newLabel();
1622 auto crossroad = GetMasm()->newLabel();
1623
1624 if (dstSigned) {
1625 GetMasm()->cmp(ArchReg(src1), asmjit::imm(-1));
1626 GetMasm()->je(zeroPath);
1627 }
1628
1629 if (dst.GetId() != ConvertRegNumber(asmjit::x86::rax.id())) {
1630 GetMasm()->push(asmjit::x86::rax);
1631 }
1632 if (dst.GetId() != ConvertRegNumber(asmjit::x86::rdx.id())) {
1633 GetMasm()->push(asmjit::x86::rdx);
1634 }
1635
1636 ScopedTmpReg tmpReg(this, dst.GetType());
1637 Reg op1 {src1};
1638 if (src1.GetId() == ConvertRegNumber(asmjit::x86::rax.id()) ||
1639 src1.GetId() == ConvertRegNumber(asmjit::x86::rdx.id())) {
1640 GetMasm()->mov(ArchReg(tmpReg), ArchReg(src1));
1641 op1 = Reg(tmpReg);
1642 }
1643
1644 if (src0.GetId() != ConvertRegNumber(asmjit::x86::rax.id())) {
1645 GetMasm()->mov(asmjit::x86::rax, ArchReg(src0, DOUBLE_WORD_SIZE));
1646 }
1647
1648 if (dstSigned) {
1649 if (dst.GetSize() <= WORD_SIZE) {
1650 GetMasm()->cdq();
1651 } else {
1652 GetMasm()->cqo();
1653 }
1654 GetMasm()->idiv(ArchReg(op1));
1655 } else {
1656 GetMasm()->xor_(asmjit::x86::rdx, asmjit::x86::rdx);
1657 GetMasm()->div(ArchReg(op1));
1658 }
1659
1660 if (dst.GetId() != ConvertRegNumber(asmjit::x86::rdx.id())) {
1661 GetMasm()->mov(ArchReg(dst, DOUBLE_WORD_SIZE), asmjit::x86::rdx);
1662 GetMasm()->pop(asmjit::x86::rdx);
1663 }
1664
1665 if (dst.GetId() != ConvertRegNumber(asmjit::x86::rax.id())) {
1666 GetMasm()->pop(asmjit::x86::rax);
1667 }
1668 GetMasm()->jmp(crossroad);
1669
1670 GetMasm()->bind(zeroPath);
1671 GetMasm()->xor_(ArchReg(dst, WORD_SIZE), ArchReg(dst, WORD_SIZE));
1672
1673 GetMasm()->bind(crossroad);
1674 }
1675
EncodeMin(Reg dst,bool dstSigned,Reg src0,Reg src1)1676 void Amd64Encoder::EncodeMin(Reg dst, bool dstSigned, Reg src0, Reg src1)
1677 {
1678 if (dst.IsScalar()) {
1679 ScopedTmpReg tmpReg(this, dst.GetType());
1680 GetMasm()->mov(ArchReg(tmpReg), ArchReg(src1));
1681 GetMasm()->cmp(ArchReg(src0), ArchReg(src1));
1682
1683 auto size = std::max<uint8_t>(src0.GetSize(), WORD_SIZE);
1684 if (dstSigned) {
1685 GetMasm()->cmovle(ArchReg(tmpReg, size), ArchReg(src0, size));
1686 } else {
1687 GetMasm()->cmovb(ArchReg(tmpReg, size), ArchReg(src0, size));
1688 }
1689 EncodeMov(dst, tmpReg);
1690 return;
1691 }
1692
1693 EncodeMinMaxFp<false>(dst, src0, src1);
1694 }
1695
EncodeMax(Reg dst,bool dstSigned,Reg src0,Reg src1)1696 void Amd64Encoder::EncodeMax(Reg dst, bool dstSigned, Reg src0, Reg src1)
1697 {
1698 if (dst.IsScalar()) {
1699 ScopedTmpReg tmpReg(this, dst.GetType());
1700 GetMasm()->mov(ArchReg(tmpReg), ArchReg(src1));
1701 GetMasm()->cmp(ArchReg(src0), ArchReg(src1));
1702
1703 auto size = std::max<uint8_t>(src0.GetSize(), WORD_SIZE);
1704 if (dstSigned) {
1705 GetMasm()->cmovge(ArchReg(tmpReg, size), ArchReg(src0, size));
1706 } else {
1707 GetMasm()->cmova(ArchReg(tmpReg, size), ArchReg(src0, size));
1708 }
1709 EncodeMov(dst, tmpReg);
1710 return;
1711 }
1712
1713 EncodeMinMaxFp<true>(dst, src0, src1);
1714 }
1715
1716 template <bool IS_MAX>
EncodeMinMaxFp(Reg dst,Reg src0,Reg src1)1717 void Amd64Encoder::EncodeMinMaxFp(Reg dst, Reg src0, Reg src1)
1718 {
1719 auto end = GetMasm()->newLabel();
1720 auto notEqual = GetMasm()->newLabel();
1721 auto gotNan = GetMasm()->newLabel();
1722 auto &srcA = dst.GetId() != src1.GetId() ? src0 : src1;
1723 auto &srcB = srcA.GetId() == src0.GetId() ? src1 : src0;
1724 if (dst.GetType() == FLOAT32_TYPE) {
1725 GetMasm()->movaps(ArchVReg(dst), ArchVReg(srcA));
1726 GetMasm()->ucomiss(ArchVReg(srcB), ArchVReg(srcA));
1727 GetMasm()->jne(notEqual);
1728 GetMasm()->jp(gotNan);
1729 // calculate result for positive/negative zero operands
1730 if (IS_MAX) {
1731 GetMasm()->andps(ArchVReg(dst), ArchVReg(srcB));
1732 } else {
1733 GetMasm()->orps(ArchVReg(dst), ArchVReg(srcB));
1734 }
1735 GetMasm()->jmp(end);
1736 GetMasm()->bind(gotNan);
1737 // if any operand is NaN result is NaN
1738 GetMasm()->por(ArchVReg(dst), ArchVReg(srcB));
1739 GetMasm()->jmp(end);
1740 GetMasm()->bind(notEqual);
1741 if (IS_MAX) {
1742 GetMasm()->maxss(ArchVReg(dst), ArchVReg(srcB));
1743 } else {
1744 GetMasm()->minss(ArchVReg(dst), ArchVReg(srcB));
1745 }
1746 GetMasm()->bind(end);
1747 } else {
1748 GetMasm()->movapd(ArchVReg(dst), ArchVReg(srcA));
1749 GetMasm()->ucomisd(ArchVReg(srcB), ArchVReg(srcA));
1750 GetMasm()->jne(notEqual);
1751 GetMasm()->jp(gotNan);
1752 // calculate result for positive/negative zero operands
1753 if (IS_MAX) {
1754 GetMasm()->andpd(ArchVReg(dst), ArchVReg(srcB));
1755 } else {
1756 GetMasm()->orpd(ArchVReg(dst), ArchVReg(srcB));
1757 }
1758 GetMasm()->jmp(end);
1759 GetMasm()->bind(gotNan);
1760 // if any operand is NaN result is NaN
1761 GetMasm()->por(ArchVReg(dst), ArchVReg(srcB));
1762 GetMasm()->jmp(end);
1763 GetMasm()->bind(notEqual);
1764 if (IS_MAX) {
1765 GetMasm()->maxsd(ArchVReg(dst), ArchVReg(srcB));
1766 } else {
1767 GetMasm()->minsd(ArchVReg(dst), ArchVReg(srcB));
1768 }
1769 GetMasm()->bind(end);
1770 }
1771 }
1772
EncodeShl(Reg dst,Reg src0,Reg src1)1773 void Amd64Encoder::EncodeShl(Reg dst, Reg src0, Reg src1)
1774 {
1775 ASSERT(dst.IsScalar());
1776 ScopedTmpReg tmpReg(this, dst.GetType());
1777 Reg rcx(ConvertRegNumber(asmjit::x86::rcx.id()), dst.GetType());
1778 GetMasm()->mov(ArchReg(tmpReg), ArchReg(src0));
1779 if (dst.GetId() != rcx.GetId()) {
1780 GetMasm()->push(ArchReg(rcx, DOUBLE_WORD_SIZE));
1781 }
1782 GetMasm()->mov(ArchReg(rcx), ArchReg(src1));
1783 GetMasm()->shl(ArchReg(tmpReg), asmjit::x86::cl);
1784 if (dst.GetId() != rcx.GetId()) {
1785 GetMasm()->pop(ArchReg(rcx, DOUBLE_WORD_SIZE));
1786 }
1787 GetMasm()->mov(ArchReg(dst), ArchReg(tmpReg));
1788 }
1789
EncodeShr(Reg dst,Reg src0,Reg src1)1790 void Amd64Encoder::EncodeShr(Reg dst, Reg src0, Reg src1)
1791 {
1792 ASSERT(dst.IsScalar());
1793 ScopedTmpReg tmpReg(this, dst.GetType());
1794 Reg rcx(ConvertRegNumber(asmjit::x86::rcx.id()), dst.GetType());
1795 GetMasm()->mov(ArchReg(tmpReg), ArchReg(src0));
1796 if (dst.GetId() != rcx.GetId()) {
1797 GetMasm()->push(ArchReg(rcx, DOUBLE_WORD_SIZE));
1798 }
1799 GetMasm()->mov(ArchReg(rcx), ArchReg(src1));
1800 GetMasm()->shr(ArchReg(tmpReg), asmjit::x86::cl);
1801 if (dst.GetId() != rcx.GetId()) {
1802 GetMasm()->pop(ArchReg(rcx, DOUBLE_WORD_SIZE));
1803 }
1804 GetMasm()->mov(ArchReg(dst), ArchReg(tmpReg));
1805 }
1806
EncodeAShr(Reg dst,Reg src0,Reg src1)1807 void Amd64Encoder::EncodeAShr(Reg dst, Reg src0, Reg src1)
1808 {
1809 ASSERT(dst.IsScalar());
1810 ScopedTmpReg tmpReg(this, dst.GetType());
1811 Reg rcx(ConvertRegNumber(asmjit::x86::rcx.id()), dst.GetType());
1812 GetMasm()->mov(ArchReg(tmpReg), ArchReg(src0));
1813 if (dst.GetId() != rcx.GetId()) {
1814 GetMasm()->push(ArchReg(rcx, DOUBLE_WORD_SIZE));
1815 }
1816 GetMasm()->mov(ArchReg(rcx), ArchReg(src1));
1817 GetMasm()->sar(ArchReg(tmpReg), asmjit::x86::cl);
1818 if (dst.GetId() != rcx.GetId()) {
1819 GetMasm()->pop(ArchReg(rcx, DOUBLE_WORD_SIZE));
1820 }
1821 GetMasm()->mov(ArchReg(dst), ArchReg(tmpReg));
1822 }
1823
EncodeAnd(Reg dst,Reg src0,Reg src1)1824 void Amd64Encoder::EncodeAnd(Reg dst, Reg src0, Reg src1)
1825 {
1826 ASSERT(dst.IsScalar());
1827 if (dst.GetId() == src0.GetId()) {
1828 GetMasm()->and_(ArchReg(dst), ArchReg(src1));
1829 } else if (dst.GetId() == src1.GetId()) {
1830 GetMasm()->and_(ArchReg(dst), ArchReg(src0));
1831 } else {
1832 EncodeMov(dst, src0);
1833 GetMasm()->and_(ArchReg(dst), ArchReg(src1));
1834 }
1835 }
1836
EncodeOr(Reg dst,Reg src0,Reg src1)1837 void Amd64Encoder::EncodeOr(Reg dst, Reg src0, Reg src1)
1838 {
1839 ASSERT(dst.IsScalar());
1840 if (dst.GetId() == src0.GetId()) {
1841 GetMasm()->or_(ArchReg(dst), ArchReg(src1));
1842 } else if (dst.GetId() == src1.GetId()) {
1843 GetMasm()->or_(ArchReg(dst), ArchReg(src0));
1844 } else {
1845 EncodeMov(dst, src0);
1846 GetMasm()->or_(ArchReg(dst), ArchReg(src1));
1847 }
1848 }
1849
EncodeXor(Reg dst,Reg src0,Reg src1)1850 void Amd64Encoder::EncodeXor(Reg dst, Reg src0, Reg src1)
1851 {
1852 ASSERT(dst.IsScalar());
1853 if (dst.GetId() == src0.GetId()) {
1854 GetMasm()->xor_(ArchReg(dst), ArchReg(src1));
1855 } else if (dst.GetId() == src1.GetId()) {
1856 GetMasm()->xor_(ArchReg(dst), ArchReg(src0));
1857 } else {
1858 EncodeMov(dst, src0);
1859 GetMasm()->xor_(ArchReg(dst), ArchReg(src1));
1860 }
1861 }
1862
EncodeAdd(Reg dst,Reg src,Imm imm)1863 void Amd64Encoder::EncodeAdd(Reg dst, Reg src, Imm imm)
1864 {
1865 if (dst.IsFloat()) {
1866 SetFalseResult();
1867 return;
1868 }
1869
1870 auto immVal = imm.GetAsInt();
1871 auto size = std::max<uint8_t>(WORD_SIZE, dst.GetSize());
1872 if (ImmFitsSize(immVal, size)) {
1873 GetMasm()->lea(ArchReg(dst, size), asmjit::x86::ptr(ArchReg(src, size), immVal));
1874 } else {
1875 if (dst.GetId() != src.GetId()) {
1876 GetMasm()->mov(ArchReg(dst), asmjit::imm(immVal));
1877 GetMasm()->add(ArchReg(dst), ArchReg(src));
1878 } else {
1879 ScopedTmpReg tmpReg(this, dst.GetType());
1880 GetMasm()->mov(ArchReg(tmpReg), asmjit::imm(immVal));
1881 GetMasm()->add(ArchReg(dst), ArchReg(tmpReg));
1882 }
1883 }
1884 }
1885
EncodeSub(Reg dst,Reg src,Imm imm)1886 void Amd64Encoder::EncodeSub(Reg dst, Reg src, Imm imm)
1887 {
1888 if (dst.IsFloat()) {
1889 SetFalseResult();
1890 return;
1891 }
1892
1893 auto immVal = -imm.GetAsInt();
1894 auto size = std::max<uint8_t>(WORD_SIZE, dst.GetSize());
1895 if (ImmFitsSize(immVal, size)) {
1896 GetMasm()->lea(ArchReg(dst, size), asmjit::x86::ptr(ArchReg(src, size), immVal));
1897 } else {
1898 if (dst.GetId() != src.GetId()) {
1899 GetMasm()->mov(ArchReg(dst), asmjit::imm(immVal));
1900 GetMasm()->add(ArchReg(dst), ArchReg(src));
1901 } else {
1902 ScopedTmpReg tmpReg(this, dst.GetType());
1903 GetMasm()->mov(ArchReg(tmpReg), asmjit::imm(immVal));
1904 GetMasm()->add(ArchReg(dst), ArchReg(tmpReg));
1905 }
1906 }
1907 }
1908
EncodeShl(Reg dst,Reg src,Imm imm)1909 void Amd64Encoder::EncodeShl(Reg dst, Reg src, Imm imm)
1910 {
1911 ASSERT(dst.IsScalar());
1912 EncodeMov(dst, src);
1913 GetMasm()->shl(ArchReg(dst), ArchImm(imm));
1914 }
1915
EncodeShr(Reg dst,Reg src,Imm imm)1916 void Amd64Encoder::EncodeShr(Reg dst, Reg src, Imm imm)
1917 {
1918 ASSERT(dst.IsScalar());
1919
1920 EncodeMov(dst, src);
1921 GetMasm()->shr(ArchReg(dst), ArchImm(imm));
1922 }
1923
EncodeAShr(Reg dst,Reg src,Imm imm)1924 void Amd64Encoder::EncodeAShr(Reg dst, Reg src, Imm imm)
1925 {
1926 ASSERT(dst.IsScalar());
1927 EncodeMov(dst, src);
1928 GetMasm()->sar(ArchReg(dst), ArchImm(imm));
1929 }
1930
EncodeAnd(Reg dst,Reg src,Imm imm)1931 void Amd64Encoder::EncodeAnd(Reg dst, Reg src, Imm imm)
1932 {
1933 ASSERT(dst.IsScalar());
1934 auto immVal = ImmToUnsignedInt(imm);
1935
1936 switch (src.GetSize()) {
1937 case BYTE_SIZE:
1938 immVal |= ~uint64_t(0xFF); // NOLINT
1939 break;
1940 case HALF_SIZE:
1941 immVal |= ~uint64_t(0xFFFF); // NOLINT
1942 break;
1943 case WORD_SIZE:
1944 immVal |= ~uint64_t(0xFFFFFFFF); // NOLINT
1945 break;
1946 default:
1947 break;
1948 }
1949
1950 if (dst.GetSize() != DOUBLE_WORD_SIZE) {
1951 // NOLINTNEXTLINE(clang-analyzer-core.UndefinedBinaryOperatorResult)
1952 immVal &= (uint64_t(1) << dst.GetSize()) - 1;
1953 }
1954
1955 if (ImmFitsSize(immVal, dst.GetSize())) {
1956 EncodeMov(dst, src);
1957 GetMasm()->and_(ArchReg(dst), immVal);
1958 } else {
1959 if (dst.GetId() != src.GetId()) {
1960 GetMasm()->mov(ArchReg(dst), asmjit::imm(immVal));
1961 GetMasm()->and_(ArchReg(dst), ArchReg(src));
1962 } else {
1963 ScopedTmpReg tmpReg(this, dst.GetType());
1964 GetMasm()->mov(ArchReg(tmpReg), asmjit::imm(immVal));
1965 GetMasm()->and_(ArchReg(dst), ArchReg(tmpReg));
1966 }
1967 }
1968 }
1969
EncodeOr(Reg dst,Reg src,Imm imm)1970 void Amd64Encoder::EncodeOr(Reg dst, Reg src, Imm imm)
1971 {
1972 ASSERT(dst.IsScalar());
1973 auto immVal = ImmToUnsignedInt(imm);
1974 if (ImmFitsSize(immVal, dst.GetSize())) {
1975 EncodeMov(dst, src);
1976 GetMasm()->or_(ArchReg(dst), immVal);
1977 } else {
1978 if (dst.GetId() != src.GetId()) {
1979 GetMasm()->mov(ArchReg(dst), asmjit::imm(immVal));
1980 GetMasm()->or_(ArchReg(dst), ArchReg(src));
1981 } else {
1982 ScopedTmpReg tmpReg(this, dst.GetType());
1983 GetMasm()->mov(ArchReg(tmpReg), asmjit::imm(immVal));
1984 GetMasm()->or_(ArchReg(dst), ArchReg(tmpReg));
1985 }
1986 }
1987 }
1988
EncodeXor(Reg dst,Reg src,Imm imm)1989 void Amd64Encoder::EncodeXor(Reg dst, Reg src, Imm imm)
1990 {
1991 ASSERT(dst.IsScalar());
1992 auto immVal = ImmToUnsignedInt(imm);
1993 if (ImmFitsSize(immVal, dst.GetSize())) {
1994 EncodeMov(dst, src);
1995 GetMasm()->xor_(ArchReg(dst), immVal);
1996 } else {
1997 if (dst.GetId() != src.GetId()) {
1998 GetMasm()->mov(ArchReg(dst), asmjit::imm(immVal));
1999 GetMasm()->xor_(ArchReg(dst), ArchReg(src));
2000 } else {
2001 ScopedTmpReg tmpReg(this, dst.GetType());
2002 GetMasm()->mov(ArchReg(tmpReg), asmjit::imm(immVal));
2003 GetMasm()->xor_(ArchReg(dst), ArchReg(tmpReg));
2004 }
2005 }
2006 }
2007
EncodeMov(Reg dst,Imm src)2008 void Amd64Encoder::EncodeMov(Reg dst, Imm src)
2009 {
2010 if (dst.IsScalar()) {
2011 if (dst.GetSize() < WORD_SIZE) {
2012 GetMasm()->xor_(ArchReg(dst, WORD_SIZE), ArchReg(dst, WORD_SIZE));
2013 }
2014 GetMasm()->mov(ArchReg(dst), ArchImm(src));
2015 return;
2016 }
2017
2018 if (dst.GetType() == FLOAT32_TYPE) {
2019 ScopedTmpRegU32 tmpReg(this);
2020 auto val = bit_cast<uint32_t>(src.GetAsFloat());
2021 GetMasm()->mov(ArchReg(tmpReg), asmjit::imm(val));
2022 GetMasm()->movd(ArchVReg(dst), ArchReg(tmpReg));
2023 } else {
2024 ScopedTmpRegU64 tmpReg(this);
2025 auto val = bit_cast<uint64_t>(src.GetAsDouble());
2026 GetMasm()->mov(ArchReg(tmpReg), asmjit::imm(val));
2027 GetMasm()->movq(ArchVReg(dst), ArchReg(tmpReg));
2028 }
2029 }
2030
EncodeLdr(Reg dst,bool dstSigned,MemRef mem)2031 void Amd64Encoder::EncodeLdr(Reg dst, bool dstSigned, MemRef mem)
2032 {
2033 auto m = ArchMem(mem).Prepare(GetMasm());
2034
2035 if (dst.GetType() == FLOAT32_TYPE) {
2036 GetMasm()->movss(ArchVReg(dst), m);
2037 return;
2038 }
2039 if (dst.GetType() == FLOAT64_TYPE) {
2040 GetMasm()->movsd(ArchVReg(dst), m);
2041 return;
2042 }
2043
2044 m.setSize(dst.GetSize() / BITS_PER_BYTE);
2045
2046 if (dstSigned && dst.GetSize() < DOUBLE_WORD_SIZE) {
2047 if (dst.GetSize() == WORD_SIZE) {
2048 GetMasm()->movsxd(ArchReg(dst, DOUBLE_WORD_SIZE), m);
2049 } else {
2050 GetMasm()->movsx(ArchReg(dst, DOUBLE_WORD_SIZE), m);
2051 }
2052 return;
2053 }
2054 if (!dstSigned && dst.GetSize() < WORD_SIZE) {
2055 GetMasm()->movzx(ArchReg(dst, WORD_SIZE), m);
2056 return;
2057 }
2058
2059 GetMasm()->mov(ArchReg(dst), m);
2060 }
2061
EncodeLdrAcquire(Reg dst,bool dstSigned,MemRef mem)2062 void Amd64Encoder::EncodeLdrAcquire(Reg dst, bool dstSigned, MemRef mem)
2063 {
2064 EncodeLdr(dst, dstSigned, mem);
2065 // LoadLoad and LoadStore barrier should be here, but this is no-op in amd64 memory model
2066 }
2067
EncodeStr(Reg src,MemRef mem)2068 void Amd64Encoder::EncodeStr(Reg src, MemRef mem)
2069 {
2070 auto m = ArchMem(mem).Prepare(GetMasm());
2071
2072 if (src.GetType() == FLOAT32_TYPE) {
2073 GetMasm()->movss(m, ArchVReg(src));
2074 return;
2075 }
2076 if (src.GetType() == FLOAT64_TYPE) {
2077 GetMasm()->movsd(m, ArchVReg(src));
2078 return;
2079 }
2080
2081 m.setSize(src.GetSize() / BITS_PER_BYTE);
2082 GetMasm()->mov(m, ArchReg(src));
2083 }
2084
EncodeStrRelease(Reg src,MemRef mem)2085 void Amd64Encoder::EncodeStrRelease(Reg src, MemRef mem)
2086 {
2087 // StoreStore barrier should be here, but this is no-op in amd64 memory model
2088 EncodeStr(src, mem);
2089 // this is StoreLoad barrier (which is also full memory barrier in amd64 memory model)
2090 GetMasm()->lock().add(asmjit::x86::dword_ptr(asmjit::x86::rsp), asmjit::imm(0));
2091 }
2092
EncodeStrz(Reg src,MemRef mem)2093 void Amd64Encoder::EncodeStrz(Reg src, MemRef mem)
2094 {
2095 if (src.IsScalar()) {
2096 if (src.GetSize() == DOUBLE_WORD_SIZE) {
2097 GetMasm()->mov(ArchMem(mem).Prepare(GetMasm()), ArchReg(src));
2098 } else {
2099 ScopedTmpRegU64 tmpReg(this);
2100 GetMasm()->xor_(ArchReg(tmpReg), ArchReg(tmpReg));
2101 GetMasm()->mov(ArchReg(tmpReg, src.GetSize()), ArchReg(src));
2102 GetMasm()->mov(ArchMem(mem).Prepare(GetMasm()), ArchReg(tmpReg));
2103 }
2104 } else {
2105 if (src.GetType() == FLOAT64_TYPE) {
2106 GetMasm()->movsd(ArchMem(mem).Prepare(GetMasm()), ArchVReg(src));
2107 } else {
2108 ScopedTmpRegF64 tmpReg(this);
2109
2110 GetMasm()->xorpd(ArchVReg(tmpReg), ArchVReg(tmpReg));
2111 GetMasm()->movss(ArchVReg(tmpReg), ArchVReg(src));
2112 GetMasm()->movsd(ArchMem(mem).Prepare(GetMasm()), ArchVReg(tmpReg));
2113 }
2114 }
2115 }
2116
EncodeSti(int64_t src,uint8_t srcSizeBytes,MemRef mem)2117 void Amd64Encoder::EncodeSti(int64_t src, uint8_t srcSizeBytes, MemRef mem)
2118 {
2119 ASSERT(srcSizeBytes <= 8U);
2120 auto m = ArchMem(mem).Prepare(GetMasm());
2121 if (srcSizeBytes <= HALF_WORD_SIZE_BYTES) {
2122 m.setSize(srcSizeBytes);
2123 GetMasm()->mov(m, asmjit::imm(src));
2124 } else {
2125 m.setSize(DOUBLE_WORD_SIZE_BYTES);
2126
2127 if (ImmFitsSize(src, DOUBLE_WORD_SIZE)) {
2128 GetMasm()->mov(m, asmjit::imm(src));
2129 } else {
2130 ScopedTmpRegU64 tmpReg(this);
2131 GetMasm()->mov(ArchReg(tmpReg), asmjit::imm(src));
2132 GetMasm()->mov(m, ArchReg(tmpReg));
2133 }
2134 }
2135 }
2136
EncodeSti(float src,MemRef mem)2137 void Amd64Encoder::EncodeSti(float src, MemRef mem)
2138 {
2139 EncodeSti(bit_cast<int32_t>(src), sizeof(int32_t), mem);
2140 }
2141
EncodeSti(double src,MemRef mem)2142 void Amd64Encoder::EncodeSti(double src, MemRef mem)
2143 {
2144 EncodeSti(bit_cast<int64_t>(src), sizeof(int64_t), mem);
2145 }
2146
EncodeMemCopy(MemRef memFrom,MemRef memTo,size_t size)2147 void Amd64Encoder::EncodeMemCopy(MemRef memFrom, MemRef memTo, size_t size)
2148 {
2149 ScopedTmpRegU64 tmpReg(this);
2150 GetMasm()->mov(ArchReg(tmpReg, size), ArchMem(memFrom).Prepare(GetMasm()));
2151 GetMasm()->mov(ArchMem(memTo).Prepare(GetMasm()), ArchReg(tmpReg, size));
2152 }
2153
EncodeMemCopyz(MemRef memFrom,MemRef memTo,size_t size)2154 void Amd64Encoder::EncodeMemCopyz(MemRef memFrom, MemRef memTo, size_t size)
2155 {
2156 ScopedTmpRegU64 tmpReg(this);
2157 if (size < DOUBLE_WORD_SIZE) {
2158 GetMasm()->xor_(ArchReg(tmpReg), ArchReg(tmpReg));
2159 }
2160 GetMasm()->mov(ArchReg(tmpReg, size), ArchMem(memFrom).Prepare(GetMasm()));
2161 GetMasm()->mov(ArchMem(memTo).Prepare(GetMasm()), ArchReg(tmpReg));
2162 }
2163
EncodeCompare(Reg dst,Reg src0,Reg src1,Condition cc)2164 void Amd64Encoder::EncodeCompare(Reg dst, Reg src0, Reg src1, Condition cc)
2165 {
2166 if (src0.IsScalar()) {
2167 GetMasm()->cmp(ArchReg(src0), ArchReg(src1));
2168 } else {
2169 if (src0.GetType() == FLOAT32_TYPE) {
2170 GetMasm()->ucomiss(ArchVReg(src0), ArchVReg(src1));
2171 } else {
2172 GetMasm()->ucomisd(ArchVReg(src0), ArchVReg(src1));
2173 }
2174 }
2175 GetMasm()->mov(ArchReg(dst, DOUBLE_WORD_SIZE), asmjit::imm(0));
2176
2177 if (src0.IsScalar()) {
2178 GetMasm()->set(ArchCc(cc), ArchReg(dst, BYTE_SIZE));
2179 return;
2180 }
2181
2182 auto end = GetMasm()->newLabel();
2183
2184 if (CcMatchesNan(cc)) {
2185 GetMasm()->setp(ArchReg(dst, BYTE_SIZE));
2186 }
2187 GetMasm()->jp(end);
2188 GetMasm()->set(ArchCc(cc, true), ArchReg(dst, BYTE_SIZE));
2189
2190 GetMasm()->bind(end);
2191 }
2192
EncodeCompareTest(Reg dst,Reg src0,Reg src1,Condition cc)2193 void Amd64Encoder::EncodeCompareTest(Reg dst, Reg src0, Reg src1, Condition cc)
2194 {
2195 ASSERT(src0.IsScalar());
2196
2197 GetMasm()->test(ArchReg(src0), ArchReg(src1));
2198
2199 GetMasm()->mov(ArchReg(dst, DOUBLE_WORD_SIZE), asmjit::imm(0));
2200 GetMasm()->set(ArchCcTest(cc), ArchReg(dst, BYTE_SIZE));
2201 }
2202
EncodeAtomicByteOr(Reg addr,Reg value,bool fastEncoding)2203 void Amd64Encoder::EncodeAtomicByteOr(Reg addr, Reg value, [[maybe_unused]] bool fastEncoding)
2204 {
2205 GetMasm()->lock().or_(asmjit::x86::byte_ptr(ArchReg(addr)), ArchReg(value, ark::compiler::BYTE_SIZE));
2206 }
2207
EncodeCmp(Reg dst,Reg src0,Reg src1,Condition cc)2208 void Amd64Encoder::EncodeCmp(Reg dst, Reg src0, Reg src1, Condition cc)
2209 {
2210 auto end = GetMasm()->newLabel();
2211
2212 if (src0.IsFloat()) {
2213 ASSERT(src1.IsFloat());
2214 ASSERT(cc == Condition::MI || cc == Condition::LT);
2215
2216 if (src0.GetType() == FLOAT32_TYPE) {
2217 GetMasm()->ucomiss(ArchVReg(src0), ArchVReg(src1));
2218 } else {
2219 GetMasm()->ucomisd(ArchVReg(src0), ArchVReg(src1));
2220 }
2221
2222 GetMasm()->mov(ArchReg(dst, DOUBLE_WORD_SIZE), cc == Condition::LT ? asmjit::imm(-1) : asmjit::imm(1));
2223 cc = Condition::LO;
2224
2225 GetMasm()->jp(end);
2226 } else {
2227 ASSERT(src0.IsScalar() && src1.IsScalar());
2228 ASSERT(cc == Condition::LO || cc == Condition::LT);
2229 GetMasm()->cmp(ArchReg(src0), ArchReg(src1));
2230 }
2231 GetMasm()->mov(ArchReg(dst, DOUBLE_WORD_SIZE), asmjit::imm(0));
2232 GetMasm()->setne(ArchReg(dst, BYTE_SIZE));
2233
2234 GetMasm()->j(asmjit::x86::Condition::negate(ArchCc(cc)), end);
2235 GetMasm()->neg(ArchReg(dst));
2236
2237 GetMasm()->bind(end);
2238 }
2239
EncodeSelect(ArgsSelect && args)2240 void Amd64Encoder::EncodeSelect(ArgsSelect &&args)
2241 {
2242 auto [dst, src0, src1, src2, src3, cc] = args;
2243 ASSERT(!src0.IsFloat() && !src1.IsFloat());
2244 if (src2.IsScalar()) {
2245 GetMasm()->cmp(ArchReg(src2), ArchReg(src3));
2246 } else if (src2.GetType() == FLOAT32_TYPE) {
2247 GetMasm()->comiss(ArchVReg(src2), ArchVReg(src3));
2248 } else {
2249 GetMasm()->comisd(ArchVReg(src2), ArchVReg(src3));
2250 }
2251
2252 auto size = std::max<uint8_t>(src0.GetSize(), WORD_SIZE);
2253 bool dstAliased = dst.GetId() == src0.GetId();
2254 ScopedTmpReg tmpReg(this, dst.GetType());
2255 auto dstReg = dstAliased ? ArchReg(tmpReg, size) : ArchReg(dst, size);
2256
2257 GetMasm()->mov(dstReg, ArchReg(src1, size));
2258
2259 if (src2.IsScalar()) {
2260 GetMasm()->cmov(ArchCc(cc), dstReg, ArchReg(src0, size));
2261 } else if (CcMatchesNan(cc)) {
2262 GetMasm()->cmovp(dstReg, ArchReg(src0, size));
2263 GetMasm()->cmov(ArchCc(cc, src2.IsFloat()), dstReg, ArchReg(src0, size));
2264 } else {
2265 auto end = GetMasm()->newLabel();
2266
2267 GetMasm()->jp(end);
2268 GetMasm()->cmov(ArchCc(cc, src2.IsFloat()), dstReg, ArchReg(src0, size));
2269
2270 GetMasm()->bind(end);
2271 }
2272 if (dstAliased) {
2273 EncodeMov(dst, tmpReg);
2274 }
2275 }
2276
EncodeSelect(ArgsSelectImm && args)2277 void Amd64Encoder::EncodeSelect(ArgsSelectImm &&args)
2278 {
2279 auto [dst, src0, src1, src2, imm, cc] = args;
2280 ASSERT(!src0.IsFloat() && !src1.IsFloat() && !src2.IsFloat());
2281
2282 auto immVal = imm.GetAsInt();
2283 if (ImmFitsSize(immVal, src2.GetSize())) {
2284 GetMasm()->cmp(ArchReg(src2), asmjit::imm(immVal));
2285 } else {
2286 ScopedTmpReg tmpReg(this, src2.GetType());
2287 GetMasm()->mov(ArchReg(tmpReg), asmjit::imm(immVal));
2288 GetMasm()->cmp(ArchReg(src2), ArchReg(tmpReg));
2289 }
2290
2291 ScopedTmpReg tmpReg(this, dst.GetType());
2292 auto size = std::max<uint8_t>(src0.GetSize(), WORD_SIZE);
2293 bool dstAliased = dst.GetId() == src0.GetId();
2294 auto dstReg = dstAliased ? ArchReg(tmpReg, size) : ArchReg(dst, size);
2295
2296 GetMasm()->mov(dstReg, ArchReg(src1, size));
2297 GetMasm()->cmov(ArchCc(cc), dstReg, ArchReg(src0, size));
2298 if (dstAliased) {
2299 EncodeMov(dst, tmpReg);
2300 }
2301 }
2302
EncodeSelectTest(ArgsSelect && args)2303 void Amd64Encoder::EncodeSelectTest(ArgsSelect &&args)
2304 {
2305 auto [dst, src0, src1, src2, src3, cc] = args;
2306 ASSERT(!src0.IsFloat() && !src1.IsFloat() && !src2.IsFloat());
2307
2308 GetMasm()->test(ArchReg(src2), ArchReg(src3));
2309
2310 ScopedTmpReg tmpReg(this, dst.GetType());
2311 auto size = std::max<uint8_t>(src0.GetSize(), WORD_SIZE);
2312 bool dstAliased = dst.GetId() == src0.GetId();
2313 auto dstReg = dstAliased ? ArchReg(tmpReg, size) : ArchReg(dst, size);
2314
2315 GetMasm()->mov(dstReg, ArchReg(src1, size));
2316 GetMasm()->cmov(ArchCcTest(cc), dstReg, ArchReg(src0, size));
2317 if (dstAliased) {
2318 EncodeMov(dst, tmpReg);
2319 }
2320 }
2321
EncodeSelectTest(ArgsSelectImm && args)2322 void Amd64Encoder::EncodeSelectTest(ArgsSelectImm &&args)
2323 {
2324 auto [dst, src0, src1, src2, imm, cc] = args;
2325 ASSERT(!src0.IsFloat() && !src1.IsFloat() && !src2.IsFloat());
2326
2327 auto immVal = imm.GetAsInt();
2328 if (ImmFitsSize(immVal, src2.GetSize())) {
2329 GetMasm()->test(ArchReg(src2), asmjit::imm(immVal));
2330 } else {
2331 ScopedTmpReg tmpReg(this, src2.GetType());
2332 GetMasm()->mov(ArchReg(tmpReg), asmjit::imm(immVal));
2333 GetMasm()->test(ArchReg(src2), ArchReg(tmpReg));
2334 }
2335
2336 ScopedTmpReg tmpReg(this, dst.GetType());
2337 auto size = std::max<uint8_t>(src0.GetSize(), WORD_SIZE);
2338 bool dstAliased = dst.GetId() == src0.GetId();
2339 auto dstReg = dstAliased ? ArchReg(tmpReg, size) : ArchReg(dst, size);
2340
2341 GetMasm()->mov(dstReg, ArchReg(src1, size));
2342 GetMasm()->cmov(ArchCcTest(cc), dstReg, ArchReg(src0, size));
2343 if (dstAliased) {
2344 EncodeMov(dst, tmpReg);
2345 }
2346 }
2347
EncodeLdp(Reg dst0,Reg dst1,bool dstSigned,MemRef mem)2348 void Amd64Encoder::EncodeLdp(Reg dst0, Reg dst1, bool dstSigned, MemRef mem)
2349 {
2350 ASSERT(dst0.IsFloat() == dst1.IsFloat());
2351 ASSERT(dst0.GetSize() == dst1.GetSize());
2352
2353 auto m = ArchMem(mem).Prepare(GetMasm());
2354
2355 if (dst0.IsFloat()) {
2356 if (dst0.GetType() == FLOAT32_TYPE) {
2357 GetMasm()->movss(ArchVReg(dst0), m);
2358
2359 m.addOffset(WORD_SIZE_BYTES);
2360 GetMasm()->movss(ArchVReg(dst1), m);
2361 } else {
2362 GetMasm()->movsd(ArchVReg(dst0), m);
2363
2364 m.addOffset(DOUBLE_WORD_SIZE_BYTES);
2365 GetMasm()->movsd(ArchVReg(dst1), m);
2366 }
2367 return;
2368 }
2369
2370 if (dstSigned && dst0.GetSize() == WORD_SIZE) {
2371 m.setSize(WORD_SIZE_BYTES);
2372 GetMasm()->movsxd(ArchReg(dst0, DOUBLE_WORD_SIZE), m);
2373
2374 m.addOffset(WORD_SIZE_BYTES);
2375 GetMasm()->movsxd(ArchReg(dst1, DOUBLE_WORD_SIZE), m);
2376 return;
2377 }
2378
2379 GetMasm()->mov(ArchReg(dst0), m);
2380
2381 m.addOffset(dst0.GetSize() / BITS_PER_BYTE);
2382 GetMasm()->mov(ArchReg(dst1), m);
2383 }
2384
EncodeStp(Reg src0,Reg src1,MemRef mem)2385 void Amd64Encoder::EncodeStp(Reg src0, Reg src1, MemRef mem)
2386 {
2387 ASSERT(src0.IsFloat() == src1.IsFloat());
2388 ASSERT(src0.GetSize() == src1.GetSize());
2389
2390 auto m = ArchMem(mem).Prepare(GetMasm());
2391
2392 if (src0.IsFloat()) {
2393 if (src0.GetType() == FLOAT32_TYPE) {
2394 GetMasm()->movss(m, ArchVReg(src0));
2395
2396 m.addOffset(WORD_SIZE_BYTES);
2397 GetMasm()->movss(m, ArchVReg(src1));
2398 } else {
2399 GetMasm()->movsd(m, ArchVReg(src0));
2400
2401 m.addOffset(DOUBLE_WORD_SIZE_BYTES);
2402 GetMasm()->movsd(m, ArchVReg(src1));
2403 }
2404 return;
2405 }
2406
2407 GetMasm()->mov(m, ArchReg(src0));
2408
2409 m.addOffset(src0.GetSize() / BITS_PER_BYTE);
2410 GetMasm()->mov(m, ArchReg(src1));
2411 }
2412
EncodeReverseBytes(Reg dst,Reg src)2413 void Amd64Encoder::EncodeReverseBytes(Reg dst, Reg src)
2414 {
2415 ASSERT(src.GetSize() > BYTE_SIZE);
2416 ASSERT(src.GetSize() == dst.GetSize());
2417 ASSERT(src.IsValid());
2418 ASSERT(dst.IsValid());
2419
2420 if (src != dst) {
2421 GetMasm()->mov(ArchReg(dst), ArchReg(src));
2422 }
2423
2424 if (src.GetSize() == HALF_SIZE) {
2425 GetMasm()->rol(ArchReg(dst), BYTE_SIZE);
2426 GetMasm()->movsx(ArchReg(dst, WORD_SIZE), ArchReg(dst));
2427 } else {
2428 GetMasm()->bswap(ArchReg(dst));
2429 }
2430 }
2431
EncodeUnsignedExtendBytesToShorts(Reg dst,Reg src)2432 void Amd64Encoder::EncodeUnsignedExtendBytesToShorts(Reg dst, Reg src)
2433 {
2434 GetMasm()->pmovzxbw(ArchVReg(dst), ArchVReg(src));
2435 }
2436
2437 /* Attention: the encoder belows operates on vector registers not GPRs */
EncodeReverseHalfWords(Reg dst,Reg src)2438 void Amd64Encoder::EncodeReverseHalfWords(Reg dst, Reg src)
2439 {
2440 ASSERT(src.GetSize() == dst.GetSize());
2441 ASSERT(src.IsValid());
2442 ASSERT(dst.IsValid());
2443
2444 constexpr unsigned MASK = 0x1b; // reverse mask: 00 01 10 11
2445 GetMasm()->pshuflw(ArchVReg(dst), ArchVReg(src), MASK);
2446 }
2447
CanEncodeImmAddSubCmp(int64_t imm,uint32_t size,bool signedCompare)2448 bool Amd64Encoder::CanEncodeImmAddSubCmp(int64_t imm, uint32_t size, [[maybe_unused]] bool signedCompare)
2449 {
2450 return ImmFitsSize(imm, size);
2451 }
2452
EncodeBitCount(Reg dst0,Reg src0)2453 void Amd64Encoder::EncodeBitCount(Reg dst0, Reg src0)
2454 {
2455 ASSERT(src0.GetSize() == WORD_SIZE || src0.GetSize() == DOUBLE_WORD_SIZE);
2456 ASSERT(dst0.GetSize() == WORD_SIZE);
2457 ASSERT(src0.IsScalar() && dst0.IsScalar());
2458
2459 GetMasm()->popcnt(ArchReg(dst0, src0.GetSize()), ArchReg(src0));
2460 }
2461
EncodeCountLeadingZeroBits(Reg dst,Reg src)2462 void Amd64Encoder::EncodeCountLeadingZeroBits(Reg dst, Reg src)
2463 {
2464 auto end = CreateLabel();
2465 auto zero = CreateLabel();
2466 EncodeJump(zero, src, Condition::EQ);
2467 GetMasm()->bsr(ArchReg(dst), ArchReg(src));
2468 GetMasm()->xor_(ArchReg(dst), asmjit::imm(dst.GetSize() - 1));
2469 EncodeJump(end);
2470
2471 BindLabel(zero);
2472 GetMasm()->mov(ArchReg(dst), asmjit::imm(dst.GetSize()));
2473
2474 BindLabel(end);
2475 }
2476
EncodeCountTrailingZeroBits(Reg dst,Reg src)2477 void Amd64Encoder::EncodeCountTrailingZeroBits(Reg dst, Reg src)
2478 {
2479 ScopedTmpReg tmp(this, src.GetType());
2480 GetMasm()->bsf(ArchReg(tmp), ArchReg(src));
2481 GetMasm()->mov(ArchReg(dst), asmjit::imm(dst.GetSize()));
2482 GetMasm()->cmovne(ArchReg(dst), ArchReg(tmp));
2483 }
2484
EncodeCeil(Reg dst,Reg src)2485 void Amd64Encoder::EncodeCeil(Reg dst, Reg src)
2486 {
2487 // NOLINTNEXTLINE(readability-magic-numbers)
2488 GetMasm()->roundsd(ArchVReg(dst), ArchVReg(src), asmjit::imm(2_I));
2489 }
2490
EncodeFloor(Reg dst,Reg src)2491 void Amd64Encoder::EncodeFloor(Reg dst, Reg src)
2492 {
2493 GetMasm()->roundsd(ArchVReg(dst), ArchVReg(src), asmjit::imm(1));
2494 }
2495
EncodeRint(Reg dst,Reg src)2496 void Amd64Encoder::EncodeRint(Reg dst, Reg src)
2497 {
2498 GetMasm()->roundsd(ArchVReg(dst), ArchVReg(src), asmjit::imm(0));
2499 }
2500
EncodeTrunc(Reg dst,Reg src)2501 void Amd64Encoder::EncodeTrunc(Reg dst, Reg src)
2502 {
2503 GetMasm()->roundsd(ArchVReg(dst), ArchVReg(src), asmjit::imm(3_I));
2504 }
2505
EncodeRoundAway(Reg dst,Reg src)2506 void Amd64Encoder::EncodeRoundAway(Reg dst, Reg src)
2507 {
2508 ASSERT(src.GetType() == FLOAT64_TYPE);
2509 ASSERT(dst.GetType() == FLOAT64_TYPE);
2510
2511 ScopedTmpReg tv(this, src.GetType());
2512 ScopedTmpReg tv1(this, src.GetType());
2513 ScopedTmpRegU64 ti(this);
2514 auto dest = dst;
2515
2516 auto shared = src == dst;
2517
2518 if (shared) {
2519 dest = tv1.GetReg();
2520 }
2521 GetMasm()->movapd(ArchVReg(dest), ArchVReg(src));
2522
2523 constexpr auto SIGN_BIT_MASK = 0x8000000000000000ULL;
2524 GetMasm()->mov(ArchReg(ti), asmjit::imm(SIGN_BIT_MASK));
2525 GetMasm()->movq(ArchVReg(tv), ArchReg(ti));
2526 GetMasm()->andpd(ArchVReg(dest), ArchVReg(tv));
2527
2528 constexpr auto DOUBLE_POINT_FIVE = 0x3fdfffffffffffffULL; // .49999999999999994
2529 GetMasm()->mov(ArchReg(ti), asmjit::imm(DOUBLE_POINT_FIVE));
2530 GetMasm()->movq(ArchVReg(tv), ArchReg(ti));
2531 GetMasm()->orpd(ArchVReg(dest), ArchVReg(tv));
2532
2533 GetMasm()->addsd(ArchVReg(dest), ArchVReg(src));
2534 GetMasm()->roundsd(ArchVReg(dest), ArchVReg(dest), asmjit::imm(3_I));
2535 if (shared) {
2536 GetMasm()->movapd(ArchVReg(dst), ArchVReg(dest));
2537 }
2538 }
2539
EncodeRoundToPInfFloat(Reg dst,Reg src)2540 void Amd64Encoder::EncodeRoundToPInfFloat(Reg dst, Reg src)
2541 {
2542 ScopedTmpReg t1(this, src.GetType());
2543 ScopedTmpReg t2(this, src.GetType());
2544 ScopedTmpReg t3(this, src.GetType());
2545 ScopedTmpReg t4(this, dst.GetType());
2546
2547 auto skipIncrId = CreateLabel();
2548 auto doneId = CreateLabel();
2549
2550 auto skipIncr = static_cast<Amd64LabelHolder *>(GetLabels())->GetLabel(skipIncrId);
2551 auto done = static_cast<Amd64LabelHolder *>(GetLabels())->GetLabel(doneId);
2552
2553 GetMasm()->movss(ArchVReg(t2), ArchVReg(src));
2554 GetMasm()->roundss(ArchVReg(t1), ArchVReg(src), asmjit::imm(1));
2555 GetMasm()->subss(ArchVReg(t2), ArchVReg(t1));
2556 // NOLINTNEXTLINE(readability-magic-numbers)
2557 GetMasm()->mov(ArchReg(t4), asmjit::imm(bit_cast<int32_t, float>(0.5F)));
2558 GetMasm()->movd(ArchVReg(t3), ArchReg(t4));
2559 GetMasm()->comiss(ArchVReg(t2), ArchVReg(t3));
2560 GetMasm()->j(asmjit::x86::Condition::Code::kB, *skipIncr);
2561 // NOLINTNEXTLINE(readability-magic-numbers)
2562 GetMasm()->mov(ArchReg(t4), asmjit::imm(bit_cast<int32_t, float>(1.0F)));
2563 GetMasm()->movd(ArchVReg(t3), ArchReg(t4));
2564 GetMasm()->addss(ArchVReg(t1), ArchVReg(t3));
2565 BindLabel(skipIncrId);
2566
2567 // NOLINTNEXTLINE(readability-magic-numbers)
2568 GetMasm()->mov(ArchReg(dst), asmjit::imm(0x7FFFFFFF));
2569 GetMasm()->cvtsi2ss(ArchVReg(t2), ArchReg(dst));
2570 GetMasm()->comiss(ArchVReg(t1), ArchVReg(t2));
2571 GetMasm()->j(asmjit::x86::Condition::Code::kAE,
2572 *done); // clipped to max (already in dst), does not jump on unordered
2573 GetMasm()->mov(ArchReg(dst), asmjit::imm(0)); // does not change flags
2574 GetMasm()->j(asmjit::x86::Condition::Code::kParityEven, *done); // NaN mapped to 0 (just moved in dst)
2575 GetMasm()->cvttss2si(ArchReg(dst), ArchVReg(t1));
2576 BindLabel(doneId);
2577 }
2578
EncodeRoundToPInfDouble(Reg dst,Reg src)2579 void Amd64Encoder::EncodeRoundToPInfDouble(Reg dst, Reg src)
2580 {
2581 ScopedTmpReg t1(this, src.GetType());
2582 ScopedTmpReg t2(this, src.GetType());
2583 ScopedTmpReg t3(this, src.GetType());
2584 ScopedTmpReg t4(this, dst.GetType());
2585
2586 auto skipIncrId = CreateLabel();
2587 auto doneId = CreateLabel();
2588
2589 auto skipIncr = static_cast<Amd64LabelHolder *>(GetLabels())->GetLabel(skipIncrId);
2590 auto done = static_cast<Amd64LabelHolder *>(GetLabels())->GetLabel(doneId);
2591
2592 GetMasm()->movsd(ArchVReg(t2), ArchVReg(src));
2593 GetMasm()->roundsd(ArchVReg(t1), ArchVReg(src), asmjit::imm(1));
2594 GetMasm()->subsd(ArchVReg(t2), ArchVReg(t1));
2595 // NOLINTNEXTLINE(readability-magic-numbers)
2596 GetMasm()->mov(ArchReg(t4), asmjit::imm(bit_cast<int64_t, double>(0.5F)));
2597 GetMasm()->movq(ArchVReg(t3), ArchReg(t4));
2598 GetMasm()->comisd(ArchVReg(t2), ArchVReg(t3));
2599 GetMasm()->j(asmjit::x86::Condition::Code::kB, *skipIncr);
2600 // NOLINTNEXTLINE(readability-magic-numbers)
2601 GetMasm()->mov(ArchReg(t4), asmjit::imm(bit_cast<int64_t, double>(1.0)));
2602 GetMasm()->movq(ArchVReg(t3), ArchReg(t4));
2603 GetMasm()->addsd(ArchVReg(t1), ArchVReg(t3));
2604 BindLabel(skipIncrId);
2605
2606 // NOLINTNEXTLINE(readability-magic-numbers)
2607 GetMasm()->mov(ArchReg(dst), asmjit::imm(0x7FFFFFFFFFFFFFFFL));
2608 GetMasm()->cvtsi2sd(ArchVReg(t2), ArchReg(dst));
2609 GetMasm()->comisd(ArchVReg(t1), ArchVReg(t2));
2610 GetMasm()->j(asmjit::x86::Condition::Code::kAE,
2611 *done); // clipped to max (already in dst), does not jump on unordered
2612 GetMasm()->mov(ArchReg(dst), asmjit::imm(0)); // does not change flags
2613 GetMasm()->j(asmjit::x86::Condition::Code::kParityEven, *done); // NaN mapped to 0 (just moved in dst)
2614 GetMasm()->cvttsd2si(ArchReg(dst), ArchVReg(t1));
2615 BindLabel(doneId);
2616 }
2617
EncodeRoundToPInf(Reg dst,Reg src)2618 void Amd64Encoder::EncodeRoundToPInf(Reg dst, Reg src)
2619 {
2620 if (src.GetType() == FLOAT32_TYPE) {
2621 EncodeRoundToPInfFloat(dst, src);
2622 } else if (src.GetType() == FLOAT64_TYPE) {
2623 EncodeRoundToPInfDouble(dst, src);
2624 } else {
2625 UNREACHABLE();
2626 }
2627 }
2628
2629 template <typename T>
EncodeReverseBitsImpl(Reg dst0,Reg src0)2630 void Amd64Encoder::EncodeReverseBitsImpl(Reg dst0, Reg src0)
2631 {
2632 ASSERT(std::numeric_limits<T>::is_integer && !std::numeric_limits<T>::is_signed);
2633 [[maybe_unused]] constexpr auto IMM_8 = 8;
2634 ASSERT(sizeof(T) * IMM_8 == dst0.GetSize());
2635 // NOLINTNEXTLINE(modernize-avoid-c-arrays)
2636 static constexpr T MASKS[] = {static_cast<T>(UINT64_C(0x5555555555555555)),
2637 static_cast<T>(UINT64_C(0x3333333333333333)),
2638 static_cast<T>(UINT64_C(0x0f0f0f0f0f0f0f0f))};
2639
2640 ScopedTmpReg tmp(this, dst0.GetType());
2641 ScopedTmpReg immHolder(this, dst0.GetType());
2642 auto immHolderReg = ArchReg(immHolder);
2643
2644 GetMasm()->mov(ArchReg(dst0), ArchReg(src0));
2645 GetMasm()->mov(ArchReg(tmp), ArchReg(src0));
2646 constexpr auto MAX_ROUNDS = 3;
2647 for (uint64_t round = 0; round < MAX_ROUNDS; round++) {
2648 auto shift = 1U << round;
2649 auto mask = asmjit::imm(MASKS[round]);
2650 GetMasm()->shr(ArchReg(dst0), shift);
2651 if (dst0.GetSize() == DOUBLE_WORD_SIZE) {
2652 GetMasm()->mov(immHolderReg, mask);
2653 GetMasm()->and_(ArchReg(tmp), immHolderReg);
2654 GetMasm()->and_(ArchReg(dst0), immHolderReg);
2655 } else {
2656 GetMasm()->and_(ArchReg(tmp), mask);
2657 GetMasm()->and_(ArchReg(dst0), mask);
2658 }
2659 GetMasm()->shl(ArchReg(tmp), shift);
2660 GetMasm()->or_(ArchReg(dst0), ArchReg(tmp));
2661 constexpr auto ROUND_2 = 2;
2662 if (round != ROUND_2) {
2663 GetMasm()->mov(ArchReg(tmp), ArchReg(dst0));
2664 }
2665 }
2666
2667 GetMasm()->bswap(ArchReg(dst0));
2668 }
2669
EncodeReverseBits(Reg dst0,Reg src0)2670 void Amd64Encoder::EncodeReverseBits(Reg dst0, Reg src0)
2671 {
2672 ASSERT(src0.GetSize() == WORD_SIZE || src0.GetSize() == DOUBLE_WORD_SIZE);
2673 ASSERT(src0.GetSize() == dst0.GetSize());
2674
2675 if (src0.GetSize() == WORD_SIZE) {
2676 EncodeReverseBitsImpl<uint32_t>(dst0, src0);
2677 return;
2678 }
2679
2680 EncodeReverseBitsImpl<uint64_t>(dst0, src0);
2681 }
2682
CanEncodeScale(uint64_t imm,uint32_t size)2683 bool Amd64Encoder::CanEncodeScale(uint64_t imm, [[maybe_unused]] uint32_t size)
2684 {
2685 return imm <= 3U;
2686 }
2687
CanEncodeImmLogical(uint64_t imm,uint32_t size)2688 bool Amd64Encoder::CanEncodeImmLogical(uint64_t imm, uint32_t size)
2689 {
2690 #ifndef NDEBUG
2691 if (size < DOUBLE_WORD_SIZE) {
2692 // Test if the highest part is consistent:
2693 ASSERT(((imm >> size) == 0) || (((~imm) >> size) == 0));
2694 }
2695 #endif // NDEBUG
2696 return ImmFitsSize(imm, size);
2697 }
2698
CanEncodeBitCount()2699 bool Amd64Encoder::CanEncodeBitCount()
2700 {
2701 return asmjit::CpuInfo::host().hasFeature(asmjit::x86::Features::kPOPCNT);
2702 }
2703
CanOptimizeImmDivMod(uint64_t imm,bool isSigned) const2704 bool Amd64Encoder::CanOptimizeImmDivMod(uint64_t imm, bool isSigned) const
2705 {
2706 return CanOptimizeImmDivModCommon(imm, isSigned);
2707 }
2708
EncodeIsInf(Reg dst,Reg src)2709 void Amd64Encoder::EncodeIsInf(Reg dst, Reg src)
2710 {
2711 ASSERT(dst.IsScalar() && src.IsFloat());
2712
2713 GetMasm()->xor_(ArchReg(dst, DOUBLE_WORD_SIZE), ArchReg(dst, DOUBLE_WORD_SIZE));
2714
2715 if (src.GetSize() == WORD_SIZE) {
2716 constexpr auto INF_MASK = uint32_t(0x7f800000) << 1U;
2717
2718 ScopedTmpRegU32 tmpReg(this);
2719 ScopedTmpRegU32 tmp1Reg(this);
2720 auto tmp = ArchReg(tmpReg);
2721 auto tmp1 = ArchReg(tmp1Reg);
2722
2723 GetMasm()->movd(tmp1, ArchVReg(src));
2724 GetMasm()->shl(tmp1, 1);
2725 GetMasm()->mov(tmp, INF_MASK);
2726 GetMasm()->cmp(tmp, tmp1);
2727 } else {
2728 constexpr auto INF_MASK = uint64_t(0x7ff0000000000000) << 1U;
2729
2730 ScopedTmpRegU64 tmpReg(this);
2731 ScopedTmpRegU64 tmp1Reg(this);
2732 auto tmp = ArchReg(tmpReg);
2733 auto tmp1 = ArchReg(tmp1Reg);
2734
2735 GetMasm()->movq(tmp1, ArchVReg(src));
2736 GetMasm()->shl(tmp1, 1);
2737
2738 GetMasm()->mov(tmp, INF_MASK);
2739 GetMasm()->cmp(tmp, tmp1);
2740 }
2741
2742 GetMasm()->sete(ArchReg(dst, BYTE_SIZE));
2743 }
2744
EncodeCmpFracWithDelta(Reg src)2745 void Amd64Encoder::EncodeCmpFracWithDelta(Reg src)
2746 {
2747 ASSERT(src.IsFloat());
2748 ASSERT(src.GetType() == FLOAT32_TYPE || src.GetType() == FLOAT64_TYPE);
2749
2750 // Rounding control bits: Truncated (aka Round to Zero)
2751 constexpr uint8_t RND_CTL_TRUNCATED = 0b00000011;
2752
2753 // Encode (fabs(src - trunc(src)) <= DELTA)
2754 if (src.GetType() == FLOAT32_TYPE) {
2755 ScopedTmpRegF32 tmp(this);
2756 ScopedTmpRegF32 delta(this);
2757 GetMasm()->roundss(ArchVReg(tmp), ArchVReg(src), asmjit::imm(RND_CTL_TRUNCATED));
2758 EncodeSub(tmp, src, tmp);
2759 EncodeAbs(tmp, tmp);
2760 EncodeMov(delta, Imm(std::numeric_limits<float>::epsilon()));
2761 GetMasm()->ucomiss(ArchVReg(tmp), ArchVReg(delta));
2762 } else {
2763 ScopedTmpRegF64 tmp(this);
2764 ScopedTmpRegF64 delta(this);
2765 GetMasm()->roundsd(ArchVReg(tmp), ArchVReg(src), asmjit::imm(RND_CTL_TRUNCATED));
2766 EncodeSub(tmp, src, tmp);
2767 EncodeAbs(tmp, tmp);
2768 EncodeMov(delta, Imm(std::numeric_limits<double>::epsilon()));
2769 GetMasm()->ucomisd(ArchVReg(tmp), ArchVReg(delta));
2770 }
2771 }
2772
EncodeIsInteger(Reg dst,Reg src)2773 void Amd64Encoder::EncodeIsInteger(Reg dst, Reg src)
2774 {
2775 ASSERT(dst.IsScalar() && src.IsFloat());
2776 ASSERT(src.GetType() == FLOAT32_TYPE || src.GetType() == FLOAT64_TYPE);
2777
2778 auto labelExit = static_cast<Amd64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
2779
2780 GetMasm()->xor_(ArchReg(dst, WORD_SIZE), ArchReg(dst, WORD_SIZE));
2781 EncodeCmpFracWithDelta(src);
2782 GetMasm()->jp(*labelExit); // Inf or NaN
2783 GetMasm()->set(ArchCc(Condition::LE, true), ArchReg(dst, BYTE_SIZE));
2784 GetMasm()->bind(*labelExit);
2785 }
2786
EncodeIsSafeInteger(Reg dst,Reg src)2787 void Amd64Encoder::EncodeIsSafeInteger(Reg dst, Reg src)
2788 {
2789 ASSERT(dst.IsScalar() && src.IsFloat());
2790 ASSERT(src.GetType() == FLOAT32_TYPE || src.GetType() == FLOAT64_TYPE);
2791
2792 auto labelExit = static_cast<Amd64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
2793
2794 GetMasm()->xor_(ArchReg(dst, WORD_SIZE), ArchReg(dst, WORD_SIZE));
2795
2796 // Check if IsInteger
2797 EncodeCmpFracWithDelta(src);
2798 GetMasm()->jp(*labelExit); // Inf or NaN
2799 GetMasm()->j(ArchCc(Condition::GT, true), *labelExit);
2800
2801 // Check if it is safe, i.e. src can be represented in float/double without losing precision
2802 if (src.GetType() == FLOAT32_TYPE) {
2803 ScopedTmpRegF32 tmp1(this);
2804 ScopedTmpRegF32 tmp2(this);
2805 EncodeAbs(tmp1, src);
2806 EncodeMov(tmp2, Imm(MaxIntAsExactFloat()));
2807 GetMasm()->ucomiss(ArchVReg(tmp1), ArchVReg(tmp2));
2808 } else {
2809 ScopedTmpRegF64 tmp1(this);
2810 ScopedTmpRegF64 tmp2(this);
2811 EncodeAbs(tmp1, src);
2812 EncodeMov(tmp2, Imm(MaxIntAsExactDouble()));
2813 GetMasm()->ucomisd(ArchVReg(tmp1), ArchVReg(tmp2));
2814 }
2815 GetMasm()->set(ArchCc(Condition::LE, true), ArchReg(dst, BYTE_SIZE));
2816 GetMasm()->bind(*labelExit);
2817 }
2818
2819 /* Since NaNs have to be canonicalized we compare the
2820 * input with itself, if it is NaN the comparison will
2821 * set the parity flag (PF) */
EncodeFpToBits(Reg dst,Reg src)2822 void Amd64Encoder::EncodeFpToBits(Reg dst, Reg src)
2823 {
2824 ASSERT(dst.IsScalar() && src.IsFloat());
2825
2826 if (dst.GetType() == INT32_TYPE) {
2827 ASSERT(src.GetSize() == WORD_SIZE);
2828
2829 constexpr auto FLOAT_NAN = uint32_t(0x7fc00000);
2830
2831 ScopedTmpRegU32 tmp(this);
2832
2833 GetMasm()->ucomiss(ArchVReg(src), ArchVReg(src));
2834 GetMasm()->mov(ArchReg(tmp), FLOAT_NAN);
2835 GetMasm()->movd(ArchReg(dst), ArchVReg(src));
2836 GetMasm()->cmovpe(ArchReg(dst), ArchReg(tmp));
2837 } else {
2838 ASSERT(src.GetSize() == DOUBLE_WORD_SIZE);
2839
2840 constexpr auto DOUBLE_NAN = uint64_t(0x7ff8000000000000);
2841 ScopedTmpRegU64 tmp(this);
2842
2843 GetMasm()->ucomisd(ArchVReg(src), ArchVReg(src));
2844 GetMasm()->mov(ArchReg(tmp), DOUBLE_NAN);
2845 GetMasm()->movq(ArchReg(dst), ArchVReg(src));
2846 GetMasm()->cmovpe(ArchReg(dst), ArchReg(tmp));
2847 }
2848 }
2849
EncodeMoveBitsRaw(Reg dst,Reg src)2850 void Amd64Encoder::EncodeMoveBitsRaw(Reg dst, Reg src)
2851 {
2852 ASSERT((dst.IsFloat() && src.IsScalar()) || (src.IsFloat() && dst.IsScalar()));
2853 if (src.IsScalar()) {
2854 ASSERT((dst.GetSize() == src.GetSize()));
2855 if (src.GetSize() == WORD_SIZE) {
2856 GetMasm()->movd(ArchVReg(dst), ArchReg(src));
2857 } else {
2858 GetMasm()->movq(ArchVReg(dst), ArchReg(src));
2859 }
2860 } else {
2861 ASSERT((src.GetSize() == dst.GetSize()));
2862 if (dst.GetSize() == WORD_SIZE) {
2863 GetMasm()->movd(ArchReg(dst), ArchVReg(src));
2864 } else {
2865 GetMasm()->movq(ArchReg(dst), ArchVReg(src));
2866 }
2867 }
2868 }
2869
2870 /* Unsafe intrinsics */
EncodeCompareAndSwap(Reg dst,Reg obj,const Reg * offset,Reg val,Reg newval)2871 void Amd64Encoder::EncodeCompareAndSwap(Reg dst, Reg obj, const Reg *offset, Reg val, Reg newval)
2872 {
2873 /*
2874 * movl old, %eax
2875 * lock cmpxchgl new, addr
2876 * sete %al
2877 */
2878 ScopedTmpRegU64 tmp1(this);
2879 ScopedTmpRegU64 tmp2(this);
2880 ScopedTmpRegU64 tmp3(this);
2881 Reg newvalue = newval;
2882 auto addr = ArchMem(MemRef(tmp2)).Prepare(GetMasm());
2883 auto addrReg = ArchReg(tmp2);
2884 Reg rax(ConvertRegNumber(asmjit::x86::rax.id()), INT64_TYPE);
2885
2886 /* NOTE(ayodkev) this is a workaround for the failure of
2887 * jsr166.ScheduledExecutorTest, have to figure out if there
2888 * is less crude way to avoid this */
2889 if (newval.GetId() == rax.GetId()) {
2890 SetFalseResult();
2891 return;
2892 }
2893
2894 if (offset != nullptr) {
2895 GetMasm()->lea(addrReg, asmjit::x86::ptr(ArchReg(obj), ArchReg(*offset)));
2896 } else {
2897 GetMasm()->mov(addrReg, ArchReg(obj));
2898 }
2899
2900 /* the [er]ax register will be overwritten by cmpxchg instruction
2901 * save it unless it is set as a destination register */
2902 if (dst.GetId() != rax.GetId()) {
2903 GetMasm()->mov(ArchReg(tmp1), asmjit::x86::rax);
2904 }
2905
2906 /* if the new value comes in [er]ax register we have to use a
2907 * different register as [er]ax will contain the current value */
2908 if (newval.GetId() == rax.GetId()) {
2909 GetMasm()->mov(ArchReg(tmp3, newval.GetSize()), ArchReg(newval));
2910 newvalue = tmp3;
2911 }
2912
2913 if (val.GetId() != rax.GetId()) {
2914 GetMasm()->mov(asmjit::x86::rax, ArchReg(val).r64());
2915 }
2916
2917 GetMasm()->lock().cmpxchg(addr, ArchReg(newvalue));
2918 GetMasm()->sete(ArchReg(dst));
2919
2920 if (dst.GetId() != rax.GetId()) {
2921 GetMasm()->mov(asmjit::x86::rax, ArchReg(tmp1));
2922 }
2923 }
2924
EncodeCompareAndSwap(Reg dst,Reg obj,Reg offset,Reg val,Reg newval)2925 void Amd64Encoder::EncodeCompareAndSwap(Reg dst, Reg obj, Reg offset, Reg val, Reg newval)
2926 {
2927 EncodeCompareAndSwap(dst, obj, &offset, val, newval);
2928 }
2929
EncodeCompareAndSwap(Reg dst,Reg addr,Reg val,Reg newval)2930 void Amd64Encoder::EncodeCompareAndSwap(Reg dst, Reg addr, Reg val, Reg newval)
2931 {
2932 EncodeCompareAndSwap(dst, addr, nullptr, val, newval);
2933 }
2934
EncodeUnsafeGetAndSet(Reg dst,Reg obj,Reg offset,Reg val)2935 void Amd64Encoder::EncodeUnsafeGetAndSet(Reg dst, Reg obj, Reg offset, Reg val)
2936 {
2937 ScopedTmpRegU64 tmp(this);
2938 auto addrReg = ArchReg(tmp);
2939 auto addr = ArchMem(MemRef(tmp)).Prepare(GetMasm());
2940 GetMasm()->lea(addrReg, asmjit::x86::ptr(ArchReg(obj), ArchReg(offset)));
2941 GetMasm()->mov(ArchReg(dst), ArchReg(val));
2942 GetMasm()->lock().xchg(addr, ArchReg(dst));
2943 }
2944
EncodeUnsafeGetAndAdd(Reg dst,Reg obj,Reg offset,Reg val,Reg tmp)2945 void Amd64Encoder::EncodeUnsafeGetAndAdd(Reg dst, Reg obj, Reg offset, Reg val, [[maybe_unused]] Reg tmp)
2946 {
2947 ScopedTmpRegU64 tmp1(this);
2948 auto addrReg = ArchReg(tmp1);
2949 auto addr = ArchMem(MemRef(tmp1)).Prepare(GetMasm());
2950 GetMasm()->lea(addrReg, asmjit::x86::ptr(ArchReg(obj), ArchReg(offset)));
2951 GetMasm()->mov(ArchReg(dst), ArchReg(val));
2952 GetMasm()->lock().xadd(addr, ArchReg(dst));
2953 }
2954
EncodeMemoryBarrier(memory_order::Order order)2955 void Amd64Encoder::EncodeMemoryBarrier(memory_order::Order order)
2956 {
2957 if (order == memory_order::FULL) {
2958 /* does the same as mfence but faster, not applicable for NT-writes, though */
2959 GetMasm()->lock().add(asmjit::x86::dword_ptr(asmjit::x86::rsp), asmjit::imm(0));
2960 }
2961 }
2962
EncodeStackOverflowCheck(ssize_t offset)2963 void Amd64Encoder::EncodeStackOverflowCheck(ssize_t offset)
2964 {
2965 MemRef mem(GetTarget().GetStackReg(), offset);
2966 auto m = ArchMem(mem).Prepare(GetMasm());
2967 GetMasm()->test(m, ArchReg(GetTarget().GetParamReg(0)));
2968 }
2969
GetCursorOffset() const2970 size_t Amd64Encoder::GetCursorOffset() const
2971 {
2972 // NOLINTNEXTLINE(readability-identifier-naming)
2973 return GetMasm()->offset();
2974 }
2975
SetCursorOffset(size_t offset)2976 void Amd64Encoder::SetCursorOffset(size_t offset)
2977 {
2978 // NOLINTNEXTLINE(readability-identifier-naming)
2979 GetMasm()->setOffset(offset);
2980 }
2981
AcquireScratchRegister(TypeInfo type)2982 Reg Amd64Encoder::AcquireScratchRegister(TypeInfo type)
2983 {
2984 return (static_cast<Amd64RegisterDescription *>(GetRegfile()))->AcquireScratchRegister(type);
2985 }
2986
AcquireScratchRegister(Reg reg)2987 void Amd64Encoder::AcquireScratchRegister(Reg reg)
2988 {
2989 (static_cast<Amd64RegisterDescription *>(GetRegfile()))->AcquireScratchRegister(reg);
2990 }
2991
ReleaseScratchRegister(Reg reg)2992 void Amd64Encoder::ReleaseScratchRegister(Reg reg)
2993 {
2994 (static_cast<Amd64RegisterDescription *>(GetRegfile()))->ReleaseScratchRegister(reg);
2995 }
2996
IsScratchRegisterReleased(Reg reg) const2997 bool Amd64Encoder::IsScratchRegisterReleased(Reg reg) const
2998 {
2999 return (static_cast<Amd64RegisterDescription *>(GetRegfile()))->IsScratchRegisterReleased(reg);
3000 }
3001
GetScratchRegistersMask() const3002 RegMask Amd64Encoder::GetScratchRegistersMask() const
3003 {
3004 return (static_cast<const Amd64RegisterDescription *>(GetRegfile()))->GetScratchRegistersMask();
3005 }
3006
GetScratchFpRegistersMask() const3007 RegMask Amd64Encoder::GetScratchFpRegistersMask() const
3008 {
3009 return (static_cast<const Amd64RegisterDescription *>(GetRegfile()))->GetScratchFpRegistersMask();
3010 }
3011
GetAvailableScratchRegisters() const3012 RegMask Amd64Encoder::GetAvailableScratchRegisters() const
3013 {
3014 auto regfile = static_cast<const Amd64RegisterDescription *>(GetRegfile());
3015 return RegMask(regfile->GetScratchRegisters().GetMask());
3016 }
3017
GetAvailableScratchFpRegisters() const3018 VRegMask Amd64Encoder::GetAvailableScratchFpRegisters() const
3019 {
3020 auto regfile = static_cast<const Amd64RegisterDescription *>(GetRegfile());
3021 return VRegMask(regfile->GetScratchFPRegisters().GetMask());
3022 }
3023
GetRefType()3024 TypeInfo Amd64Encoder::GetRefType()
3025 {
3026 return INT64_TYPE;
3027 }
3028
BufferData() const3029 void *Amd64Encoder::BufferData() const
3030 {
3031 // NOLINTNEXTLINE(readability-identifier-naming)
3032 return GetMasm()->bufferData();
3033 }
3034
BufferSize() const3035 size_t Amd64Encoder::BufferSize() const
3036 {
3037 // NOLINTNEXTLINE(readability-identifier-naming)
3038 return GetMasm()->offset();
3039 }
3040
MakeLibCall(Reg dst,Reg src0,Reg src1,void * entryPoint)3041 void Amd64Encoder::MakeLibCall(Reg dst, Reg src0, Reg src1, void *entryPoint)
3042 {
3043 if (!dst.IsFloat()) {
3044 SetFalseResult();
3045 return;
3046 }
3047
3048 if (dst.GetType() == FLOAT32_TYPE) {
3049 if (!src0.IsFloat() || !src1.IsFloat()) {
3050 SetFalseResult();
3051 return;
3052 }
3053
3054 if (src0.GetId() != asmjit::x86::xmm0.id() || src1.GetId() != asmjit::x86::xmm1.id()) {
3055 ScopedTmpRegF32 tmp(this);
3056 GetMasm()->movss(ArchVReg(tmp), ArchVReg(src1));
3057 GetMasm()->movss(asmjit::x86::xmm0, ArchVReg(src0));
3058 GetMasm()->movss(asmjit::x86::xmm1, ArchVReg(tmp));
3059 }
3060
3061 MakeCall(entryPoint);
3062
3063 if (dst.GetId() != asmjit::x86::xmm0.id()) {
3064 GetMasm()->movss(ArchVReg(dst), asmjit::x86::xmm0);
3065 }
3066 } else if (dst.GetType() == FLOAT64_TYPE) {
3067 if (!src0.IsFloat() || !src1.IsFloat()) {
3068 SetFalseResult();
3069 return;
3070 }
3071
3072 if (src0.GetId() != asmjit::x86::xmm0.id() || src1.GetId() != asmjit::x86::xmm1.id()) {
3073 ScopedTmpRegF64 tmp(this);
3074 GetMasm()->movsd(ArchVReg(tmp), ArchVReg(src1));
3075 GetMasm()->movsd(asmjit::x86::xmm0, ArchVReg(src0));
3076 GetMasm()->movsd(asmjit::x86::xmm1, ArchVReg(tmp));
3077 }
3078
3079 MakeCall(entryPoint);
3080
3081 if (dst.GetId() != asmjit::x86::xmm0.id()) {
3082 GetMasm()->movsd(ArchVReg(dst), asmjit::x86::xmm0);
3083 }
3084 } else {
3085 UNREACHABLE();
3086 }
3087 }
3088
3089 template <bool IS_STORE>
LoadStoreRegisters(RegMask registers,ssize_t slot,size_t startReg,bool isFp)3090 void Amd64Encoder::LoadStoreRegisters(RegMask registers, ssize_t slot, size_t startReg, bool isFp)
3091 {
3092 for (size_t i {0}; i < registers.size(); ++i) {
3093 if (!registers.test(i)) {
3094 continue;
3095 }
3096
3097 asmjit::x86::Mem mem = asmjit::x86::ptr(asmjit::x86::rsp, (slot + i - startReg) * DOUBLE_WORD_SIZE_BYTES);
3098
3099 if constexpr (IS_STORE) { // NOLINT
3100 if (isFp) {
3101 GetMasm()->movsd(mem, asmjit::x86::xmm(i));
3102 } else {
3103 GetMasm()->mov(mem, asmjit::x86::gpq(ConvertRegNumber(i)));
3104 }
3105 } else { // NOLINT
3106 if (isFp) {
3107 GetMasm()->movsd(asmjit::x86::xmm(i), mem);
3108 } else {
3109 GetMasm()->mov(asmjit::x86::gpq(ConvertRegNumber(i)), mem);
3110 }
3111 }
3112 }
3113 }
3114
3115 template <bool IS_STORE>
LoadStoreRegisters(RegMask registers,bool isFp,int32_t slot,Reg base,RegMask mask)3116 void Amd64Encoder::LoadStoreRegisters(RegMask registers, bool isFp, int32_t slot, Reg base, RegMask mask)
3117 {
3118 auto baseReg = ArchReg(base);
3119 bool hasMask = mask.any();
3120 int32_t index = hasMask ? static_cast<int32_t>(mask.GetMinRegister()) : 0;
3121 slot -= index;
3122 for (size_t i = index; i < registers.size(); ++i) {
3123 if (hasMask) {
3124 if (!mask.test(i)) {
3125 continue;
3126 }
3127 index++;
3128 }
3129 if (!registers.test(i)) {
3130 continue;
3131 }
3132
3133 if (!hasMask) {
3134 index++;
3135 }
3136
3137 // `-1` because we've incremented `index` in advance
3138 asmjit::x86::Mem mem = asmjit::x86::ptr(baseReg, (slot + index - 1) * DOUBLE_WORD_SIZE_BYTES);
3139
3140 if constexpr (IS_STORE) { // NOLINT
3141 if (isFp) {
3142 GetMasm()->movsd(mem, asmjit::x86::xmm(i));
3143 } else {
3144 GetMasm()->mov(mem, asmjit::x86::gpq(ConvertRegNumber(i)));
3145 }
3146 } else { // NOLINT
3147 if (isFp) {
3148 GetMasm()->movsd(asmjit::x86::xmm(i), mem);
3149 } else {
3150 GetMasm()->mov(asmjit::x86::gpq(ConvertRegNumber(i)), mem);
3151 }
3152 }
3153 }
3154 }
3155
SaveRegisters(RegMask registers,ssize_t slot,size_t startReg,bool isFp)3156 void Amd64Encoder::SaveRegisters(RegMask registers, ssize_t slot, size_t startReg, bool isFp)
3157 {
3158 LoadStoreRegisters<true>(registers, slot, startReg, isFp);
3159 }
3160
LoadRegisters(RegMask registers,ssize_t slot,size_t startReg,bool isFp)3161 void Amd64Encoder::LoadRegisters(RegMask registers, ssize_t slot, size_t startReg, bool isFp)
3162 {
3163 LoadStoreRegisters<false>(registers, slot, startReg, isFp);
3164 }
3165
SaveRegisters(RegMask registers,bool isFp,ssize_t slot,Reg base,RegMask mask)3166 void Amd64Encoder::SaveRegisters(RegMask registers, bool isFp, ssize_t slot, Reg base, RegMask mask)
3167 {
3168 LoadStoreRegisters<true>(registers, isFp, slot, base, mask);
3169 }
3170
LoadRegisters(RegMask registers,bool isFp,ssize_t slot,Reg base,RegMask mask)3171 void Amd64Encoder::LoadRegisters(RegMask registers, bool isFp, ssize_t slot, Reg base, RegMask mask)
3172 {
3173 LoadStoreRegisters<false>(registers, isFp, slot, base, mask);
3174 }
3175
PushRegisters(RegMask registers,bool isFp)3176 void Amd64Encoder::PushRegisters(RegMask registers, bool isFp)
3177 {
3178 for (size_t i = 0; i < registers.size(); i++) {
3179 if (registers[i]) {
3180 if (isFp) {
3181 GetMasm()->sub(asmjit::x86::rsp, DOUBLE_WORD_SIZE_BYTES);
3182 GetMasm()->movsd(asmjit::x86::ptr(asmjit::x86::rsp), ArchVReg(Reg(i, FLOAT64_TYPE)));
3183 } else {
3184 GetMasm()->push(asmjit::x86::gpq(ConvertRegNumber(i)));
3185 }
3186 }
3187 }
3188 }
3189
PopRegisters(RegMask registers,bool isFp)3190 void Amd64Encoder::PopRegisters(RegMask registers, bool isFp)
3191 {
3192 for (ssize_t i = registers.size() - 1; i >= 0; i--) {
3193 if (registers[i]) {
3194 if (isFp) {
3195 GetMasm()->movsd(ArchVReg(Reg(i, FLOAT64_TYPE)), asmjit::x86::ptr(asmjit::x86::rsp));
3196 GetMasm()->add(asmjit::x86::rsp, DOUBLE_WORD_SIZE_BYTES);
3197 } else {
3198 GetMasm()->pop(asmjit::x86::gpq(ConvertRegNumber(i)));
3199 }
3200 }
3201 }
3202 }
3203
GetMasm() const3204 asmjit::x86::Assembler *Amd64Encoder::GetMasm() const
3205 {
3206 ASSERT(masm_ != nullptr);
3207 return masm_;
3208 }
3209
GetLabelAddress(LabelHolder::LabelId label)3210 size_t Amd64Encoder::GetLabelAddress(LabelHolder::LabelId label)
3211 {
3212 auto code = GetMasm()->code();
3213 ASSERT(code->isLabelBound(label));
3214 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
3215 return code->baseAddress() + code->labelOffset(label);
3216 }
3217
LabelHasLinks(LabelHolder::LabelId label)3218 bool Amd64Encoder::LabelHasLinks(LabelHolder::LabelId label)
3219 {
3220 auto code = GetMasm()->code();
3221 auto entry = code->labelEntry(label);
3222 return entry->links() != nullptr;
3223 }
3224
3225 template <typename T, size_t N>
CopyArrayToXmm(Reg xmm,const std::array<T,N> & arr)3226 void Amd64Encoder::CopyArrayToXmm(Reg xmm, const std::array<T, N> &arr)
3227 {
3228 static constexpr auto SIZE {N * sizeof(T)};
3229 static_assert((SIZE == DOUBLE_WORD_SIZE_BYTES) || (SIZE == 2U * DOUBLE_WORD_SIZE_BYTES));
3230 ASSERT(xmm.GetType() == FLOAT64_TYPE);
3231
3232 auto data {reinterpret_cast<const uint64_t *>(arr.data())};
3233
3234 ScopedTmpRegU64 tmpGpr(this);
3235 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
3236 GetMasm()->mov(ArchReg(tmpGpr), asmjit::imm(data[0]));
3237 GetMasm()->movq(ArchVReg(xmm), ArchReg(tmpGpr));
3238
3239 if constexpr (SIZE == 2U * DOUBLE_WORD_SIZE_BYTES) {
3240 ScopedTmpRegF64 tmpXmm(this);
3241 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
3242 GetMasm()->mov(ArchReg(tmpGpr), asmjit::imm(data[1]));
3243 GetMasm()->movq(ArchVReg(tmpXmm), ArchReg(tmpGpr));
3244 GetMasm()->unpcklpd(ArchVReg(xmm), ArchVReg(tmpXmm));
3245 }
3246 }
3247
3248 template <typename T>
CopyImmToXmm(Reg xmm,T imm)3249 void Amd64Encoder::CopyImmToXmm(Reg xmm, T imm)
3250 {
3251 static_assert((sizeof(imm) == WORD_SIZE_BYTES) || (sizeof(imm) == DOUBLE_WORD_SIZE_BYTES));
3252 ASSERT(xmm.GetSize() == BYTE_SIZE * sizeof(imm));
3253
3254 if constexpr (sizeof(imm) == WORD_SIZE_BYTES) { // NOLINT
3255 ScopedTmpRegU32 tmpGpr(this);
3256 GetMasm()->mov(ArchReg(tmpGpr), asmjit::imm(bit_cast<uint32_t>(imm)));
3257 GetMasm()->movd(ArchVReg(xmm), ArchReg(tmpGpr));
3258 } else { // NOLINT
3259 ScopedTmpRegU64 tmpGpr(this);
3260 GetMasm()->mov(ArchReg(tmpGpr), asmjit::imm(bit_cast<uint64_t>(imm)));
3261 GetMasm()->movq(ArchVReg(xmm), ArchReg(tmpGpr));
3262 }
3263 }
3264
DisasmInstr(std::ostream & stream,size_t pc,ssize_t codeOffset) const3265 size_t Amd64Encoder::DisasmInstr(std::ostream &stream, size_t pc, ssize_t codeOffset) const
3266 {
3267 if (codeOffset < 0) {
3268 (const_cast<Amd64Encoder *>(this))->Finalize();
3269 }
3270 // NOLINTNEXTLINE(readability-identifier-naming)
3271 Span code(GetMasm()->bufferData(), GetMasm()->offset());
3272
3273 [[maybe_unused]] size_t dataLeft = code.Size() - pc;
3274 [[maybe_unused]] constexpr size_t LENGTH = ZYDIS_MAX_INSTRUCTION_LENGTH; // 15 bytes is max inst length in amd64
3275
3276 // Initialize decoder context
3277 ZydisDecoder decoder;
3278 [[maybe_unused]] bool res =
3279 ZYAN_SUCCESS(ZydisDecoderInit(&decoder, ZYDIS_MACHINE_MODE_LONG_64, ZYDIS_ADDRESS_WIDTH_64));
3280
3281 // Initialize formatter
3282 ZydisFormatter formatter;
3283 res &= ZYAN_SUCCESS(ZydisFormatterInit(&formatter, ZYDIS_FORMATTER_STYLE_ATT));
3284 ZydisFormatterSetProperty(&formatter, ZYDIS_FORMATTER_PROP_FORCE_RELATIVE_BRANCHES, 1);
3285 ASSERT(res);
3286
3287 ZydisDecodedInstruction instruction;
3288
3289 res &= ZYAN_SUCCESS(ZydisDecoderDecodeBuffer(&decoder, &code[pc], std::min(LENGTH, dataLeft), &instruction));
3290
3291 // Format & print the binary instruction structure to human readable format
3292 char buffer[256]; // NOLINT (modernize-avoid-c-arrays, readability-identifier-naming, readability-magic-numbers)
3293 res &= ZYAN_SUCCESS(
3294 ZydisFormatterFormatInstruction(&formatter, &instruction, buffer, sizeof(buffer), uintptr_t(&code[pc])));
3295
3296 ASSERT(res);
3297
3298 // Print disassembly
3299 if (codeOffset < 0) {
3300 stream << buffer;
3301 } else {
3302 stream << std::setw(0x8) << std::right << std::setfill('0') << std::hex << pc + codeOffset << std::dec
3303 << std::setfill(' ') << ": " << buffer;
3304 }
3305
3306 return pc + instruction.length;
3307 }
3308 } // namespace ark::compiler::amd64
3309