• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2021-2024 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16 Encoder (implementation of math and mem Low-level emitters)
17 */
18 
19 #include <iomanip>
20 
21 #include "libpandabase/utils/utils.h"
22 #include "compiler/optimizer/code_generator/relocations.h"
23 #include "compiler/optimizer/code_generator/fast_divisor.h"
24 #include "operands.h"
25 #include "scoped_tmp_reg.h"
26 #include "target/amd64/target.h"
27 
28 #include "lib_helpers.inl"
29 
30 #include "Zydis/Zydis.h"
31 
32 #ifndef PANDA_TARGET_MACOS
33 #include "elf.h"
34 #endif  // PANDA_TARGET_MACOS
35 
36 namespace ark::compiler::amd64 {
37 
ArchCcInt(Condition cc)38 static auto ArchCcInt(Condition cc)
39 {
40     switch (cc) {
41         case Condition::EQ:
42             return asmjit::x86::Condition::Code::kEqual;
43         case Condition::NE:
44             return asmjit::x86::Condition::Code::kNotEqual;
45         case Condition::LT:
46             return asmjit::x86::Condition::Code::kSignedLT;
47         case Condition::GT:
48             return asmjit::x86::Condition::Code::kSignedGT;
49         case Condition::LE:
50             return asmjit::x86::Condition::Code::kSignedLE;
51         case Condition::GE:
52             return asmjit::x86::Condition::Code::kSignedGE;
53         case Condition::LO:
54             return asmjit::x86::Condition::Code::kUnsignedLT;
55         case Condition::LS:
56             return asmjit::x86::Condition::Code::kUnsignedLE;
57         case Condition::HI:
58             return asmjit::x86::Condition::Code::kUnsignedGT;
59         case Condition::HS:
60             return asmjit::x86::Condition::Code::kUnsignedGE;
61         // NOTE(igorban) : Remove them
62         case Condition::MI:
63             return asmjit::x86::Condition::Code::kNegative;
64         case Condition::PL:
65             return asmjit::x86::Condition::Code::kPositive;
66         case Condition::VS:
67             return asmjit::x86::Condition::Code::kOverflow;
68         case Condition::VC:
69             return asmjit::x86::Condition::Code::kNotOverflow;
70         case Condition::AL:
71         case Condition::NV:
72         default:
73             UNREACHABLE();
74             return asmjit::x86::Condition::Code::kEqual;
75     }
76 }
ArchCcFloat(Condition cc)77 static auto ArchCcFloat(Condition cc)
78 {
79     switch (cc) {
80         case Condition::EQ:
81             return asmjit::x86::Condition::Code::kEqual;
82         case Condition::NE:
83             return asmjit::x86::Condition::Code::kNotEqual;
84         case Condition::LT:
85             return asmjit::x86::Condition::Code::kUnsignedLT;
86         case Condition::GT:
87             return asmjit::x86::Condition::Code::kUnsignedGT;
88         case Condition::LE:
89             return asmjit::x86::Condition::Code::kUnsignedLE;
90         case Condition::GE:
91             return asmjit::x86::Condition::Code::kUnsignedGE;
92         case Condition::LO:
93             return asmjit::x86::Condition::Code::kUnsignedLT;
94         case Condition::LS:
95             return asmjit::x86::Condition::Code::kUnsignedLE;
96         case Condition::HI:
97             return asmjit::x86::Condition::Code::kUnsignedGT;
98         case Condition::HS:
99             return asmjit::x86::Condition::Code::kUnsignedGE;
100         // NOTE(igorban) : Remove them
101         case Condition::MI:
102             return asmjit::x86::Condition::Code::kNegative;
103         case Condition::PL:
104             return asmjit::x86::Condition::Code::kPositive;
105         case Condition::VS:
106             return asmjit::x86::Condition::Code::kOverflow;
107         case Condition::VC:
108             return asmjit::x86::Condition::Code::kNotOverflow;
109         case Condition::AL:
110         case Condition::NV:
111         default:
112             UNREACHABLE();
113             return asmjit::x86::Condition::Code::kEqual;
114     }
115 }
116 /// Converters
ArchCc(Condition cc,bool isFloat=false)117 static asmjit::x86::Condition::Code ArchCc(Condition cc, bool isFloat = false)
118 {
119     return isFloat ? ArchCcFloat(cc) : ArchCcInt(cc);
120 }
121 
ArchCcTest(Condition cc)122 static asmjit::x86::Condition::Code ArchCcTest(Condition cc)
123 {
124     ASSERT(cc == Condition::TST_EQ || cc == Condition::TST_NE);
125     return cc == Condition::TST_EQ ? asmjit::x86::Condition::Code::kEqual : asmjit::x86::Condition::Code::kNotEqual;
126 }
127 
CcMatchesNan(Condition cc)128 static bool CcMatchesNan(Condition cc)
129 {
130     switch (cc) {
131         case Condition::NE:
132         case Condition::LT:
133         case Condition::LE:
134         case Condition::HI:
135         case Condition::HS:
136             return true;
137 
138         default:
139             return false;
140     }
141 }
142 
143 /// Converters
ArchReg(Reg reg,uint8_t size=0)144 static asmjit::x86::Gp ArchReg(Reg reg, uint8_t size = 0)
145 {
146     ASSERT(reg.IsValid());
147     if (reg.IsScalar()) {
148         size_t regSize = size == 0 ? reg.GetSize() : size;
149         auto archId = ConvertRegNumber(reg.GetId());
150 
151         asmjit::x86::Gp archReg;
152         switch (regSize) {
153             case DOUBLE_WORD_SIZE:
154                 archReg = asmjit::x86::Gp(asmjit::x86::Gpq::kSignature, archId);
155                 break;
156             case WORD_SIZE:
157                 archReg = asmjit::x86::Gp(asmjit::x86::Gpd::kSignature, archId);
158                 break;
159             case HALF_SIZE:
160                 archReg = asmjit::x86::Gp(asmjit::x86::Gpw::kSignature, archId);
161                 break;
162             case BYTE_SIZE:
163                 archReg = asmjit::x86::Gp(asmjit::x86::GpbLo::kSignature, archId);
164                 break;
165 
166             default:
167                 UNREACHABLE();
168         }
169 
170         ASSERT(archReg.isValid());
171         return archReg;
172     }
173     if (reg.GetId() == ConvertRegNumber(asmjit::x86::rsp.id())) {
174         return asmjit::x86::rsp;
175     }
176 
177     // Invalid register type
178     UNREACHABLE();
179     return asmjit::x86::rax;
180 }
181 
ArchVReg(Reg reg)182 static asmjit::x86::Xmm ArchVReg(Reg reg)
183 {
184     ASSERT(reg.IsValid() && reg.IsFloat());
185     auto archVreg = asmjit::x86::xmm(reg.GetId());
186     return archVreg;
187 }
188 
ArchImm(Imm imm)189 static asmjit::Imm ArchImm(Imm imm)
190 {
191     ASSERT(imm.GetType() == INT64_TYPE);
192     return asmjit::imm(imm.GetAsInt());
193 }
194 
ImmToUnsignedInt(Imm imm)195 static uint64_t ImmToUnsignedInt(Imm imm)
196 {
197     ASSERT(imm.GetType() == INT64_TYPE);
198     return uint64_t(imm.GetAsInt());
199 }
200 
ImmFitsSize(int64_t imm,uint8_t size)201 static bool ImmFitsSize(int64_t imm, uint8_t size)
202 {
203     if (size == DOUBLE_WORD_SIZE) {
204         size = WORD_SIZE;
205     }
206 
207     // NOLINTNEXTLINE(clang-analyzer-core.UndefinedBinaryOperatorResult)
208     int64_t max = (uint64_t(1) << (size - 1U)) - 1U;
209     int64_t min = ~uint64_t(max);
210     ASSERT(min < 0);
211     ASSERT(max > 0);
212 
213     return imm >= min && imm <= max;
214 }
215 
CreateLabel()216 LabelHolder::LabelId Amd64LabelHolder::CreateLabel()
217 {
218     ++id_;
219 
220     auto masm = (static_cast<Amd64Encoder *>(GetEncoder()))->GetMasm();
221     auto label = masm->newLabel();
222 
223     auto allocator = GetEncoder()->GetAllocator();
224     labels_.push_back(allocator->New<LabelType>(std::move(label)));
225     ASSERT(labels_.size() == id_);
226     return id_ - 1;
227 }
228 
ArchMem(MemRef mem)229 ArchMem::ArchMem(MemRef mem)
230 {
231     bool base = mem.HasBase();
232     bool regoffset = mem.HasIndex();
233     bool shift = mem.HasScale();
234     bool offset = mem.HasDisp();
235 
236     if (base && !regoffset && !shift) {
237         // Default memory - base + offset
238         mem_ = asmjit::x86::ptr(ArchReg(mem.GetBase()), mem.GetDisp());
239     } else if (base && regoffset && !offset) {
240         auto baseSize = mem.GetBase().GetSize();
241         auto indexSize = mem.GetIndex().GetSize();
242 
243         ASSERT(baseSize >= indexSize);
244         ASSERT(indexSize >= WORD_SIZE);
245 
246         if (baseSize > indexSize) {
247             needExtendIndex_ = true;
248         }
249 
250         if (mem.GetScale() == 0) {
251             mem_ = asmjit::x86::ptr(ArchReg(mem.GetBase()), ArchReg(mem.GetIndex(), baseSize));
252         } else {
253             auto scale = mem.GetScale();
254             if (scale <= 3U) {
255                 mem_ = asmjit::x86::ptr(ArchReg(mem.GetBase()), ArchReg(mem.GetIndex(), baseSize), scale);
256             } else {
257                 mem_ = asmjit::x86::ptr(ArchReg(mem.GetBase()), ArchReg(mem.GetIndex(), baseSize));
258                 bigShift_ = scale;
259             }
260         }
261     } else {
262         // Wrong memRef
263         UNREACHABLE();
264     }
265 }
266 
Prepare(asmjit::x86::Assembler * masm)267 asmjit::x86::Mem ArchMem::Prepare(asmjit::x86::Assembler *masm)
268 {
269     if (isPrepared_) {
270         return mem_;
271     }
272 
273     if (bigShift_ != 0) {
274         ASSERT(!mem_.hasOffset() && mem_.hasIndex() && bigShift_ > 3U);
275         masm->shl(mem_.indexReg().as<asmjit::x86::Gp>(), asmjit::imm(bigShift_));
276     }
277 
278     if (needExtendIndex_) {
279         ASSERT(mem_.hasIndex());
280         auto qIndex = mem_.indexReg().as<asmjit::x86::Gp>();
281         auto dIndex {qIndex};
282         dIndex.setSignature(asmjit::x86::Gpd::kSignature);
283         masm->movsxd(qIndex, dIndex);
284     }
285 
286     isPrepared_ = true;
287     return mem_;
288 }
289 
AsmJitErrorHandler(Encoder * encoder)290 AsmJitErrorHandler::AsmJitErrorHandler(Encoder *encoder) : encoder_(encoder)
291 {
292     ASSERT(encoder != nullptr);
293 }
294 
handleError(asmjit::Error err,const char * message,asmjit::BaseEmitter * origin)295 void AsmJitErrorHandler::handleError([[maybe_unused]] asmjit::Error err, [[maybe_unused]] const char *message,
296                                      [[maybe_unused]] asmjit::BaseEmitter *origin)
297 {
298     encoder_->SetFalseResult();
299 }
300 
CreateLabels(LabelId max)301 void Amd64LabelHolder::CreateLabels(LabelId max)
302 {
303     for (LabelId i = 0; i < max; ++i) {
304         CreateLabel();
305     }
306 }
307 
GetLabel(LabelId id)308 Amd64LabelHolder::LabelType *Amd64LabelHolder::GetLabel(LabelId id)
309 {
310     ASSERT(labels_.size() > id);
311     return labels_[id];
312 }
313 
Size()314 Amd64LabelHolder::LabelId Amd64LabelHolder::Size()
315 {
316     return labels_.size();
317 }
318 
BindLabel(LabelId id)319 void Amd64LabelHolder::BindLabel(LabelId id)
320 {
321     static_cast<Amd64Encoder *>(GetEncoder())->GetMasm()->bind(*labels_[id]);
322 }
323 
Amd64Encoder(ArenaAllocator * allocator)324 Amd64Encoder::Amd64Encoder(ArenaAllocator *allocator) : Encoder(allocator, Arch::X86_64, false) {}
325 
~Amd64Encoder()326 Amd64Encoder::~Amd64Encoder()
327 {
328     if (masm_ != nullptr) {
329         masm_->~Assembler();
330         masm_ = nullptr;
331     }
332 
333     if (codeHolder_ != nullptr) {
334         codeHolder_->~CodeHolder();
335         codeHolder_ = nullptr;
336     }
337 
338     if (errorHandler_ != nullptr) {
339         errorHandler_->~ErrorHandler();
340         errorHandler_ = nullptr;
341     }
342 
343     if (labels_ != nullptr) {
344         labels_->~Amd64LabelHolder();
345         labels_ = nullptr;
346     }
347 }
348 
GetLabels() const349 LabelHolder *Amd64Encoder::GetLabels() const
350 {
351     ASSERT(labels_ != nullptr);
352     return labels_;
353 }
354 
IsValid() const355 bool Amd64Encoder::IsValid() const
356 {
357     return true;
358 }
359 
GetTarget()360 constexpr auto Amd64Encoder::GetTarget()
361 {
362     return ark::compiler::Target(Arch::X86_64);
363 }
364 
InitMasm()365 bool Amd64Encoder::InitMasm()
366 {
367     if (masm_ == nullptr) {
368         labels_ = GetAllocator()->New<Amd64LabelHolder>(this);
369         if (labels_ == nullptr) {
370             SetFalseResult();
371             return false;
372         }
373 
374         asmjit::Environment env;
375         env.setArch(asmjit::Environment::kArchX64);
376 
377         codeHolder_ = GetAllocator()->New<asmjit::CodeHolder>(GetAllocator());
378         if (codeHolder_ == nullptr) {
379             SetFalseResult();
380             return false;
381         }
382         codeHolder_->init(env, 0U);
383 
384         masm_ = GetAllocator()->New<asmjit::x86::Assembler>(codeHolder_);
385         if (masm_ == nullptr) {
386             SetFalseResult();
387             return false;
388         }
389 
390         // Enable strict validation.
391         masm_->addValidationOptions(asmjit::BaseEmitter::kValidationOptionAssembler);
392         errorHandler_ = GetAllocator()->New<AsmJitErrorHandler>(this);
393         if (errorHandler_ == nullptr) {
394             SetFalseResult();
395             return false;
396         }
397         masm_->setErrorHandler(errorHandler_);
398 
399         // Make sure that the compiler uses the same scratch registers as the assembler
400         CHECK_EQ(compiler::arch_info::x86_64::TEMP_REGS, GetTarget().GetTempRegsMask());
401         CHECK_EQ(compiler::arch_info::x86_64::TEMP_FP_REGS, GetTarget().GetTempVRegsMask());
402     }
403     return true;
404 }
405 
Finalize()406 void Amd64Encoder::Finalize()
407 {
408     auto code = GetMasm()->code();
409     auto codeSize = code->codeSize();
410 
411     code->flatten();
412     code->resolveUnresolvedLinks();
413 
414     auto codeBuffer = GetAllocator()->Alloc(codeSize);
415 
416     code->relocateToBase(reinterpret_cast<uintptr_t>(codeBuffer));
417     code->copyFlattenedData(codeBuffer, codeSize, asmjit::CodeHolder::kCopyPadSectionBuffer);
418 }
419 
EncodeJump(LabelHolder::LabelId id)420 void Amd64Encoder::EncodeJump(LabelHolder::LabelId id)
421 {
422     auto label = static_cast<Amd64LabelHolder *>(GetLabels())->GetLabel(id);
423     GetMasm()->jmp(*label);
424 }
425 
EncodeJump(LabelHolder::LabelId id,Reg src0,Reg src1,Condition cc)426 void Amd64Encoder::EncodeJump(LabelHolder::LabelId id, Reg src0, Reg src1, Condition cc)
427 {
428     if (src0.IsScalar()) {
429         if (src0.GetSize() == src1.GetSize()) {
430             GetMasm()->cmp(ArchReg(src0), ArchReg(src1));
431         } else if (src0.GetSize() > src1.GetSize()) {
432             ScopedTmpReg tmpReg(this, src0.GetType());
433             EncodeCast(tmpReg, false, src1, false);
434             GetMasm()->cmp(ArchReg(src0), ArchReg(tmpReg));
435         } else {
436             ScopedTmpReg tmpReg(this, src1.GetType());
437             EncodeCast(tmpReg, false, src0, false);
438             GetMasm()->cmp(ArchReg(tmpReg), ArchReg(src1));
439         }
440     } else if (src0.GetType() == FLOAT32_TYPE) {
441         GetMasm()->comiss(ArchVReg(src0), ArchVReg(src1));
442     } else {
443         GetMasm()->comisd(ArchVReg(src0), ArchVReg(src1));
444     }
445 
446     auto label = static_cast<Amd64LabelHolder *>(GetLabels())->GetLabel(id);
447     if (src0.IsScalar()) {
448         GetMasm()->j(ArchCc(cc, src0.IsFloat()), *label);
449         return;
450     }
451 
452     if (CcMatchesNan(cc)) {
453         GetMasm()->jp(*label);
454         GetMasm()->j(ArchCc(cc, src0.IsFloat()), *label);
455     } else {
456         auto end = GetMasm()->newLabel();
457 
458         GetMasm()->jp(end);
459         GetMasm()->j(ArchCc(cc, src0.IsFloat()), *label);
460         GetMasm()->bind(end);
461     }
462 }
463 
EncodeJump(LabelHolder::LabelId id,Reg src,Imm imm,Condition cc)464 void Amd64Encoder::EncodeJump(LabelHolder::LabelId id, Reg src, Imm imm, Condition cc)
465 {
466     ASSERT(src.IsScalar());
467 
468     auto immVal = imm.GetAsInt();
469     if (immVal == 0) {
470         EncodeJump(id, src, cc);
471         return;
472     }
473 
474     if (ImmFitsSize(immVal, src.GetSize())) {
475         auto label = static_cast<Amd64LabelHolder *>(GetLabels())->GetLabel(id);
476 
477         GetMasm()->cmp(ArchReg(src), asmjit::imm(immVal));
478         GetMasm()->j(ArchCc(cc), *label);
479     } else {
480         ScopedTmpReg tmpReg(this, src.GetType());
481         GetMasm()->mov(ArchReg(tmpReg), asmjit::imm(immVal));
482         EncodeJump(id, src, tmpReg, cc);
483     }
484 }
485 
EncodeJumpTest(LabelHolder::LabelId id,Reg src0,Reg src1,Condition cc)486 void Amd64Encoder::EncodeJumpTest(LabelHolder::LabelId id, Reg src0, Reg src1, Condition cc)
487 {
488     ASSERT(src0.IsScalar());
489     if (src0.GetSize() == src1.GetSize()) {
490         GetMasm()->test(ArchReg(src0), ArchReg(src1));
491     } else if (src0.GetSize() > src1.GetSize()) {
492         ScopedTmpReg tmpReg(this, src0.GetType());
493         EncodeCast(tmpReg, false, src1, false);
494         GetMasm()->test(ArchReg(src0), ArchReg(tmpReg));
495     } else {
496         ScopedTmpReg tmpReg(this, src1.GetType());
497         EncodeCast(tmpReg, false, src0, false);
498         GetMasm()->test(ArchReg(tmpReg), ArchReg(src1));
499     }
500 
501     auto label = static_cast<Amd64LabelHolder *>(GetLabels())->GetLabel(id);
502     GetMasm()->j(ArchCcTest(cc), *label);
503 }
504 
EncodeJumpTest(LabelHolder::LabelId id,Reg src,Imm imm,Condition cc)505 void Amd64Encoder::EncodeJumpTest(LabelHolder::LabelId id, Reg src, Imm imm, Condition cc)
506 {
507     ASSERT(src.IsScalar());
508 
509     auto immVal = imm.GetAsInt();
510     if (ImmFitsSize(immVal, src.GetSize())) {
511         auto label = static_cast<Amd64LabelHolder *>(GetLabels())->GetLabel(id);
512 
513         GetMasm()->test(ArchReg(src), asmjit::imm(immVal));
514         GetMasm()->j(ArchCcTest(cc), *label);
515     } else {
516         ScopedTmpReg tmpReg(this, src.GetType());
517         GetMasm()->mov(ArchReg(tmpReg), asmjit::imm(immVal));
518         EncodeJumpTest(id, src, tmpReg, cc);
519     }
520 }
521 
EncodeJump(LabelHolder::LabelId id,Reg src,Condition cc)522 void Amd64Encoder::EncodeJump(LabelHolder::LabelId id, Reg src, Condition cc)
523 {
524     if (src.IsScalar()) {
525         auto label = static_cast<Amd64LabelHolder *>(GetLabels())->GetLabel(id);
526 
527         GetMasm()->cmp(ArchReg(src), asmjit::imm(0));
528         GetMasm()->j(ArchCc(cc), *label);
529         return;
530     }
531 
532     ScopedTmpReg tmpReg(this, src.GetType());
533     if (src.GetType() == FLOAT32_TYPE) {
534         GetMasm()->xorps(ArchVReg(tmpReg), ArchVReg(tmpReg));
535     } else {
536         GetMasm()->xorpd(ArchVReg(tmpReg), ArchVReg(tmpReg));
537     }
538     EncodeJump(id, src, tmpReg, cc);
539 }
540 
EncodeJump(Reg dst)541 void Amd64Encoder::EncodeJump(Reg dst)
542 {
543     GetMasm()->jmp(ArchReg(dst));
544 }
545 
EncodeJump(RelocationInfo * relocation)546 void Amd64Encoder::EncodeJump(RelocationInfo *relocation)
547 {
548 #ifdef PANDA_TARGET_MACOS
549     LOG(FATAL, COMPILER) << "Not supported in Macos build";
550 #else
551     // NOLINTNEXTLINE(readability-magic-numbers)
552     std::array<uint8_t, 5U> data = {0xe9, 0, 0, 0, 0};
553     GetMasm()->embed(data.data(), data.size());
554 
555     constexpr int ADDEND = 4;
556     relocation->offset = GetCursorOffset() - ADDEND;
557     relocation->addend = -ADDEND;
558     relocation->type = R_X86_64_PLT32;
559 #endif
560 }
561 
EncodeBitTestAndBranch(LabelHolder::LabelId id,compiler::Reg reg,uint32_t bitPos,bool bitValue)562 void Amd64Encoder::EncodeBitTestAndBranch(LabelHolder::LabelId id, compiler::Reg reg, uint32_t bitPos, bool bitValue)
563 {
564     ASSERT(reg.IsScalar() && reg.GetSize() > bitPos);
565     auto label = static_cast<Amd64LabelHolder *>(GetLabels())->GetLabel(id);
566     if (reg.GetSize() == DOUBLE_WORD_SIZE) {
567         ScopedTmpRegU64 tmpReg(this);
568         GetMasm()->mov(ArchReg(tmpReg), asmjit::imm(static_cast<uint64_t>(1) << bitPos));
569         GetMasm()->test(ArchReg(reg), ArchReg(tmpReg));
570     } else {
571         GetMasm()->test(ArchReg(reg), asmjit::imm(1U << bitPos));
572     }
573     if (bitValue) {
574         GetMasm()->j(ArchCc(Condition::NE), *label);
575     } else {
576         GetMasm()->j(ArchCc(Condition::EQ), *label);
577     }
578 }
579 
MakeCall(compiler::RelocationInfo * relocation)580 void Amd64Encoder::MakeCall([[maybe_unused]] compiler::RelocationInfo *relocation)
581 {
582 #ifdef PANDA_TARGET_MACOS
583     LOG(FATAL, COMPILER) << "Not supported in Macos build";
584 #else
585     // NOLINTNEXTLINE(readability-magic-numbers)
586     std::array<uint8_t, 5U> data = {0xe8, 0, 0, 0, 0};
587     GetMasm()->embed(data.data(), data.size());
588 
589     relocation->offset = GetCursorOffset() - 4_I;
590     relocation->addend = -4_I;
591     relocation->type = R_X86_64_PLT32;
592 #endif
593 }
594 
MakeCall(LabelHolder::LabelId id)595 void Amd64Encoder::MakeCall(LabelHolder::LabelId id)
596 {
597     auto label = static_cast<Amd64LabelHolder *>(GetLabels())->GetLabel(id);
598     GetMasm()->call(*label);
599 }
600 
MakeCall(const void * entryPoint)601 void Amd64Encoder::MakeCall(const void *entryPoint)
602 {
603     ScopedTmpRegU64 tmpReg(this);
604     GetMasm()->mov(ArchReg(tmpReg), asmjit::imm(entryPoint));
605     GetMasm()->call(ArchReg(tmpReg));
606 }
607 
MakeCall(Reg reg)608 void Amd64Encoder::MakeCall(Reg reg)
609 {
610     GetMasm()->call(ArchReg(reg));
611 }
612 
MakeCall(MemRef entryPoint)613 void Amd64Encoder::MakeCall(MemRef entryPoint)
614 {
615     ScopedTmpRegU64 tmpReg(this);
616     EncodeLdr(tmpReg, false, entryPoint);
617     GetMasm()->call(ArchReg(tmpReg));
618 }
619 
620 template <typename Func>
EncodeRelativePcMov(Reg reg,intptr_t offset,Func encodeInstruction)621 void Amd64Encoder::EncodeRelativePcMov(Reg reg, intptr_t offset, Func encodeInstruction)
622 {
623     // NOLINTNEXTLINE(readability-identifier-naming)
624     auto pos = GetMasm()->offset();
625     encodeInstruction(reg, offset);
626     // NOLINTNEXTLINE(readability-identifier-naming)
627     offset -= (GetMasm()->offset() - pos);
628     // NOLINTNEXTLINE(readability-identifier-naming)
629     GetMasm()->setOffset(pos);
630     encodeInstruction(reg, offset);
631 }
632 
MakeCallAot(intptr_t offset)633 void Amd64Encoder::MakeCallAot(intptr_t offset)
634 {
635     ScopedTmpRegU64 tmpReg(this);
636     EncodeRelativePcMov(tmpReg, offset, [this](Reg reg, intptr_t offset) {
637         GetMasm()->long_().mov(ArchReg(reg), asmjit::x86::ptr(asmjit::x86::rip, offset));
638     });
639     GetMasm()->call(ArchReg(tmpReg));
640 }
641 
CanMakeCallByOffset(intptr_t offset)642 bool Amd64Encoder::CanMakeCallByOffset(intptr_t offset)
643 {
644     return offset == static_cast<intptr_t>(static_cast<int32_t>(offset));
645 }
646 
MakeCallByOffset(intptr_t offset)647 void Amd64Encoder::MakeCallByOffset(intptr_t offset)
648 {
649     GetMasm()->call(GetCursorOffset() + int32_t(offset));
650 }
651 
MakeLoadAotTable(intptr_t offset,Reg reg)652 void Amd64Encoder::MakeLoadAotTable(intptr_t offset, Reg reg)
653 {
654     EncodeRelativePcMov(reg, offset, [this](Reg reg, intptr_t offset) {
655         GetMasm()->long_().mov(ArchReg(reg), asmjit::x86::ptr(asmjit::x86::rip, offset));
656     });
657 }
658 
MakeLoadAotTableAddr(intptr_t offset,Reg addr,Reg val)659 void Amd64Encoder::MakeLoadAotTableAddr([[maybe_unused]] intptr_t offset, [[maybe_unused]] Reg addr,
660                                         [[maybe_unused]] Reg val)
661 {
662     EncodeRelativePcMov(addr, offset, [this](Reg reg, intptr_t offset) {
663         GetMasm()->long_().lea(ArchReg(reg), asmjit::x86::ptr(asmjit::x86::rip, offset));
664     });
665     GetMasm()->mov(ArchReg(val), asmjit::x86::ptr(ArchReg(addr)));
666 }
667 
EncodeAbort()668 void Amd64Encoder::EncodeAbort()
669 {
670     GetMasm()->int3();
671 }
672 
EncodeReturn()673 void Amd64Encoder::EncodeReturn()
674 {
675     GetMasm()->ret();
676 }
677 
EncodeMul(Reg dst,Reg src,Imm imm)678 void Amd64Encoder::EncodeMul([[maybe_unused]] Reg dst, [[maybe_unused]] Reg src, [[maybe_unused]] Imm imm)
679 {
680     SetFalseResult();
681 }
682 
EncodeNop()683 void Amd64Encoder::EncodeNop()
684 {
685     GetMasm()->nop();
686 }
687 
EncodeMov(Reg dst,Reg src)688 void Amd64Encoder::EncodeMov(Reg dst, Reg src)
689 {
690     if (dst == src) {
691         return;
692     }
693 
694     if (dst.IsFloat() != src.IsFloat()) {
695         ASSERT(src.GetSize() == dst.GetSize());
696         if (dst.GetSize() == WORD_SIZE) {
697             if (dst.IsFloat()) {
698                 GetMasm()->movd(ArchVReg(dst), ArchReg(src));
699             } else {
700                 GetMasm()->movd(ArchReg(dst), ArchVReg(src));
701             }
702         } else {
703             ASSERT(dst.GetSize() == DOUBLE_WORD_SIZE);
704             if (dst.IsFloat()) {
705                 GetMasm()->movq(ArchVReg(dst), ArchReg(src));
706             } else {
707                 GetMasm()->movq(ArchReg(dst), ArchVReg(src));
708             }
709         }
710         return;
711     }
712 
713     if (dst.IsFloat()) {
714         ASSERT(src.IsFloat());
715         if (dst.GetType() == FLOAT32_TYPE) {
716             GetMasm()->movss(ArchVReg(dst), ArchVReg(src));
717         } else {
718             GetMasm()->movsd(ArchVReg(dst), ArchVReg(src));
719         }
720         return;
721     }
722 
723     if (dst.GetSize() < WORD_SIZE && dst.GetSize() == src.GetSize()) {
724         GetMasm()->xor_(ArchReg(dst, WORD_SIZE), ArchReg(dst, WORD_SIZE));
725     }
726 
727     if (dst.GetSize() == src.GetSize()) {
728         GetMasm()->mov(ArchReg(dst), ArchReg(src));
729     } else {
730         EncodeCast(dst, false, src, false);
731     }
732 }
733 
EncodeNeg(Reg dst,Reg src)734 void Amd64Encoder::EncodeNeg(Reg dst, Reg src)
735 {
736     if (dst.IsScalar()) {
737         EncodeMov(dst, src);
738         GetMasm()->neg(ArchReg(dst));
739         return;
740     }
741 
742     if (dst.GetType() == FLOAT32_TYPE) {
743         ScopedTmpRegF32 tmp(this);
744         CopyImmToXmm(tmp, -0.0F);
745 
746         if (dst.GetId() != src.GetId()) {
747             GetMasm()->movsd(ArchVReg(dst), ArchVReg(src));
748         }
749         GetMasm()->xorps(ArchVReg(dst), ArchVReg(tmp));
750     } else {
751         ScopedTmpRegF64 tmp(this);
752         CopyImmToXmm(tmp, -0.0);
753 
754         if (dst.GetId() != src.GetId()) {
755             GetMasm()->movsd(ArchVReg(dst), ArchVReg(src));
756         }
757         GetMasm()->xorps(ArchVReg(dst), ArchVReg(tmp));
758     }
759 }
760 
EncodeAbs(Reg dst,Reg src)761 void Amd64Encoder::EncodeAbs(Reg dst, Reg src)
762 {
763     if (dst.IsScalar()) {
764         auto size = std::max<uint8_t>(src.GetSize(), WORD_SIZE);
765 
766         if (dst.GetId() != src.GetId()) {
767             GetMasm()->mov(ArchReg(dst), ArchReg(src));
768             GetMasm()->neg(ArchReg(dst));
769             GetMasm()->cmovl(ArchReg(dst, size), ArchReg(src, size));
770         } else if (GetScratchRegistersCount() > 0) {
771             ScopedTmpReg tmpReg(this, dst.GetType());
772 
773             GetMasm()->mov(ArchReg(tmpReg), ArchReg(src));
774             GetMasm()->neg(ArchReg(tmpReg));
775 
776             GetMasm()->cmovl(ArchReg(tmpReg, size), ArchReg(src, size));
777             GetMasm()->mov(ArchReg(dst), ArchReg(tmpReg));
778         } else {
779             auto end = GetMasm()->newLabel();
780 
781             GetMasm()->test(ArchReg(dst), ArchReg(dst));
782             GetMasm()->jns(end);
783 
784             GetMasm()->neg(ArchReg(dst));
785             GetMasm()->bind(end);
786         }
787         return;
788     }
789 
790     if (dst.GetType() == FLOAT32_TYPE) {
791         ScopedTmpRegF32 tmp(this);
792         // NOLINTNEXTLINE(readability-magic-numbers)
793         CopyImmToXmm(tmp, uint32_t(0x7fffffff));
794 
795         if (dst.GetId() != src.GetId()) {
796             GetMasm()->movss(ArchVReg(dst), ArchVReg(src));
797         }
798         GetMasm()->andps(ArchVReg(dst), ArchVReg(tmp));
799     } else {
800         ScopedTmpRegF64 tmp(this);
801         // NOLINTNEXTLINE(readability-magic-numbers)
802         CopyImmToXmm(tmp, uint64_t(0x7fffffffffffffff));
803 
804         if (dst.GetId() != src.GetId()) {
805             GetMasm()->movsd(ArchVReg(dst), ArchVReg(src));
806         }
807         GetMasm()->andps(ArchVReg(dst), ArchVReg(tmp));
808     }
809 }
810 
EncodeNot(Reg dst,Reg src)811 void Amd64Encoder::EncodeNot(Reg dst, Reg src)
812 {
813     ASSERT(dst.IsScalar());
814 
815     EncodeMov(dst, src);
816     GetMasm()->not_(ArchReg(dst));
817 }
818 
EncodeSqrt(Reg dst,Reg src)819 void Amd64Encoder::EncodeSqrt(Reg dst, Reg src)
820 {
821     ASSERT(dst.IsFloat());
822     if (src.GetType() == FLOAT32_TYPE) {
823         GetMasm()->sqrtps(ArchVReg(dst), ArchVReg(src));
824     } else {
825         GetMasm()->sqrtpd(ArchVReg(dst), ArchVReg(src));
826     }
827 }
828 
EncodeCastFloatToScalar(Reg dst,bool dstSigned,Reg src)829 void Amd64Encoder::EncodeCastFloatToScalar(Reg dst, bool dstSigned, Reg src)
830 {
831     // We DON'T support casts from float32/64 to int8/16 and bool, because this caste is not declared anywhere
832     // in other languages and architecture, we do not know what the behavior should be.
833     ASSERT(dst.GetSize() >= WORD_SIZE);
834     auto end = GetMasm()->newLabel();
835 
836     // if src is NaN, then dst = 0
837     EncodeCastFloatCheckNan(dst, src, end);
838 
839     if (dstSigned) {
840         EncodeCastFloatSignCheckRange(dst, src, end);
841     } else {
842         EncodeCastFloatUnsignCheckRange(dst, src, end);
843     }
844 
845     if (src.GetType() == FLOAT32_TYPE) {
846         if (dst.GetSize() == DOUBLE_WORD_SIZE) {
847             EncodeCastFloat32ToUint64(dst, src);
848         } else {
849             GetMasm()->cvttss2si(ArchReg(dst, DOUBLE_WORD_SIZE), ArchVReg(src));
850         }
851     } else {
852         if (dst.GetSize() == DOUBLE_WORD_SIZE) {
853             EncodeCastFloat64ToUint64(dst, src);
854         } else {
855             GetMasm()->cvttsd2si(ArchReg(dst, DOUBLE_WORD_SIZE), ArchVReg(src));
856         }
857     }
858 
859     GetMasm()->bind(end);
860 }
861 
EncodeCastFloat32ToUint64(Reg dst,Reg src)862 void Amd64Encoder::EncodeCastFloat32ToUint64(Reg dst, Reg src)
863 {
864     auto bigNumberLabel = GetMasm()->newLabel();
865     auto endLabel = GetMasm()->newLabel();
866     ScopedTmpReg tmpReg(this, src.GetType());
867     ScopedTmpReg tmpNum(this, dst.GetType());
868 
869     // It is max number with max degree that we can load in sign int64
870     // NOLINTNEXTLINE (readability-magic-numbers)
871     GetMasm()->mov(ArchReg(dst, WORD_SIZE), asmjit::imm(0x5F000000));
872     GetMasm()->movd(ArchVReg(tmpReg), ArchReg(dst, WORD_SIZE));
873     GetMasm()->comiss(ArchVReg(src), ArchVReg(tmpReg));
874     GetMasm()->jnb(bigNumberLabel);
875 
876     GetMasm()->cvttss2si(ArchReg(dst), ArchVReg(src));
877     GetMasm()->jmp(endLabel);
878 
879     GetMasm()->bind(bigNumberLabel);
880     GetMasm()->subss(ArchVReg(src), ArchVReg(tmpReg));
881     GetMasm()->cvttss2si(ArchReg(dst), ArchVReg(src));
882     // NOLINTNEXTLINE (readability-magic-numbers)
883     GetMasm()->mov(ArchReg(tmpNum), asmjit::imm(0x8000000000000000));
884     GetMasm()->xor_(ArchReg(dst), ArchReg(tmpNum));
885     GetMasm()->bind(endLabel);
886 }
887 
EncodeCastFloat64ToUint64(Reg dst,Reg src)888 void Amd64Encoder::EncodeCastFloat64ToUint64(Reg dst, Reg src)
889 {
890     auto bigNumberLabel = GetMasm()->newLabel();
891     auto endLabel = GetMasm()->newLabel();
892     ScopedTmpReg tmpReg(this, src.GetType());
893     ScopedTmpReg tmpNum(this, dst.GetType());
894 
895     // It is max number with max degree that we can load in sign int64
896     // NOLINTNEXTLINE (readability-magic-numbers)
897     GetMasm()->mov(ArchReg(dst), asmjit::imm(0x43E0000000000000));
898     GetMasm()->movq(ArchVReg(tmpReg), ArchReg(dst));
899     GetMasm()->comisd(ArchVReg(src), ArchVReg(tmpReg));
900     GetMasm()->jnb(bigNumberLabel);
901 
902     GetMasm()->cvttsd2si(ArchReg(dst), ArchVReg(src));
903     GetMasm()->jmp(endLabel);
904 
905     GetMasm()->bind(bigNumberLabel);
906     GetMasm()->subsd(ArchVReg(src), ArchVReg(tmpReg));
907     GetMasm()->cvttsd2si(ArchReg(dst), ArchVReg(src));
908     // NOLINTNEXTLINE (readability-magic-numbers)
909     GetMasm()->mov(ArchReg(tmpNum), asmjit::imm(0x8000000000000000));
910     GetMasm()->xor_(ArchReg(dst), ArchReg(tmpNum));
911     GetMasm()->bind(endLabel);
912 }
913 
EncodeCastFloatCheckNan(Reg dst,Reg src,const asmjit::Label & end)914 void Amd64Encoder::EncodeCastFloatCheckNan(Reg dst, Reg src, const asmjit::Label &end)
915 {
916     GetMasm()->xor_(ArchReg(dst, DOUBLE_WORD_SIZE), ArchReg(dst, DOUBLE_WORD_SIZE));
917     if (src.GetType() == FLOAT32_TYPE) {
918         GetMasm()->ucomiss(ArchVReg(src), ArchVReg(src));
919     } else {
920         GetMasm()->ucomisd(ArchVReg(src), ArchVReg(src));
921     }
922     GetMasm()->jp(end);
923 }
924 
EncodeCastFloatSignCheckRange(Reg dst,Reg src,const asmjit::Label & end)925 void Amd64Encoder::EncodeCastFloatSignCheckRange(Reg dst, Reg src, const asmjit::Label &end)
926 {
927     // if src < INT_MIN, then dst = INT_MIN
928     // if src >= (INT_MAX + 1), then dst = INT_MAX
929     if (dst.GetSize() == DOUBLE_WORD_SIZE) {
930         EncodeCastFloatCheckRange(dst, src, end, INT64_MIN, INT64_MAX);
931     } else {
932         EncodeCastFloatCheckRange(dst, src, end, INT32_MIN, INT32_MAX);
933     }
934 }
935 
EncodeCastFloatCheckRange(Reg dst,Reg src,const asmjit::Label & end,const int64_t minValue,const uint64_t maxValue)936 void Amd64Encoder::EncodeCastFloatCheckRange(Reg dst, Reg src, const asmjit::Label &end, const int64_t minValue,
937                                              const uint64_t maxValue)
938 {
939     ScopedTmpReg cmpReg(this, src.GetType());
940     ScopedTmpReg tmpReg(this, src.GetType() == FLOAT64_TYPE ? INT64_TYPE : INT32_TYPE);
941 
942     GetMasm()->mov(ArchReg(dst, DOUBLE_WORD_SIZE), asmjit::imm(minValue));
943     if (src.GetType() == FLOAT32_TYPE) {
944         GetMasm()->mov(ArchReg(tmpReg), asmjit::imm(bit_cast<uint32_t>(float(minValue))));
945         GetMasm()->movd(ArchVReg(cmpReg), ArchReg(tmpReg));
946         GetMasm()->ucomiss(ArchVReg(src), ArchVReg(cmpReg));
947     } else {
948         GetMasm()->mov(ArchReg(tmpReg), asmjit::imm(bit_cast<uint64_t>(double(minValue))));
949         GetMasm()->movq(ArchVReg(cmpReg), ArchReg(tmpReg));
950         GetMasm()->ucomisd(ArchVReg(src), ArchVReg(cmpReg));
951     }
952     GetMasm()->jb(end);
953 
954     GetMasm()->mov(ArchReg(dst, DOUBLE_WORD_SIZE), asmjit::imm(maxValue));
955     if (src.GetType() == FLOAT32_TYPE) {
956         GetMasm()->mov(ArchReg(tmpReg), asmjit::imm(bit_cast<uint32_t>(float(maxValue) + 1U)));
957         GetMasm()->movd(ArchVReg(cmpReg), ArchReg(tmpReg));
958         GetMasm()->ucomiss(ArchVReg(src), ArchVReg(cmpReg));
959     } else {
960         GetMasm()->mov(ArchReg(tmpReg), asmjit::imm(bit_cast<uint64_t>(double(maxValue) + 1U)));
961         GetMasm()->movq(ArchVReg(cmpReg), ArchReg(tmpReg));
962         GetMasm()->ucomisd(ArchVReg(src), ArchVReg(cmpReg));
963     }
964     GetMasm()->jae(end);
965 }
966 
EncodeCastFloatUnsignCheckRange(Reg dst,Reg src,const asmjit::Label & end)967 void Amd64Encoder::EncodeCastFloatUnsignCheckRange(Reg dst, Reg src, const asmjit::Label &end)
968 {
969     // if src < 0, then dst = 0
970     // if src >= (UINT_MAX + 1), then dst = UINT_MAX
971     if (dst.GetSize() == DOUBLE_WORD_SIZE) {
972         EncodeCastFloatCheckRange(dst, src, end, 0, UINT64_MAX);
973     } else {
974         EncodeCastFloatCheckRange(dst, src, end, 0, UINT32_MAX);
975     }
976 }
977 
EncodeCastScalarToFloatUnsignDouble(Reg dst,Reg src)978 void Amd64Encoder::EncodeCastScalarToFloatUnsignDouble(Reg dst, Reg src)
979 {
980     if (dst.GetType() == FLOAT32_TYPE) {
981         ScopedTmpRegU64 int1Reg(this);
982         ScopedTmpRegU64 int2Reg(this);
983 
984         auto sgn = GetMasm()->newLabel();
985         auto end = GetMasm()->newLabel();
986 
987         GetMasm()->test(ArchReg(src), ArchReg(src));
988         GetMasm()->js(sgn);
989         GetMasm()->cvtsi2ss(ArchVReg(dst), ArchReg(src));
990         GetMasm()->jmp(end);
991 
992         GetMasm()->bind(sgn);
993         GetMasm()->mov(ArchReg(int1Reg), ArchReg(src));
994         GetMasm()->mov(ArchReg(int2Reg), ArchReg(src));
995         GetMasm()->shr(ArchReg(int2Reg), asmjit::imm(1));
996         GetMasm()->and_(ArchReg(int1Reg, WORD_SIZE), asmjit::imm(1));
997         GetMasm()->or_(ArchReg(int1Reg), ArchReg(int2Reg));
998         GetMasm()->cvtsi2ss(ArchVReg(dst), ArchReg(int1Reg));
999         GetMasm()->addss(ArchVReg(dst), ArchVReg(dst));
1000 
1001         GetMasm()->bind(end);
1002     } else {
1003         static constexpr std::array<uint32_t, 4> ARR1 = {uint32_t(0x43300000), uint32_t(0x45300000), 0x0, 0x0};
1004         static constexpr std::array<uint64_t, 2> ARR2 = {uint64_t(0x4330000000000000), uint64_t(0x4530000000000000)};
1005 
1006         ScopedTmpReg float1Reg(this, dst.GetType());
1007         ScopedTmpRegF64 tmp(this);
1008 
1009         GetMasm()->movq(ArchVReg(float1Reg), ArchReg(src));
1010         CopyArrayToXmm(tmp, ARR1);
1011         GetMasm()->punpckldq(ArchVReg(float1Reg), ArchVReg(tmp));
1012         CopyArrayToXmm(tmp, ARR2);
1013         GetMasm()->subpd(ArchVReg(float1Reg), ArchVReg(tmp));
1014         GetMasm()->movapd(ArchVReg(dst), ArchVReg(float1Reg));
1015         GetMasm()->unpckhpd(ArchVReg(dst), ArchVReg(float1Reg));
1016         GetMasm()->addsd(ArchVReg(dst), ArchVReg(float1Reg));
1017     }
1018 }
1019 
EncodeCastScalarToFloat(Reg dst,Reg src,bool srcSigned)1020 void Amd64Encoder::EncodeCastScalarToFloat(Reg dst, Reg src, bool srcSigned)
1021 {
1022     if (!srcSigned && src.GetSize() == DOUBLE_WORD_SIZE) {
1023         EncodeCastScalarToFloatUnsignDouble(dst, src);
1024         return;
1025     }
1026 
1027     if (src.GetSize() < WORD_SIZE || (srcSigned && src.GetSize() == WORD_SIZE)) {
1028         if (dst.GetType() == FLOAT32_TYPE) {
1029             GetMasm()->cvtsi2ss(ArchVReg(dst), ArchReg(src, WORD_SIZE));
1030         } else {
1031             GetMasm()->cvtsi2sd(ArchVReg(dst), ArchReg(src, WORD_SIZE));
1032         }
1033         return;
1034     }
1035 
1036     if (!srcSigned && src.GetSize() == WORD_SIZE) {
1037         ScopedTmpRegU64 int1Reg(this);
1038 
1039         GetMasm()->mov(ArchReg(int1Reg, WORD_SIZE), ArchReg(src, WORD_SIZE));
1040         if (dst.GetType() == FLOAT32_TYPE) {
1041             GetMasm()->cvtsi2ss(ArchVReg(dst), ArchReg(int1Reg));
1042         } else {
1043             GetMasm()->cvtsi2sd(ArchVReg(dst), ArchReg(int1Reg));
1044         }
1045         return;
1046     }
1047 
1048     ASSERT(srcSigned && src.GetSize() == DOUBLE_WORD_SIZE);
1049     if (dst.GetType() == FLOAT32_TYPE) {
1050         GetMasm()->cvtsi2ss(ArchVReg(dst), ArchReg(src));
1051     } else {
1052         GetMasm()->cvtsi2sd(ArchVReg(dst), ArchReg(src));
1053     }
1054 }
1055 
EncodeCastToBool(Reg dst,Reg src)1056 void Amd64Encoder::EncodeCastToBool(Reg dst, Reg src)
1057 {
1058     // In ISA says that we only support casts:
1059     // i32tou1, i64tou1, u32tou1, u64tou1
1060     ASSERT(src.IsScalar());
1061     ASSERT(dst.IsScalar());
1062 
1063     // In our ISA minimal type is 32-bit, so bool in 32bit
1064     GetMasm()->test(ArchReg(src), ArchReg(src));
1065     // One "mov" will be better, then 2 jump. Else other instructions will overwrite the flags.
1066     GetMasm()->mov(ArchReg(dst, WORD_SIZE), asmjit::imm(0));
1067     GetMasm()->setne(ArchReg(dst));
1068 }
1069 
EncodeFastPathDynamicCast(Reg dst,Reg src,LabelHolder::LabelId slow)1070 void Amd64Encoder::EncodeFastPathDynamicCast(Reg dst, Reg src, LabelHolder::LabelId slow)
1071 {
1072     ASSERT(IsLabelValid(slow));
1073     ASSERT(IsJsNumberCast());
1074     ASSERT(src.IsFloat() && dst.IsScalar());
1075 
1076     CHECK_EQ(src.GetSize(), BITS_PER_UINT64);
1077     CHECK_EQ(dst.GetSize(), BITS_PER_UINT32);
1078 
1079     auto end {GetMasm()->newLabel()};
1080 
1081     // if src is NaN, then dst = 0
1082     EncodeCastFloatCheckNan(dst, src, end);
1083 
1084     // infinite and big numbers will overflow here to INT64_MIN
1085     GetMasm()->cvttsd2si(ArchReg(dst, DOUBLE_WORD_SIZE), ArchVReg(src));
1086     // check INT64_MIN
1087     GetMasm()->cmp(ArchReg(dst, DOUBLE_WORD_SIZE), asmjit::imm(1));
1088     auto slowLabel {static_cast<Amd64LabelHolder *>(GetLabels())->GetLabel(slow)};
1089     // jump to slow path in case of overflow
1090     GetMasm()->jo(*slowLabel);
1091 
1092     GetMasm()->bind(end);
1093 }
1094 
EncodeCast(Reg dst,bool dstSigned,Reg src,bool srcSigned)1095 void Amd64Encoder::EncodeCast(Reg dst, bool dstSigned, Reg src, bool srcSigned)
1096 {
1097     if (src.IsFloat() && dst.IsScalar()) {
1098         EncodeCastFloatToScalar(dst, dstSigned, src);
1099         return;
1100     }
1101 
1102     if (src.IsScalar() && dst.IsFloat()) {
1103         EncodeCastScalarToFloat(dst, src, srcSigned);
1104         return;
1105     }
1106 
1107     if (src.IsFloat() && dst.IsFloat()) {
1108         if (src.GetSize() != dst.GetSize()) {
1109             if (src.GetType() == FLOAT32_TYPE) {
1110                 GetMasm()->cvtss2sd(ArchVReg(dst), ArchVReg(src));
1111             } else {
1112                 GetMasm()->cvtsd2ss(ArchVReg(dst), ArchVReg(src));
1113             }
1114             return;
1115         }
1116 
1117         if (src.GetType() == FLOAT32_TYPE) {
1118             GetMasm()->movss(ArchVReg(dst), ArchVReg(src));
1119         } else {
1120             GetMasm()->movsd(ArchVReg(dst), ArchVReg(src));
1121         }
1122         return;
1123     }
1124 
1125     ASSERT(src.IsScalar() && dst.IsScalar());
1126     EncodeCastScalar(dst, dstSigned, src, srcSigned);
1127 }
1128 
EncodeCastScalar(Reg dst,bool dstSigned,Reg src,bool srcSigned)1129 void Amd64Encoder::EncodeCastScalar(Reg dst, bool dstSigned, Reg src, bool srcSigned)
1130 {
1131     auto extendTo32bit = [this](Reg reg, bool isSigned) {
1132         if (reg.GetSize() < WORD_SIZE) {
1133             if (isSigned) {
1134                 GetMasm()->movsx(ArchReg(reg, WORD_SIZE), ArchReg(reg));
1135             } else {
1136                 GetMasm()->movzx(ArchReg(reg, WORD_SIZE), ArchReg(reg));
1137             }
1138         }
1139     };
1140 
1141     if (src.GetSize() >= dst.GetSize()) {
1142         if (dst.GetId() != src.GetId()) {
1143             GetMasm()->mov(ArchReg(dst), ArchReg(src, dst.GetSize()));
1144         }
1145         extendTo32bit(dst, dstSigned);
1146         return;
1147     }
1148 
1149     if (srcSigned) {
1150         if (dst.GetSize() < DOUBLE_WORD_SIZE) {
1151             GetMasm()->movsx(ArchReg(dst), ArchReg(src));
1152             extendTo32bit(dst, dstSigned);
1153         } else if (src.GetSize() == WORD_SIZE) {
1154             GetMasm()->movsxd(ArchReg(dst), ArchReg(src));
1155         } else {
1156             GetMasm()->movsx(ArchReg(dst, WORD_SIZE), ArchReg(src));
1157             GetMasm()->movsxd(ArchReg(dst), ArchReg(dst, WORD_SIZE));
1158         }
1159         return;
1160     }
1161 
1162     if (src.GetSize() == WORD_SIZE) {
1163         GetMasm()->mov(ArchReg(dst, WORD_SIZE), ArchReg(src));
1164     } else if (dst.GetSize() == DOUBLE_WORD_SIZE) {
1165         GetMasm()->movzx(ArchReg(dst, WORD_SIZE), ArchReg(src));
1166     } else {
1167         GetMasm()->movzx(ArchReg(dst), ArchReg(src));
1168         extendTo32bit(dst, dstSigned);
1169     }
1170 }
1171 
MakeShift(Shift shift)1172 Reg Amd64Encoder::MakeShift(Shift shift)
1173 {
1174     Reg reg = shift.GetBase();
1175     ASSERT(reg.IsValid());
1176     if (reg.IsScalar()) {
1177         ASSERT(shift.GetType() != ShiftType::INVALID_SHIFT);
1178         switch (shift.GetType()) {
1179             case ShiftType::LSL:
1180                 GetMasm()->shl(ArchReg(reg), asmjit::imm(shift.GetScale()));
1181                 break;
1182             case ShiftType::LSR:
1183                 GetMasm()->shr(ArchReg(reg), asmjit::imm(shift.GetScale()));
1184                 break;
1185             case ShiftType::ASR:
1186                 GetMasm()->sar(ArchReg(reg), asmjit::imm(shift.GetScale()));
1187                 break;
1188             case ShiftType::ROR:
1189                 GetMasm()->ror(ArchReg(reg), asmjit::imm(shift.GetScale()));
1190                 break;
1191             default:
1192                 UNREACHABLE();
1193         }
1194 
1195         return reg;
1196     }
1197 
1198     // Invalid register type
1199     UNREACHABLE();
1200 }
1201 
EncodeAdd(Reg dst,Reg src0,Shift src1)1202 void Amd64Encoder::EncodeAdd(Reg dst, Reg src0, Shift src1)
1203 {
1204     if (dst.IsFloat()) {
1205         SetFalseResult();
1206         return;
1207     }
1208 
1209     ASSERT(dst.GetSize() >= src0.GetSize());
1210 
1211     auto shiftReg = MakeShift(src1);
1212 
1213     if (src0.GetSize() < WORD_SIZE) {
1214         EncodeAdd(dst, src0, shiftReg);
1215         return;
1216     }
1217 
1218     if (src0.GetSize() == DOUBLE_WORD_SIZE && shiftReg.GetSize() < DOUBLE_WORD_SIZE) {
1219         GetMasm()->movsxd(ArchReg(shiftReg, DOUBLE_WORD_SIZE), ArchReg(shiftReg));
1220     }
1221 
1222     GetMasm()->lea(ArchReg(dst), asmjit::x86::ptr(ArchReg(src0), ArchReg(shiftReg, src0.GetSize())));
1223 }
1224 
EncodeAdd(Reg dst,Reg src0,Reg src1)1225 void Amd64Encoder::EncodeAdd(Reg dst, Reg src0, Reg src1)
1226 {
1227     if (dst.IsScalar()) {
1228         auto size = std::max<uint8_t>(WORD_SIZE, dst.GetSize());
1229         GetMasm()->lea(ArchReg(dst, size), asmjit::x86::ptr(ArchReg(src0, size), ArchReg(src1, size)));
1230         return;
1231     }
1232 
1233     if (dst.GetType() == FLOAT32_TYPE) {
1234         if (dst.GetId() == src0.GetId()) {
1235             GetMasm()->addss(ArchVReg(dst), ArchVReg(src1));
1236         } else if (dst.GetId() == src1.GetId()) {
1237             GetMasm()->addss(ArchVReg(dst), ArchVReg(src0));
1238         } else {
1239             GetMasm()->movss(ArchVReg(dst), ArchVReg(src0));
1240             GetMasm()->addss(ArchVReg(dst), ArchVReg(src1));
1241         }
1242     } else {
1243         if (dst.GetId() == src0.GetId()) {
1244             GetMasm()->addsd(ArchVReg(dst), ArchVReg(src1));
1245         } else if (dst.GetId() == src1.GetId()) {
1246             GetMasm()->addsd(ArchVReg(dst), ArchVReg(src0));
1247         } else {
1248             GetMasm()->movsd(ArchVReg(dst), ArchVReg(src0));
1249             GetMasm()->addsd(ArchVReg(dst), ArchVReg(src1));
1250         }
1251     }
1252 }
1253 
EncodeSub(Reg dst,Reg src0,Reg src1)1254 void Amd64Encoder::EncodeSub(Reg dst, Reg src0, Reg src1)
1255 {
1256     if (dst.IsScalar()) {
1257         if (dst.GetId() == src0.GetId()) {
1258             GetMasm()->sub(ArchReg(dst), ArchReg(src1));
1259         } else if (dst.GetId() == src1.GetId()) {
1260             GetMasm()->sub(ArchReg(dst), ArchReg(src0));
1261             GetMasm()->neg(ArchReg(dst));
1262         } else {
1263             EncodeMov(dst, src0);
1264             GetMasm()->sub(ArchReg(dst), ArchReg(src1));
1265         }
1266         return;
1267     }
1268 
1269     if (dst.GetType() == FLOAT32_TYPE) {
1270         if (dst.GetId() == src0.GetId()) {
1271             GetMasm()->subss(ArchVReg(dst), ArchVReg(src1));
1272         } else if (dst.GetId() != src1.GetId()) {
1273             GetMasm()->movss(ArchVReg(dst), ArchVReg(src0));
1274             GetMasm()->subss(ArchVReg(dst), ArchVReg(src1));
1275         } else {
1276             ScopedTmpReg tmpReg(this, dst.GetType());
1277             GetMasm()->movss(ArchVReg(tmpReg), ArchVReg(src0));
1278             GetMasm()->subss(ArchVReg(tmpReg), ArchVReg(src1));
1279             GetMasm()->movss(ArchVReg(dst), ArchVReg(tmpReg));
1280         }
1281     } else {
1282         if (dst.GetId() == src0.GetId()) {
1283             GetMasm()->subsd(ArchVReg(dst), ArchVReg(src1));
1284         } else if (dst.GetId() != src1.GetId()) {
1285             GetMasm()->movsd(ArchVReg(dst), ArchVReg(src0));
1286             GetMasm()->subsd(ArchVReg(dst), ArchVReg(src1));
1287         } else {
1288             ScopedTmpReg tmpReg(this, dst.GetType());
1289             GetMasm()->movsd(ArchVReg(tmpReg), ArchVReg(src0));
1290             GetMasm()->subsd(ArchVReg(tmpReg), ArchVReg(src1));
1291             GetMasm()->movsd(ArchVReg(dst), ArchVReg(tmpReg));
1292         }
1293     }
1294 }
1295 
EncodeMul(Reg dst,Reg src0,Reg src1)1296 void Amd64Encoder::EncodeMul(Reg dst, Reg src0, Reg src1)
1297 {
1298     if (dst.IsScalar()) {
1299         auto size = std::max<uint8_t>(WORD_SIZE, dst.GetSize());
1300 
1301         if (dst.GetId() == src0.GetId()) {
1302             GetMasm()->imul(ArchReg(dst, size), ArchReg(src1, size));
1303         } else if (dst.GetId() == src1.GetId()) {
1304             GetMasm()->imul(ArchReg(dst, size), ArchReg(src0, size));
1305         } else {
1306             GetMasm()->mov(ArchReg(dst, size), ArchReg(src0, size));
1307             GetMasm()->imul(ArchReg(dst, size), ArchReg(src1, size));
1308         }
1309         return;
1310     }
1311 
1312     if (dst.GetType() == FLOAT32_TYPE) {
1313         if (dst.GetId() == src0.GetId()) {
1314             GetMasm()->mulss(ArchVReg(dst), ArchVReg(src1));
1315         } else if (dst.GetId() == src1.GetId()) {
1316             GetMasm()->mulss(ArchVReg(dst), ArchVReg(src0));
1317         } else {
1318             GetMasm()->movss(ArchVReg(dst), ArchVReg(src0));
1319             GetMasm()->mulss(ArchVReg(dst), ArchVReg(src1));
1320         }
1321     } else {
1322         if (dst.GetId() == src0.GetId()) {
1323             GetMasm()->mulsd(ArchVReg(dst), ArchVReg(src1));
1324         } else if (dst.GetId() == src1.GetId()) {
1325             GetMasm()->mulsd(ArchVReg(dst), ArchVReg(src0));
1326         } else {
1327             GetMasm()->movsd(ArchVReg(dst), ArchVReg(src0));
1328             GetMasm()->mulsd(ArchVReg(dst), ArchVReg(src1));
1329         }
1330     }
1331 }
1332 
EncodeAddOverflow(compiler::LabelHolder::LabelId id,Reg dst,Reg src0,Reg src1,Condition cc)1333 void Amd64Encoder::EncodeAddOverflow(compiler::LabelHolder::LabelId id, Reg dst, Reg src0, Reg src1, Condition cc)
1334 {
1335     ASSERT(!dst.IsFloat() && !src0.IsFloat() && !src1.IsFloat());
1336     ASSERT(cc == Condition::VS || cc == Condition::VC);
1337     auto size = dst.GetSize();
1338     if (dst.GetId() == src0.GetId()) {
1339         GetMasm()->add(ArchReg(dst, size), ArchReg(src1, size));
1340     } else if (dst.GetId() == src1.GetId()) {
1341         GetMasm()->add(ArchReg(dst, size), ArchReg(src0, size));
1342     } else {
1343         GetMasm()->mov(ArchReg(dst, size), ArchReg(src0, size));
1344         GetMasm()->add(ArchReg(dst, size), ArchReg(src1, size));
1345     }
1346     auto label = static_cast<Amd64LabelHolder *>(GetLabels())->GetLabel(id);
1347     GetMasm()->j(ArchCc(cc, false), *label);
1348 }
1349 
EncodeSubOverflow(compiler::LabelHolder::LabelId id,Reg dst,Reg src0,Reg src1,Condition cc)1350 void Amd64Encoder::EncodeSubOverflow(compiler::LabelHolder::LabelId id, Reg dst, Reg src0, Reg src1, Condition cc)
1351 {
1352     ASSERT(!dst.IsFloat() && !src0.IsFloat() && !src1.IsFloat());
1353     ASSERT(cc == Condition::VS || cc == Condition::VC);
1354     auto size = dst.GetSize();
1355     if (dst.GetId() == src0.GetId()) {
1356         GetMasm()->sub(ArchReg(dst, size), ArchReg(src1, size));
1357     } else if (dst.GetId() == src1.GetId()) {
1358         ScopedTmpReg tmpReg(this, dst.GetType());
1359         GetMasm()->mov(ArchReg(tmpReg, size), ArchReg(src1, size));
1360         GetMasm()->mov(ArchReg(dst, size), ArchReg(src0, size));
1361         GetMasm()->sub(ArchReg(dst, size), ArchReg(tmpReg, size));
1362     } else {
1363         GetMasm()->mov(ArchReg(dst, size), ArchReg(src0, size));
1364         GetMasm()->sub(ArchReg(dst, size), ArchReg(src1, size));
1365     }
1366     auto label = static_cast<Amd64LabelHolder *>(GetLabels())->GetLabel(id);
1367     GetMasm()->j(ArchCc(cc, false), *label);
1368 }
1369 
EncodeNegOverflowAndZero(compiler::LabelHolder::LabelId id,Reg dst,Reg src)1370 void Amd64Encoder::EncodeNegOverflowAndZero(compiler::LabelHolder::LabelId id, Reg dst, Reg src)
1371 {
1372     ASSERT(!dst.IsFloat() && !src.IsFloat());
1373     auto size = dst.GetSize();
1374     // NOLINTNEXTLINE(readability-magic-numbers)
1375     EncodeJumpTest(id, src, Imm(0x7fffffff), Condition::TST_EQ);
1376     EncodeMov(dst, src);
1377     GetMasm()->neg(ArchReg(dst, size));
1378 }
1379 
EncodeDivFloat(Reg dst,Reg src0,Reg src1)1380 void Amd64Encoder::EncodeDivFloat(Reg dst, Reg src0, Reg src1)
1381 {
1382     ASSERT(dst.IsFloat());
1383     if (dst.GetType() == FLOAT32_TYPE) {
1384         if (dst.GetId() == src0.GetId()) {
1385             GetMasm()->divss(ArchVReg(dst), ArchVReg(src1));
1386         } else if (dst.GetId() != src1.GetId()) {
1387             GetMasm()->movss(ArchVReg(dst), ArchVReg(src0));
1388             GetMasm()->divss(ArchVReg(dst), ArchVReg(src1));
1389         } else {
1390             ScopedTmpRegF32 tmp(this);
1391             GetMasm()->movss(ArchVReg(tmp), ArchVReg(src0));
1392             GetMasm()->divss(ArchVReg(tmp), ArchVReg(src1));
1393             GetMasm()->movss(ArchVReg(dst), ArchVReg(tmp));
1394         }
1395     } else {
1396         if (dst.GetId() == src0.GetId()) {
1397             GetMasm()->divsd(ArchVReg(dst), ArchVReg(src1));
1398         } else if (dst.GetId() != src1.GetId()) {
1399             GetMasm()->movsd(ArchVReg(dst), ArchVReg(src0));
1400             GetMasm()->divsd(ArchVReg(dst), ArchVReg(src1));
1401         } else {
1402             ScopedTmpRegF64 tmp(this);
1403             GetMasm()->movsd(ArchVReg(tmp), ArchVReg(src0));
1404             GetMasm()->divsd(ArchVReg(tmp), ArchVReg(src1));
1405             GetMasm()->movsd(ArchVReg(dst), ArchVReg(tmp));
1406         }
1407     }
1408 }
1409 
EncodeDivSpillDst(asmjit::x86::Assembler * masm,Reg dst)1410 static void EncodeDivSpillDst(asmjit::x86::Assembler *masm, Reg dst)
1411 {
1412     if (dst.GetId() != ConvertRegNumber(asmjit::x86::rdx.id())) {
1413         masm->push(asmjit::x86::rdx);
1414     }
1415     if (dst.GetId() != ConvertRegNumber(asmjit::x86::rax.id())) {
1416         masm->push(asmjit::x86::rax);
1417     }
1418 }
1419 
EncodeDivFillDst(asmjit::x86::Assembler * masm,Reg dst)1420 static void EncodeDivFillDst(asmjit::x86::Assembler *masm, Reg dst)
1421 {
1422     if (dst.GetId() != ConvertRegNumber(asmjit::x86::rax.id())) {
1423         masm->mov(ArchReg(dst, DOUBLE_WORD_SIZE), asmjit::x86::rax);
1424         masm->pop(asmjit::x86::rax);
1425     }
1426 
1427     if (dst.GetId() != ConvertRegNumber(asmjit::x86::rdx.id())) {
1428         masm->pop(asmjit::x86::rdx);
1429     }
1430 }
1431 
EncodeDiv(Reg dst,bool dstSigned,Reg src0,Reg src1)1432 void Amd64Encoder::EncodeDiv(Reg dst, bool dstSigned, Reg src0, Reg src1)
1433 {
1434     if (dst.IsFloat()) {
1435         EncodeDivFloat(dst, src0, src1);
1436         return;
1437     }
1438 
1439     auto negPath = GetMasm()->newLabel();
1440     auto crossroad = GetMasm()->newLabel();
1441 
1442     if (dstSigned) {
1443         GetMasm()->cmp(ArchReg(src1), asmjit::imm(-1));
1444         GetMasm()->je(negPath);
1445     }
1446 
1447     EncodeDivSpillDst(GetMasm(), dst);
1448 
1449     ScopedTmpReg tmpReg(this, dst.GetType());
1450     Reg op1 {src1};
1451     if (src1.GetId() == ConvertRegNumber(asmjit::x86::rax.id()) ||
1452         src1.GetId() == ConvertRegNumber(asmjit::x86::rdx.id())) {
1453         GetMasm()->mov(ArchReg(tmpReg), ArchReg(src1));
1454         op1 = Reg(tmpReg);
1455     }
1456 
1457     if (src0.GetId() != ConvertRegNumber(asmjit::x86::rax.id())) {
1458         GetMasm()->mov(asmjit::x86::rax, ArchReg(src0, DOUBLE_WORD_SIZE));
1459     }
1460     if (dstSigned) {
1461         if (dst.GetSize() <= WORD_SIZE) {
1462             GetMasm()->cdq();
1463         } else {
1464             GetMasm()->cqo();
1465         }
1466         GetMasm()->idiv(ArchReg(op1));
1467     } else {
1468         GetMasm()->xor_(asmjit::x86::rdx, asmjit::x86::rdx);
1469         GetMasm()->div(ArchReg(op1));
1470     }
1471 
1472     EncodeDivFillDst(GetMasm(), dst);
1473 
1474     GetMasm()->jmp(crossroad);
1475 
1476     GetMasm()->bind(negPath);
1477     if (dst.GetId() != src0.GetId()) {
1478         GetMasm()->mov(ArchReg(dst), ArchReg(src0));
1479     }
1480     GetMasm()->neg(ArchReg(dst));
1481 
1482     GetMasm()->bind(crossroad);
1483 }
1484 
EncodeSignedDiv(Reg dst,Reg src0,Imm imm)1485 void Amd64Encoder::EncodeSignedDiv(Reg dst, Reg src0, Imm imm)
1486 {
1487     int64_t divisor = imm.GetAsInt();
1488 
1489     Reg ax(ConvertRegNumber(asmjit::x86::rax.id()), dst.GetType());
1490     Reg dx(ConvertRegNumber(asmjit::x86::rdx.id()), dst.GetType());
1491 
1492     if (dst != ax) {
1493         GetMasm()->push(ArchReg(ax, DOUBLE_WORD_SIZE));
1494     }
1495     if (dst != dx) {
1496         GetMasm()->push(ArchReg(dx, DOUBLE_WORD_SIZE));
1497     }
1498 
1499     FastConstSignedDivisor fastDivisor(divisor, dst.GetSize());
1500     int64_t magic = fastDivisor.GetMagic();
1501 
1502     ScopedTmpReg tmp(this, dst.GetType());
1503     EncodeMov(tmp, src0);
1504     EncodeMov(ax, src0);
1505     EncodeMov(dx, Imm(magic));
1506     GetMasm()->imul(ArchReg(dx));
1507 
1508     if (divisor > 0 && magic < 0) {
1509         EncodeAdd(dx, dx, tmp);
1510     } else if (divisor < 0 && magic > 0) {
1511         EncodeSub(dx, dx, tmp);
1512     }
1513 
1514     int64_t shift = fastDivisor.GetShift();
1515     EncodeAShr(dst, dx, Imm(shift));
1516 
1517     // result = (result < 0 ? result + 1 : result)
1518     EncodeShr(tmp, dst, Imm(dst.GetSize() - 1U));
1519     EncodeAdd(dst, dst, tmp);
1520 
1521     if (dst != dx) {
1522         GetMasm()->pop(ArchReg(dx, DOUBLE_WORD_SIZE));
1523     }
1524     if (dst != ax) {
1525         GetMasm()->pop(ArchReg(ax, DOUBLE_WORD_SIZE));
1526     }
1527 }
1528 
EncodeUnsignedDiv(Reg dst,Reg src0,Imm imm)1529 void Amd64Encoder::EncodeUnsignedDiv(Reg dst, Reg src0, Imm imm)
1530 {
1531     auto divisor = bit_cast<uint64_t>(imm.GetAsInt());
1532 
1533     Reg ax(ConvertRegNumber(asmjit::x86::rax.id()), dst.GetType());
1534     Reg dx(ConvertRegNumber(asmjit::x86::rdx.id()), dst.GetType());
1535 
1536     if (dst != ax) {
1537         GetMasm()->push(ArchReg(ax, DOUBLE_WORD_SIZE));
1538     }
1539     if (dst != dx) {
1540         GetMasm()->push(ArchReg(dx, DOUBLE_WORD_SIZE));
1541     }
1542 
1543     FastConstUnsignedDivisor fastDivisor(divisor, dst.GetSize());
1544     uint64_t magic = fastDivisor.GetMagic();
1545 
1546     ScopedTmpReg tmp(this, dst.GetType());
1547     if (fastDivisor.GetAdd()) {
1548         EncodeMov(tmp, src0);
1549     }
1550     EncodeMov(ax, src0);
1551     EncodeMov(dx, Imm(magic));
1552     GetMasm()->mul(ArchReg(dx));
1553 
1554     uint64_t shift = fastDivisor.GetShift();
1555     if (!fastDivisor.GetAdd()) {
1556         EncodeShr(dst, dx, Imm(shift));
1557     } else {
1558         ASSERT(shift >= 1U);
1559         EncodeSub(tmp, tmp, dx);
1560         EncodeShr(tmp, tmp, Imm(1U));
1561         EncodeAdd(tmp, tmp, dx);
1562         EncodeShr(dst, tmp, Imm(shift - 1U));
1563     }
1564 
1565     if (dst != dx) {
1566         GetMasm()->pop(ArchReg(dx, DOUBLE_WORD_SIZE));
1567     }
1568     if (dst != ax) {
1569         GetMasm()->pop(ArchReg(ax, DOUBLE_WORD_SIZE));
1570     }
1571 }
1572 
EncodeDiv(Reg dst,Reg src0,Imm imm,bool isSigned)1573 void Amd64Encoder::EncodeDiv(Reg dst, Reg src0, Imm imm, bool isSigned)
1574 {
1575     ASSERT(dst.IsScalar() && dst.GetSize() >= WORD_SIZE);
1576     ASSERT(CanOptimizeImmDivMod(bit_cast<uint64_t>(imm.GetAsInt()), isSigned));
1577     if (isSigned) {
1578         EncodeSignedDiv(dst, src0, imm);
1579     } else {
1580         EncodeUnsignedDiv(dst, src0, imm);
1581     }
1582 }
1583 
EncodeMod(Reg dst,Reg src0,Imm imm,bool isSigned)1584 void Amd64Encoder::EncodeMod(Reg dst, Reg src0, Imm imm, bool isSigned)
1585 {
1586     ASSERT(dst.IsScalar() && dst.GetSize() >= WORD_SIZE);
1587     ASSERT(CanOptimizeImmDivMod(bit_cast<uint64_t>(imm.GetAsInt()), isSigned));
1588 
1589     // dst = src0 - imm * (src0 / imm)
1590     ScopedTmpReg tmp(this, dst.GetType());
1591     EncodeDiv(tmp, src0, imm, isSigned);
1592     if (dst.GetSize() == WORD_SIZE) {
1593         GetMasm()->imul(ArchReg(tmp), ArchReg(tmp), asmjit::imm(imm.GetAsInt()));
1594     } else {
1595         ScopedTmpRegU64 immReg(this);
1596         EncodeMov(immReg, imm);
1597         EncodeMul(tmp, tmp, immReg);
1598     }
1599     EncodeSub(dst, src0, tmp);
1600 }
1601 
EncodeModFloat(Reg dst,Reg src0,Reg src1)1602 void Amd64Encoder::EncodeModFloat(Reg dst, Reg src0, Reg src1)
1603 {
1604     ASSERT(dst.IsFloat());
1605     if (dst.GetType() == FLOAT32_TYPE) {
1606         using Fp = float (*)(float, float);
1607         MakeLibCall(dst, src0, src1, reinterpret_cast<void *>(static_cast<Fp>(fmodf)));
1608     } else {
1609         using Fp = double (*)(double, double);
1610         MakeLibCall(dst, src0, src1, reinterpret_cast<void *>(static_cast<Fp>(fmod)));
1611     }
1612 }
1613 
EncodeMod(Reg dst,bool dstSigned,Reg src0,Reg src1)1614 void Amd64Encoder::EncodeMod(Reg dst, bool dstSigned, Reg src0, Reg src1)
1615 {
1616     if (dst.IsFloat()) {
1617         EncodeModFloat(dst, src0, src1);
1618         return;
1619     }
1620 
1621     auto zeroPath = GetMasm()->newLabel();
1622     auto crossroad = GetMasm()->newLabel();
1623 
1624     if (dstSigned) {
1625         GetMasm()->cmp(ArchReg(src1), asmjit::imm(-1));
1626         GetMasm()->je(zeroPath);
1627     }
1628 
1629     if (dst.GetId() != ConvertRegNumber(asmjit::x86::rax.id())) {
1630         GetMasm()->push(asmjit::x86::rax);
1631     }
1632     if (dst.GetId() != ConvertRegNumber(asmjit::x86::rdx.id())) {
1633         GetMasm()->push(asmjit::x86::rdx);
1634     }
1635 
1636     ScopedTmpReg tmpReg(this, dst.GetType());
1637     Reg op1 {src1};
1638     if (src1.GetId() == ConvertRegNumber(asmjit::x86::rax.id()) ||
1639         src1.GetId() == ConvertRegNumber(asmjit::x86::rdx.id())) {
1640         GetMasm()->mov(ArchReg(tmpReg), ArchReg(src1));
1641         op1 = Reg(tmpReg);
1642     }
1643 
1644     if (src0.GetId() != ConvertRegNumber(asmjit::x86::rax.id())) {
1645         GetMasm()->mov(asmjit::x86::rax, ArchReg(src0, DOUBLE_WORD_SIZE));
1646     }
1647 
1648     if (dstSigned) {
1649         if (dst.GetSize() <= WORD_SIZE) {
1650             GetMasm()->cdq();
1651         } else {
1652             GetMasm()->cqo();
1653         }
1654         GetMasm()->idiv(ArchReg(op1));
1655     } else {
1656         GetMasm()->xor_(asmjit::x86::rdx, asmjit::x86::rdx);
1657         GetMasm()->div(ArchReg(op1));
1658     }
1659 
1660     if (dst.GetId() != ConvertRegNumber(asmjit::x86::rdx.id())) {
1661         GetMasm()->mov(ArchReg(dst, DOUBLE_WORD_SIZE), asmjit::x86::rdx);
1662         GetMasm()->pop(asmjit::x86::rdx);
1663     }
1664 
1665     if (dst.GetId() != ConvertRegNumber(asmjit::x86::rax.id())) {
1666         GetMasm()->pop(asmjit::x86::rax);
1667     }
1668     GetMasm()->jmp(crossroad);
1669 
1670     GetMasm()->bind(zeroPath);
1671     GetMasm()->xor_(ArchReg(dst, WORD_SIZE), ArchReg(dst, WORD_SIZE));
1672 
1673     GetMasm()->bind(crossroad);
1674 }
1675 
EncodeMin(Reg dst,bool dstSigned,Reg src0,Reg src1)1676 void Amd64Encoder::EncodeMin(Reg dst, bool dstSigned, Reg src0, Reg src1)
1677 {
1678     if (dst.IsScalar()) {
1679         ScopedTmpReg tmpReg(this, dst.GetType());
1680         GetMasm()->mov(ArchReg(tmpReg), ArchReg(src1));
1681         GetMasm()->cmp(ArchReg(src0), ArchReg(src1));
1682 
1683         auto size = std::max<uint8_t>(src0.GetSize(), WORD_SIZE);
1684         if (dstSigned) {
1685             GetMasm()->cmovle(ArchReg(tmpReg, size), ArchReg(src0, size));
1686         } else {
1687             GetMasm()->cmovb(ArchReg(tmpReg, size), ArchReg(src0, size));
1688         }
1689         EncodeMov(dst, tmpReg);
1690         return;
1691     }
1692 
1693     EncodeMinMaxFp<false>(dst, src0, src1);
1694 }
1695 
EncodeMax(Reg dst,bool dstSigned,Reg src0,Reg src1)1696 void Amd64Encoder::EncodeMax(Reg dst, bool dstSigned, Reg src0, Reg src1)
1697 {
1698     if (dst.IsScalar()) {
1699         ScopedTmpReg tmpReg(this, dst.GetType());
1700         GetMasm()->mov(ArchReg(tmpReg), ArchReg(src1));
1701         GetMasm()->cmp(ArchReg(src0), ArchReg(src1));
1702 
1703         auto size = std::max<uint8_t>(src0.GetSize(), WORD_SIZE);
1704         if (dstSigned) {
1705             GetMasm()->cmovge(ArchReg(tmpReg, size), ArchReg(src0, size));
1706         } else {
1707             GetMasm()->cmova(ArchReg(tmpReg, size), ArchReg(src0, size));
1708         }
1709         EncodeMov(dst, tmpReg);
1710         return;
1711     }
1712 
1713     EncodeMinMaxFp<true>(dst, src0, src1);
1714 }
1715 
1716 template <bool IS_MAX>
EncodeMinMaxFp(Reg dst,Reg src0,Reg src1)1717 void Amd64Encoder::EncodeMinMaxFp(Reg dst, Reg src0, Reg src1)
1718 {
1719     auto end = GetMasm()->newLabel();
1720     auto notEqual = GetMasm()->newLabel();
1721     auto gotNan = GetMasm()->newLabel();
1722     auto &srcA = dst.GetId() != src1.GetId() ? src0 : src1;
1723     auto &srcB = srcA.GetId() == src0.GetId() ? src1 : src0;
1724     if (dst.GetType() == FLOAT32_TYPE) {
1725         GetMasm()->movaps(ArchVReg(dst), ArchVReg(srcA));
1726         GetMasm()->ucomiss(ArchVReg(srcB), ArchVReg(srcA));
1727         GetMasm()->jne(notEqual);
1728         GetMasm()->jp(gotNan);
1729         // calculate result for positive/negative zero operands
1730         if (IS_MAX) {
1731             GetMasm()->andps(ArchVReg(dst), ArchVReg(srcB));
1732         } else {
1733             GetMasm()->orps(ArchVReg(dst), ArchVReg(srcB));
1734         }
1735         GetMasm()->jmp(end);
1736         GetMasm()->bind(gotNan);
1737         // if any operand is NaN result is NaN
1738         GetMasm()->por(ArchVReg(dst), ArchVReg(srcB));
1739         GetMasm()->jmp(end);
1740         GetMasm()->bind(notEqual);
1741         if (IS_MAX) {
1742             GetMasm()->maxss(ArchVReg(dst), ArchVReg(srcB));
1743         } else {
1744             GetMasm()->minss(ArchVReg(dst), ArchVReg(srcB));
1745         }
1746         GetMasm()->bind(end);
1747     } else {
1748         GetMasm()->movapd(ArchVReg(dst), ArchVReg(srcA));
1749         GetMasm()->ucomisd(ArchVReg(srcB), ArchVReg(srcA));
1750         GetMasm()->jne(notEqual);
1751         GetMasm()->jp(gotNan);
1752         // calculate result for positive/negative zero operands
1753         if (IS_MAX) {
1754             GetMasm()->andpd(ArchVReg(dst), ArchVReg(srcB));
1755         } else {
1756             GetMasm()->orpd(ArchVReg(dst), ArchVReg(srcB));
1757         }
1758         GetMasm()->jmp(end);
1759         GetMasm()->bind(gotNan);
1760         // if any operand is NaN result is NaN
1761         GetMasm()->por(ArchVReg(dst), ArchVReg(srcB));
1762         GetMasm()->jmp(end);
1763         GetMasm()->bind(notEqual);
1764         if (IS_MAX) {
1765             GetMasm()->maxsd(ArchVReg(dst), ArchVReg(srcB));
1766         } else {
1767             GetMasm()->minsd(ArchVReg(dst), ArchVReg(srcB));
1768         }
1769         GetMasm()->bind(end);
1770     }
1771 }
1772 
EncodeShl(Reg dst,Reg src0,Reg src1)1773 void Amd64Encoder::EncodeShl(Reg dst, Reg src0, Reg src1)
1774 {
1775     ASSERT(dst.IsScalar());
1776     ScopedTmpReg tmpReg(this, dst.GetType());
1777     Reg rcx(ConvertRegNumber(asmjit::x86::rcx.id()), dst.GetType());
1778     GetMasm()->mov(ArchReg(tmpReg), ArchReg(src0));
1779     if (dst.GetId() != rcx.GetId()) {
1780         GetMasm()->push(ArchReg(rcx, DOUBLE_WORD_SIZE));
1781     }
1782     GetMasm()->mov(ArchReg(rcx), ArchReg(src1));
1783     GetMasm()->shl(ArchReg(tmpReg), asmjit::x86::cl);
1784     if (dst.GetId() != rcx.GetId()) {
1785         GetMasm()->pop(ArchReg(rcx, DOUBLE_WORD_SIZE));
1786     }
1787     GetMasm()->mov(ArchReg(dst), ArchReg(tmpReg));
1788 }
1789 
EncodeShr(Reg dst,Reg src0,Reg src1)1790 void Amd64Encoder::EncodeShr(Reg dst, Reg src0, Reg src1)
1791 {
1792     ASSERT(dst.IsScalar());
1793     ScopedTmpReg tmpReg(this, dst.GetType());
1794     Reg rcx(ConvertRegNumber(asmjit::x86::rcx.id()), dst.GetType());
1795     GetMasm()->mov(ArchReg(tmpReg), ArchReg(src0));
1796     if (dst.GetId() != rcx.GetId()) {
1797         GetMasm()->push(ArchReg(rcx, DOUBLE_WORD_SIZE));
1798     }
1799     GetMasm()->mov(ArchReg(rcx), ArchReg(src1));
1800     GetMasm()->shr(ArchReg(tmpReg), asmjit::x86::cl);
1801     if (dst.GetId() != rcx.GetId()) {
1802         GetMasm()->pop(ArchReg(rcx, DOUBLE_WORD_SIZE));
1803     }
1804     GetMasm()->mov(ArchReg(dst), ArchReg(tmpReg));
1805 }
1806 
EncodeAShr(Reg dst,Reg src0,Reg src1)1807 void Amd64Encoder::EncodeAShr(Reg dst, Reg src0, Reg src1)
1808 {
1809     ASSERT(dst.IsScalar());
1810     ScopedTmpReg tmpReg(this, dst.GetType());
1811     Reg rcx(ConvertRegNumber(asmjit::x86::rcx.id()), dst.GetType());
1812     GetMasm()->mov(ArchReg(tmpReg), ArchReg(src0));
1813     if (dst.GetId() != rcx.GetId()) {
1814         GetMasm()->push(ArchReg(rcx, DOUBLE_WORD_SIZE));
1815     }
1816     GetMasm()->mov(ArchReg(rcx), ArchReg(src1));
1817     GetMasm()->sar(ArchReg(tmpReg), asmjit::x86::cl);
1818     if (dst.GetId() != rcx.GetId()) {
1819         GetMasm()->pop(ArchReg(rcx, DOUBLE_WORD_SIZE));
1820     }
1821     GetMasm()->mov(ArchReg(dst), ArchReg(tmpReg));
1822 }
1823 
EncodeAnd(Reg dst,Reg src0,Reg src1)1824 void Amd64Encoder::EncodeAnd(Reg dst, Reg src0, Reg src1)
1825 {
1826     ASSERT(dst.IsScalar());
1827     if (dst.GetId() == src0.GetId()) {
1828         GetMasm()->and_(ArchReg(dst), ArchReg(src1));
1829     } else if (dst.GetId() == src1.GetId()) {
1830         GetMasm()->and_(ArchReg(dst), ArchReg(src0));
1831     } else {
1832         EncodeMov(dst, src0);
1833         GetMasm()->and_(ArchReg(dst), ArchReg(src1));
1834     }
1835 }
1836 
EncodeOr(Reg dst,Reg src0,Reg src1)1837 void Amd64Encoder::EncodeOr(Reg dst, Reg src0, Reg src1)
1838 {
1839     ASSERT(dst.IsScalar());
1840     if (dst.GetId() == src0.GetId()) {
1841         GetMasm()->or_(ArchReg(dst), ArchReg(src1));
1842     } else if (dst.GetId() == src1.GetId()) {
1843         GetMasm()->or_(ArchReg(dst), ArchReg(src0));
1844     } else {
1845         EncodeMov(dst, src0);
1846         GetMasm()->or_(ArchReg(dst), ArchReg(src1));
1847     }
1848 }
1849 
EncodeXor(Reg dst,Reg src0,Reg src1)1850 void Amd64Encoder::EncodeXor(Reg dst, Reg src0, Reg src1)
1851 {
1852     ASSERT(dst.IsScalar());
1853     if (dst.GetId() == src0.GetId()) {
1854         GetMasm()->xor_(ArchReg(dst), ArchReg(src1));
1855     } else if (dst.GetId() == src1.GetId()) {
1856         GetMasm()->xor_(ArchReg(dst), ArchReg(src0));
1857     } else {
1858         EncodeMov(dst, src0);
1859         GetMasm()->xor_(ArchReg(dst), ArchReg(src1));
1860     }
1861 }
1862 
EncodeAdd(Reg dst,Reg src,Imm imm)1863 void Amd64Encoder::EncodeAdd(Reg dst, Reg src, Imm imm)
1864 {
1865     if (dst.IsFloat()) {
1866         SetFalseResult();
1867         return;
1868     }
1869 
1870     auto immVal = imm.GetAsInt();
1871     auto size = std::max<uint8_t>(WORD_SIZE, dst.GetSize());
1872     if (ImmFitsSize(immVal, size)) {
1873         GetMasm()->lea(ArchReg(dst, size), asmjit::x86::ptr(ArchReg(src, size), immVal));
1874     } else {
1875         if (dst.GetId() != src.GetId()) {
1876             GetMasm()->mov(ArchReg(dst), asmjit::imm(immVal));
1877             GetMasm()->add(ArchReg(dst), ArchReg(src));
1878         } else {
1879             ScopedTmpReg tmpReg(this, dst.GetType());
1880             GetMasm()->mov(ArchReg(tmpReg), asmjit::imm(immVal));
1881             GetMasm()->add(ArchReg(dst), ArchReg(tmpReg));
1882         }
1883     }
1884 }
1885 
EncodeSub(Reg dst,Reg src,Imm imm)1886 void Amd64Encoder::EncodeSub(Reg dst, Reg src, Imm imm)
1887 {
1888     if (dst.IsFloat()) {
1889         SetFalseResult();
1890         return;
1891     }
1892 
1893     auto immVal = -imm.GetAsInt();
1894     auto size = std::max<uint8_t>(WORD_SIZE, dst.GetSize());
1895     if (ImmFitsSize(immVal, size)) {
1896         GetMasm()->lea(ArchReg(dst, size), asmjit::x86::ptr(ArchReg(src, size), immVal));
1897     } else {
1898         if (dst.GetId() != src.GetId()) {
1899             GetMasm()->mov(ArchReg(dst), asmjit::imm(immVal));
1900             GetMasm()->add(ArchReg(dst), ArchReg(src));
1901         } else {
1902             ScopedTmpReg tmpReg(this, dst.GetType());
1903             GetMasm()->mov(ArchReg(tmpReg), asmjit::imm(immVal));
1904             GetMasm()->add(ArchReg(dst), ArchReg(tmpReg));
1905         }
1906     }
1907 }
1908 
EncodeShl(Reg dst,Reg src,Imm imm)1909 void Amd64Encoder::EncodeShl(Reg dst, Reg src, Imm imm)
1910 {
1911     ASSERT(dst.IsScalar());
1912     EncodeMov(dst, src);
1913     GetMasm()->shl(ArchReg(dst), ArchImm(imm));
1914 }
1915 
EncodeShr(Reg dst,Reg src,Imm imm)1916 void Amd64Encoder::EncodeShr(Reg dst, Reg src, Imm imm)
1917 {
1918     ASSERT(dst.IsScalar());
1919 
1920     EncodeMov(dst, src);
1921     GetMasm()->shr(ArchReg(dst), ArchImm(imm));
1922 }
1923 
EncodeAShr(Reg dst,Reg src,Imm imm)1924 void Amd64Encoder::EncodeAShr(Reg dst, Reg src, Imm imm)
1925 {
1926     ASSERT(dst.IsScalar());
1927     EncodeMov(dst, src);
1928     GetMasm()->sar(ArchReg(dst), ArchImm(imm));
1929 }
1930 
EncodeAnd(Reg dst,Reg src,Imm imm)1931 void Amd64Encoder::EncodeAnd(Reg dst, Reg src, Imm imm)
1932 {
1933     ASSERT(dst.IsScalar());
1934     auto immVal = ImmToUnsignedInt(imm);
1935 
1936     switch (src.GetSize()) {
1937         case BYTE_SIZE:
1938             immVal |= ~uint64_t(0xFF);  // NOLINT
1939             break;
1940         case HALF_SIZE:
1941             immVal |= ~uint64_t(0xFFFF);  // NOLINT
1942             break;
1943         case WORD_SIZE:
1944             immVal |= ~uint64_t(0xFFFFFFFF);  // NOLINT
1945             break;
1946         default:
1947             break;
1948     }
1949 
1950     if (dst.GetSize() != DOUBLE_WORD_SIZE) {
1951         // NOLINTNEXTLINE(clang-analyzer-core.UndefinedBinaryOperatorResult)
1952         immVal &= (uint64_t(1) << dst.GetSize()) - 1;
1953     }
1954 
1955     if (ImmFitsSize(immVal, dst.GetSize())) {
1956         EncodeMov(dst, src);
1957         GetMasm()->and_(ArchReg(dst), immVal);
1958     } else {
1959         if (dst.GetId() != src.GetId()) {
1960             GetMasm()->mov(ArchReg(dst), asmjit::imm(immVal));
1961             GetMasm()->and_(ArchReg(dst), ArchReg(src));
1962         } else {
1963             ScopedTmpReg tmpReg(this, dst.GetType());
1964             GetMasm()->mov(ArchReg(tmpReg), asmjit::imm(immVal));
1965             GetMasm()->and_(ArchReg(dst), ArchReg(tmpReg));
1966         }
1967     }
1968 }
1969 
EncodeOr(Reg dst,Reg src,Imm imm)1970 void Amd64Encoder::EncodeOr(Reg dst, Reg src, Imm imm)
1971 {
1972     ASSERT(dst.IsScalar());
1973     auto immVal = ImmToUnsignedInt(imm);
1974     if (ImmFitsSize(immVal, dst.GetSize())) {
1975         EncodeMov(dst, src);
1976         GetMasm()->or_(ArchReg(dst), immVal);
1977     } else {
1978         if (dst.GetId() != src.GetId()) {
1979             GetMasm()->mov(ArchReg(dst), asmjit::imm(immVal));
1980             GetMasm()->or_(ArchReg(dst), ArchReg(src));
1981         } else {
1982             ScopedTmpReg tmpReg(this, dst.GetType());
1983             GetMasm()->mov(ArchReg(tmpReg), asmjit::imm(immVal));
1984             GetMasm()->or_(ArchReg(dst), ArchReg(tmpReg));
1985         }
1986     }
1987 }
1988 
EncodeXor(Reg dst,Reg src,Imm imm)1989 void Amd64Encoder::EncodeXor(Reg dst, Reg src, Imm imm)
1990 {
1991     ASSERT(dst.IsScalar());
1992     auto immVal = ImmToUnsignedInt(imm);
1993     if (ImmFitsSize(immVal, dst.GetSize())) {
1994         EncodeMov(dst, src);
1995         GetMasm()->xor_(ArchReg(dst), immVal);
1996     } else {
1997         if (dst.GetId() != src.GetId()) {
1998             GetMasm()->mov(ArchReg(dst), asmjit::imm(immVal));
1999             GetMasm()->xor_(ArchReg(dst), ArchReg(src));
2000         } else {
2001             ScopedTmpReg tmpReg(this, dst.GetType());
2002             GetMasm()->mov(ArchReg(tmpReg), asmjit::imm(immVal));
2003             GetMasm()->xor_(ArchReg(dst), ArchReg(tmpReg));
2004         }
2005     }
2006 }
2007 
EncodeMov(Reg dst,Imm src)2008 void Amd64Encoder::EncodeMov(Reg dst, Imm src)
2009 {
2010     if (dst.IsScalar()) {
2011         if (dst.GetSize() < WORD_SIZE) {
2012             GetMasm()->xor_(ArchReg(dst, WORD_SIZE), ArchReg(dst, WORD_SIZE));
2013         }
2014         GetMasm()->mov(ArchReg(dst), ArchImm(src));
2015         return;
2016     }
2017 
2018     if (dst.GetType() == FLOAT32_TYPE) {
2019         ScopedTmpRegU32 tmpReg(this);
2020         auto val = bit_cast<uint32_t>(src.GetAsFloat());
2021         GetMasm()->mov(ArchReg(tmpReg), asmjit::imm(val));
2022         GetMasm()->movd(ArchVReg(dst), ArchReg(tmpReg));
2023     } else {
2024         ScopedTmpRegU64 tmpReg(this);
2025         auto val = bit_cast<uint64_t>(src.GetAsDouble());
2026         GetMasm()->mov(ArchReg(tmpReg), asmjit::imm(val));
2027         GetMasm()->movq(ArchVReg(dst), ArchReg(tmpReg));
2028     }
2029 }
2030 
EncodeLdr(Reg dst,bool dstSigned,MemRef mem)2031 void Amd64Encoder::EncodeLdr(Reg dst, bool dstSigned, MemRef mem)
2032 {
2033     auto m = ArchMem(mem).Prepare(GetMasm());
2034 
2035     if (dst.GetType() == FLOAT32_TYPE) {
2036         GetMasm()->movss(ArchVReg(dst), m);
2037         return;
2038     }
2039     if (dst.GetType() == FLOAT64_TYPE) {
2040         GetMasm()->movsd(ArchVReg(dst), m);
2041         return;
2042     }
2043 
2044     m.setSize(dst.GetSize() / BITS_PER_BYTE);
2045 
2046     if (dstSigned && dst.GetSize() < DOUBLE_WORD_SIZE) {
2047         if (dst.GetSize() == WORD_SIZE) {
2048             GetMasm()->movsxd(ArchReg(dst, DOUBLE_WORD_SIZE), m);
2049         } else {
2050             GetMasm()->movsx(ArchReg(dst, DOUBLE_WORD_SIZE), m);
2051         }
2052         return;
2053     }
2054     if (!dstSigned && dst.GetSize() < WORD_SIZE) {
2055         GetMasm()->movzx(ArchReg(dst, WORD_SIZE), m);
2056         return;
2057     }
2058 
2059     GetMasm()->mov(ArchReg(dst), m);
2060 }
2061 
EncodeLdrAcquire(Reg dst,bool dstSigned,MemRef mem)2062 void Amd64Encoder::EncodeLdrAcquire(Reg dst, bool dstSigned, MemRef mem)
2063 {
2064     EncodeLdr(dst, dstSigned, mem);
2065     // LoadLoad and LoadStore barrier should be here, but this is no-op in amd64 memory model
2066 }
2067 
EncodeStr(Reg src,MemRef mem)2068 void Amd64Encoder::EncodeStr(Reg src, MemRef mem)
2069 {
2070     auto m = ArchMem(mem).Prepare(GetMasm());
2071 
2072     if (src.GetType() == FLOAT32_TYPE) {
2073         GetMasm()->movss(m, ArchVReg(src));
2074         return;
2075     }
2076     if (src.GetType() == FLOAT64_TYPE) {
2077         GetMasm()->movsd(m, ArchVReg(src));
2078         return;
2079     }
2080 
2081     m.setSize(src.GetSize() / BITS_PER_BYTE);
2082     GetMasm()->mov(m, ArchReg(src));
2083 }
2084 
EncodeStrRelease(Reg src,MemRef mem)2085 void Amd64Encoder::EncodeStrRelease(Reg src, MemRef mem)
2086 {
2087     // StoreStore barrier should be here, but this is no-op in amd64 memory model
2088     EncodeStr(src, mem);
2089     // this is StoreLoad barrier (which is also full memory barrier in amd64 memory model)
2090     GetMasm()->lock().add(asmjit::x86::dword_ptr(asmjit::x86::rsp), asmjit::imm(0));
2091 }
2092 
EncodeStrz(Reg src,MemRef mem)2093 void Amd64Encoder::EncodeStrz(Reg src, MemRef mem)
2094 {
2095     if (src.IsScalar()) {
2096         if (src.GetSize() == DOUBLE_WORD_SIZE) {
2097             GetMasm()->mov(ArchMem(mem).Prepare(GetMasm()), ArchReg(src));
2098         } else {
2099             ScopedTmpRegU64 tmpReg(this);
2100             GetMasm()->xor_(ArchReg(tmpReg), ArchReg(tmpReg));
2101             GetMasm()->mov(ArchReg(tmpReg, src.GetSize()), ArchReg(src));
2102             GetMasm()->mov(ArchMem(mem).Prepare(GetMasm()), ArchReg(tmpReg));
2103         }
2104     } else {
2105         if (src.GetType() == FLOAT64_TYPE) {
2106             GetMasm()->movsd(ArchMem(mem).Prepare(GetMasm()), ArchVReg(src));
2107         } else {
2108             ScopedTmpRegF64 tmpReg(this);
2109 
2110             GetMasm()->xorpd(ArchVReg(tmpReg), ArchVReg(tmpReg));
2111             GetMasm()->movss(ArchVReg(tmpReg), ArchVReg(src));
2112             GetMasm()->movsd(ArchMem(mem).Prepare(GetMasm()), ArchVReg(tmpReg));
2113         }
2114     }
2115 }
2116 
EncodeSti(int64_t src,uint8_t srcSizeBytes,MemRef mem)2117 void Amd64Encoder::EncodeSti(int64_t src, uint8_t srcSizeBytes, MemRef mem)
2118 {
2119     ASSERT(srcSizeBytes <= 8U);
2120     auto m = ArchMem(mem).Prepare(GetMasm());
2121     if (srcSizeBytes <= HALF_WORD_SIZE_BYTES) {
2122         m.setSize(srcSizeBytes);
2123         GetMasm()->mov(m, asmjit::imm(src));
2124     } else {
2125         m.setSize(DOUBLE_WORD_SIZE_BYTES);
2126 
2127         if (ImmFitsSize(src, DOUBLE_WORD_SIZE)) {
2128             GetMasm()->mov(m, asmjit::imm(src));
2129         } else {
2130             ScopedTmpRegU64 tmpReg(this);
2131             GetMasm()->mov(ArchReg(tmpReg), asmjit::imm(src));
2132             GetMasm()->mov(m, ArchReg(tmpReg));
2133         }
2134     }
2135 }
2136 
EncodeSti(float src,MemRef mem)2137 void Amd64Encoder::EncodeSti(float src, MemRef mem)
2138 {
2139     EncodeSti(bit_cast<int32_t>(src), sizeof(int32_t), mem);
2140 }
2141 
EncodeSti(double src,MemRef mem)2142 void Amd64Encoder::EncodeSti(double src, MemRef mem)
2143 {
2144     EncodeSti(bit_cast<int64_t>(src), sizeof(int64_t), mem);
2145 }
2146 
EncodeMemCopy(MemRef memFrom,MemRef memTo,size_t size)2147 void Amd64Encoder::EncodeMemCopy(MemRef memFrom, MemRef memTo, size_t size)
2148 {
2149     ScopedTmpRegU64 tmpReg(this);
2150     GetMasm()->mov(ArchReg(tmpReg, size), ArchMem(memFrom).Prepare(GetMasm()));
2151     GetMasm()->mov(ArchMem(memTo).Prepare(GetMasm()), ArchReg(tmpReg, size));
2152 }
2153 
EncodeMemCopyz(MemRef memFrom,MemRef memTo,size_t size)2154 void Amd64Encoder::EncodeMemCopyz(MemRef memFrom, MemRef memTo, size_t size)
2155 {
2156     ScopedTmpRegU64 tmpReg(this);
2157     if (size < DOUBLE_WORD_SIZE) {
2158         GetMasm()->xor_(ArchReg(tmpReg), ArchReg(tmpReg));
2159     }
2160     GetMasm()->mov(ArchReg(tmpReg, size), ArchMem(memFrom).Prepare(GetMasm()));
2161     GetMasm()->mov(ArchMem(memTo).Prepare(GetMasm()), ArchReg(tmpReg));
2162 }
2163 
EncodeCompare(Reg dst,Reg src0,Reg src1,Condition cc)2164 void Amd64Encoder::EncodeCompare(Reg dst, Reg src0, Reg src1, Condition cc)
2165 {
2166     if (src0.IsScalar()) {
2167         GetMasm()->cmp(ArchReg(src0), ArchReg(src1));
2168     } else {
2169         if (src0.GetType() == FLOAT32_TYPE) {
2170             GetMasm()->ucomiss(ArchVReg(src0), ArchVReg(src1));
2171         } else {
2172             GetMasm()->ucomisd(ArchVReg(src0), ArchVReg(src1));
2173         }
2174     }
2175     GetMasm()->mov(ArchReg(dst, DOUBLE_WORD_SIZE), asmjit::imm(0));
2176 
2177     if (src0.IsScalar()) {
2178         GetMasm()->set(ArchCc(cc), ArchReg(dst, BYTE_SIZE));
2179         return;
2180     }
2181 
2182     auto end = GetMasm()->newLabel();
2183 
2184     if (CcMatchesNan(cc)) {
2185         GetMasm()->setp(ArchReg(dst, BYTE_SIZE));
2186     }
2187     GetMasm()->jp(end);
2188     GetMasm()->set(ArchCc(cc, true), ArchReg(dst, BYTE_SIZE));
2189 
2190     GetMasm()->bind(end);
2191 }
2192 
EncodeCompareTest(Reg dst,Reg src0,Reg src1,Condition cc)2193 void Amd64Encoder::EncodeCompareTest(Reg dst, Reg src0, Reg src1, Condition cc)
2194 {
2195     ASSERT(src0.IsScalar());
2196 
2197     GetMasm()->test(ArchReg(src0), ArchReg(src1));
2198 
2199     GetMasm()->mov(ArchReg(dst, DOUBLE_WORD_SIZE), asmjit::imm(0));
2200     GetMasm()->set(ArchCcTest(cc), ArchReg(dst, BYTE_SIZE));
2201 }
2202 
EncodeAtomicByteOr(Reg addr,Reg value,bool fastEncoding)2203 void Amd64Encoder::EncodeAtomicByteOr(Reg addr, Reg value, [[maybe_unused]] bool fastEncoding)
2204 {
2205     GetMasm()->lock().or_(asmjit::x86::byte_ptr(ArchReg(addr)), ArchReg(value, ark::compiler::BYTE_SIZE));
2206 }
2207 
EncodeCmp(Reg dst,Reg src0,Reg src1,Condition cc)2208 void Amd64Encoder::EncodeCmp(Reg dst, Reg src0, Reg src1, Condition cc)
2209 {
2210     auto end = GetMasm()->newLabel();
2211 
2212     if (src0.IsFloat()) {
2213         ASSERT(src1.IsFloat());
2214         ASSERT(cc == Condition::MI || cc == Condition::LT);
2215 
2216         if (src0.GetType() == FLOAT32_TYPE) {
2217             GetMasm()->ucomiss(ArchVReg(src0), ArchVReg(src1));
2218         } else {
2219             GetMasm()->ucomisd(ArchVReg(src0), ArchVReg(src1));
2220         }
2221 
2222         GetMasm()->mov(ArchReg(dst, DOUBLE_WORD_SIZE), cc == Condition::LT ? asmjit::imm(-1) : asmjit::imm(1));
2223         cc = Condition::LO;
2224 
2225         GetMasm()->jp(end);
2226     } else {
2227         ASSERT(src0.IsScalar() && src1.IsScalar());
2228         ASSERT(cc == Condition::LO || cc == Condition::LT);
2229         GetMasm()->cmp(ArchReg(src0), ArchReg(src1));
2230     }
2231     GetMasm()->mov(ArchReg(dst, DOUBLE_WORD_SIZE), asmjit::imm(0));
2232     GetMasm()->setne(ArchReg(dst, BYTE_SIZE));
2233 
2234     GetMasm()->j(asmjit::x86::Condition::negate(ArchCc(cc)), end);
2235     GetMasm()->neg(ArchReg(dst));
2236 
2237     GetMasm()->bind(end);
2238 }
2239 
EncodeSelect(ArgsSelect && args)2240 void Amd64Encoder::EncodeSelect(ArgsSelect &&args)
2241 {
2242     auto [dst, src0, src1, src2, src3, cc] = args;
2243     ASSERT(!src0.IsFloat() && !src1.IsFloat());
2244     if (src2.IsScalar()) {
2245         GetMasm()->cmp(ArchReg(src2), ArchReg(src3));
2246     } else if (src2.GetType() == FLOAT32_TYPE) {
2247         GetMasm()->comiss(ArchVReg(src2), ArchVReg(src3));
2248     } else {
2249         GetMasm()->comisd(ArchVReg(src2), ArchVReg(src3));
2250     }
2251 
2252     auto size = std::max<uint8_t>(src0.GetSize(), WORD_SIZE);
2253     bool dstAliased = dst.GetId() == src0.GetId();
2254     ScopedTmpReg tmpReg(this, dst.GetType());
2255     auto dstReg = dstAliased ? ArchReg(tmpReg, size) : ArchReg(dst, size);
2256 
2257     GetMasm()->mov(dstReg, ArchReg(src1, size));
2258 
2259     if (src2.IsScalar()) {
2260         GetMasm()->cmov(ArchCc(cc), dstReg, ArchReg(src0, size));
2261     } else if (CcMatchesNan(cc)) {
2262         GetMasm()->cmovp(dstReg, ArchReg(src0, size));
2263         GetMasm()->cmov(ArchCc(cc, src2.IsFloat()), dstReg, ArchReg(src0, size));
2264     } else {
2265         auto end = GetMasm()->newLabel();
2266 
2267         GetMasm()->jp(end);
2268         GetMasm()->cmov(ArchCc(cc, src2.IsFloat()), dstReg, ArchReg(src0, size));
2269 
2270         GetMasm()->bind(end);
2271     }
2272     if (dstAliased) {
2273         EncodeMov(dst, tmpReg);
2274     }
2275 }
2276 
EncodeSelect(ArgsSelectImm && args)2277 void Amd64Encoder::EncodeSelect(ArgsSelectImm &&args)
2278 {
2279     auto [dst, src0, src1, src2, imm, cc] = args;
2280     ASSERT(!src0.IsFloat() && !src1.IsFloat() && !src2.IsFloat());
2281 
2282     auto immVal = imm.GetAsInt();
2283     if (ImmFitsSize(immVal, src2.GetSize())) {
2284         GetMasm()->cmp(ArchReg(src2), asmjit::imm(immVal));
2285     } else {
2286         ScopedTmpReg tmpReg(this, src2.GetType());
2287         GetMasm()->mov(ArchReg(tmpReg), asmjit::imm(immVal));
2288         GetMasm()->cmp(ArchReg(src2), ArchReg(tmpReg));
2289     }
2290 
2291     ScopedTmpReg tmpReg(this, dst.GetType());
2292     auto size = std::max<uint8_t>(src0.GetSize(), WORD_SIZE);
2293     bool dstAliased = dst.GetId() == src0.GetId();
2294     auto dstReg = dstAliased ? ArchReg(tmpReg, size) : ArchReg(dst, size);
2295 
2296     GetMasm()->mov(dstReg, ArchReg(src1, size));
2297     GetMasm()->cmov(ArchCc(cc), dstReg, ArchReg(src0, size));
2298     if (dstAliased) {
2299         EncodeMov(dst, tmpReg);
2300     }
2301 }
2302 
EncodeSelectTest(ArgsSelect && args)2303 void Amd64Encoder::EncodeSelectTest(ArgsSelect &&args)
2304 {
2305     auto [dst, src0, src1, src2, src3, cc] = args;
2306     ASSERT(!src0.IsFloat() && !src1.IsFloat() && !src2.IsFloat());
2307 
2308     GetMasm()->test(ArchReg(src2), ArchReg(src3));
2309 
2310     ScopedTmpReg tmpReg(this, dst.GetType());
2311     auto size = std::max<uint8_t>(src0.GetSize(), WORD_SIZE);
2312     bool dstAliased = dst.GetId() == src0.GetId();
2313     auto dstReg = dstAliased ? ArchReg(tmpReg, size) : ArchReg(dst, size);
2314 
2315     GetMasm()->mov(dstReg, ArchReg(src1, size));
2316     GetMasm()->cmov(ArchCcTest(cc), dstReg, ArchReg(src0, size));
2317     if (dstAliased) {
2318         EncodeMov(dst, tmpReg);
2319     }
2320 }
2321 
EncodeSelectTest(ArgsSelectImm && args)2322 void Amd64Encoder::EncodeSelectTest(ArgsSelectImm &&args)
2323 {
2324     auto [dst, src0, src1, src2, imm, cc] = args;
2325     ASSERT(!src0.IsFloat() && !src1.IsFloat() && !src2.IsFloat());
2326 
2327     auto immVal = imm.GetAsInt();
2328     if (ImmFitsSize(immVal, src2.GetSize())) {
2329         GetMasm()->test(ArchReg(src2), asmjit::imm(immVal));
2330     } else {
2331         ScopedTmpReg tmpReg(this, src2.GetType());
2332         GetMasm()->mov(ArchReg(tmpReg), asmjit::imm(immVal));
2333         GetMasm()->test(ArchReg(src2), ArchReg(tmpReg));
2334     }
2335 
2336     ScopedTmpReg tmpReg(this, dst.GetType());
2337     auto size = std::max<uint8_t>(src0.GetSize(), WORD_SIZE);
2338     bool dstAliased = dst.GetId() == src0.GetId();
2339     auto dstReg = dstAliased ? ArchReg(tmpReg, size) : ArchReg(dst, size);
2340 
2341     GetMasm()->mov(dstReg, ArchReg(src1, size));
2342     GetMasm()->cmov(ArchCcTest(cc), dstReg, ArchReg(src0, size));
2343     if (dstAliased) {
2344         EncodeMov(dst, tmpReg);
2345     }
2346 }
2347 
EncodeLdp(Reg dst0,Reg dst1,bool dstSigned,MemRef mem)2348 void Amd64Encoder::EncodeLdp(Reg dst0, Reg dst1, bool dstSigned, MemRef mem)
2349 {
2350     ASSERT(dst0.IsFloat() == dst1.IsFloat());
2351     ASSERT(dst0.GetSize() == dst1.GetSize());
2352 
2353     auto m = ArchMem(mem).Prepare(GetMasm());
2354 
2355     if (dst0.IsFloat()) {
2356         if (dst0.GetType() == FLOAT32_TYPE) {
2357             GetMasm()->movss(ArchVReg(dst0), m);
2358 
2359             m.addOffset(WORD_SIZE_BYTES);
2360             GetMasm()->movss(ArchVReg(dst1), m);
2361         } else {
2362             GetMasm()->movsd(ArchVReg(dst0), m);
2363 
2364             m.addOffset(DOUBLE_WORD_SIZE_BYTES);
2365             GetMasm()->movsd(ArchVReg(dst1), m);
2366         }
2367         return;
2368     }
2369 
2370     if (dstSigned && dst0.GetSize() == WORD_SIZE) {
2371         m.setSize(WORD_SIZE_BYTES);
2372         GetMasm()->movsxd(ArchReg(dst0, DOUBLE_WORD_SIZE), m);
2373 
2374         m.addOffset(WORD_SIZE_BYTES);
2375         GetMasm()->movsxd(ArchReg(dst1, DOUBLE_WORD_SIZE), m);
2376         return;
2377     }
2378 
2379     GetMasm()->mov(ArchReg(dst0), m);
2380 
2381     m.addOffset(dst0.GetSize() / BITS_PER_BYTE);
2382     GetMasm()->mov(ArchReg(dst1), m);
2383 }
2384 
EncodeStp(Reg src0,Reg src1,MemRef mem)2385 void Amd64Encoder::EncodeStp(Reg src0, Reg src1, MemRef mem)
2386 {
2387     ASSERT(src0.IsFloat() == src1.IsFloat());
2388     ASSERT(src0.GetSize() == src1.GetSize());
2389 
2390     auto m = ArchMem(mem).Prepare(GetMasm());
2391 
2392     if (src0.IsFloat()) {
2393         if (src0.GetType() == FLOAT32_TYPE) {
2394             GetMasm()->movss(m, ArchVReg(src0));
2395 
2396             m.addOffset(WORD_SIZE_BYTES);
2397             GetMasm()->movss(m, ArchVReg(src1));
2398         } else {
2399             GetMasm()->movsd(m, ArchVReg(src0));
2400 
2401             m.addOffset(DOUBLE_WORD_SIZE_BYTES);
2402             GetMasm()->movsd(m, ArchVReg(src1));
2403         }
2404         return;
2405     }
2406 
2407     GetMasm()->mov(m, ArchReg(src0));
2408 
2409     m.addOffset(src0.GetSize() / BITS_PER_BYTE);
2410     GetMasm()->mov(m, ArchReg(src1));
2411 }
2412 
EncodeReverseBytes(Reg dst,Reg src)2413 void Amd64Encoder::EncodeReverseBytes(Reg dst, Reg src)
2414 {
2415     ASSERT(src.GetSize() > BYTE_SIZE);
2416     ASSERT(src.GetSize() == dst.GetSize());
2417     ASSERT(src.IsValid());
2418     ASSERT(dst.IsValid());
2419 
2420     if (src != dst) {
2421         GetMasm()->mov(ArchReg(dst), ArchReg(src));
2422     }
2423 
2424     if (src.GetSize() == HALF_SIZE) {
2425         GetMasm()->rol(ArchReg(dst), BYTE_SIZE);
2426         GetMasm()->movsx(ArchReg(dst, WORD_SIZE), ArchReg(dst));
2427     } else {
2428         GetMasm()->bswap(ArchReg(dst));
2429     }
2430 }
2431 
EncodeUnsignedExtendBytesToShorts(Reg dst,Reg src)2432 void Amd64Encoder::EncodeUnsignedExtendBytesToShorts(Reg dst, Reg src)
2433 {
2434     GetMasm()->pmovzxbw(ArchVReg(dst), ArchVReg(src));
2435 }
2436 
2437 /* Attention: the encoder belows operates on vector registers not GPRs */
EncodeReverseHalfWords(Reg dst,Reg src)2438 void Amd64Encoder::EncodeReverseHalfWords(Reg dst, Reg src)
2439 {
2440     ASSERT(src.GetSize() == dst.GetSize());
2441     ASSERT(src.IsValid());
2442     ASSERT(dst.IsValid());
2443 
2444     constexpr unsigned MASK = 0x1b;  // reverse mask: 00 01 10 11
2445     GetMasm()->pshuflw(ArchVReg(dst), ArchVReg(src), MASK);
2446 }
2447 
CanEncodeImmAddSubCmp(int64_t imm,uint32_t size,bool signedCompare)2448 bool Amd64Encoder::CanEncodeImmAddSubCmp(int64_t imm, uint32_t size, [[maybe_unused]] bool signedCompare)
2449 {
2450     return ImmFitsSize(imm, size);
2451 }
2452 
EncodeBitCount(Reg dst0,Reg src0)2453 void Amd64Encoder::EncodeBitCount(Reg dst0, Reg src0)
2454 {
2455     ASSERT(src0.GetSize() == WORD_SIZE || src0.GetSize() == DOUBLE_WORD_SIZE);
2456     ASSERT(dst0.GetSize() == WORD_SIZE);
2457     ASSERT(src0.IsScalar() && dst0.IsScalar());
2458 
2459     GetMasm()->popcnt(ArchReg(dst0, src0.GetSize()), ArchReg(src0));
2460 }
2461 
EncodeCountLeadingZeroBits(Reg dst,Reg src)2462 void Amd64Encoder::EncodeCountLeadingZeroBits(Reg dst, Reg src)
2463 {
2464     auto end = CreateLabel();
2465     auto zero = CreateLabel();
2466     EncodeJump(zero, src, Condition::EQ);
2467     GetMasm()->bsr(ArchReg(dst), ArchReg(src));
2468     GetMasm()->xor_(ArchReg(dst), asmjit::imm(dst.GetSize() - 1));
2469     EncodeJump(end);
2470 
2471     BindLabel(zero);
2472     GetMasm()->mov(ArchReg(dst), asmjit::imm(dst.GetSize()));
2473 
2474     BindLabel(end);
2475 }
2476 
EncodeCountTrailingZeroBits(Reg dst,Reg src)2477 void Amd64Encoder::EncodeCountTrailingZeroBits(Reg dst, Reg src)
2478 {
2479     ScopedTmpReg tmp(this, src.GetType());
2480     GetMasm()->bsf(ArchReg(tmp), ArchReg(src));
2481     GetMasm()->mov(ArchReg(dst), asmjit::imm(dst.GetSize()));
2482     GetMasm()->cmovne(ArchReg(dst), ArchReg(tmp));
2483 }
2484 
EncodeCeil(Reg dst,Reg src)2485 void Amd64Encoder::EncodeCeil(Reg dst, Reg src)
2486 {
2487     // NOLINTNEXTLINE(readability-magic-numbers)
2488     GetMasm()->roundsd(ArchVReg(dst), ArchVReg(src), asmjit::imm(2_I));
2489 }
2490 
EncodeFloor(Reg dst,Reg src)2491 void Amd64Encoder::EncodeFloor(Reg dst, Reg src)
2492 {
2493     GetMasm()->roundsd(ArchVReg(dst), ArchVReg(src), asmjit::imm(1));
2494 }
2495 
EncodeRint(Reg dst,Reg src)2496 void Amd64Encoder::EncodeRint(Reg dst, Reg src)
2497 {
2498     GetMasm()->roundsd(ArchVReg(dst), ArchVReg(src), asmjit::imm(0));
2499 }
2500 
EncodeTrunc(Reg dst,Reg src)2501 void Amd64Encoder::EncodeTrunc(Reg dst, Reg src)
2502 {
2503     GetMasm()->roundsd(ArchVReg(dst), ArchVReg(src), asmjit::imm(3_I));
2504 }
2505 
EncodeRoundAway(Reg dst,Reg src)2506 void Amd64Encoder::EncodeRoundAway(Reg dst, Reg src)
2507 {
2508     ASSERT(src.GetType() == FLOAT64_TYPE);
2509     ASSERT(dst.GetType() == FLOAT64_TYPE);
2510 
2511     ScopedTmpReg tv(this, src.GetType());
2512     ScopedTmpReg tv1(this, src.GetType());
2513     ScopedTmpRegU64 ti(this);
2514     auto dest = dst;
2515 
2516     auto shared = src == dst;
2517 
2518     if (shared) {
2519         dest = tv1.GetReg();
2520     }
2521     GetMasm()->movapd(ArchVReg(dest), ArchVReg(src));
2522 
2523     constexpr auto SIGN_BIT_MASK = 0x8000000000000000ULL;
2524     GetMasm()->mov(ArchReg(ti), asmjit::imm(SIGN_BIT_MASK));
2525     GetMasm()->movq(ArchVReg(tv), ArchReg(ti));
2526     GetMasm()->andpd(ArchVReg(dest), ArchVReg(tv));
2527 
2528     constexpr auto DOUBLE_POINT_FIVE = 0x3fdfffffffffffffULL;  // .49999999999999994
2529     GetMasm()->mov(ArchReg(ti), asmjit::imm(DOUBLE_POINT_FIVE));
2530     GetMasm()->movq(ArchVReg(tv), ArchReg(ti));
2531     GetMasm()->orpd(ArchVReg(dest), ArchVReg(tv));
2532 
2533     GetMasm()->addsd(ArchVReg(dest), ArchVReg(src));
2534     GetMasm()->roundsd(ArchVReg(dest), ArchVReg(dest), asmjit::imm(3_I));
2535     if (shared) {
2536         GetMasm()->movapd(ArchVReg(dst), ArchVReg(dest));
2537     }
2538 }
2539 
EncodeRoundToPInfFloat(Reg dst,Reg src)2540 void Amd64Encoder::EncodeRoundToPInfFloat(Reg dst, Reg src)
2541 {
2542     ScopedTmpReg t1(this, src.GetType());
2543     ScopedTmpReg t2(this, src.GetType());
2544     ScopedTmpReg t3(this, src.GetType());
2545     ScopedTmpReg t4(this, dst.GetType());
2546 
2547     auto skipIncrId = CreateLabel();
2548     auto doneId = CreateLabel();
2549 
2550     auto skipIncr = static_cast<Amd64LabelHolder *>(GetLabels())->GetLabel(skipIncrId);
2551     auto done = static_cast<Amd64LabelHolder *>(GetLabels())->GetLabel(doneId);
2552 
2553     GetMasm()->movss(ArchVReg(t2), ArchVReg(src));
2554     GetMasm()->roundss(ArchVReg(t1), ArchVReg(src), asmjit::imm(1));
2555     GetMasm()->subss(ArchVReg(t2), ArchVReg(t1));
2556     // NOLINTNEXTLINE(readability-magic-numbers)
2557     GetMasm()->mov(ArchReg(t4), asmjit::imm(bit_cast<int32_t, float>(0.5F)));
2558     GetMasm()->movd(ArchVReg(t3), ArchReg(t4));
2559     GetMasm()->comiss(ArchVReg(t2), ArchVReg(t3));
2560     GetMasm()->j(asmjit::x86::Condition::Code::kB, *skipIncr);
2561     // NOLINTNEXTLINE(readability-magic-numbers)
2562     GetMasm()->mov(ArchReg(t4), asmjit::imm(bit_cast<int32_t, float>(1.0F)));
2563     GetMasm()->movd(ArchVReg(t3), ArchReg(t4));
2564     GetMasm()->addss(ArchVReg(t1), ArchVReg(t3));
2565     BindLabel(skipIncrId);
2566 
2567     // NOLINTNEXTLINE(readability-magic-numbers)
2568     GetMasm()->mov(ArchReg(dst), asmjit::imm(0x7FFFFFFF));
2569     GetMasm()->cvtsi2ss(ArchVReg(t2), ArchReg(dst));
2570     GetMasm()->comiss(ArchVReg(t1), ArchVReg(t2));
2571     GetMasm()->j(asmjit::x86::Condition::Code::kAE,
2572                  *done);                           // clipped to max (already in dst), does not jump on unordered
2573     GetMasm()->mov(ArchReg(dst), asmjit::imm(0));  // does not change flags
2574     GetMasm()->j(asmjit::x86::Condition::Code::kParityEven, *done);  // NaN mapped to 0 (just moved in dst)
2575     GetMasm()->cvttss2si(ArchReg(dst), ArchVReg(t1));
2576     BindLabel(doneId);
2577 }
2578 
EncodeRoundToPInfDouble(Reg dst,Reg src)2579 void Amd64Encoder::EncodeRoundToPInfDouble(Reg dst, Reg src)
2580 {
2581     ScopedTmpReg t1(this, src.GetType());
2582     ScopedTmpReg t2(this, src.GetType());
2583     ScopedTmpReg t3(this, src.GetType());
2584     ScopedTmpReg t4(this, dst.GetType());
2585 
2586     auto skipIncrId = CreateLabel();
2587     auto doneId = CreateLabel();
2588 
2589     auto skipIncr = static_cast<Amd64LabelHolder *>(GetLabels())->GetLabel(skipIncrId);
2590     auto done = static_cast<Amd64LabelHolder *>(GetLabels())->GetLabel(doneId);
2591 
2592     GetMasm()->movsd(ArchVReg(t2), ArchVReg(src));
2593     GetMasm()->roundsd(ArchVReg(t1), ArchVReg(src), asmjit::imm(1));
2594     GetMasm()->subsd(ArchVReg(t2), ArchVReg(t1));
2595     // NOLINTNEXTLINE(readability-magic-numbers)
2596     GetMasm()->mov(ArchReg(t4), asmjit::imm(bit_cast<int64_t, double>(0.5F)));
2597     GetMasm()->movq(ArchVReg(t3), ArchReg(t4));
2598     GetMasm()->comisd(ArchVReg(t2), ArchVReg(t3));
2599     GetMasm()->j(asmjit::x86::Condition::Code::kB, *skipIncr);
2600     // NOLINTNEXTLINE(readability-magic-numbers)
2601     GetMasm()->mov(ArchReg(t4), asmjit::imm(bit_cast<int64_t, double>(1.0)));
2602     GetMasm()->movq(ArchVReg(t3), ArchReg(t4));
2603     GetMasm()->addsd(ArchVReg(t1), ArchVReg(t3));
2604     BindLabel(skipIncrId);
2605 
2606     // NOLINTNEXTLINE(readability-magic-numbers)
2607     GetMasm()->mov(ArchReg(dst), asmjit::imm(0x7FFFFFFFFFFFFFFFL));
2608     GetMasm()->cvtsi2sd(ArchVReg(t2), ArchReg(dst));
2609     GetMasm()->comisd(ArchVReg(t1), ArchVReg(t2));
2610     GetMasm()->j(asmjit::x86::Condition::Code::kAE,
2611                  *done);                           // clipped to max (already in dst), does not jump on unordered
2612     GetMasm()->mov(ArchReg(dst), asmjit::imm(0));  // does not change flags
2613     GetMasm()->j(asmjit::x86::Condition::Code::kParityEven, *done);  // NaN mapped to 0 (just moved in dst)
2614     GetMasm()->cvttsd2si(ArchReg(dst), ArchVReg(t1));
2615     BindLabel(doneId);
2616 }
2617 
EncodeRoundToPInf(Reg dst,Reg src)2618 void Amd64Encoder::EncodeRoundToPInf(Reg dst, Reg src)
2619 {
2620     if (src.GetType() == FLOAT32_TYPE) {
2621         EncodeRoundToPInfFloat(dst, src);
2622     } else if (src.GetType() == FLOAT64_TYPE) {
2623         EncodeRoundToPInfDouble(dst, src);
2624     } else {
2625         UNREACHABLE();
2626     }
2627 }
2628 
2629 template <typename T>
EncodeReverseBitsImpl(Reg dst0,Reg src0)2630 void Amd64Encoder::EncodeReverseBitsImpl(Reg dst0, Reg src0)
2631 {
2632     ASSERT(std::numeric_limits<T>::is_integer && !std::numeric_limits<T>::is_signed);
2633     [[maybe_unused]] constexpr auto IMM_8 = 8;
2634     ASSERT(sizeof(T) * IMM_8 == dst0.GetSize());
2635     // NOLINTNEXTLINE(modernize-avoid-c-arrays)
2636     static constexpr T MASKS[] = {static_cast<T>(UINT64_C(0x5555555555555555)),
2637                                   static_cast<T>(UINT64_C(0x3333333333333333)),
2638                                   static_cast<T>(UINT64_C(0x0f0f0f0f0f0f0f0f))};
2639 
2640     ScopedTmpReg tmp(this, dst0.GetType());
2641     ScopedTmpReg immHolder(this, dst0.GetType());
2642     auto immHolderReg = ArchReg(immHolder);
2643 
2644     GetMasm()->mov(ArchReg(dst0), ArchReg(src0));
2645     GetMasm()->mov(ArchReg(tmp), ArchReg(src0));
2646     constexpr auto MAX_ROUNDS = 3;
2647     for (uint64_t round = 0; round < MAX_ROUNDS; round++) {
2648         auto shift = 1U << round;
2649         auto mask = asmjit::imm(MASKS[round]);
2650         GetMasm()->shr(ArchReg(dst0), shift);
2651         if (dst0.GetSize() == DOUBLE_WORD_SIZE) {
2652             GetMasm()->mov(immHolderReg, mask);
2653             GetMasm()->and_(ArchReg(tmp), immHolderReg);
2654             GetMasm()->and_(ArchReg(dst0), immHolderReg);
2655         } else {
2656             GetMasm()->and_(ArchReg(tmp), mask);
2657             GetMasm()->and_(ArchReg(dst0), mask);
2658         }
2659         GetMasm()->shl(ArchReg(tmp), shift);
2660         GetMasm()->or_(ArchReg(dst0), ArchReg(tmp));
2661         constexpr auto ROUND_2 = 2;
2662         if (round != ROUND_2) {
2663             GetMasm()->mov(ArchReg(tmp), ArchReg(dst0));
2664         }
2665     }
2666 
2667     GetMasm()->bswap(ArchReg(dst0));
2668 }
2669 
EncodeReverseBits(Reg dst0,Reg src0)2670 void Amd64Encoder::EncodeReverseBits(Reg dst0, Reg src0)
2671 {
2672     ASSERT(src0.GetSize() == WORD_SIZE || src0.GetSize() == DOUBLE_WORD_SIZE);
2673     ASSERT(src0.GetSize() == dst0.GetSize());
2674 
2675     if (src0.GetSize() == WORD_SIZE) {
2676         EncodeReverseBitsImpl<uint32_t>(dst0, src0);
2677         return;
2678     }
2679 
2680     EncodeReverseBitsImpl<uint64_t>(dst0, src0);
2681 }
2682 
CanEncodeScale(uint64_t imm,uint32_t size)2683 bool Amd64Encoder::CanEncodeScale(uint64_t imm, [[maybe_unused]] uint32_t size)
2684 {
2685     return imm <= 3U;
2686 }
2687 
CanEncodeImmLogical(uint64_t imm,uint32_t size)2688 bool Amd64Encoder::CanEncodeImmLogical(uint64_t imm, uint32_t size)
2689 {
2690 #ifndef NDEBUG
2691     if (size < DOUBLE_WORD_SIZE) {
2692         // Test if the highest part is consistent:
2693         ASSERT(((imm >> size) == 0) || (((~imm) >> size) == 0));
2694     }
2695 #endif  // NDEBUG
2696     return ImmFitsSize(imm, size);
2697 }
2698 
CanEncodeBitCount()2699 bool Amd64Encoder::CanEncodeBitCount()
2700 {
2701     return asmjit::CpuInfo::host().hasFeature(asmjit::x86::Features::kPOPCNT);
2702 }
2703 
CanOptimizeImmDivMod(uint64_t imm,bool isSigned) const2704 bool Amd64Encoder::CanOptimizeImmDivMod(uint64_t imm, bool isSigned) const
2705 {
2706     return CanOptimizeImmDivModCommon(imm, isSigned);
2707 }
2708 
EncodeIsInf(Reg dst,Reg src)2709 void Amd64Encoder::EncodeIsInf(Reg dst, Reg src)
2710 {
2711     ASSERT(dst.IsScalar() && src.IsFloat());
2712 
2713     GetMasm()->xor_(ArchReg(dst, DOUBLE_WORD_SIZE), ArchReg(dst, DOUBLE_WORD_SIZE));
2714 
2715     if (src.GetSize() == WORD_SIZE) {
2716         constexpr auto INF_MASK = uint32_t(0x7f800000) << 1U;
2717 
2718         ScopedTmpRegU32 tmpReg(this);
2719         ScopedTmpRegU32 tmp1Reg(this);
2720         auto tmp = ArchReg(tmpReg);
2721         auto tmp1 = ArchReg(tmp1Reg);
2722 
2723         GetMasm()->movd(tmp1, ArchVReg(src));
2724         GetMasm()->shl(tmp1, 1);
2725         GetMasm()->mov(tmp, INF_MASK);
2726         GetMasm()->cmp(tmp, tmp1);
2727     } else {
2728         constexpr auto INF_MASK = uint64_t(0x7ff0000000000000) << 1U;
2729 
2730         ScopedTmpRegU64 tmpReg(this);
2731         ScopedTmpRegU64 tmp1Reg(this);
2732         auto tmp = ArchReg(tmpReg);
2733         auto tmp1 = ArchReg(tmp1Reg);
2734 
2735         GetMasm()->movq(tmp1, ArchVReg(src));
2736         GetMasm()->shl(tmp1, 1);
2737 
2738         GetMasm()->mov(tmp, INF_MASK);
2739         GetMasm()->cmp(tmp, tmp1);
2740     }
2741 
2742     GetMasm()->sete(ArchReg(dst, BYTE_SIZE));
2743 }
2744 
EncodeCmpFracWithDelta(Reg src)2745 void Amd64Encoder::EncodeCmpFracWithDelta(Reg src)
2746 {
2747     ASSERT(src.IsFloat());
2748     ASSERT(src.GetType() == FLOAT32_TYPE || src.GetType() == FLOAT64_TYPE);
2749 
2750     // Rounding control bits: Truncated (aka Round to Zero)
2751     constexpr uint8_t RND_CTL_TRUNCATED = 0b00000011;
2752 
2753     // Encode (fabs(src - trunc(src)) <= DELTA)
2754     if (src.GetType() == FLOAT32_TYPE) {
2755         ScopedTmpRegF32 tmp(this);
2756         ScopedTmpRegF32 delta(this);
2757         GetMasm()->roundss(ArchVReg(tmp), ArchVReg(src), asmjit::imm(RND_CTL_TRUNCATED));
2758         EncodeSub(tmp, src, tmp);
2759         EncodeAbs(tmp, tmp);
2760         EncodeMov(delta, Imm(std::numeric_limits<float>::epsilon()));
2761         GetMasm()->ucomiss(ArchVReg(tmp), ArchVReg(delta));
2762     } else {
2763         ScopedTmpRegF64 tmp(this);
2764         ScopedTmpRegF64 delta(this);
2765         GetMasm()->roundsd(ArchVReg(tmp), ArchVReg(src), asmjit::imm(RND_CTL_TRUNCATED));
2766         EncodeSub(tmp, src, tmp);
2767         EncodeAbs(tmp, tmp);
2768         EncodeMov(delta, Imm(std::numeric_limits<double>::epsilon()));
2769         GetMasm()->ucomisd(ArchVReg(tmp), ArchVReg(delta));
2770     }
2771 }
2772 
EncodeIsInteger(Reg dst,Reg src)2773 void Amd64Encoder::EncodeIsInteger(Reg dst, Reg src)
2774 {
2775     ASSERT(dst.IsScalar() && src.IsFloat());
2776     ASSERT(src.GetType() == FLOAT32_TYPE || src.GetType() == FLOAT64_TYPE);
2777 
2778     auto labelExit = static_cast<Amd64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
2779 
2780     GetMasm()->xor_(ArchReg(dst, WORD_SIZE), ArchReg(dst, WORD_SIZE));
2781     EncodeCmpFracWithDelta(src);
2782     GetMasm()->jp(*labelExit);  // Inf or NaN
2783     GetMasm()->set(ArchCc(Condition::LE, true), ArchReg(dst, BYTE_SIZE));
2784     GetMasm()->bind(*labelExit);
2785 }
2786 
EncodeIsSafeInteger(Reg dst,Reg src)2787 void Amd64Encoder::EncodeIsSafeInteger(Reg dst, Reg src)
2788 {
2789     ASSERT(dst.IsScalar() && src.IsFloat());
2790     ASSERT(src.GetType() == FLOAT32_TYPE || src.GetType() == FLOAT64_TYPE);
2791 
2792     auto labelExit = static_cast<Amd64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
2793 
2794     GetMasm()->xor_(ArchReg(dst, WORD_SIZE), ArchReg(dst, WORD_SIZE));
2795 
2796     // Check if IsInteger
2797     EncodeCmpFracWithDelta(src);
2798     GetMasm()->jp(*labelExit);  // Inf or NaN
2799     GetMasm()->j(ArchCc(Condition::GT, true), *labelExit);
2800 
2801     // Check if it is safe, i.e. src can be represented in float/double without losing precision
2802     if (src.GetType() == FLOAT32_TYPE) {
2803         ScopedTmpRegF32 tmp1(this);
2804         ScopedTmpRegF32 tmp2(this);
2805         EncodeAbs(tmp1, src);
2806         EncodeMov(tmp2, Imm(MaxIntAsExactFloat()));
2807         GetMasm()->ucomiss(ArchVReg(tmp1), ArchVReg(tmp2));
2808     } else {
2809         ScopedTmpRegF64 tmp1(this);
2810         ScopedTmpRegF64 tmp2(this);
2811         EncodeAbs(tmp1, src);
2812         EncodeMov(tmp2, Imm(MaxIntAsExactDouble()));
2813         GetMasm()->ucomisd(ArchVReg(tmp1), ArchVReg(tmp2));
2814     }
2815     GetMasm()->set(ArchCc(Condition::LE, true), ArchReg(dst, BYTE_SIZE));
2816     GetMasm()->bind(*labelExit);
2817 }
2818 
2819 /* Since NaNs have to be canonicalized we compare the
2820  * input with itself, if it is NaN the comparison will
2821  * set the parity flag (PF) */
EncodeFpToBits(Reg dst,Reg src)2822 void Amd64Encoder::EncodeFpToBits(Reg dst, Reg src)
2823 {
2824     ASSERT(dst.IsScalar() && src.IsFloat());
2825 
2826     if (dst.GetType() == INT32_TYPE) {
2827         ASSERT(src.GetSize() == WORD_SIZE);
2828 
2829         constexpr auto FLOAT_NAN = uint32_t(0x7fc00000);
2830 
2831         ScopedTmpRegU32 tmp(this);
2832 
2833         GetMasm()->ucomiss(ArchVReg(src), ArchVReg(src));
2834         GetMasm()->mov(ArchReg(tmp), FLOAT_NAN);
2835         GetMasm()->movd(ArchReg(dst), ArchVReg(src));
2836         GetMasm()->cmovpe(ArchReg(dst), ArchReg(tmp));
2837     } else {
2838         ASSERT(src.GetSize() == DOUBLE_WORD_SIZE);
2839 
2840         constexpr auto DOUBLE_NAN = uint64_t(0x7ff8000000000000);
2841         ScopedTmpRegU64 tmp(this);
2842 
2843         GetMasm()->ucomisd(ArchVReg(src), ArchVReg(src));
2844         GetMasm()->mov(ArchReg(tmp), DOUBLE_NAN);
2845         GetMasm()->movq(ArchReg(dst), ArchVReg(src));
2846         GetMasm()->cmovpe(ArchReg(dst), ArchReg(tmp));
2847     }
2848 }
2849 
EncodeMoveBitsRaw(Reg dst,Reg src)2850 void Amd64Encoder::EncodeMoveBitsRaw(Reg dst, Reg src)
2851 {
2852     ASSERT((dst.IsFloat() && src.IsScalar()) || (src.IsFloat() && dst.IsScalar()));
2853     if (src.IsScalar()) {
2854         ASSERT((dst.GetSize() == src.GetSize()));
2855         if (src.GetSize() == WORD_SIZE) {
2856             GetMasm()->movd(ArchVReg(dst), ArchReg(src));
2857         } else {
2858             GetMasm()->movq(ArchVReg(dst), ArchReg(src));
2859         }
2860     } else {
2861         ASSERT((src.GetSize() == dst.GetSize()));
2862         if (dst.GetSize() == WORD_SIZE) {
2863             GetMasm()->movd(ArchReg(dst), ArchVReg(src));
2864         } else {
2865             GetMasm()->movq(ArchReg(dst), ArchVReg(src));
2866         }
2867     }
2868 }
2869 
2870 /* Unsafe intrinsics */
EncodeCompareAndSwap(Reg dst,Reg obj,const Reg * offset,Reg val,Reg newval)2871 void Amd64Encoder::EncodeCompareAndSwap(Reg dst, Reg obj, const Reg *offset, Reg val, Reg newval)
2872 {
2873     /*
2874      * movl    old, %eax
2875      * lock    cmpxchgl   new, addr
2876      * sete    %al
2877      */
2878     ScopedTmpRegU64 tmp1(this);
2879     ScopedTmpRegU64 tmp2(this);
2880     ScopedTmpRegU64 tmp3(this);
2881     Reg newvalue = newval;
2882     auto addr = ArchMem(MemRef(tmp2)).Prepare(GetMasm());
2883     auto addrReg = ArchReg(tmp2);
2884     Reg rax(ConvertRegNumber(asmjit::x86::rax.id()), INT64_TYPE);
2885 
2886     /* NOTE(ayodkev) this is a workaround for the failure of
2887      * jsr166.ScheduledExecutorTest, have to figure out if there
2888      * is less crude way to avoid this */
2889     if (newval.GetId() == rax.GetId()) {
2890         SetFalseResult();
2891         return;
2892     }
2893 
2894     if (offset != nullptr) {
2895         GetMasm()->lea(addrReg, asmjit::x86::ptr(ArchReg(obj), ArchReg(*offset)));
2896     } else {
2897         GetMasm()->mov(addrReg, ArchReg(obj));
2898     }
2899 
2900     /* the [er]ax register will be overwritten by cmpxchg instruction
2901      * save it unless it is set as a destination register */
2902     if (dst.GetId() != rax.GetId()) {
2903         GetMasm()->mov(ArchReg(tmp1), asmjit::x86::rax);
2904     }
2905 
2906     /* if the new value comes in [er]ax register we have to use a
2907      * different register as [er]ax will contain the current value */
2908     if (newval.GetId() == rax.GetId()) {
2909         GetMasm()->mov(ArchReg(tmp3, newval.GetSize()), ArchReg(newval));
2910         newvalue = tmp3;
2911     }
2912 
2913     if (val.GetId() != rax.GetId()) {
2914         GetMasm()->mov(asmjit::x86::rax, ArchReg(val).r64());
2915     }
2916 
2917     GetMasm()->lock().cmpxchg(addr, ArchReg(newvalue));
2918     GetMasm()->sete(ArchReg(dst));
2919 
2920     if (dst.GetId() != rax.GetId()) {
2921         GetMasm()->mov(asmjit::x86::rax, ArchReg(tmp1));
2922     }
2923 }
2924 
EncodeCompareAndSwap(Reg dst,Reg obj,Reg offset,Reg val,Reg newval)2925 void Amd64Encoder::EncodeCompareAndSwap(Reg dst, Reg obj, Reg offset, Reg val, Reg newval)
2926 {
2927     EncodeCompareAndSwap(dst, obj, &offset, val, newval);
2928 }
2929 
EncodeCompareAndSwap(Reg dst,Reg addr,Reg val,Reg newval)2930 void Amd64Encoder::EncodeCompareAndSwap(Reg dst, Reg addr, Reg val, Reg newval)
2931 {
2932     EncodeCompareAndSwap(dst, addr, nullptr, val, newval);
2933 }
2934 
EncodeUnsafeGetAndSet(Reg dst,Reg obj,Reg offset,Reg val)2935 void Amd64Encoder::EncodeUnsafeGetAndSet(Reg dst, Reg obj, Reg offset, Reg val)
2936 {
2937     ScopedTmpRegU64 tmp(this);
2938     auto addrReg = ArchReg(tmp);
2939     auto addr = ArchMem(MemRef(tmp)).Prepare(GetMasm());
2940     GetMasm()->lea(addrReg, asmjit::x86::ptr(ArchReg(obj), ArchReg(offset)));
2941     GetMasm()->mov(ArchReg(dst), ArchReg(val));
2942     GetMasm()->lock().xchg(addr, ArchReg(dst));
2943 }
2944 
EncodeUnsafeGetAndAdd(Reg dst,Reg obj,Reg offset,Reg val,Reg tmp)2945 void Amd64Encoder::EncodeUnsafeGetAndAdd(Reg dst, Reg obj, Reg offset, Reg val, [[maybe_unused]] Reg tmp)
2946 {
2947     ScopedTmpRegU64 tmp1(this);
2948     auto addrReg = ArchReg(tmp1);
2949     auto addr = ArchMem(MemRef(tmp1)).Prepare(GetMasm());
2950     GetMasm()->lea(addrReg, asmjit::x86::ptr(ArchReg(obj), ArchReg(offset)));
2951     GetMasm()->mov(ArchReg(dst), ArchReg(val));
2952     GetMasm()->lock().xadd(addr, ArchReg(dst));
2953 }
2954 
EncodeMemoryBarrier(memory_order::Order order)2955 void Amd64Encoder::EncodeMemoryBarrier(memory_order::Order order)
2956 {
2957     if (order == memory_order::FULL) {
2958         /* does the same as mfence but faster, not applicable for NT-writes, though */
2959         GetMasm()->lock().add(asmjit::x86::dword_ptr(asmjit::x86::rsp), asmjit::imm(0));
2960     }
2961 }
2962 
EncodeStackOverflowCheck(ssize_t offset)2963 void Amd64Encoder::EncodeStackOverflowCheck(ssize_t offset)
2964 {
2965     MemRef mem(GetTarget().GetStackReg(), offset);
2966     auto m = ArchMem(mem).Prepare(GetMasm());
2967     GetMasm()->test(m, ArchReg(GetTarget().GetParamReg(0)));
2968 }
2969 
GetCursorOffset() const2970 size_t Amd64Encoder::GetCursorOffset() const
2971 {
2972     // NOLINTNEXTLINE(readability-identifier-naming)
2973     return GetMasm()->offset();
2974 }
2975 
SetCursorOffset(size_t offset)2976 void Amd64Encoder::SetCursorOffset(size_t offset)
2977 {
2978     // NOLINTNEXTLINE(readability-identifier-naming)
2979     GetMasm()->setOffset(offset);
2980 }
2981 
AcquireScratchRegister(TypeInfo type)2982 Reg Amd64Encoder::AcquireScratchRegister(TypeInfo type)
2983 {
2984     return (static_cast<Amd64RegisterDescription *>(GetRegfile()))->AcquireScratchRegister(type);
2985 }
2986 
AcquireScratchRegister(Reg reg)2987 void Amd64Encoder::AcquireScratchRegister(Reg reg)
2988 {
2989     (static_cast<Amd64RegisterDescription *>(GetRegfile()))->AcquireScratchRegister(reg);
2990 }
2991 
ReleaseScratchRegister(Reg reg)2992 void Amd64Encoder::ReleaseScratchRegister(Reg reg)
2993 {
2994     (static_cast<Amd64RegisterDescription *>(GetRegfile()))->ReleaseScratchRegister(reg);
2995 }
2996 
IsScratchRegisterReleased(Reg reg) const2997 bool Amd64Encoder::IsScratchRegisterReleased(Reg reg) const
2998 {
2999     return (static_cast<Amd64RegisterDescription *>(GetRegfile()))->IsScratchRegisterReleased(reg);
3000 }
3001 
GetScratchRegistersMask() const3002 RegMask Amd64Encoder::GetScratchRegistersMask() const
3003 {
3004     return (static_cast<const Amd64RegisterDescription *>(GetRegfile()))->GetScratchRegistersMask();
3005 }
3006 
GetScratchFpRegistersMask() const3007 RegMask Amd64Encoder::GetScratchFpRegistersMask() const
3008 {
3009     return (static_cast<const Amd64RegisterDescription *>(GetRegfile()))->GetScratchFpRegistersMask();
3010 }
3011 
GetAvailableScratchRegisters() const3012 RegMask Amd64Encoder::GetAvailableScratchRegisters() const
3013 {
3014     auto regfile = static_cast<const Amd64RegisterDescription *>(GetRegfile());
3015     return RegMask(regfile->GetScratchRegisters().GetMask());
3016 }
3017 
GetAvailableScratchFpRegisters() const3018 VRegMask Amd64Encoder::GetAvailableScratchFpRegisters() const
3019 {
3020     auto regfile = static_cast<const Amd64RegisterDescription *>(GetRegfile());
3021     return VRegMask(regfile->GetScratchFPRegisters().GetMask());
3022 }
3023 
GetRefType()3024 TypeInfo Amd64Encoder::GetRefType()
3025 {
3026     return INT64_TYPE;
3027 }
3028 
BufferData() const3029 void *Amd64Encoder::BufferData() const
3030 {
3031     // NOLINTNEXTLINE(readability-identifier-naming)
3032     return GetMasm()->bufferData();
3033 }
3034 
BufferSize() const3035 size_t Amd64Encoder::BufferSize() const
3036 {
3037     // NOLINTNEXTLINE(readability-identifier-naming)
3038     return GetMasm()->offset();
3039 }
3040 
MakeLibCall(Reg dst,Reg src0,Reg src1,void * entryPoint)3041 void Amd64Encoder::MakeLibCall(Reg dst, Reg src0, Reg src1, void *entryPoint)
3042 {
3043     if (!dst.IsFloat()) {
3044         SetFalseResult();
3045         return;
3046     }
3047 
3048     if (dst.GetType() == FLOAT32_TYPE) {
3049         if (!src0.IsFloat() || !src1.IsFloat()) {
3050             SetFalseResult();
3051             return;
3052         }
3053 
3054         if (src0.GetId() != asmjit::x86::xmm0.id() || src1.GetId() != asmjit::x86::xmm1.id()) {
3055             ScopedTmpRegF32 tmp(this);
3056             GetMasm()->movss(ArchVReg(tmp), ArchVReg(src1));
3057             GetMasm()->movss(asmjit::x86::xmm0, ArchVReg(src0));
3058             GetMasm()->movss(asmjit::x86::xmm1, ArchVReg(tmp));
3059         }
3060 
3061         MakeCall(entryPoint);
3062 
3063         if (dst.GetId() != asmjit::x86::xmm0.id()) {
3064             GetMasm()->movss(ArchVReg(dst), asmjit::x86::xmm0);
3065         }
3066     } else if (dst.GetType() == FLOAT64_TYPE) {
3067         if (!src0.IsFloat() || !src1.IsFloat()) {
3068             SetFalseResult();
3069             return;
3070         }
3071 
3072         if (src0.GetId() != asmjit::x86::xmm0.id() || src1.GetId() != asmjit::x86::xmm1.id()) {
3073             ScopedTmpRegF64 tmp(this);
3074             GetMasm()->movsd(ArchVReg(tmp), ArchVReg(src1));
3075             GetMasm()->movsd(asmjit::x86::xmm0, ArchVReg(src0));
3076             GetMasm()->movsd(asmjit::x86::xmm1, ArchVReg(tmp));
3077         }
3078 
3079         MakeCall(entryPoint);
3080 
3081         if (dst.GetId() != asmjit::x86::xmm0.id()) {
3082             GetMasm()->movsd(ArchVReg(dst), asmjit::x86::xmm0);
3083         }
3084     } else {
3085         UNREACHABLE();
3086     }
3087 }
3088 
3089 template <bool IS_STORE>
LoadStoreRegisters(RegMask registers,ssize_t slot,size_t startReg,bool isFp)3090 void Amd64Encoder::LoadStoreRegisters(RegMask registers, ssize_t slot, size_t startReg, bool isFp)
3091 {
3092     for (size_t i {0}; i < registers.size(); ++i) {
3093         if (!registers.test(i)) {
3094             continue;
3095         }
3096 
3097         asmjit::x86::Mem mem = asmjit::x86::ptr(asmjit::x86::rsp, (slot + i - startReg) * DOUBLE_WORD_SIZE_BYTES);
3098 
3099         if constexpr (IS_STORE) {  // NOLINT
3100             if (isFp) {
3101                 GetMasm()->movsd(mem, asmjit::x86::xmm(i));
3102             } else {
3103                 GetMasm()->mov(mem, asmjit::x86::gpq(ConvertRegNumber(i)));
3104             }
3105         } else {  // NOLINT
3106             if (isFp) {
3107                 GetMasm()->movsd(asmjit::x86::xmm(i), mem);
3108             } else {
3109                 GetMasm()->mov(asmjit::x86::gpq(ConvertRegNumber(i)), mem);
3110             }
3111         }
3112     }
3113 }
3114 
3115 template <bool IS_STORE>
LoadStoreRegisters(RegMask registers,bool isFp,int32_t slot,Reg base,RegMask mask)3116 void Amd64Encoder::LoadStoreRegisters(RegMask registers, bool isFp, int32_t slot, Reg base, RegMask mask)
3117 {
3118     auto baseReg = ArchReg(base);
3119     bool hasMask = mask.any();
3120     int32_t index = hasMask ? static_cast<int32_t>(mask.GetMinRegister()) : 0;
3121     slot -= index;
3122     for (size_t i = index; i < registers.size(); ++i) {
3123         if (hasMask) {
3124             if (!mask.test(i)) {
3125                 continue;
3126             }
3127             index++;
3128         }
3129         if (!registers.test(i)) {
3130             continue;
3131         }
3132 
3133         if (!hasMask) {
3134             index++;
3135         }
3136 
3137         // `-1` because we've incremented `index` in advance
3138         asmjit::x86::Mem mem = asmjit::x86::ptr(baseReg, (slot + index - 1) * DOUBLE_WORD_SIZE_BYTES);
3139 
3140         if constexpr (IS_STORE) {  // NOLINT
3141             if (isFp) {
3142                 GetMasm()->movsd(mem, asmjit::x86::xmm(i));
3143             } else {
3144                 GetMasm()->mov(mem, asmjit::x86::gpq(ConvertRegNumber(i)));
3145             }
3146         } else {  // NOLINT
3147             if (isFp) {
3148                 GetMasm()->movsd(asmjit::x86::xmm(i), mem);
3149             } else {
3150                 GetMasm()->mov(asmjit::x86::gpq(ConvertRegNumber(i)), mem);
3151             }
3152         }
3153     }
3154 }
3155 
SaveRegisters(RegMask registers,ssize_t slot,size_t startReg,bool isFp)3156 void Amd64Encoder::SaveRegisters(RegMask registers, ssize_t slot, size_t startReg, bool isFp)
3157 {
3158     LoadStoreRegisters<true>(registers, slot, startReg, isFp);
3159 }
3160 
LoadRegisters(RegMask registers,ssize_t slot,size_t startReg,bool isFp)3161 void Amd64Encoder::LoadRegisters(RegMask registers, ssize_t slot, size_t startReg, bool isFp)
3162 {
3163     LoadStoreRegisters<false>(registers, slot, startReg, isFp);
3164 }
3165 
SaveRegisters(RegMask registers,bool isFp,ssize_t slot,Reg base,RegMask mask)3166 void Amd64Encoder::SaveRegisters(RegMask registers, bool isFp, ssize_t slot, Reg base, RegMask mask)
3167 {
3168     LoadStoreRegisters<true>(registers, isFp, slot, base, mask);
3169 }
3170 
LoadRegisters(RegMask registers,bool isFp,ssize_t slot,Reg base,RegMask mask)3171 void Amd64Encoder::LoadRegisters(RegMask registers, bool isFp, ssize_t slot, Reg base, RegMask mask)
3172 {
3173     LoadStoreRegisters<false>(registers, isFp, slot, base, mask);
3174 }
3175 
PushRegisters(RegMask registers,bool isFp)3176 void Amd64Encoder::PushRegisters(RegMask registers, bool isFp)
3177 {
3178     for (size_t i = 0; i < registers.size(); i++) {
3179         if (registers[i]) {
3180             if (isFp) {
3181                 GetMasm()->sub(asmjit::x86::rsp, DOUBLE_WORD_SIZE_BYTES);
3182                 GetMasm()->movsd(asmjit::x86::ptr(asmjit::x86::rsp), ArchVReg(Reg(i, FLOAT64_TYPE)));
3183             } else {
3184                 GetMasm()->push(asmjit::x86::gpq(ConvertRegNumber(i)));
3185             }
3186         }
3187     }
3188 }
3189 
PopRegisters(RegMask registers,bool isFp)3190 void Amd64Encoder::PopRegisters(RegMask registers, bool isFp)
3191 {
3192     for (ssize_t i = registers.size() - 1; i >= 0; i--) {
3193         if (registers[i]) {
3194             if (isFp) {
3195                 GetMasm()->movsd(ArchVReg(Reg(i, FLOAT64_TYPE)), asmjit::x86::ptr(asmjit::x86::rsp));
3196                 GetMasm()->add(asmjit::x86::rsp, DOUBLE_WORD_SIZE_BYTES);
3197             } else {
3198                 GetMasm()->pop(asmjit::x86::gpq(ConvertRegNumber(i)));
3199             }
3200         }
3201     }
3202 }
3203 
GetMasm() const3204 asmjit::x86::Assembler *Amd64Encoder::GetMasm() const
3205 {
3206     ASSERT(masm_ != nullptr);
3207     return masm_;
3208 }
3209 
GetLabelAddress(LabelHolder::LabelId label)3210 size_t Amd64Encoder::GetLabelAddress(LabelHolder::LabelId label)
3211 {
3212     auto code = GetMasm()->code();
3213     ASSERT(code->isLabelBound(label));
3214     // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
3215     return code->baseAddress() + code->labelOffset(label);
3216 }
3217 
LabelHasLinks(LabelHolder::LabelId label)3218 bool Amd64Encoder::LabelHasLinks(LabelHolder::LabelId label)
3219 {
3220     auto code = GetMasm()->code();
3221     auto entry = code->labelEntry(label);
3222     return entry->links() != nullptr;
3223 }
3224 
3225 template <typename T, size_t N>
CopyArrayToXmm(Reg xmm,const std::array<T,N> & arr)3226 void Amd64Encoder::CopyArrayToXmm(Reg xmm, const std::array<T, N> &arr)
3227 {
3228     static constexpr auto SIZE {N * sizeof(T)};
3229     static_assert((SIZE == DOUBLE_WORD_SIZE_BYTES) || (SIZE == 2U * DOUBLE_WORD_SIZE_BYTES));
3230     ASSERT(xmm.GetType() == FLOAT64_TYPE);
3231 
3232     auto data {reinterpret_cast<const uint64_t *>(arr.data())};
3233 
3234     ScopedTmpRegU64 tmpGpr(this);
3235     // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
3236     GetMasm()->mov(ArchReg(tmpGpr), asmjit::imm(data[0]));
3237     GetMasm()->movq(ArchVReg(xmm), ArchReg(tmpGpr));
3238 
3239     if constexpr (SIZE == 2U * DOUBLE_WORD_SIZE_BYTES) {
3240         ScopedTmpRegF64 tmpXmm(this);
3241         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
3242         GetMasm()->mov(ArchReg(tmpGpr), asmjit::imm(data[1]));
3243         GetMasm()->movq(ArchVReg(tmpXmm), ArchReg(tmpGpr));
3244         GetMasm()->unpcklpd(ArchVReg(xmm), ArchVReg(tmpXmm));
3245     }
3246 }
3247 
3248 template <typename T>
CopyImmToXmm(Reg xmm,T imm)3249 void Amd64Encoder::CopyImmToXmm(Reg xmm, T imm)
3250 {
3251     static_assert((sizeof(imm) == WORD_SIZE_BYTES) || (sizeof(imm) == DOUBLE_WORD_SIZE_BYTES));
3252     ASSERT(xmm.GetSize() == BYTE_SIZE * sizeof(imm));
3253 
3254     if constexpr (sizeof(imm) == WORD_SIZE_BYTES) {  // NOLINT
3255         ScopedTmpRegU32 tmpGpr(this);
3256         GetMasm()->mov(ArchReg(tmpGpr), asmjit::imm(bit_cast<uint32_t>(imm)));
3257         GetMasm()->movd(ArchVReg(xmm), ArchReg(tmpGpr));
3258     } else {  // NOLINT
3259         ScopedTmpRegU64 tmpGpr(this);
3260         GetMasm()->mov(ArchReg(tmpGpr), asmjit::imm(bit_cast<uint64_t>(imm)));
3261         GetMasm()->movq(ArchVReg(xmm), ArchReg(tmpGpr));
3262     }
3263 }
3264 
DisasmInstr(std::ostream & stream,size_t pc,ssize_t codeOffset) const3265 size_t Amd64Encoder::DisasmInstr(std::ostream &stream, size_t pc, ssize_t codeOffset) const
3266 {
3267     if (codeOffset < 0) {
3268         (const_cast<Amd64Encoder *>(this))->Finalize();
3269     }
3270     // NOLINTNEXTLINE(readability-identifier-naming)
3271     Span code(GetMasm()->bufferData(), GetMasm()->offset());
3272 
3273     [[maybe_unused]] size_t dataLeft = code.Size() - pc;
3274     [[maybe_unused]] constexpr size_t LENGTH = ZYDIS_MAX_INSTRUCTION_LENGTH;  // 15 bytes is max inst length in amd64
3275 
3276     // Initialize decoder context
3277     ZydisDecoder decoder;
3278     [[maybe_unused]] bool res =
3279         ZYAN_SUCCESS(ZydisDecoderInit(&decoder, ZYDIS_MACHINE_MODE_LONG_64, ZYDIS_ADDRESS_WIDTH_64));
3280 
3281     // Initialize formatter
3282     ZydisFormatter formatter;
3283     res &= ZYAN_SUCCESS(ZydisFormatterInit(&formatter, ZYDIS_FORMATTER_STYLE_ATT));
3284     ZydisFormatterSetProperty(&formatter, ZYDIS_FORMATTER_PROP_FORCE_RELATIVE_BRANCHES, 1);
3285     ASSERT(res);
3286 
3287     ZydisDecodedInstruction instruction;
3288 
3289     res &= ZYAN_SUCCESS(ZydisDecoderDecodeBuffer(&decoder, &code[pc], std::min(LENGTH, dataLeft), &instruction));
3290 
3291     // Format & print the binary instruction structure to human readable format
3292     char buffer[256];  // NOLINT (modernize-avoid-c-arrays, readability-identifier-naming, readability-magic-numbers)
3293     res &= ZYAN_SUCCESS(
3294         ZydisFormatterFormatInstruction(&formatter, &instruction, buffer, sizeof(buffer), uintptr_t(&code[pc])));
3295 
3296     ASSERT(res);
3297 
3298     // Print disassembly
3299     if (codeOffset < 0) {
3300         stream << buffer;
3301     } else {
3302         stream << std::setw(0x8) << std::right << std::setfill('0') << std::hex << pc + codeOffset << std::dec
3303                << std::setfill(' ') << ": " << buffer;
3304     }
3305 
3306     return pc + instruction.length;
3307 }
3308 }  // namespace ark::compiler::amd64
3309