• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2021-2025 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16 Encoder (implementation of math and mem Low-level emitters)
17 */
18 
19 #include <iomanip>
20 
21 #include "libpandabase/utils/utils.h"
22 #include "compiler/optimizer/code_generator/relocations.h"
23 #include "compiler/optimizer/code_generator/fast_divisor.h"
24 #include "operands.h"
25 #include "scoped_tmp_reg.h"
26 #include "target/amd64/target.h"
27 
28 #include "lib_helpers.inl"
29 
30 #include "Zydis/Zydis.h"
31 
32 #ifndef PANDA_TARGET_MACOS
33 #include "elf.h"
34 #endif  // PANDA_TARGET_MACOS
35 
36 namespace ark::compiler::amd64 {
37 
ArchCcInt(Condition cc)38 static auto ArchCcInt(Condition cc)
39 {
40     switch (cc) {
41         case Condition::EQ:
42             return asmjit::x86::Condition::Code::kEqual;
43         case Condition::NE:
44             return asmjit::x86::Condition::Code::kNotEqual;
45         case Condition::LT:
46             return asmjit::x86::Condition::Code::kSignedLT;
47         case Condition::GT:
48             return asmjit::x86::Condition::Code::kSignedGT;
49         case Condition::LE:
50             return asmjit::x86::Condition::Code::kSignedLE;
51         case Condition::GE:
52             return asmjit::x86::Condition::Code::kSignedGE;
53         case Condition::LO:
54             return asmjit::x86::Condition::Code::kUnsignedLT;
55         case Condition::LS:
56             return asmjit::x86::Condition::Code::kUnsignedLE;
57         case Condition::HI:
58             return asmjit::x86::Condition::Code::kUnsignedGT;
59         case Condition::HS:
60             return asmjit::x86::Condition::Code::kUnsignedGE;
61         // NOTE(igorban) : Remove them
62         case Condition::MI:
63             return asmjit::x86::Condition::Code::kNegative;
64         case Condition::PL:
65             return asmjit::x86::Condition::Code::kPositive;
66         case Condition::VS:
67             return asmjit::x86::Condition::Code::kOverflow;
68         case Condition::VC:
69             return asmjit::x86::Condition::Code::kNotOverflow;
70         case Condition::AL:
71         case Condition::NV:
72         default:
73             UNREACHABLE();
74             return asmjit::x86::Condition::Code::kEqual;
75     }
76 }
ArchCcFloat(Condition cc)77 static auto ArchCcFloat(Condition cc)
78 {
79     switch (cc) {
80         case Condition::EQ:
81             return asmjit::x86::Condition::Code::kEqual;
82         case Condition::NE:
83             return asmjit::x86::Condition::Code::kNotEqual;
84         case Condition::LT:
85             return asmjit::x86::Condition::Code::kUnsignedLT;
86         case Condition::GT:
87             return asmjit::x86::Condition::Code::kUnsignedGT;
88         case Condition::LE:
89             return asmjit::x86::Condition::Code::kUnsignedLE;
90         case Condition::GE:
91             return asmjit::x86::Condition::Code::kUnsignedGE;
92         case Condition::LO:
93             return asmjit::x86::Condition::Code::kUnsignedLT;
94         case Condition::LS:
95             return asmjit::x86::Condition::Code::kUnsignedLE;
96         case Condition::HI:
97             return asmjit::x86::Condition::Code::kUnsignedGT;
98         case Condition::HS:
99             return asmjit::x86::Condition::Code::kUnsignedGE;
100         // NOTE(igorban) : Remove them
101         case Condition::MI:
102             return asmjit::x86::Condition::Code::kNegative;
103         case Condition::PL:
104             return asmjit::x86::Condition::Code::kPositive;
105         case Condition::VS:
106             return asmjit::x86::Condition::Code::kOverflow;
107         case Condition::VC:
108             return asmjit::x86::Condition::Code::kNotOverflow;
109         case Condition::AL:
110         case Condition::NV:
111         default:
112             UNREACHABLE();
113             return asmjit::x86::Condition::Code::kEqual;
114     }
115 }
116 /// Converters
ArchCc(Condition cc,bool isFloat=false)117 static asmjit::x86::Condition::Code ArchCc(Condition cc, bool isFloat = false)
118 {
119     return isFloat ? ArchCcFloat(cc) : ArchCcInt(cc);
120 }
121 
ArchCcTest(Condition cc)122 static asmjit::x86::Condition::Code ArchCcTest(Condition cc)
123 {
124     ASSERT(cc == Condition::TST_EQ || cc == Condition::TST_NE);
125     return cc == Condition::TST_EQ ? asmjit::x86::Condition::Code::kEqual : asmjit::x86::Condition::Code::kNotEqual;
126 }
127 
CcMatchesNan(Condition cc)128 static bool CcMatchesNan(Condition cc)
129 {
130     switch (cc) {
131         case Condition::NE:
132         case Condition::LT:
133         case Condition::LE:
134         case Condition::HI:
135         case Condition::HS:
136             return true;
137 
138         default:
139             return false;
140     }
141 }
142 
143 /// Converters
ArchReg(Reg reg,uint8_t size=0)144 static asmjit::x86::Gp ArchReg(Reg reg, uint8_t size = 0)
145 {
146     ASSERT(reg.IsValid());
147     if (reg.IsScalar()) {
148         size_t regSize = size == 0 ? reg.GetSize() : size;
149         auto archId = ConvertRegNumber(reg.GetId());
150 
151         asmjit::x86::Gp archReg;
152         switch (regSize) {
153             case DOUBLE_WORD_SIZE:
154                 archReg = asmjit::x86::Gp(asmjit::x86::Gpq::kSignature, archId);
155                 break;
156             case WORD_SIZE:
157                 archReg = asmjit::x86::Gp(asmjit::x86::Gpd::kSignature, archId);
158                 break;
159             case HALF_SIZE:
160                 archReg = asmjit::x86::Gp(asmjit::x86::Gpw::kSignature, archId);
161                 break;
162             case BYTE_SIZE:
163                 archReg = asmjit::x86::Gp(asmjit::x86::GpbLo::kSignature, archId);
164                 break;
165 
166             default:
167                 UNREACHABLE();
168         }
169 
170         ASSERT(archReg.isValid());
171         return archReg;
172     }
173     if (reg.GetId() == ConvertRegNumber(asmjit::x86::rsp.id())) {
174         return asmjit::x86::rsp;
175     }
176 
177     // Invalid register type
178     UNREACHABLE();
179     return asmjit::x86::rax;
180 }
181 
ArchVReg(Reg reg)182 static asmjit::x86::Xmm ArchVReg(Reg reg)
183 {
184     ASSERT(reg.IsValid() && reg.IsFloat());
185     auto archVreg = asmjit::x86::xmm(reg.GetId());
186     return archVreg;
187 }
188 
ArchImm(Imm imm)189 static asmjit::Imm ArchImm(Imm imm)
190 {
191     ASSERT(imm.GetType() == INT64_TYPE);
192     return asmjit::imm(imm.GetAsInt());
193 }
194 
ImmToUnsignedInt(Imm imm)195 static uint64_t ImmToUnsignedInt(Imm imm)
196 {
197     ASSERT(imm.GetType() == INT64_TYPE);
198     return uint64_t(imm.GetAsInt());
199 }
200 
ImmFitsSize(int64_t imm,uint8_t size)201 static bool ImmFitsSize(int64_t imm, uint8_t size)
202 {
203     if (size == DOUBLE_WORD_SIZE) {
204         size = WORD_SIZE;
205     }
206 
207     // NOLINTNEXTLINE(clang-analyzer-core.UndefinedBinaryOperatorResult)
208     int64_t max = (uint64_t(1) << (size - 1U)) - 1U;
209     int64_t min = ~uint64_t(max);
210     ASSERT(min < 0);
211     ASSERT(max > 0);
212 
213     return imm >= min && imm <= max;
214 }
215 
CreateLabel()216 LabelHolder::LabelId Amd64LabelHolder::CreateLabel()
217 {
218     ++id_;
219 
220     auto masm = (static_cast<Amd64Encoder *>(GetEncoder()))->GetMasm();
221     auto label = masm->newLabel();
222 
223     auto allocator = GetEncoder()->GetAllocator();
224     labels_.push_back(allocator->New<LabelType>(std::move(label)));
225     ASSERT(labels_.size() == id_);
226     return id_ - 1;
227 }
228 
ArchMem(MemRef mem)229 ArchMem::ArchMem(MemRef mem)
230 {
231     bool base = mem.HasBase();
232     bool regoffset = mem.HasIndex();
233     bool shift = mem.HasScale();
234     bool offset = mem.HasDisp();
235 
236     if (base && !regoffset && !shift) {
237         // Default memory - base + offset
238         mem_ = asmjit::x86::ptr(ArchReg(mem.GetBase()), mem.GetDisp());
239     } else if (base && regoffset && !offset) {
240         auto baseSize = mem.GetBase().GetSize();
241         auto indexSize = mem.GetIndex().GetSize();
242 
243         ASSERT(baseSize >= indexSize);
244         ASSERT(indexSize >= WORD_SIZE);
245 
246         if (baseSize > indexSize) {
247             needExtendIndex_ = true;
248         }
249 
250         if (mem.GetScale() == 0) {
251             mem_ = asmjit::x86::ptr(ArchReg(mem.GetBase()), ArchReg(mem.GetIndex(), baseSize));
252         } else {
253             auto scale = mem.GetScale();
254             if (scale <= 3U) {
255                 mem_ = asmjit::x86::ptr(ArchReg(mem.GetBase()), ArchReg(mem.GetIndex(), baseSize), scale);
256             } else {
257                 mem_ = asmjit::x86::ptr(ArchReg(mem.GetBase()), ArchReg(mem.GetIndex(), baseSize));
258                 bigShift_ = scale;
259             }
260         }
261     } else {
262         // Wrong memRef
263         UNREACHABLE();
264     }
265 }
266 
Prepare(asmjit::x86::Assembler * masm)267 asmjit::x86::Mem ArchMem::Prepare(asmjit::x86::Assembler *masm)
268 {
269     if (isPrepared_) {
270         return mem_;
271     }
272 
273     if (bigShift_ != 0) {
274         ASSERT(!mem_.hasOffset() && mem_.hasIndex() && bigShift_ > 3U);
275         masm->shl(mem_.indexReg().as<asmjit::x86::Gp>(), asmjit::imm(bigShift_));
276     }
277 
278     if (needExtendIndex_) {
279         ASSERT(mem_.hasIndex());
280         auto qIndex = mem_.indexReg().as<asmjit::x86::Gp>();
281         auto dIndex {qIndex};
282         dIndex.setSignature(asmjit::x86::Gpd::kSignature);
283         masm->movsxd(qIndex, dIndex);
284     }
285 
286     isPrepared_ = true;
287     return mem_;
288 }
289 
AsmJitErrorHandler(Encoder * encoder)290 AsmJitErrorHandler::AsmJitErrorHandler(Encoder *encoder) : encoder_(encoder)
291 {
292     ASSERT(encoder != nullptr);
293 }
294 
handleError(asmjit::Error err,const char * message,asmjit::BaseEmitter * origin)295 void AsmJitErrorHandler::handleError([[maybe_unused]] asmjit::Error err, [[maybe_unused]] const char *message,
296                                      [[maybe_unused]] asmjit::BaseEmitter *origin)
297 {
298     encoder_->SetFalseResult();
299 }
300 
CreateLabels(LabelId max)301 void Amd64LabelHolder::CreateLabels(LabelId max)
302 {
303     for (LabelId i = 0; i < max; ++i) {
304         CreateLabel();
305     }
306 }
307 
GetLabel(LabelId id)308 Amd64LabelHolder::LabelType *Amd64LabelHolder::GetLabel(LabelId id)
309 {
310     ASSERT(labels_.size() > id);
311     return labels_[id];
312 }
313 
Size()314 Amd64LabelHolder::LabelId Amd64LabelHolder::Size()
315 {
316     return labels_.size();
317 }
318 
BindLabel(LabelId id)319 void Amd64LabelHolder::BindLabel(LabelId id)
320 {
321     static_cast<Amd64Encoder *>(GetEncoder())->GetMasm()->bind(*labels_[id]);
322 }
323 
Amd64Encoder(ArenaAllocator * allocator)324 Amd64Encoder::Amd64Encoder(ArenaAllocator *allocator) : Encoder(allocator, Arch::X86_64, false) {}
325 
~Amd64Encoder()326 Amd64Encoder::~Amd64Encoder()
327 {
328     if (masm_ != nullptr) {
329         masm_->~Assembler();
330         masm_ = nullptr;
331     }
332 
333     if (codeHolder_ != nullptr) {
334         codeHolder_->~CodeHolder();
335         codeHolder_ = nullptr;
336     }
337 
338     if (errorHandler_ != nullptr) {
339         errorHandler_->~ErrorHandler();
340         errorHandler_ = nullptr;
341     }
342 
343     if (labels_ != nullptr) {
344         labels_->~Amd64LabelHolder();
345         labels_ = nullptr;
346     }
347 }
348 
GetLabels() const349 LabelHolder *Amd64Encoder::GetLabels() const
350 {
351     ASSERT(labels_ != nullptr);
352     return labels_;
353 }
354 
IsValid() const355 bool Amd64Encoder::IsValid() const
356 {
357     return true;
358 }
359 
GetTarget()360 constexpr auto Amd64Encoder::GetTarget()
361 {
362     return ark::compiler::Target(Arch::X86_64);
363 }
364 
InitMasm()365 bool Amd64Encoder::InitMasm()
366 {
367     if (masm_ == nullptr) {
368         labels_ = GetAllocator()->New<Amd64LabelHolder>(this);
369         if (labels_ == nullptr) {
370             SetFalseResult();
371             return false;
372         }
373 
374         asmjit::Environment env;
375         env.setArch(asmjit::Environment::kArchX64);
376 
377         codeHolder_ = GetAllocator()->New<asmjit::CodeHolder>(GetAllocator());
378         if (codeHolder_ == nullptr) {
379             SetFalseResult();
380             return false;
381         }
382         codeHolder_->init(env, 0U);
383 
384         masm_ = GetAllocator()->New<asmjit::x86::Assembler>(codeHolder_);
385         if (masm_ == nullptr) {
386             SetFalseResult();
387             return false;
388         }
389 
390         // Enable strict validation.
391         masm_->addValidationOptions(asmjit::BaseEmitter::kValidationOptionAssembler);
392         errorHandler_ = GetAllocator()->New<AsmJitErrorHandler>(this);
393         if (errorHandler_ == nullptr) {
394             SetFalseResult();
395             return false;
396         }
397         masm_->setErrorHandler(errorHandler_);
398 
399         // Make sure that the compiler uses the same scratch registers as the assembler
400         CHECK_EQ(compiler::arch_info::x86_64::TEMP_REGS, GetTarget().GetTempRegsMask());
401         CHECK_EQ(compiler::arch_info::x86_64::TEMP_FP_REGS, GetTarget().GetTempVRegsMask());
402     }
403     return true;
404 }
405 
Finalize()406 void Amd64Encoder::Finalize()
407 {
408     auto code = GetMasm()->code();
409     auto codeSize = code->codeSize();
410 
411     code->flatten();
412     code->resolveUnresolvedLinks();
413 
414     auto codeBuffer = GetAllocator()->Alloc(codeSize);
415 
416     code->relocateToBase(reinterpret_cast<uintptr_t>(codeBuffer));
417     code->copyFlattenedData(codeBuffer, codeSize, asmjit::CodeHolder::kCopyPadSectionBuffer);
418 }
419 
EncodeJump(LabelHolder::LabelId id)420 void Amd64Encoder::EncodeJump(LabelHolder::LabelId id)
421 {
422     auto label = static_cast<Amd64LabelHolder *>(GetLabels())->GetLabel(id);
423     GetMasm()->jmp(*label);
424 }
425 
EncodeJump(LabelHolder::LabelId id,Reg src0,Reg src1,Condition cc)426 void Amd64Encoder::EncodeJump(LabelHolder::LabelId id, Reg src0, Reg src1, Condition cc)
427 {
428     if (src0.IsScalar()) {
429         if (src0.GetSize() == src1.GetSize()) {
430             GetMasm()->cmp(ArchReg(src0), ArchReg(src1));
431         } else if (src0.GetSize() > src1.GetSize()) {
432             ScopedTmpReg tmpReg(this, src0.GetType());
433             EncodeCast(tmpReg, false, src1, false);
434             GetMasm()->cmp(ArchReg(src0), ArchReg(tmpReg));
435         } else {
436             ScopedTmpReg tmpReg(this, src1.GetType());
437             EncodeCast(tmpReg, false, src0, false);
438             GetMasm()->cmp(ArchReg(tmpReg), ArchReg(src1));
439         }
440     } else if (src0.GetType() == FLOAT32_TYPE) {
441         GetMasm()->comiss(ArchVReg(src0), ArchVReg(src1));
442     } else {
443         GetMasm()->comisd(ArchVReg(src0), ArchVReg(src1));
444     }
445 
446     auto label = static_cast<Amd64LabelHolder *>(GetLabels())->GetLabel(id);
447     if (src0.IsScalar()) {
448         GetMasm()->j(ArchCc(cc, src0.IsFloat()), *label);
449         return;
450     }
451 
452     if (CcMatchesNan(cc)) {
453         GetMasm()->jp(*label);
454         GetMasm()->j(ArchCc(cc, src0.IsFloat()), *label);
455     } else {
456         auto end = GetMasm()->newLabel();
457 
458         GetMasm()->jp(end);
459         GetMasm()->j(ArchCc(cc, src0.IsFloat()), *label);
460         GetMasm()->bind(end);
461     }
462 }
463 
EncodeJump(LabelHolder::LabelId id,Reg src,Imm imm,Condition cc)464 void Amd64Encoder::EncodeJump(LabelHolder::LabelId id, Reg src, Imm imm, Condition cc)
465 {
466     ASSERT(src.IsScalar());
467 
468     auto immVal = imm.GetAsInt();
469     if (immVal == 0) {
470         EncodeJump(id, src, cc);
471         return;
472     }
473 
474     if (ImmFitsSize(immVal, src.GetSize())) {
475         auto label = static_cast<Amd64LabelHolder *>(GetLabels())->GetLabel(id);
476 
477         GetMasm()->cmp(ArchReg(src), asmjit::imm(immVal));
478         GetMasm()->j(ArchCc(cc), *label);
479     } else {
480         ScopedTmpReg tmpReg(this, src.GetType());
481         GetMasm()->mov(ArchReg(tmpReg), asmjit::imm(immVal));
482         EncodeJump(id, src, tmpReg, cc);
483     }
484 }
485 
EncodeJumpTest(LabelHolder::LabelId id,Reg src0,Reg src1,Condition cc)486 void Amd64Encoder::EncodeJumpTest(LabelHolder::LabelId id, Reg src0, Reg src1, Condition cc)
487 {
488     ASSERT(src0.IsScalar());
489     if (src0.GetSize() == src1.GetSize()) {
490         GetMasm()->test(ArchReg(src0), ArchReg(src1));
491     } else if (src0.GetSize() > src1.GetSize()) {
492         ScopedTmpReg tmpReg(this, src0.GetType());
493         EncodeCast(tmpReg, false, src1, false);
494         GetMasm()->test(ArchReg(src0), ArchReg(tmpReg));
495     } else {
496         ScopedTmpReg tmpReg(this, src1.GetType());
497         EncodeCast(tmpReg, false, src0, false);
498         GetMasm()->test(ArchReg(tmpReg), ArchReg(src1));
499     }
500 
501     auto label = static_cast<Amd64LabelHolder *>(GetLabels())->GetLabel(id);
502     GetMasm()->j(ArchCcTest(cc), *label);
503 }
504 
EncodeJumpTest(LabelHolder::LabelId id,Reg src,Imm imm,Condition cc)505 void Amd64Encoder::EncodeJumpTest(LabelHolder::LabelId id, Reg src, Imm imm, Condition cc)
506 {
507     ASSERT(src.IsScalar());
508 
509     auto immVal = imm.GetAsInt();
510     if (ImmFitsSize(immVal, src.GetSize())) {
511         auto label = static_cast<Amd64LabelHolder *>(GetLabels())->GetLabel(id);
512 
513         GetMasm()->test(ArchReg(src), asmjit::imm(immVal));
514         GetMasm()->j(ArchCcTest(cc), *label);
515     } else {
516         ScopedTmpReg tmpReg(this, src.GetType());
517         GetMasm()->mov(ArchReg(tmpReg), asmjit::imm(immVal));
518         EncodeJumpTest(id, src, tmpReg, cc);
519     }
520 }
521 
EncodeJump(LabelHolder::LabelId id,Reg src,Condition cc)522 void Amd64Encoder::EncodeJump(LabelHolder::LabelId id, Reg src, Condition cc)
523 {
524     if (src.IsScalar()) {
525         auto label = static_cast<Amd64LabelHolder *>(GetLabels())->GetLabel(id);
526 
527         GetMasm()->cmp(ArchReg(src), asmjit::imm(0));
528         GetMasm()->j(ArchCc(cc), *label);
529         return;
530     }
531 
532     ScopedTmpReg tmpReg(this, src.GetType());
533     if (src.GetType() == FLOAT32_TYPE) {
534         GetMasm()->xorps(ArchVReg(tmpReg), ArchVReg(tmpReg));
535     } else {
536         GetMasm()->xorpd(ArchVReg(tmpReg), ArchVReg(tmpReg));
537     }
538     EncodeJump(id, src, tmpReg, cc);
539 }
540 
EncodeJump(Reg dst)541 void Amd64Encoder::EncodeJump(Reg dst)
542 {
543     GetMasm()->jmp(ArchReg(dst));
544 }
545 
EncodeJump(RelocationInfo * relocation)546 void Amd64Encoder::EncodeJump(RelocationInfo *relocation)
547 {
548 #ifdef PANDA_TARGET_MACOS
549     LOG(FATAL, COMPILER) << "Not supported in Macos build";
550 #else
551     // NOLINTNEXTLINE(readability-magic-numbers)
552     std::array<uint8_t, 5U> data = {0xe9, 0, 0, 0, 0};
553     GetMasm()->embed(data.data(), data.size());
554 
555     constexpr int ADDEND = 4;
556     relocation->offset = GetCursorOffset() - ADDEND;
557     relocation->addend = -ADDEND;
558     relocation->type = R_X86_64_PLT32;
559 #endif
560 }
561 
EncodeBitTestAndBranch(LabelHolder::LabelId id,compiler::Reg reg,uint32_t bitPos,bool bitValue)562 void Amd64Encoder::EncodeBitTestAndBranch(LabelHolder::LabelId id, compiler::Reg reg, uint32_t bitPos, bool bitValue)
563 {
564     ASSERT(reg.IsScalar() && reg.GetSize() > bitPos);
565     auto label = static_cast<Amd64LabelHolder *>(GetLabels())->GetLabel(id);
566     if (reg.GetSize() == DOUBLE_WORD_SIZE) {
567         ScopedTmpRegU64 tmpReg(this);
568         GetMasm()->mov(ArchReg(tmpReg), asmjit::imm(static_cast<uint64_t>(1) << bitPos));
569         GetMasm()->test(ArchReg(reg), ArchReg(tmpReg));
570     } else {
571         GetMasm()->test(ArchReg(reg), asmjit::imm(1U << bitPos));
572     }
573     if (bitValue) {
574         GetMasm()->j(ArchCc(Condition::NE), *label);
575     } else {
576         GetMasm()->j(ArchCc(Condition::EQ), *label);
577     }
578 }
579 
MakeCall(compiler::RelocationInfo * relocation)580 void Amd64Encoder::MakeCall([[maybe_unused]] compiler::RelocationInfo *relocation)
581 {
582 #ifdef PANDA_TARGET_MACOS
583     LOG(FATAL, COMPILER) << "Not supported in Macos build";
584 #else
585     // NOLINTNEXTLINE(readability-magic-numbers)
586     std::array<uint8_t, 5U> data = {0xe8, 0, 0, 0, 0};
587     GetMasm()->embed(data.data(), data.size());
588 
589     relocation->offset = GetCursorOffset() - 4_I;
590     relocation->addend = -4_I;
591     relocation->type = R_X86_64_PLT32;
592 #endif
593 }
594 
MakeCall(LabelHolder::LabelId id)595 void Amd64Encoder::MakeCall(LabelHolder::LabelId id)
596 {
597     auto label = static_cast<Amd64LabelHolder *>(GetLabels())->GetLabel(id);
598     GetMasm()->call(*label);
599 }
600 
MakeCall(const void * entryPoint)601 void Amd64Encoder::MakeCall(const void *entryPoint)
602 {
603     ScopedTmpRegU64 tmpReg(this);
604     GetMasm()->mov(ArchReg(tmpReg), asmjit::imm(entryPoint));
605     GetMasm()->call(ArchReg(tmpReg));
606 }
607 
MakeCall(Reg reg)608 void Amd64Encoder::MakeCall(Reg reg)
609 {
610     GetMasm()->call(ArchReg(reg));
611 }
612 
MakeCall(MemRef entryPoint)613 void Amd64Encoder::MakeCall(MemRef entryPoint)
614 {
615     ScopedTmpRegU64 tmpReg(this);
616     EncodeLdr(tmpReg, false, entryPoint);
617     GetMasm()->call(ArchReg(tmpReg));
618 }
619 
620 template <typename Func>
EncodeRelativePcMov(Reg reg,intptr_t offset,Func encodeInstruction)621 void Amd64Encoder::EncodeRelativePcMov(Reg reg, intptr_t offset, Func encodeInstruction)
622 {
623     // NOLINTNEXTLINE(readability-identifier-naming)
624     auto pos = GetMasm()->offset();
625     encodeInstruction(reg, offset);
626     // NOLINTNEXTLINE(readability-identifier-naming)
627     offset -= (GetMasm()->offset() - pos);
628     // NOLINTNEXTLINE(readability-identifier-naming)
629     GetMasm()->setOffset(pos);
630     encodeInstruction(reg, offset);
631 }
632 
MakeCallAot(intptr_t offset)633 void Amd64Encoder::MakeCallAot(intptr_t offset)
634 {
635     ScopedTmpRegU64 tmpReg(this);
636     EncodeRelativePcMov(tmpReg, offset, [this](Reg reg, intptr_t offset) {
637         GetMasm()->long_().mov(ArchReg(reg), asmjit::x86::ptr(asmjit::x86::rip, offset));
638     });
639     GetMasm()->call(ArchReg(tmpReg));
640 }
641 
CanMakeCallByOffset(intptr_t offset)642 bool Amd64Encoder::CanMakeCallByOffset(intptr_t offset)
643 {
644     return offset == static_cast<intptr_t>(static_cast<int32_t>(offset));
645 }
646 
MakeCallByOffset(intptr_t offset)647 void Amd64Encoder::MakeCallByOffset(intptr_t offset)
648 {
649     GetMasm()->call(GetCursorOffset() + int32_t(offset));
650 }
651 
MakeLoadAotTable(intptr_t offset,Reg reg)652 void Amd64Encoder::MakeLoadAotTable(intptr_t offset, Reg reg)
653 {
654     EncodeRelativePcMov(reg, offset, [this](Reg reg, intptr_t offset) {
655         GetMasm()->long_().mov(ArchReg(reg), asmjit::x86::ptr(asmjit::x86::rip, offset));
656     });
657 }
658 
MakeLoadAotTableAddr(intptr_t offset,Reg addr,Reg val)659 void Amd64Encoder::MakeLoadAotTableAddr([[maybe_unused]] intptr_t offset, [[maybe_unused]] Reg addr,
660                                         [[maybe_unused]] Reg val)
661 {
662     EncodeRelativePcMov(addr, offset, [this](Reg reg, intptr_t offset) {
663         GetMasm()->long_().lea(ArchReg(reg), asmjit::x86::ptr(asmjit::x86::rip, offset));
664     });
665     GetMasm()->mov(ArchReg(val), asmjit::x86::ptr(ArchReg(addr)));
666 }
667 
EncodeAbort()668 void Amd64Encoder::EncodeAbort()
669 {
670     GetMasm()->int3();
671 }
672 
EncodeReturn()673 void Amd64Encoder::EncodeReturn()
674 {
675     GetMasm()->ret();
676 }
677 
EncodeMul(Reg dst,Reg src,Imm imm)678 void Amd64Encoder::EncodeMul([[maybe_unused]] Reg dst, [[maybe_unused]] Reg src, [[maybe_unused]] Imm imm)
679 {
680     SetFalseResult();
681 }
682 
EncodeNop()683 void Amd64Encoder::EncodeNop()
684 {
685     GetMasm()->nop();
686 }
687 
EncodeMov(Reg dst,Reg src)688 void Amd64Encoder::EncodeMov(Reg dst, Reg src)
689 {
690     if (dst == src) {
691         return;
692     }
693 
694     if (dst.IsFloat() != src.IsFloat()) {
695         ASSERT(src.GetSize() == dst.GetSize());
696         if (dst.GetSize() == WORD_SIZE) {
697             if (dst.IsFloat()) {
698                 GetMasm()->movd(ArchVReg(dst), ArchReg(src));
699             } else {
700                 GetMasm()->movd(ArchReg(dst), ArchVReg(src));
701             }
702         } else {
703             ASSERT(dst.GetSize() == DOUBLE_WORD_SIZE);
704             if (dst.IsFloat()) {
705                 GetMasm()->movq(ArchVReg(dst), ArchReg(src));
706             } else {
707                 GetMasm()->movq(ArchReg(dst), ArchVReg(src));
708             }
709         }
710         return;
711     }
712 
713     if (dst.IsFloat()) {
714         ASSERT(src.IsFloat());
715         if (dst.GetType() == FLOAT32_TYPE) {
716             GetMasm()->movss(ArchVReg(dst), ArchVReg(src));
717         } else {
718             GetMasm()->movsd(ArchVReg(dst), ArchVReg(src));
719         }
720         return;
721     }
722 
723     if (dst.GetSize() < WORD_SIZE && dst.GetSize() == src.GetSize()) {
724         GetMasm()->xor_(ArchReg(dst, WORD_SIZE), ArchReg(dst, WORD_SIZE));
725     }
726 
727     if (dst.GetSize() == src.GetSize()) {
728         GetMasm()->mov(ArchReg(dst), ArchReg(src));
729     } else {
730         EncodeCast(dst, false, src, false);
731     }
732 }
733 
EncodeNeg(Reg dst,Reg src)734 void Amd64Encoder::EncodeNeg(Reg dst, Reg src)
735 {
736     if (dst.IsScalar()) {
737         EncodeMov(dst, src);
738         GetMasm()->neg(ArchReg(dst));
739         return;
740     }
741 
742     if (dst.GetType() == FLOAT32_TYPE) {
743         ScopedTmpRegF32 tmp(this);
744         CopyImmToXmm(tmp, -0.0F);
745 
746         if (dst.GetId() != src.GetId()) {
747             GetMasm()->movsd(ArchVReg(dst), ArchVReg(src));
748         }
749         GetMasm()->xorps(ArchVReg(dst), ArchVReg(tmp));
750     } else {
751         ScopedTmpRegF64 tmp(this);
752         CopyImmToXmm(tmp, -0.0);
753 
754         if (dst.GetId() != src.GetId()) {
755             GetMasm()->movsd(ArchVReg(dst), ArchVReg(src));
756         }
757         GetMasm()->xorps(ArchVReg(dst), ArchVReg(tmp));
758     }
759 }
760 
EncodeAbs(Reg dst,Reg src)761 void Amd64Encoder::EncodeAbs(Reg dst, Reg src)
762 {
763     if (dst.IsScalar()) {
764         auto size = std::max<uint8_t>(src.GetSize(), WORD_SIZE);
765 
766         if (dst.GetId() != src.GetId()) {
767             GetMasm()->mov(ArchReg(dst), ArchReg(src));
768             GetMasm()->neg(ArchReg(dst));
769             GetMasm()->cmovl(ArchReg(dst, size), ArchReg(src, size));
770         } else if (GetScratchRegistersCount() > 0) {
771             ScopedTmpReg tmpReg(this, dst.GetType());
772 
773             GetMasm()->mov(ArchReg(tmpReg), ArchReg(src));
774             GetMasm()->neg(ArchReg(tmpReg));
775 
776             GetMasm()->cmovl(ArchReg(tmpReg, size), ArchReg(src, size));
777             GetMasm()->mov(ArchReg(dst), ArchReg(tmpReg));
778         } else {
779             auto end = GetMasm()->newLabel();
780 
781             GetMasm()->test(ArchReg(dst), ArchReg(dst));
782             GetMasm()->jns(end);
783 
784             GetMasm()->neg(ArchReg(dst));
785             GetMasm()->bind(end);
786         }
787         return;
788     }
789 
790     if (dst.GetType() == FLOAT32_TYPE) {
791         ScopedTmpRegF32 tmp(this);
792         // NOLINTNEXTLINE(readability-magic-numbers)
793         CopyImmToXmm(tmp, uint32_t(0x7fffffff));
794 
795         if (dst.GetId() != src.GetId()) {
796             GetMasm()->movss(ArchVReg(dst), ArchVReg(src));
797         }
798         GetMasm()->andps(ArchVReg(dst), ArchVReg(tmp));
799     } else {
800         ScopedTmpRegF64 tmp(this);
801         // NOLINTNEXTLINE(readability-magic-numbers)
802         CopyImmToXmm(tmp, uint64_t(0x7fffffffffffffff));
803 
804         if (dst.GetId() != src.GetId()) {
805             GetMasm()->movsd(ArchVReg(dst), ArchVReg(src));
806         }
807         GetMasm()->andps(ArchVReg(dst), ArchVReg(tmp));
808     }
809 }
810 
EncodeNot(Reg dst,Reg src)811 void Amd64Encoder::EncodeNot(Reg dst, Reg src)
812 {
813     ASSERT(dst.IsScalar());
814 
815     EncodeMov(dst, src);
816     GetMasm()->not_(ArchReg(dst));
817 }
818 
EncodeSqrt(Reg dst,Reg src)819 void Amd64Encoder::EncodeSqrt(Reg dst, Reg src)
820 {
821     ASSERT(dst.IsFloat());
822     if (src.GetType() == FLOAT32_TYPE) {
823         GetMasm()->sqrtps(ArchVReg(dst), ArchVReg(src));
824     } else {
825         GetMasm()->sqrtpd(ArchVReg(dst), ArchVReg(src));
826     }
827 }
828 
EncodeCastFloatToScalar(Reg dst,bool dstSigned,Reg src)829 void Amd64Encoder::EncodeCastFloatToScalar(Reg dst, bool dstSigned, Reg src)
830 {
831     // We DON'T support casts from float32/64 to int8/16 and bool, because this caste is not declared anywhere
832     // in other languages and architecture, we do not know what the behavior should be.
833     ASSERT(dst.GetSize() >= WORD_SIZE);
834     auto end = GetMasm()->newLabel();
835 
836     // if src is NaN, then dst = 0
837     EncodeCastFloatCheckNan(dst, src, end);
838 
839     if (dstSigned) {
840         EncodeCastFloatSignCheckRange(dst, src, end);
841     } else {
842         EncodeCastFloatUnsignCheckRange(dst, src, end);
843     }
844 
845     if (src.GetType() == FLOAT32_TYPE) {
846         if (dst.GetSize() == DOUBLE_WORD_SIZE) {
847             EncodeCastFloat32ToUint64(dst, src);
848         } else {
849             GetMasm()->cvttss2si(ArchReg(dst, DOUBLE_WORD_SIZE), ArchVReg(src));
850         }
851     } else {
852         if (dst.GetSize() == DOUBLE_WORD_SIZE) {
853             EncodeCastFloat64ToUint64(dst, src);
854         } else {
855             GetMasm()->cvttsd2si(ArchReg(dst, DOUBLE_WORD_SIZE), ArchVReg(src));
856         }
857     }
858 
859     GetMasm()->bind(end);
860 }
861 
EncodeCastFloat32ToUint64(Reg dst,Reg src)862 void Amd64Encoder::EncodeCastFloat32ToUint64(Reg dst, Reg src)
863 {
864     auto bigNumberLabel = GetMasm()->newLabel();
865     auto endLabel = GetMasm()->newLabel();
866     ScopedTmpReg tmpReg(this, src.GetType());
867     ScopedTmpReg tmpNum(this, dst.GetType());
868 
869     // It is max number with max degree that we can load in sign int64
870     // NOLINTNEXTLINE (readability-magic-numbers)
871     GetMasm()->mov(ArchReg(dst, WORD_SIZE), asmjit::imm(0x5F000000));
872     GetMasm()->movd(ArchVReg(tmpReg), ArchReg(dst, WORD_SIZE));
873     GetMasm()->comiss(ArchVReg(src), ArchVReg(tmpReg));
874     GetMasm()->jnb(bigNumberLabel);
875 
876     GetMasm()->cvttss2si(ArchReg(dst), ArchVReg(src));
877     GetMasm()->jmp(endLabel);
878 
879     GetMasm()->bind(bigNumberLabel);
880     GetMasm()->subss(ArchVReg(src), ArchVReg(tmpReg));
881     GetMasm()->cvttss2si(ArchReg(dst), ArchVReg(src));
882     // NOLINTNEXTLINE (readability-magic-numbers)
883     GetMasm()->mov(ArchReg(tmpNum), asmjit::imm(0x8000000000000000));
884     GetMasm()->xor_(ArchReg(dst), ArchReg(tmpNum));
885     GetMasm()->bind(endLabel);
886 }
887 
EncodeCastFloat64ToUint64(Reg dst,Reg src)888 void Amd64Encoder::EncodeCastFloat64ToUint64(Reg dst, Reg src)
889 {
890     auto bigNumberLabel = GetMasm()->newLabel();
891     auto endLabel = GetMasm()->newLabel();
892     ScopedTmpReg tmpReg(this, src.GetType());
893     ScopedTmpReg tmpNum(this, dst.GetType());
894 
895     // It is max number with max degree that we can load in sign int64
896     // NOLINTNEXTLINE (readability-magic-numbers)
897     GetMasm()->mov(ArchReg(dst), asmjit::imm(0x43E0000000000000));
898     GetMasm()->movq(ArchVReg(tmpReg), ArchReg(dst));
899     GetMasm()->comisd(ArchVReg(src), ArchVReg(tmpReg));
900     GetMasm()->jnb(bigNumberLabel);
901 
902     GetMasm()->cvttsd2si(ArchReg(dst), ArchVReg(src));
903     GetMasm()->jmp(endLabel);
904 
905     GetMasm()->bind(bigNumberLabel);
906     GetMasm()->subsd(ArchVReg(src), ArchVReg(tmpReg));
907     GetMasm()->cvttsd2si(ArchReg(dst), ArchVReg(src));
908     // NOLINTNEXTLINE (readability-magic-numbers)
909     GetMasm()->mov(ArchReg(tmpNum), asmjit::imm(0x8000000000000000));
910     GetMasm()->xor_(ArchReg(dst), ArchReg(tmpNum));
911     GetMasm()->bind(endLabel);
912 }
913 
EncodeCastFloatCheckNan(Reg dst,Reg src,const asmjit::Label & end)914 void Amd64Encoder::EncodeCastFloatCheckNan(Reg dst, Reg src, const asmjit::Label &end)
915 {
916     GetMasm()->xor_(ArchReg(dst, DOUBLE_WORD_SIZE), ArchReg(dst, DOUBLE_WORD_SIZE));
917     if (src.GetType() == FLOAT32_TYPE) {
918         GetMasm()->ucomiss(ArchVReg(src), ArchVReg(src));
919     } else {
920         GetMasm()->ucomisd(ArchVReg(src), ArchVReg(src));
921     }
922     GetMasm()->jp(end);
923 }
924 
EncodeCastFloatSignCheckRange(Reg dst,Reg src,const asmjit::Label & end)925 void Amd64Encoder::EncodeCastFloatSignCheckRange(Reg dst, Reg src, const asmjit::Label &end)
926 {
927     // if src < INT_MIN, then dst = INT_MIN
928     // if src >= (INT_MAX + 1), then dst = INT_MAX
929     if (dst.GetSize() == DOUBLE_WORD_SIZE) {
930         EncodeCastFloatCheckRange(dst, src, end, INT64_MIN, INT64_MAX);
931     } else {
932         EncodeCastFloatCheckRange(dst, src, end, INT32_MIN, INT32_MAX);
933     }
934 }
935 
EncodeCastFloatCheckRange(Reg dst,Reg src,const asmjit::Label & end,const int64_t minValue,const uint64_t maxValue)936 void Amd64Encoder::EncodeCastFloatCheckRange(Reg dst, Reg src, const asmjit::Label &end, const int64_t minValue,
937                                              const uint64_t maxValue)
938 {
939     ScopedTmpReg cmpReg(this, src.GetType());
940     ScopedTmpReg tmpReg(this, src.GetType() == FLOAT64_TYPE ? INT64_TYPE : INT32_TYPE);
941 
942     GetMasm()->mov(ArchReg(dst, DOUBLE_WORD_SIZE), asmjit::imm(minValue));
943     if (src.GetType() == FLOAT32_TYPE) {
944         GetMasm()->mov(ArchReg(tmpReg), asmjit::imm(bit_cast<uint32_t>(float(minValue))));
945         GetMasm()->movd(ArchVReg(cmpReg), ArchReg(tmpReg));
946         GetMasm()->ucomiss(ArchVReg(src), ArchVReg(cmpReg));
947     } else {
948         GetMasm()->mov(ArchReg(tmpReg), asmjit::imm(bit_cast<uint64_t>(double(minValue))));
949         GetMasm()->movq(ArchVReg(cmpReg), ArchReg(tmpReg));
950         GetMasm()->ucomisd(ArchVReg(src), ArchVReg(cmpReg));
951     }
952     GetMasm()->jb(end);
953 
954     GetMasm()->mov(ArchReg(dst, DOUBLE_WORD_SIZE), asmjit::imm(maxValue));
955     if (src.GetType() == FLOAT32_TYPE) {
956         GetMasm()->mov(ArchReg(tmpReg), asmjit::imm(bit_cast<uint32_t>(float(maxValue) + 1U)));
957         GetMasm()->movd(ArchVReg(cmpReg), ArchReg(tmpReg));
958         GetMasm()->ucomiss(ArchVReg(src), ArchVReg(cmpReg));
959     } else {
960         GetMasm()->mov(ArchReg(tmpReg), asmjit::imm(bit_cast<uint64_t>(double(maxValue) + 1U)));
961         GetMasm()->movq(ArchVReg(cmpReg), ArchReg(tmpReg));
962         GetMasm()->ucomisd(ArchVReg(src), ArchVReg(cmpReg));
963     }
964     GetMasm()->jae(end);
965 }
966 
EncodeCastFloatUnsignCheckRange(Reg dst,Reg src,const asmjit::Label & end)967 void Amd64Encoder::EncodeCastFloatUnsignCheckRange(Reg dst, Reg src, const asmjit::Label &end)
968 {
969     // if src < 0, then dst = 0
970     // if src >= (UINT_MAX + 1), then dst = UINT_MAX
971     if (dst.GetSize() == DOUBLE_WORD_SIZE) {
972         EncodeCastFloatCheckRange(dst, src, end, 0, UINT64_MAX);
973     } else {
974         EncodeCastFloatCheckRange(dst, src, end, 0, UINT32_MAX);
975     }
976 }
977 
EncodeCastScalarToFloatUnsignDouble(Reg dst,Reg src)978 void Amd64Encoder::EncodeCastScalarToFloatUnsignDouble(Reg dst, Reg src)
979 {
980     if (dst.GetType() == FLOAT32_TYPE) {
981         ScopedTmpRegU64 int1Reg(this);
982         ScopedTmpRegU64 int2Reg(this);
983 
984         auto sgn = GetMasm()->newLabel();
985         auto end = GetMasm()->newLabel();
986 
987         GetMasm()->test(ArchReg(src), ArchReg(src));
988         GetMasm()->js(sgn);
989         GetMasm()->cvtsi2ss(ArchVReg(dst), ArchReg(src));
990         GetMasm()->jmp(end);
991 
992         GetMasm()->bind(sgn);
993         GetMasm()->mov(ArchReg(int1Reg), ArchReg(src));
994         GetMasm()->mov(ArchReg(int2Reg), ArchReg(src));
995         GetMasm()->shr(ArchReg(int2Reg), asmjit::imm(1));
996         GetMasm()->and_(ArchReg(int1Reg, WORD_SIZE), asmjit::imm(1));
997         GetMasm()->or_(ArchReg(int1Reg), ArchReg(int2Reg));
998         GetMasm()->cvtsi2ss(ArchVReg(dst), ArchReg(int1Reg));
999         GetMasm()->addss(ArchVReg(dst), ArchVReg(dst));
1000 
1001         GetMasm()->bind(end);
1002     } else {
1003         static constexpr std::array<uint32_t, 4> ARR1 = {uint32_t(0x43300000), uint32_t(0x45300000), 0x0, 0x0};
1004         static constexpr std::array<uint64_t, 2> ARR2 = {uint64_t(0x4330000000000000), uint64_t(0x4530000000000000)};
1005 
1006         ScopedTmpReg float1Reg(this, dst.GetType());
1007         ScopedTmpRegF64 tmp(this);
1008 
1009         GetMasm()->movq(ArchVReg(float1Reg), ArchReg(src));
1010         CopyArrayToXmm(tmp, ARR1);
1011         GetMasm()->punpckldq(ArchVReg(float1Reg), ArchVReg(tmp));
1012         CopyArrayToXmm(tmp, ARR2);
1013         GetMasm()->subpd(ArchVReg(float1Reg), ArchVReg(tmp));
1014         GetMasm()->movapd(ArchVReg(dst), ArchVReg(float1Reg));
1015         GetMasm()->unpckhpd(ArchVReg(dst), ArchVReg(float1Reg));
1016         GetMasm()->addsd(ArchVReg(dst), ArchVReg(float1Reg));
1017     }
1018 }
1019 
EncodeCastScalarToFloat(Reg dst,Reg src,bool srcSigned)1020 void Amd64Encoder::EncodeCastScalarToFloat(Reg dst, Reg src, bool srcSigned)
1021 {
1022     if (!srcSigned && src.GetSize() == DOUBLE_WORD_SIZE) {
1023         EncodeCastScalarToFloatUnsignDouble(dst, src);
1024         return;
1025     }
1026 
1027     if (src.GetSize() < WORD_SIZE || (srcSigned && src.GetSize() == WORD_SIZE)) {
1028         if (dst.GetType() == FLOAT32_TYPE) {
1029             GetMasm()->cvtsi2ss(ArchVReg(dst), ArchReg(src, WORD_SIZE));
1030         } else {
1031             GetMasm()->cvtsi2sd(ArchVReg(dst), ArchReg(src, WORD_SIZE));
1032         }
1033         return;
1034     }
1035 
1036     if (!srcSigned && src.GetSize() == WORD_SIZE) {
1037         ScopedTmpRegU64 int1Reg(this);
1038 
1039         GetMasm()->mov(ArchReg(int1Reg, WORD_SIZE), ArchReg(src, WORD_SIZE));
1040         if (dst.GetType() == FLOAT32_TYPE) {
1041             GetMasm()->cvtsi2ss(ArchVReg(dst), ArchReg(int1Reg));
1042         } else {
1043             GetMasm()->cvtsi2sd(ArchVReg(dst), ArchReg(int1Reg));
1044         }
1045         return;
1046     }
1047 
1048     ASSERT(srcSigned && src.GetSize() == DOUBLE_WORD_SIZE);
1049     if (dst.GetType() == FLOAT32_TYPE) {
1050         GetMasm()->cvtsi2ss(ArchVReg(dst), ArchReg(src));
1051     } else {
1052         GetMasm()->cvtsi2sd(ArchVReg(dst), ArchReg(src));
1053     }
1054 }
1055 
EncodeCastToBool(Reg dst,Reg src)1056 void Amd64Encoder::EncodeCastToBool(Reg dst, Reg src)
1057 {
1058     // In ISA says that we only support casts:
1059     // i32tou1, i64tou1, u32tou1, u64tou1
1060     ASSERT(src.IsScalar());
1061     ASSERT(dst.IsScalar());
1062 
1063     // In our ISA minimal type is 32-bit, so bool in 32bit
1064     GetMasm()->test(ArchReg(src), ArchReg(src));
1065     // One "mov" will be better, then 2 jump. Else other instructions will overwrite the flags.
1066     GetMasm()->mov(ArchReg(dst, WORD_SIZE), asmjit::imm(0));
1067     GetMasm()->setne(ArchReg(dst));
1068 }
1069 
EncodeFastPathDynamicCast(Reg dst,Reg src,LabelHolder::LabelId slow)1070 void Amd64Encoder::EncodeFastPathDynamicCast(Reg dst, Reg src, LabelHolder::LabelId slow)
1071 {
1072     ASSERT(IsLabelValid(slow));
1073     ASSERT(src.IsFloat() && dst.IsScalar());
1074 
1075     CHECK_EQ(src.GetSize(), BITS_PER_UINT64);
1076     CHECK_EQ(dst.GetSize(), BITS_PER_UINT32);
1077 
1078     auto end {GetMasm()->newLabel()};
1079 
1080     // if src is NaN, then dst = 0
1081     EncodeCastFloatCheckNan(dst, src, end);
1082 
1083     // infinite and big numbers will overflow here to INT64_MIN
1084     GetMasm()->cvttsd2si(ArchReg(dst, DOUBLE_WORD_SIZE), ArchVReg(src));
1085     // check INT64_MIN
1086     GetMasm()->cmp(ArchReg(dst, DOUBLE_WORD_SIZE), asmjit::imm(1));
1087     auto slowLabel {static_cast<Amd64LabelHolder *>(GetLabels())->GetLabel(slow)};
1088     // jump to slow path in case of overflow
1089     GetMasm()->jo(*slowLabel);
1090 
1091     GetMasm()->bind(end);
1092 }
1093 
EncodeJsDoubleToCharCast(Reg dst,Reg src,Reg tmp,uint32_t failureResult)1094 void Amd64Encoder::EncodeJsDoubleToCharCast(Reg dst, Reg src, Reg tmp, uint32_t failureResult)
1095 {
1096     ASSERT(src.IsFloat() && dst.IsScalar());
1097 
1098     CHECK_EQ(src.GetSize(), BITS_PER_UINT64);
1099     CHECK_EQ(dst.GetSize(), BITS_PER_UINT32);
1100 
1101     // infinite and big numbers will overflow here to INT64_MIN. If src is NaN, cvttsd2si itself returns zero.
1102     GetMasm()->cvttsd2si(ArchReg(dst, DOUBLE_WORD_SIZE), ArchVReg(src));
1103     // save the result to tmp
1104     GetMasm()->mov(ArchReg(tmp, DOUBLE_WORD_SIZE), ArchReg(dst, DOUBLE_WORD_SIZE));
1105     // 'and' the result with 0xffff
1106     constexpr uint32_t UTF16_CHAR_MASK = 0xffff;
1107     GetMasm()->and_(ArchReg(dst), asmjit::imm(UTF16_CHAR_MASK));
1108     // check INT64_MIN
1109     GetMasm()->cmp(ArchReg(tmp, DOUBLE_WORD_SIZE), asmjit::imm(1));
1110     // 'mov' never affects the flags
1111     GetMasm()->mov(ArchReg(tmp, DOUBLE_WORD_SIZE), failureResult);
1112     // ... and we may move conditionally the failureResult into dst for overflow only
1113     GetMasm()->cmovo(ArchReg(dst), ArchReg(tmp));
1114 }
1115 
EncodeCast(Reg dst,bool dstSigned,Reg src,bool srcSigned)1116 void Amd64Encoder::EncodeCast(Reg dst, bool dstSigned, Reg src, bool srcSigned)
1117 {
1118     if (src.IsFloat() && dst.IsScalar()) {
1119         EncodeCastFloatToScalar(dst, dstSigned, src);
1120         return;
1121     }
1122 
1123     if (src.IsScalar() && dst.IsFloat()) {
1124         EncodeCastScalarToFloat(dst, src, srcSigned);
1125         return;
1126     }
1127 
1128     if (src.IsFloat() && dst.IsFloat()) {
1129         if (src.GetSize() != dst.GetSize()) {
1130             if (src.GetType() == FLOAT32_TYPE) {
1131                 GetMasm()->cvtss2sd(ArchVReg(dst), ArchVReg(src));
1132             } else {
1133                 GetMasm()->cvtsd2ss(ArchVReg(dst), ArchVReg(src));
1134             }
1135             return;
1136         }
1137 
1138         if (src.GetType() == FLOAT32_TYPE) {
1139             GetMasm()->movss(ArchVReg(dst), ArchVReg(src));
1140         } else {
1141             GetMasm()->movsd(ArchVReg(dst), ArchVReg(src));
1142         }
1143         return;
1144     }
1145 
1146     ASSERT(src.IsScalar() && dst.IsScalar());
1147     EncodeCastScalar(dst, dstSigned, src, srcSigned);
1148 }
1149 
EncodeCastScalar(Reg dst,bool dstSigned,Reg src,bool srcSigned)1150 void Amd64Encoder::EncodeCastScalar(Reg dst, bool dstSigned, Reg src, bool srcSigned)
1151 {
1152     auto extendTo32bit = [this](Reg reg, bool isSigned) {
1153         if (reg.GetSize() < WORD_SIZE) {
1154             if (isSigned) {
1155                 GetMasm()->movsx(ArchReg(reg, WORD_SIZE), ArchReg(reg));
1156             } else {
1157                 GetMasm()->movzx(ArchReg(reg, WORD_SIZE), ArchReg(reg));
1158             }
1159         }
1160     };
1161 
1162     if (src.GetSize() >= dst.GetSize()) {
1163         if (dst.GetId() != src.GetId()) {
1164             GetMasm()->mov(ArchReg(dst), ArchReg(src, dst.GetSize()));
1165         }
1166         extendTo32bit(dst, dstSigned);
1167         return;
1168     }
1169 
1170     if (srcSigned) {
1171         if (dst.GetSize() < DOUBLE_WORD_SIZE) {
1172             GetMasm()->movsx(ArchReg(dst), ArchReg(src));
1173             extendTo32bit(dst, dstSigned);
1174         } else if (src.GetSize() == WORD_SIZE) {
1175             GetMasm()->movsxd(ArchReg(dst), ArchReg(src));
1176         } else {
1177             GetMasm()->movsx(ArchReg(dst, WORD_SIZE), ArchReg(src));
1178             GetMasm()->movsxd(ArchReg(dst), ArchReg(dst, WORD_SIZE));
1179         }
1180         return;
1181     }
1182 
1183     if (src.GetSize() == WORD_SIZE) {
1184         GetMasm()->mov(ArchReg(dst, WORD_SIZE), ArchReg(src));
1185     } else if (dst.GetSize() == DOUBLE_WORD_SIZE) {
1186         GetMasm()->movzx(ArchReg(dst, WORD_SIZE), ArchReg(src));
1187     } else {
1188         GetMasm()->movzx(ArchReg(dst), ArchReg(src));
1189         extendTo32bit(dst, dstSigned);
1190     }
1191 }
1192 
MakeShift(Shift shift)1193 Reg Amd64Encoder::MakeShift(Shift shift)
1194 {
1195     Reg reg = shift.GetBase();
1196     ASSERT(reg.IsValid());
1197     if (reg.IsScalar()) {
1198         ASSERT(shift.GetType() != ShiftType::INVALID_SHIFT);
1199         switch (shift.GetType()) {
1200             case ShiftType::LSL:
1201                 GetMasm()->shl(ArchReg(reg), asmjit::imm(shift.GetScale()));
1202                 break;
1203             case ShiftType::LSR:
1204                 GetMasm()->shr(ArchReg(reg), asmjit::imm(shift.GetScale()));
1205                 break;
1206             case ShiftType::ASR:
1207                 GetMasm()->sar(ArchReg(reg), asmjit::imm(shift.GetScale()));
1208                 break;
1209             case ShiftType::ROR:
1210                 GetMasm()->ror(ArchReg(reg), asmjit::imm(shift.GetScale()));
1211                 break;
1212             default:
1213                 UNREACHABLE();
1214         }
1215 
1216         return reg;
1217     }
1218 
1219     // Invalid register type
1220     UNREACHABLE();
1221 }
1222 
EncodeAdd(Reg dst,Reg src0,Shift src1)1223 void Amd64Encoder::EncodeAdd(Reg dst, Reg src0, Shift src1)
1224 {
1225     if (dst.IsFloat()) {
1226         SetFalseResult();
1227         return;
1228     }
1229 
1230     ASSERT(dst.GetSize() >= src0.GetSize());
1231 
1232     auto shiftReg = MakeShift(src1);
1233 
1234     if (src0.GetSize() < WORD_SIZE) {
1235         EncodeAdd(dst, src0, shiftReg);
1236         return;
1237     }
1238 
1239     if (src0.GetSize() == DOUBLE_WORD_SIZE && shiftReg.GetSize() < DOUBLE_WORD_SIZE) {
1240         GetMasm()->movsxd(ArchReg(shiftReg, DOUBLE_WORD_SIZE), ArchReg(shiftReg));
1241     }
1242 
1243     GetMasm()->lea(ArchReg(dst), asmjit::x86::ptr(ArchReg(src0), ArchReg(shiftReg, src0.GetSize())));
1244 }
1245 
EncodeAdd(Reg dst,Reg src0,Reg src1)1246 void Amd64Encoder::EncodeAdd(Reg dst, Reg src0, Reg src1)
1247 {
1248     if (dst.IsScalar()) {
1249         auto size = std::max<uint8_t>(WORD_SIZE, dst.GetSize());
1250         GetMasm()->lea(ArchReg(dst, size), asmjit::x86::ptr(ArchReg(src0, size), ArchReg(src1, size)));
1251         return;
1252     }
1253 
1254     if (dst.GetType() == FLOAT32_TYPE) {
1255         if (dst.GetId() == src0.GetId()) {
1256             GetMasm()->addss(ArchVReg(dst), ArchVReg(src1));
1257         } else if (dst.GetId() == src1.GetId()) {
1258             GetMasm()->addss(ArchVReg(dst), ArchVReg(src0));
1259         } else {
1260             GetMasm()->movss(ArchVReg(dst), ArchVReg(src0));
1261             GetMasm()->addss(ArchVReg(dst), ArchVReg(src1));
1262         }
1263     } else {
1264         if (dst.GetId() == src0.GetId()) {
1265             GetMasm()->addsd(ArchVReg(dst), ArchVReg(src1));
1266         } else if (dst.GetId() == src1.GetId()) {
1267             GetMasm()->addsd(ArchVReg(dst), ArchVReg(src0));
1268         } else {
1269             GetMasm()->movsd(ArchVReg(dst), ArchVReg(src0));
1270             GetMasm()->addsd(ArchVReg(dst), ArchVReg(src1));
1271         }
1272     }
1273 }
1274 
EncodeSub(Reg dst,Reg src0,Reg src1)1275 void Amd64Encoder::EncodeSub(Reg dst, Reg src0, Reg src1)
1276 {
1277     if (dst.IsScalar()) {
1278         if (dst.GetId() == src0.GetId()) {
1279             GetMasm()->sub(ArchReg(dst), ArchReg(src1));
1280         } else if (dst.GetId() == src1.GetId()) {
1281             GetMasm()->sub(ArchReg(dst), ArchReg(src0));
1282             GetMasm()->neg(ArchReg(dst));
1283         } else {
1284             EncodeMov(dst, src0);
1285             GetMasm()->sub(ArchReg(dst), ArchReg(src1));
1286         }
1287         return;
1288     }
1289 
1290     if (dst.GetType() == FLOAT32_TYPE) {
1291         if (dst.GetId() == src0.GetId()) {
1292             GetMasm()->subss(ArchVReg(dst), ArchVReg(src1));
1293         } else if (dst.GetId() != src1.GetId()) {
1294             GetMasm()->movss(ArchVReg(dst), ArchVReg(src0));
1295             GetMasm()->subss(ArchVReg(dst), ArchVReg(src1));
1296         } else {
1297             ScopedTmpReg tmpReg(this, dst.GetType());
1298             GetMasm()->movss(ArchVReg(tmpReg), ArchVReg(src0));
1299             GetMasm()->subss(ArchVReg(tmpReg), ArchVReg(src1));
1300             GetMasm()->movss(ArchVReg(dst), ArchVReg(tmpReg));
1301         }
1302     } else {
1303         if (dst.GetId() == src0.GetId()) {
1304             GetMasm()->subsd(ArchVReg(dst), ArchVReg(src1));
1305         } else if (dst.GetId() != src1.GetId()) {
1306             GetMasm()->movsd(ArchVReg(dst), ArchVReg(src0));
1307             GetMasm()->subsd(ArchVReg(dst), ArchVReg(src1));
1308         } else {
1309             ScopedTmpReg tmpReg(this, dst.GetType());
1310             GetMasm()->movsd(ArchVReg(tmpReg), ArchVReg(src0));
1311             GetMasm()->subsd(ArchVReg(tmpReg), ArchVReg(src1));
1312             GetMasm()->movsd(ArchVReg(dst), ArchVReg(tmpReg));
1313         }
1314     }
1315 }
1316 
EncodeMul(Reg dst,Reg src0,Reg src1)1317 void Amd64Encoder::EncodeMul(Reg dst, Reg src0, Reg src1)
1318 {
1319     if (dst.IsScalar()) {
1320         auto size = std::max<uint8_t>(WORD_SIZE, dst.GetSize());
1321 
1322         if (dst.GetId() == src0.GetId()) {
1323             GetMasm()->imul(ArchReg(dst, size), ArchReg(src1, size));
1324         } else if (dst.GetId() == src1.GetId()) {
1325             GetMasm()->imul(ArchReg(dst, size), ArchReg(src0, size));
1326         } else {
1327             GetMasm()->mov(ArchReg(dst, size), ArchReg(src0, size));
1328             GetMasm()->imul(ArchReg(dst, size), ArchReg(src1, size));
1329         }
1330         return;
1331     }
1332 
1333     if (dst.GetType() == FLOAT32_TYPE) {
1334         if (dst.GetId() == src0.GetId()) {
1335             GetMasm()->mulss(ArchVReg(dst), ArchVReg(src1));
1336         } else if (dst.GetId() == src1.GetId()) {
1337             GetMasm()->mulss(ArchVReg(dst), ArchVReg(src0));
1338         } else {
1339             GetMasm()->movss(ArchVReg(dst), ArchVReg(src0));
1340             GetMasm()->mulss(ArchVReg(dst), ArchVReg(src1));
1341         }
1342     } else {
1343         if (dst.GetId() == src0.GetId()) {
1344             GetMasm()->mulsd(ArchVReg(dst), ArchVReg(src1));
1345         } else if (dst.GetId() == src1.GetId()) {
1346             GetMasm()->mulsd(ArchVReg(dst), ArchVReg(src0));
1347         } else {
1348             GetMasm()->movsd(ArchVReg(dst), ArchVReg(src0));
1349             GetMasm()->mulsd(ArchVReg(dst), ArchVReg(src1));
1350         }
1351     }
1352 }
1353 
EncodeAddOverflow(compiler::LabelHolder::LabelId id,Reg dst,Reg src0,Reg src1,Condition cc)1354 void Amd64Encoder::EncodeAddOverflow(compiler::LabelHolder::LabelId id, Reg dst, Reg src0, Reg src1, Condition cc)
1355 {
1356     ASSERT(!dst.IsFloat() && !src0.IsFloat() && !src1.IsFloat());
1357     ASSERT(cc == Condition::VS || cc == Condition::VC);
1358     auto size = dst.GetSize();
1359     if (dst.GetId() == src0.GetId()) {
1360         GetMasm()->add(ArchReg(dst, size), ArchReg(src1, size));
1361     } else if (dst.GetId() == src1.GetId()) {
1362         GetMasm()->add(ArchReg(dst, size), ArchReg(src0, size));
1363     } else {
1364         GetMasm()->mov(ArchReg(dst, size), ArchReg(src0, size));
1365         GetMasm()->add(ArchReg(dst, size), ArchReg(src1, size));
1366     }
1367     auto label = static_cast<Amd64LabelHolder *>(GetLabels())->GetLabel(id);
1368     GetMasm()->j(ArchCc(cc, false), *label);
1369 }
1370 
EncodeSubOverflow(compiler::LabelHolder::LabelId id,Reg dst,Reg src0,Reg src1,Condition cc)1371 void Amd64Encoder::EncodeSubOverflow(compiler::LabelHolder::LabelId id, Reg dst, Reg src0, Reg src1, Condition cc)
1372 {
1373     ASSERT(!dst.IsFloat() && !src0.IsFloat() && !src1.IsFloat());
1374     ASSERT(cc == Condition::VS || cc == Condition::VC);
1375     auto size = dst.GetSize();
1376     if (dst.GetId() == src0.GetId()) {
1377         GetMasm()->sub(ArchReg(dst, size), ArchReg(src1, size));
1378     } else if (dst.GetId() == src1.GetId()) {
1379         ScopedTmpReg tmpReg(this, dst.GetType());
1380         GetMasm()->mov(ArchReg(tmpReg, size), ArchReg(src1, size));
1381         GetMasm()->mov(ArchReg(dst, size), ArchReg(src0, size));
1382         GetMasm()->sub(ArchReg(dst, size), ArchReg(tmpReg, size));
1383     } else {
1384         GetMasm()->mov(ArchReg(dst, size), ArchReg(src0, size));
1385         GetMasm()->sub(ArchReg(dst, size), ArchReg(src1, size));
1386     }
1387     auto label = static_cast<Amd64LabelHolder *>(GetLabels())->GetLabel(id);
1388     GetMasm()->j(ArchCc(cc, false), *label);
1389 }
1390 
EncodeNegOverflowAndZero(compiler::LabelHolder::LabelId id,Reg dst,Reg src)1391 void Amd64Encoder::EncodeNegOverflowAndZero(compiler::LabelHolder::LabelId id, Reg dst, Reg src)
1392 {
1393     ASSERT(!dst.IsFloat() && !src.IsFloat());
1394     auto size = dst.GetSize();
1395     // NOLINTNEXTLINE(readability-magic-numbers)
1396     EncodeJumpTest(id, src, Imm(0x7fffffff), Condition::TST_EQ);
1397     EncodeMov(dst, src);
1398     GetMasm()->neg(ArchReg(dst, size));
1399 }
1400 
EncodeDivFloat(Reg dst,Reg src0,Reg src1)1401 void Amd64Encoder::EncodeDivFloat(Reg dst, Reg src0, Reg src1)
1402 {
1403     ASSERT(dst.IsFloat());
1404     if (dst.GetType() == FLOAT32_TYPE) {
1405         if (dst.GetId() == src0.GetId()) {
1406             GetMasm()->divss(ArchVReg(dst), ArchVReg(src1));
1407         } else if (dst.GetId() != src1.GetId()) {
1408             GetMasm()->movss(ArchVReg(dst), ArchVReg(src0));
1409             GetMasm()->divss(ArchVReg(dst), ArchVReg(src1));
1410         } else {
1411             ScopedTmpRegF32 tmp(this);
1412             GetMasm()->movss(ArchVReg(tmp), ArchVReg(src0));
1413             GetMasm()->divss(ArchVReg(tmp), ArchVReg(src1));
1414             GetMasm()->movss(ArchVReg(dst), ArchVReg(tmp));
1415         }
1416     } else {
1417         if (dst.GetId() == src0.GetId()) {
1418             GetMasm()->divsd(ArchVReg(dst), ArchVReg(src1));
1419         } else if (dst.GetId() != src1.GetId()) {
1420             GetMasm()->movsd(ArchVReg(dst), ArchVReg(src0));
1421             GetMasm()->divsd(ArchVReg(dst), ArchVReg(src1));
1422         } else {
1423             ScopedTmpRegF64 tmp(this);
1424             GetMasm()->movsd(ArchVReg(tmp), ArchVReg(src0));
1425             GetMasm()->divsd(ArchVReg(tmp), ArchVReg(src1));
1426             GetMasm()->movsd(ArchVReg(dst), ArchVReg(tmp));
1427         }
1428     }
1429 }
1430 
EncodeDivSpillDst(asmjit::x86::Assembler * masm,Reg dst)1431 static void EncodeDivSpillDst(asmjit::x86::Assembler *masm, Reg dst)
1432 {
1433     if (dst.GetId() != ConvertRegNumber(asmjit::x86::rdx.id())) {
1434         masm->push(asmjit::x86::rdx);
1435     }
1436     if (dst.GetId() != ConvertRegNumber(asmjit::x86::rax.id())) {
1437         masm->push(asmjit::x86::rax);
1438     }
1439 }
1440 
EncodeDivFillDst(asmjit::x86::Assembler * masm,Reg dst)1441 static void EncodeDivFillDst(asmjit::x86::Assembler *masm, Reg dst)
1442 {
1443     if (dst.GetId() != ConvertRegNumber(asmjit::x86::rax.id())) {
1444         masm->mov(ArchReg(dst, DOUBLE_WORD_SIZE), asmjit::x86::rax);
1445         masm->pop(asmjit::x86::rax);
1446     }
1447 
1448     if (dst.GetId() != ConvertRegNumber(asmjit::x86::rdx.id())) {
1449         masm->pop(asmjit::x86::rdx);
1450     }
1451 }
1452 
EncodeDiv(Reg dst,bool dstSigned,Reg src0,Reg src1)1453 void Amd64Encoder::EncodeDiv(Reg dst, bool dstSigned, Reg src0, Reg src1)
1454 {
1455     if (dst.IsFloat()) {
1456         EncodeDivFloat(dst, src0, src1);
1457         return;
1458     }
1459 
1460     auto negPath = GetMasm()->newLabel();
1461     auto crossroad = GetMasm()->newLabel();
1462 
1463     if (dstSigned) {
1464         GetMasm()->cmp(ArchReg(src1), asmjit::imm(-1));
1465         GetMasm()->je(negPath);
1466     }
1467 
1468     EncodeDivSpillDst(GetMasm(), dst);
1469 
1470     ScopedTmpReg tmpReg(this, dst.GetType());
1471     Reg op1 {src1};
1472     if (src1.GetId() == ConvertRegNumber(asmjit::x86::rax.id()) ||
1473         src1.GetId() == ConvertRegNumber(asmjit::x86::rdx.id())) {
1474         GetMasm()->mov(ArchReg(tmpReg), ArchReg(src1));
1475         op1 = Reg(tmpReg);
1476     }
1477 
1478     if (src0.GetId() != ConvertRegNumber(asmjit::x86::rax.id())) {
1479         GetMasm()->mov(asmjit::x86::rax, ArchReg(src0, DOUBLE_WORD_SIZE));
1480     }
1481     if (dstSigned) {
1482         if (dst.GetSize() <= WORD_SIZE) {
1483             GetMasm()->cdq();
1484         } else {
1485             GetMasm()->cqo();
1486         }
1487         GetMasm()->idiv(ArchReg(op1));
1488     } else {
1489         GetMasm()->xor_(asmjit::x86::rdx, asmjit::x86::rdx);
1490         GetMasm()->div(ArchReg(op1));
1491     }
1492 
1493     EncodeDivFillDst(GetMasm(), dst);
1494 
1495     GetMasm()->jmp(crossroad);
1496 
1497     GetMasm()->bind(negPath);
1498     if (dst.GetId() != src0.GetId()) {
1499         GetMasm()->mov(ArchReg(dst), ArchReg(src0));
1500     }
1501     GetMasm()->neg(ArchReg(dst));
1502 
1503     GetMasm()->bind(crossroad);
1504 }
1505 
EncodeSignedDiv(Reg dst,Reg src0,Imm imm)1506 void Amd64Encoder::EncodeSignedDiv(Reg dst, Reg src0, Imm imm)
1507 {
1508     int64_t divisor = imm.GetAsInt();
1509 
1510     Reg ax(ConvertRegNumber(asmjit::x86::rax.id()), dst.GetType());
1511     Reg dx(ConvertRegNumber(asmjit::x86::rdx.id()), dst.GetType());
1512 
1513     if (dst != ax) {
1514         GetMasm()->push(ArchReg(ax, DOUBLE_WORD_SIZE));
1515     }
1516     if (dst != dx) {
1517         GetMasm()->push(ArchReg(dx, DOUBLE_WORD_SIZE));
1518     }
1519 
1520     FastConstSignedDivisor fastDivisor(divisor, dst.GetSize());
1521     int64_t magic = fastDivisor.GetMagic();
1522 
1523     ScopedTmpReg tmp(this, dst.GetType());
1524     EncodeMov(tmp, src0);
1525     EncodeMov(ax, src0);
1526     EncodeMov(dx, Imm(magic));
1527     GetMasm()->imul(ArchReg(dx));
1528 
1529     if (divisor > 0 && magic < 0) {
1530         EncodeAdd(dx, dx, tmp);
1531     } else if (divisor < 0 && magic > 0) {
1532         EncodeSub(dx, dx, tmp);
1533     }
1534 
1535     int64_t shift = fastDivisor.GetShift();
1536     EncodeAShr(dst, dx, Imm(shift));
1537 
1538     // result = (result < 0 ? result + 1 : result)
1539     EncodeShr(tmp, dst, Imm(dst.GetSize() - 1U));
1540     EncodeAdd(dst, dst, tmp);
1541 
1542     if (dst != dx) {
1543         GetMasm()->pop(ArchReg(dx, DOUBLE_WORD_SIZE));
1544     }
1545     if (dst != ax) {
1546         GetMasm()->pop(ArchReg(ax, DOUBLE_WORD_SIZE));
1547     }
1548 }
1549 
EncodeUnsignedDiv(Reg dst,Reg src0,Imm imm)1550 void Amd64Encoder::EncodeUnsignedDiv(Reg dst, Reg src0, Imm imm)
1551 {
1552     auto divisor = bit_cast<uint64_t>(imm.GetAsInt());
1553 
1554     Reg ax(ConvertRegNumber(asmjit::x86::rax.id()), dst.GetType());
1555     Reg dx(ConvertRegNumber(asmjit::x86::rdx.id()), dst.GetType());
1556 
1557     if (dst != ax) {
1558         GetMasm()->push(ArchReg(ax, DOUBLE_WORD_SIZE));
1559     }
1560     if (dst != dx) {
1561         GetMasm()->push(ArchReg(dx, DOUBLE_WORD_SIZE));
1562     }
1563 
1564     FastConstUnsignedDivisor fastDivisor(divisor, dst.GetSize());
1565     uint64_t magic = fastDivisor.GetMagic();
1566 
1567     ScopedTmpReg tmp(this, dst.GetType());
1568     if (fastDivisor.GetAdd()) {
1569         EncodeMov(tmp, src0);
1570     }
1571     EncodeMov(ax, src0);
1572     EncodeMov(dx, Imm(magic));
1573     GetMasm()->mul(ArchReg(dx));
1574 
1575     uint64_t shift = fastDivisor.GetShift();
1576     if (!fastDivisor.GetAdd()) {
1577         EncodeShr(dst, dx, Imm(shift));
1578     } else {
1579         ASSERT(shift >= 1U);
1580         EncodeSub(tmp, tmp, dx);
1581         EncodeShr(tmp, tmp, Imm(1U));
1582         EncodeAdd(tmp, tmp, dx);
1583         EncodeShr(dst, tmp, Imm(shift - 1U));
1584     }
1585 
1586     if (dst != dx) {
1587         GetMasm()->pop(ArchReg(dx, DOUBLE_WORD_SIZE));
1588     }
1589     if (dst != ax) {
1590         GetMasm()->pop(ArchReg(ax, DOUBLE_WORD_SIZE));
1591     }
1592 }
1593 
EncodeDiv(Reg dst,Reg src0,Imm imm,bool isSigned)1594 void Amd64Encoder::EncodeDiv(Reg dst, Reg src0, Imm imm, bool isSigned)
1595 {
1596     ASSERT(dst.IsScalar() && dst.GetSize() >= WORD_SIZE);
1597     ASSERT(CanOptimizeImmDivMod(bit_cast<uint64_t>(imm.GetAsInt()), isSigned));
1598     if (isSigned) {
1599         EncodeSignedDiv(dst, src0, imm);
1600     } else {
1601         EncodeUnsignedDiv(dst, src0, imm);
1602     }
1603 }
1604 
EncodeMod(Reg dst,Reg src0,Imm imm,bool isSigned)1605 void Amd64Encoder::EncodeMod(Reg dst, Reg src0, Imm imm, bool isSigned)
1606 {
1607     ASSERT(dst.IsScalar() && dst.GetSize() >= WORD_SIZE);
1608     ASSERT(CanOptimizeImmDivMod(bit_cast<uint64_t>(imm.GetAsInt()), isSigned));
1609 
1610     // dst = src0 - imm * (src0 / imm)
1611     ScopedTmpReg tmp(this, dst.GetType());
1612     EncodeDiv(tmp, src0, imm, isSigned);
1613     if (dst.GetSize() == WORD_SIZE) {
1614         GetMasm()->imul(ArchReg(tmp), ArchReg(tmp), asmjit::imm(imm.GetAsInt()));
1615     } else {
1616         ScopedTmpRegU64 immReg(this);
1617         EncodeMov(immReg, imm);
1618         EncodeMul(tmp, tmp, immReg);
1619     }
1620     EncodeSub(dst, src0, tmp);
1621 }
1622 
EncodeModFloat(Reg dst,Reg src0,Reg src1)1623 void Amd64Encoder::EncodeModFloat(Reg dst, Reg src0, Reg src1)
1624 {
1625     ASSERT(dst.IsFloat());
1626     if (dst.GetType() == FLOAT32_TYPE) {
1627         using Fp = float (*)(float, float);
1628         MakeLibCall(dst, src0, src1, reinterpret_cast<void *>(static_cast<Fp>(fmodf)));
1629     } else {
1630         using Fp = double (*)(double, double);
1631         MakeLibCall(dst, src0, src1, reinterpret_cast<void *>(static_cast<Fp>(fmod)));
1632     }
1633 }
1634 
EncodeMod(Reg dst,bool dstSigned,Reg src0,Reg src1)1635 void Amd64Encoder::EncodeMod(Reg dst, bool dstSigned, Reg src0, Reg src1)
1636 {
1637     if (dst.IsFloat()) {
1638         EncodeModFloat(dst, src0, src1);
1639         return;
1640     }
1641 
1642     auto zeroPath = GetMasm()->newLabel();
1643     auto crossroad = GetMasm()->newLabel();
1644 
1645     if (dstSigned) {
1646         GetMasm()->cmp(ArchReg(src1), asmjit::imm(-1));
1647         GetMasm()->je(zeroPath);
1648     }
1649 
1650     if (dst.GetId() != ConvertRegNumber(asmjit::x86::rax.id())) {
1651         GetMasm()->push(asmjit::x86::rax);
1652     }
1653     if (dst.GetId() != ConvertRegNumber(asmjit::x86::rdx.id())) {
1654         GetMasm()->push(asmjit::x86::rdx);
1655     }
1656 
1657     ScopedTmpReg tmpReg(this, dst.GetType());
1658     Reg op1 {src1};
1659     if (src1.GetId() == ConvertRegNumber(asmjit::x86::rax.id()) ||
1660         src1.GetId() == ConvertRegNumber(asmjit::x86::rdx.id())) {
1661         GetMasm()->mov(ArchReg(tmpReg), ArchReg(src1));
1662         op1 = Reg(tmpReg);
1663     }
1664 
1665     if (src0.GetId() != ConvertRegNumber(asmjit::x86::rax.id())) {
1666         GetMasm()->mov(asmjit::x86::rax, ArchReg(src0, DOUBLE_WORD_SIZE));
1667     }
1668 
1669     if (dstSigned) {
1670         if (dst.GetSize() <= WORD_SIZE) {
1671             GetMasm()->cdq();
1672         } else {
1673             GetMasm()->cqo();
1674         }
1675         GetMasm()->idiv(ArchReg(op1));
1676     } else {
1677         GetMasm()->xor_(asmjit::x86::rdx, asmjit::x86::rdx);
1678         GetMasm()->div(ArchReg(op1));
1679     }
1680 
1681     if (dst.GetId() != ConvertRegNumber(asmjit::x86::rdx.id())) {
1682         GetMasm()->mov(ArchReg(dst, DOUBLE_WORD_SIZE), asmjit::x86::rdx);
1683         GetMasm()->pop(asmjit::x86::rdx);
1684     }
1685 
1686     if (dst.GetId() != ConvertRegNumber(asmjit::x86::rax.id())) {
1687         GetMasm()->pop(asmjit::x86::rax);
1688     }
1689     GetMasm()->jmp(crossroad);
1690 
1691     GetMasm()->bind(zeroPath);
1692     GetMasm()->xor_(ArchReg(dst, WORD_SIZE), ArchReg(dst, WORD_SIZE));
1693 
1694     GetMasm()->bind(crossroad);
1695 }
1696 
EncodeMin(Reg dst,bool dstSigned,Reg src0,Reg src1)1697 void Amd64Encoder::EncodeMin(Reg dst, bool dstSigned, Reg src0, Reg src1)
1698 {
1699     if (dst.IsScalar()) {
1700         ScopedTmpReg tmpReg(this, dst.GetType());
1701         GetMasm()->mov(ArchReg(tmpReg), ArchReg(src1));
1702         GetMasm()->cmp(ArchReg(src0), ArchReg(src1));
1703 
1704         auto size = std::max<uint8_t>(src0.GetSize(), WORD_SIZE);
1705         if (dstSigned) {
1706             GetMasm()->cmovle(ArchReg(tmpReg, size), ArchReg(src0, size));
1707         } else {
1708             GetMasm()->cmovb(ArchReg(tmpReg, size), ArchReg(src0, size));
1709         }
1710         EncodeMov(dst, tmpReg);
1711         return;
1712     }
1713 
1714     EncodeMinMaxFp<false>(dst, src0, src1);
1715 }
1716 
EncodeMax(Reg dst,bool dstSigned,Reg src0,Reg src1)1717 void Amd64Encoder::EncodeMax(Reg dst, bool dstSigned, Reg src0, Reg src1)
1718 {
1719     if (dst.IsScalar()) {
1720         ScopedTmpReg tmpReg(this, dst.GetType());
1721         GetMasm()->mov(ArchReg(tmpReg), ArchReg(src1));
1722         GetMasm()->cmp(ArchReg(src0), ArchReg(src1));
1723 
1724         auto size = std::max<uint8_t>(src0.GetSize(), WORD_SIZE);
1725         if (dstSigned) {
1726             GetMasm()->cmovge(ArchReg(tmpReg, size), ArchReg(src0, size));
1727         } else {
1728             GetMasm()->cmova(ArchReg(tmpReg, size), ArchReg(src0, size));
1729         }
1730         EncodeMov(dst, tmpReg);
1731         return;
1732     }
1733 
1734     EncodeMinMaxFp<true>(dst, src0, src1);
1735 }
1736 
1737 template <bool IS_MAX>
EncodeMinMaxFp(Reg dst,Reg src0,Reg src1)1738 void Amd64Encoder::EncodeMinMaxFp(Reg dst, Reg src0, Reg src1)
1739 {
1740     auto end = GetMasm()->newLabel();
1741     auto notEqual = GetMasm()->newLabel();
1742     auto gotNan = GetMasm()->newLabel();
1743     auto &srcA = dst.GetId() != src1.GetId() ? src0 : src1;
1744     auto &srcB = srcA.GetId() == src0.GetId() ? src1 : src0;
1745     if (dst.GetType() == FLOAT32_TYPE) {
1746         GetMasm()->movaps(ArchVReg(dst), ArchVReg(srcA));
1747         GetMasm()->ucomiss(ArchVReg(srcB), ArchVReg(srcA));
1748         GetMasm()->jne(notEqual);
1749         GetMasm()->jp(gotNan);
1750         // calculate result for positive/negative zero operands
1751         if (IS_MAX) {
1752             GetMasm()->andps(ArchVReg(dst), ArchVReg(srcB));
1753         } else {
1754             GetMasm()->orps(ArchVReg(dst), ArchVReg(srcB));
1755         }
1756         GetMasm()->jmp(end);
1757         GetMasm()->bind(gotNan);
1758         // if any operand is NaN result is NaN
1759         GetMasm()->por(ArchVReg(dst), ArchVReg(srcB));
1760         GetMasm()->jmp(end);
1761         GetMasm()->bind(notEqual);
1762         if (IS_MAX) {
1763             GetMasm()->maxss(ArchVReg(dst), ArchVReg(srcB));
1764         } else {
1765             GetMasm()->minss(ArchVReg(dst), ArchVReg(srcB));
1766         }
1767         GetMasm()->bind(end);
1768     } else {
1769         GetMasm()->movapd(ArchVReg(dst), ArchVReg(srcA));
1770         GetMasm()->ucomisd(ArchVReg(srcB), ArchVReg(srcA));
1771         GetMasm()->jne(notEqual);
1772         GetMasm()->jp(gotNan);
1773         // calculate result for positive/negative zero operands
1774         if (IS_MAX) {
1775             GetMasm()->andpd(ArchVReg(dst), ArchVReg(srcB));
1776         } else {
1777             GetMasm()->orpd(ArchVReg(dst), ArchVReg(srcB));
1778         }
1779         GetMasm()->jmp(end);
1780         GetMasm()->bind(gotNan);
1781         // if any operand is NaN result is NaN
1782         GetMasm()->por(ArchVReg(dst), ArchVReg(srcB));
1783         GetMasm()->jmp(end);
1784         GetMasm()->bind(notEqual);
1785         if (IS_MAX) {
1786             GetMasm()->maxsd(ArchVReg(dst), ArchVReg(srcB));
1787         } else {
1788             GetMasm()->minsd(ArchVReg(dst), ArchVReg(srcB));
1789         }
1790         GetMasm()->bind(end);
1791     }
1792 }
1793 
EncodeShl(Reg dst,Reg src0,Reg src1)1794 void Amd64Encoder::EncodeShl(Reg dst, Reg src0, Reg src1)
1795 {
1796     ASSERT(dst.IsScalar());
1797     ScopedTmpReg tmpReg(this, dst.GetType());
1798     Reg rcx(ConvertRegNumber(asmjit::x86::rcx.id()), dst.GetType());
1799     GetMasm()->mov(ArchReg(tmpReg), ArchReg(src0));
1800     if (dst.GetId() != rcx.GetId()) {
1801         GetMasm()->push(ArchReg(rcx, DOUBLE_WORD_SIZE));
1802     }
1803     GetMasm()->mov(ArchReg(rcx), ArchReg(src1));
1804     GetMasm()->shl(ArchReg(tmpReg), asmjit::x86::cl);
1805     if (dst.GetId() != rcx.GetId()) {
1806         GetMasm()->pop(ArchReg(rcx, DOUBLE_WORD_SIZE));
1807     }
1808     GetMasm()->mov(ArchReg(dst), ArchReg(tmpReg));
1809 }
1810 
EncodeShr(Reg dst,Reg src0,Reg src1)1811 void Amd64Encoder::EncodeShr(Reg dst, Reg src0, Reg src1)
1812 {
1813     ASSERT(dst.IsScalar());
1814     ScopedTmpReg tmpReg(this, dst.GetType());
1815     Reg rcx(ConvertRegNumber(asmjit::x86::rcx.id()), dst.GetType());
1816     GetMasm()->mov(ArchReg(tmpReg), ArchReg(src0));
1817     if (dst.GetId() != rcx.GetId()) {
1818         GetMasm()->push(ArchReg(rcx, DOUBLE_WORD_SIZE));
1819     }
1820     GetMasm()->mov(ArchReg(rcx), ArchReg(src1));
1821     GetMasm()->shr(ArchReg(tmpReg), asmjit::x86::cl);
1822     if (dst.GetId() != rcx.GetId()) {
1823         GetMasm()->pop(ArchReg(rcx, DOUBLE_WORD_SIZE));
1824     }
1825     GetMasm()->mov(ArchReg(dst), ArchReg(tmpReg));
1826 }
1827 
EncodeAShr(Reg dst,Reg src0,Reg src1)1828 void Amd64Encoder::EncodeAShr(Reg dst, Reg src0, Reg src1)
1829 {
1830     ASSERT(dst.IsScalar());
1831     ScopedTmpReg tmpReg(this, dst.GetType());
1832     Reg rcx(ConvertRegNumber(asmjit::x86::rcx.id()), dst.GetType());
1833     GetMasm()->mov(ArchReg(tmpReg), ArchReg(src0));
1834     if (dst.GetId() != rcx.GetId()) {
1835         GetMasm()->push(ArchReg(rcx, DOUBLE_WORD_SIZE));
1836     }
1837     GetMasm()->mov(ArchReg(rcx), ArchReg(src1));
1838     GetMasm()->sar(ArchReg(tmpReg), asmjit::x86::cl);
1839     if (dst.GetId() != rcx.GetId()) {
1840         GetMasm()->pop(ArchReg(rcx, DOUBLE_WORD_SIZE));
1841     }
1842     GetMasm()->mov(ArchReg(dst), ArchReg(tmpReg));
1843 }
1844 
EncodeAnd(Reg dst,Reg src0,Reg src1)1845 void Amd64Encoder::EncodeAnd(Reg dst, Reg src0, Reg src1)
1846 {
1847     ASSERT(dst.IsScalar());
1848     if (dst.GetId() == src0.GetId()) {
1849         GetMasm()->and_(ArchReg(dst), ArchReg(src1));
1850     } else if (dst.GetId() == src1.GetId()) {
1851         GetMasm()->and_(ArchReg(dst), ArchReg(src0));
1852     } else {
1853         EncodeMov(dst, src0);
1854         GetMasm()->and_(ArchReg(dst), ArchReg(src1));
1855     }
1856 }
1857 
EncodeOr(Reg dst,Reg src0,Reg src1)1858 void Amd64Encoder::EncodeOr(Reg dst, Reg src0, Reg src1)
1859 {
1860     ASSERT(dst.IsScalar());
1861     if (dst.GetId() == src0.GetId()) {
1862         GetMasm()->or_(ArchReg(dst), ArchReg(src1));
1863     } else if (dst.GetId() == src1.GetId()) {
1864         GetMasm()->or_(ArchReg(dst), ArchReg(src0));
1865     } else {
1866         EncodeMov(dst, src0);
1867         GetMasm()->or_(ArchReg(dst), ArchReg(src1));
1868     }
1869 }
1870 
EncodeXor(Reg dst,Reg src0,Reg src1)1871 void Amd64Encoder::EncodeXor(Reg dst, Reg src0, Reg src1)
1872 {
1873     ASSERT(dst.IsScalar());
1874     if (dst.GetId() == src0.GetId()) {
1875         GetMasm()->xor_(ArchReg(dst), ArchReg(src1));
1876     } else if (dst.GetId() == src1.GetId()) {
1877         GetMasm()->xor_(ArchReg(dst), ArchReg(src0));
1878     } else {
1879         EncodeMov(dst, src0);
1880         GetMasm()->xor_(ArchReg(dst), ArchReg(src1));
1881     }
1882 }
1883 
EncodeAdd(Reg dst,Reg src,Imm imm)1884 void Amd64Encoder::EncodeAdd(Reg dst, Reg src, Imm imm)
1885 {
1886     if (dst.IsFloat()) {
1887         SetFalseResult();
1888         return;
1889     }
1890 
1891     auto immVal = imm.GetAsInt();
1892     auto size = std::max<uint8_t>(WORD_SIZE, dst.GetSize());
1893     if (ImmFitsSize(immVal, size)) {
1894         GetMasm()->lea(ArchReg(dst, size), asmjit::x86::ptr(ArchReg(src, size), immVal));
1895     } else {
1896         if (dst.GetId() != src.GetId()) {
1897             GetMasm()->mov(ArchReg(dst), asmjit::imm(immVal));
1898             GetMasm()->add(ArchReg(dst), ArchReg(src));
1899         } else {
1900             ScopedTmpReg tmpReg(this, dst.GetType());
1901             GetMasm()->mov(ArchReg(tmpReg), asmjit::imm(immVal));
1902             GetMasm()->add(ArchReg(dst), ArchReg(tmpReg));
1903         }
1904     }
1905 }
1906 
EncodeSub(Reg dst,Reg src,Imm imm)1907 void Amd64Encoder::EncodeSub(Reg dst, Reg src, Imm imm)
1908 {
1909     if (dst.IsFloat()) {
1910         SetFalseResult();
1911         return;
1912     }
1913 
1914     auto immVal = -imm.GetAsInt();
1915     auto size = std::max<uint8_t>(WORD_SIZE, dst.GetSize());
1916     if (ImmFitsSize(immVal, size)) {
1917         GetMasm()->lea(ArchReg(dst, size), asmjit::x86::ptr(ArchReg(src, size), immVal));
1918     } else {
1919         if (dst.GetId() != src.GetId()) {
1920             GetMasm()->mov(ArchReg(dst), asmjit::imm(immVal));
1921             GetMasm()->add(ArchReg(dst), ArchReg(src));
1922         } else {
1923             ScopedTmpReg tmpReg(this, dst.GetType());
1924             GetMasm()->mov(ArchReg(tmpReg), asmjit::imm(immVal));
1925             GetMasm()->add(ArchReg(dst), ArchReg(tmpReg));
1926         }
1927     }
1928 }
1929 
EncodeShl(Reg dst,Reg src,Imm imm)1930 void Amd64Encoder::EncodeShl(Reg dst, Reg src, Imm imm)
1931 {
1932     ASSERT(dst.IsScalar());
1933     EncodeMov(dst, src);
1934     GetMasm()->shl(ArchReg(dst), ArchImm(imm));
1935 }
1936 
EncodeShr(Reg dst,Reg src,Imm imm)1937 void Amd64Encoder::EncodeShr(Reg dst, Reg src, Imm imm)
1938 {
1939     ASSERT(dst.IsScalar());
1940 
1941     EncodeMov(dst, src);
1942     GetMasm()->shr(ArchReg(dst), ArchImm(imm));
1943 }
1944 
EncodeAShr(Reg dst,Reg src,Imm imm)1945 void Amd64Encoder::EncodeAShr(Reg dst, Reg src, Imm imm)
1946 {
1947     ASSERT(dst.IsScalar());
1948     EncodeMov(dst, src);
1949     GetMasm()->sar(ArchReg(dst), ArchImm(imm));
1950 }
1951 
EncodeAnd(Reg dst,Reg src,Imm imm)1952 void Amd64Encoder::EncodeAnd(Reg dst, Reg src, Imm imm)
1953 {
1954     ASSERT(dst.IsScalar());
1955     auto immVal = ImmToUnsignedInt(imm);
1956 
1957     switch (src.GetSize()) {
1958         case BYTE_SIZE:
1959             immVal |= ~uint64_t(0xFF);  // NOLINT
1960             break;
1961         case HALF_SIZE:
1962             immVal |= ~uint64_t(0xFFFF);  // NOLINT
1963             break;
1964         case WORD_SIZE:
1965             immVal |= ~uint64_t(0xFFFFFFFF);  // NOLINT
1966             break;
1967         default:
1968             break;
1969     }
1970 
1971     if (dst.GetSize() != DOUBLE_WORD_SIZE) {
1972         // NOLINTNEXTLINE(clang-analyzer-core.UndefinedBinaryOperatorResult)
1973         immVal &= (uint64_t(1) << dst.GetSize()) - 1;
1974     }
1975 
1976     if (ImmFitsSize(immVal, dst.GetSize())) {
1977         EncodeMov(dst, src);
1978         GetMasm()->and_(ArchReg(dst), immVal);
1979     } else {
1980         if (dst.GetId() != src.GetId()) {
1981             GetMasm()->mov(ArchReg(dst), asmjit::imm(immVal));
1982             GetMasm()->and_(ArchReg(dst), ArchReg(src));
1983         } else {
1984             ScopedTmpReg tmpReg(this, dst.GetType());
1985             GetMasm()->mov(ArchReg(tmpReg), asmjit::imm(immVal));
1986             GetMasm()->and_(ArchReg(dst), ArchReg(tmpReg));
1987         }
1988     }
1989 }
1990 
EncodeOr(Reg dst,Reg src,Imm imm)1991 void Amd64Encoder::EncodeOr(Reg dst, Reg src, Imm imm)
1992 {
1993     ASSERT(dst.IsScalar());
1994     auto immVal = ImmToUnsignedInt(imm);
1995     if (ImmFitsSize(immVal, dst.GetSize())) {
1996         EncodeMov(dst, src);
1997         GetMasm()->or_(ArchReg(dst), immVal);
1998     } else {
1999         if (dst.GetId() != src.GetId()) {
2000             GetMasm()->mov(ArchReg(dst), asmjit::imm(immVal));
2001             GetMasm()->or_(ArchReg(dst), ArchReg(src));
2002         } else {
2003             ScopedTmpReg tmpReg(this, dst.GetType());
2004             GetMasm()->mov(ArchReg(tmpReg), asmjit::imm(immVal));
2005             GetMasm()->or_(ArchReg(dst), ArchReg(tmpReg));
2006         }
2007     }
2008 }
2009 
EncodeXor(Reg dst,Reg src,Imm imm)2010 void Amd64Encoder::EncodeXor(Reg dst, Reg src, Imm imm)
2011 {
2012     ASSERT(dst.IsScalar());
2013     auto immVal = ImmToUnsignedInt(imm);
2014     if (ImmFitsSize(immVal, dst.GetSize())) {
2015         EncodeMov(dst, src);
2016         GetMasm()->xor_(ArchReg(dst), immVal);
2017     } else {
2018         if (dst.GetId() != src.GetId()) {
2019             GetMasm()->mov(ArchReg(dst), asmjit::imm(immVal));
2020             GetMasm()->xor_(ArchReg(dst), ArchReg(src));
2021         } else {
2022             ScopedTmpReg tmpReg(this, dst.GetType());
2023             GetMasm()->mov(ArchReg(tmpReg), asmjit::imm(immVal));
2024             GetMasm()->xor_(ArchReg(dst), ArchReg(tmpReg));
2025         }
2026     }
2027 }
2028 
EncodeMov(Reg dst,Imm src)2029 void Amd64Encoder::EncodeMov(Reg dst, Imm src)
2030 {
2031     if (dst.IsScalar()) {
2032         if (dst.GetSize() < WORD_SIZE) {
2033             GetMasm()->xor_(ArchReg(dst, WORD_SIZE), ArchReg(dst, WORD_SIZE));
2034         }
2035         GetMasm()->mov(ArchReg(dst), ArchImm(src));
2036         return;
2037     }
2038 
2039     if (dst.GetType() == FLOAT32_TYPE) {
2040         ScopedTmpRegU32 tmpReg(this);
2041         auto val = bit_cast<uint32_t>(src.GetAsFloat());
2042         GetMasm()->mov(ArchReg(tmpReg), asmjit::imm(val));
2043         GetMasm()->movd(ArchVReg(dst), ArchReg(tmpReg));
2044     } else {
2045         ScopedTmpRegU64 tmpReg(this);
2046         auto val = bit_cast<uint64_t>(src.GetAsDouble());
2047         GetMasm()->mov(ArchReg(tmpReg), asmjit::imm(val));
2048         GetMasm()->movq(ArchVReg(dst), ArchReg(tmpReg));
2049     }
2050 }
2051 
EncodeLdr(Reg dst,bool dstSigned,MemRef mem)2052 void Amd64Encoder::EncodeLdr(Reg dst, bool dstSigned, MemRef mem)
2053 {
2054     auto m = ArchMem(mem).Prepare(GetMasm());
2055 
2056     if (dst.GetType() == FLOAT32_TYPE) {
2057         GetMasm()->movss(ArchVReg(dst), m);
2058         return;
2059     }
2060     if (dst.GetType() == FLOAT64_TYPE) {
2061         GetMasm()->movsd(ArchVReg(dst), m);
2062         return;
2063     }
2064 
2065     m.setSize(dst.GetSize() / BITS_PER_BYTE);
2066 
2067     if (dstSigned && dst.GetSize() < DOUBLE_WORD_SIZE) {
2068         if (dst.GetSize() == WORD_SIZE) {
2069             GetMasm()->movsxd(ArchReg(dst, DOUBLE_WORD_SIZE), m);
2070         } else {
2071             GetMasm()->movsx(ArchReg(dst, DOUBLE_WORD_SIZE), m);
2072         }
2073         return;
2074     }
2075     if (!dstSigned && dst.GetSize() < WORD_SIZE) {
2076         GetMasm()->movzx(ArchReg(dst, WORD_SIZE), m);
2077         return;
2078     }
2079 
2080     GetMasm()->mov(ArchReg(dst), m);
2081 }
2082 
EncodeLdrAcquire(Reg dst,bool dstSigned,MemRef mem)2083 void Amd64Encoder::EncodeLdrAcquire(Reg dst, bool dstSigned, MemRef mem)
2084 {
2085     EncodeLdr(dst, dstSigned, mem);
2086     // LoadLoad and LoadStore barrier should be here, but this is no-op in amd64 memory model
2087 }
2088 
EncodeStr(Reg src,MemRef mem)2089 void Amd64Encoder::EncodeStr(Reg src, MemRef mem)
2090 {
2091     auto m = ArchMem(mem).Prepare(GetMasm());
2092 
2093     if (src.GetType() == FLOAT32_TYPE) {
2094         GetMasm()->movss(m, ArchVReg(src));
2095         return;
2096     }
2097     if (src.GetType() == FLOAT64_TYPE) {
2098         GetMasm()->movsd(m, ArchVReg(src));
2099         return;
2100     }
2101 
2102     m.setSize(src.GetSize() / BITS_PER_BYTE);
2103     GetMasm()->mov(m, ArchReg(src));
2104 }
2105 
EncodeStrRelease(Reg src,MemRef mem)2106 void Amd64Encoder::EncodeStrRelease(Reg src, MemRef mem)
2107 {
2108     // StoreStore barrier should be here, but this is no-op in amd64 memory model
2109     EncodeStr(src, mem);
2110     // this is StoreLoad barrier (which is also full memory barrier in amd64 memory model)
2111     GetMasm()->lock().add(asmjit::x86::dword_ptr(asmjit::x86::rsp), asmjit::imm(0));
2112 }
2113 
EncodeStrz(Reg src,MemRef mem)2114 void Amd64Encoder::EncodeStrz(Reg src, MemRef mem)
2115 {
2116     if (src.IsScalar()) {
2117         if (src.GetSize() == DOUBLE_WORD_SIZE) {
2118             GetMasm()->mov(ArchMem(mem).Prepare(GetMasm()), ArchReg(src));
2119         } else {
2120             ScopedTmpRegU64 tmpReg(this);
2121             GetMasm()->xor_(ArchReg(tmpReg), ArchReg(tmpReg));
2122             GetMasm()->mov(ArchReg(tmpReg, src.GetSize()), ArchReg(src));
2123             GetMasm()->mov(ArchMem(mem).Prepare(GetMasm()), ArchReg(tmpReg));
2124         }
2125     } else {
2126         if (src.GetType() == FLOAT64_TYPE) {
2127             GetMasm()->movsd(ArchMem(mem).Prepare(GetMasm()), ArchVReg(src));
2128         } else {
2129             ScopedTmpRegF64 tmpReg(this);
2130 
2131             GetMasm()->xorpd(ArchVReg(tmpReg), ArchVReg(tmpReg));
2132             GetMasm()->movss(ArchVReg(tmpReg), ArchVReg(src));
2133             GetMasm()->movsd(ArchMem(mem).Prepare(GetMasm()), ArchVReg(tmpReg));
2134         }
2135     }
2136 }
2137 
EncodeSti(int64_t src,uint8_t srcSizeBytes,MemRef mem)2138 void Amd64Encoder::EncodeSti(int64_t src, uint8_t srcSizeBytes, MemRef mem)
2139 {
2140     ASSERT(srcSizeBytes <= 8U);
2141     auto m = ArchMem(mem).Prepare(GetMasm());
2142     if (srcSizeBytes <= HALF_WORD_SIZE_BYTES) {
2143         m.setSize(srcSizeBytes);
2144         GetMasm()->mov(m, asmjit::imm(src));
2145     } else {
2146         m.setSize(DOUBLE_WORD_SIZE_BYTES);
2147 
2148         if (ImmFitsSize(src, DOUBLE_WORD_SIZE)) {
2149             GetMasm()->mov(m, asmjit::imm(src));
2150         } else {
2151             ScopedTmpRegU64 tmpReg(this);
2152             GetMasm()->mov(ArchReg(tmpReg), asmjit::imm(src));
2153             GetMasm()->mov(m, ArchReg(tmpReg));
2154         }
2155     }
2156 }
2157 
EncodeSti(float src,MemRef mem)2158 void Amd64Encoder::EncodeSti(float src, MemRef mem)
2159 {
2160     EncodeSti(bit_cast<int32_t>(src), sizeof(int32_t), mem);
2161 }
2162 
EncodeSti(double src,MemRef mem)2163 void Amd64Encoder::EncodeSti(double src, MemRef mem)
2164 {
2165     EncodeSti(bit_cast<int64_t>(src), sizeof(int64_t), mem);
2166 }
2167 
EncodeMemCopy(MemRef memFrom,MemRef memTo,size_t size)2168 void Amd64Encoder::EncodeMemCopy(MemRef memFrom, MemRef memTo, size_t size)
2169 {
2170     ScopedTmpRegU64 tmpReg(this);
2171     GetMasm()->mov(ArchReg(tmpReg, size), ArchMem(memFrom).Prepare(GetMasm()));
2172     GetMasm()->mov(ArchMem(memTo).Prepare(GetMasm()), ArchReg(tmpReg, size));
2173 }
2174 
EncodeMemCopyz(MemRef memFrom,MemRef memTo,size_t size)2175 void Amd64Encoder::EncodeMemCopyz(MemRef memFrom, MemRef memTo, size_t size)
2176 {
2177     ScopedTmpRegU64 tmpReg(this);
2178     if (size < DOUBLE_WORD_SIZE) {
2179         GetMasm()->xor_(ArchReg(tmpReg), ArchReg(tmpReg));
2180     }
2181     GetMasm()->mov(ArchReg(tmpReg, size), ArchMem(memFrom).Prepare(GetMasm()));
2182     GetMasm()->mov(ArchMem(memTo).Prepare(GetMasm()), ArchReg(tmpReg));
2183 }
2184 
EncodeCompare(Reg dst,Reg src0,Reg src1,Condition cc)2185 void Amd64Encoder::EncodeCompare(Reg dst, Reg src0, Reg src1, Condition cc)
2186 {
2187     if (src0.IsScalar()) {
2188         GetMasm()->cmp(ArchReg(src0), ArchReg(src1));
2189     } else {
2190         if (src0.GetType() == FLOAT32_TYPE) {
2191             GetMasm()->ucomiss(ArchVReg(src0), ArchVReg(src1));
2192         } else {
2193             GetMasm()->ucomisd(ArchVReg(src0), ArchVReg(src1));
2194         }
2195     }
2196     GetMasm()->mov(ArchReg(dst, DOUBLE_WORD_SIZE), asmjit::imm(0));
2197 
2198     if (src0.IsScalar()) {
2199         GetMasm()->set(ArchCc(cc), ArchReg(dst, BYTE_SIZE));
2200         return;
2201     }
2202 
2203     auto end = GetMasm()->newLabel();
2204 
2205     if (CcMatchesNan(cc)) {
2206         GetMasm()->setp(ArchReg(dst, BYTE_SIZE));
2207     }
2208     GetMasm()->jp(end);
2209     GetMasm()->set(ArchCc(cc, true), ArchReg(dst, BYTE_SIZE));
2210 
2211     GetMasm()->bind(end);
2212 }
2213 
EncodeCompareTest(Reg dst,Reg src0,Reg src1,Condition cc)2214 void Amd64Encoder::EncodeCompareTest(Reg dst, Reg src0, Reg src1, Condition cc)
2215 {
2216     ASSERT(src0.IsScalar());
2217 
2218     GetMasm()->test(ArchReg(src0), ArchReg(src1));
2219 
2220     GetMasm()->mov(ArchReg(dst, DOUBLE_WORD_SIZE), asmjit::imm(0));
2221     GetMasm()->set(ArchCcTest(cc), ArchReg(dst, BYTE_SIZE));
2222 }
2223 
EncodeAtomicByteOr(Reg addr,Reg value,bool fastEncoding)2224 void Amd64Encoder::EncodeAtomicByteOr(Reg addr, Reg value, [[maybe_unused]] bool fastEncoding)
2225 {
2226     GetMasm()->lock().or_(asmjit::x86::byte_ptr(ArchReg(addr)), ArchReg(value, ark::compiler::BYTE_SIZE));
2227 }
2228 
EncodeCmp(Reg dst,Reg src0,Reg src1,Condition cc)2229 void Amd64Encoder::EncodeCmp(Reg dst, Reg src0, Reg src1, Condition cc)
2230 {
2231     auto end = GetMasm()->newLabel();
2232 
2233     if (src0.IsFloat()) {
2234         ASSERT(src1.IsFloat());
2235         ASSERT(cc == Condition::MI || cc == Condition::LT);
2236 
2237         if (src0.GetType() == FLOAT32_TYPE) {
2238             GetMasm()->ucomiss(ArchVReg(src0), ArchVReg(src1));
2239         } else {
2240             GetMasm()->ucomisd(ArchVReg(src0), ArchVReg(src1));
2241         }
2242 
2243         GetMasm()->mov(ArchReg(dst, DOUBLE_WORD_SIZE), cc == Condition::LT ? asmjit::imm(-1) : asmjit::imm(1));
2244         cc = Condition::LO;
2245 
2246         GetMasm()->jp(end);
2247     } else {
2248         ASSERT(src0.IsScalar() && src1.IsScalar());
2249         ASSERT(cc == Condition::LO || cc == Condition::LT);
2250         GetMasm()->cmp(ArchReg(src0), ArchReg(src1));
2251     }
2252     GetMasm()->mov(ArchReg(dst, DOUBLE_WORD_SIZE), asmjit::imm(0));
2253     GetMasm()->setne(ArchReg(dst, BYTE_SIZE));
2254 
2255     GetMasm()->j(asmjit::x86::Condition::negate(ArchCc(cc)), end);
2256     GetMasm()->neg(ArchReg(dst));
2257 
2258     GetMasm()->bind(end);
2259 }
2260 
EncodeSelect(ArgsSelect && args)2261 void Amd64Encoder::EncodeSelect(ArgsSelect &&args)
2262 {
2263     auto [dst, src0, src1, src2, src3, cc] = args;
2264     ASSERT(!src0.IsFloat() && !src1.IsFloat());
2265     if (src2.IsScalar()) {
2266         GetMasm()->cmp(ArchReg(src2), ArchReg(src3));
2267     } else if (src2.GetType() == FLOAT32_TYPE) {
2268         GetMasm()->comiss(ArchVReg(src2), ArchVReg(src3));
2269     } else {
2270         GetMasm()->comisd(ArchVReg(src2), ArchVReg(src3));
2271     }
2272 
2273     auto size = std::max<uint8_t>(src0.GetSize(), WORD_SIZE);
2274     bool dstAliased = dst.GetId() == src0.GetId();
2275     ScopedTmpReg tmpReg(this, dst.GetType());
2276     auto dstReg = dstAliased ? ArchReg(tmpReg, size) : ArchReg(dst, size);
2277 
2278     GetMasm()->mov(dstReg, ArchReg(src1, size));
2279 
2280     if (src2.IsScalar()) {
2281         GetMasm()->cmov(ArchCc(cc), dstReg, ArchReg(src0, size));
2282     } else if (CcMatchesNan(cc)) {
2283         GetMasm()->cmovp(dstReg, ArchReg(src0, size));
2284         GetMasm()->cmov(ArchCc(cc, src2.IsFloat()), dstReg, ArchReg(src0, size));
2285     } else {
2286         auto end = GetMasm()->newLabel();
2287 
2288         GetMasm()->jp(end);
2289         GetMasm()->cmov(ArchCc(cc, src2.IsFloat()), dstReg, ArchReg(src0, size));
2290 
2291         GetMasm()->bind(end);
2292     }
2293     if (dstAliased) {
2294         EncodeMov(dst, tmpReg);
2295     }
2296 }
2297 
EncodeSelect(ArgsSelectImm && args)2298 void Amd64Encoder::EncodeSelect(ArgsSelectImm &&args)
2299 {
2300     auto [dst, src0, src1, src2, imm, cc] = args;
2301     ASSERT(!src0.IsFloat() && !src1.IsFloat() && !src2.IsFloat());
2302 
2303     auto immVal = imm.GetAsInt();
2304     if (ImmFitsSize(immVal, src2.GetSize())) {
2305         GetMasm()->cmp(ArchReg(src2), asmjit::imm(immVal));
2306     } else {
2307         ScopedTmpReg tmpReg(this, src2.GetType());
2308         GetMasm()->mov(ArchReg(tmpReg), asmjit::imm(immVal));
2309         GetMasm()->cmp(ArchReg(src2), ArchReg(tmpReg));
2310     }
2311 
2312     ScopedTmpReg tmpReg(this, dst.GetType());
2313     auto size = std::max<uint8_t>(src0.GetSize(), WORD_SIZE);
2314     bool dstAliased = dst.GetId() == src0.GetId();
2315     auto dstReg = dstAliased ? ArchReg(tmpReg, size) : ArchReg(dst, size);
2316 
2317     GetMasm()->mov(dstReg, ArchReg(src1, size));
2318     GetMasm()->cmov(ArchCc(cc), dstReg, ArchReg(src0, size));
2319     if (dstAliased) {
2320         EncodeMov(dst, tmpReg);
2321     }
2322 }
2323 
EncodeSelectTest(ArgsSelect && args)2324 void Amd64Encoder::EncodeSelectTest(ArgsSelect &&args)
2325 {
2326     auto [dst, src0, src1, src2, src3, cc] = args;
2327     ASSERT(!src0.IsFloat() && !src1.IsFloat() && !src2.IsFloat());
2328 
2329     GetMasm()->test(ArchReg(src2), ArchReg(src3));
2330 
2331     ScopedTmpReg tmpReg(this, dst.GetType());
2332     auto size = std::max<uint8_t>(src0.GetSize(), WORD_SIZE);
2333     bool dstAliased = dst.GetId() == src0.GetId();
2334     auto dstReg = dstAliased ? ArchReg(tmpReg, size) : ArchReg(dst, size);
2335 
2336     GetMasm()->mov(dstReg, ArchReg(src1, size));
2337     GetMasm()->cmov(ArchCcTest(cc), dstReg, ArchReg(src0, size));
2338     if (dstAliased) {
2339         EncodeMov(dst, tmpReg);
2340     }
2341 }
2342 
EncodeSelectTest(ArgsSelectImm && args)2343 void Amd64Encoder::EncodeSelectTest(ArgsSelectImm &&args)
2344 {
2345     auto [dst, src0, src1, src2, imm, cc] = args;
2346     ASSERT(!src0.IsFloat() && !src1.IsFloat() && !src2.IsFloat());
2347 
2348     auto immVal = imm.GetAsInt();
2349     if (ImmFitsSize(immVal, src2.GetSize())) {
2350         GetMasm()->test(ArchReg(src2), asmjit::imm(immVal));
2351     } else {
2352         ScopedTmpReg tmpReg(this, src2.GetType());
2353         GetMasm()->mov(ArchReg(tmpReg), asmjit::imm(immVal));
2354         GetMasm()->test(ArchReg(src2), ArchReg(tmpReg));
2355     }
2356 
2357     ScopedTmpReg tmpReg(this, dst.GetType());
2358     auto size = std::max<uint8_t>(src0.GetSize(), WORD_SIZE);
2359     bool dstAliased = dst.GetId() == src0.GetId();
2360     auto dstReg = dstAliased ? ArchReg(tmpReg, size) : ArchReg(dst, size);
2361 
2362     GetMasm()->mov(dstReg, ArchReg(src1, size));
2363     GetMasm()->cmov(ArchCcTest(cc), dstReg, ArchReg(src0, size));
2364     if (dstAliased) {
2365         EncodeMov(dst, tmpReg);
2366     }
2367 }
2368 
EncodeLdp(Reg dst0,Reg dst1,bool dstSigned,MemRef mem)2369 void Amd64Encoder::EncodeLdp(Reg dst0, Reg dst1, bool dstSigned, MemRef mem)
2370 {
2371     ASSERT(dst0.IsFloat() == dst1.IsFloat());
2372     ASSERT(dst0.GetSize() == dst1.GetSize());
2373 
2374     auto m = ArchMem(mem).Prepare(GetMasm());
2375 
2376     if (dst0.IsFloat()) {
2377         if (dst0.GetType() == FLOAT32_TYPE) {
2378             GetMasm()->movss(ArchVReg(dst0), m);
2379 
2380             m.addOffset(WORD_SIZE_BYTES);
2381             GetMasm()->movss(ArchVReg(dst1), m);
2382         } else {
2383             GetMasm()->movsd(ArchVReg(dst0), m);
2384 
2385             m.addOffset(DOUBLE_WORD_SIZE_BYTES);
2386             GetMasm()->movsd(ArchVReg(dst1), m);
2387         }
2388         return;
2389     }
2390 
2391     if (dstSigned && dst0.GetSize() == WORD_SIZE) {
2392         m.setSize(WORD_SIZE_BYTES);
2393         GetMasm()->movsxd(ArchReg(dst0, DOUBLE_WORD_SIZE), m);
2394 
2395         m.addOffset(WORD_SIZE_BYTES);
2396         GetMasm()->movsxd(ArchReg(dst1, DOUBLE_WORD_SIZE), m);
2397         return;
2398     }
2399 
2400     GetMasm()->mov(ArchReg(dst0), m);
2401 
2402     m.addOffset(dst0.GetSize() / BITS_PER_BYTE);
2403     GetMasm()->mov(ArchReg(dst1), m);
2404 }
2405 
EncodeStp(Reg src0,Reg src1,MemRef mem)2406 void Amd64Encoder::EncodeStp(Reg src0, Reg src1, MemRef mem)
2407 {
2408     ASSERT(src0.IsFloat() == src1.IsFloat());
2409     ASSERT(src0.GetSize() == src1.GetSize());
2410 
2411     auto m = ArchMem(mem).Prepare(GetMasm());
2412 
2413     if (src0.IsFloat()) {
2414         if (src0.GetType() == FLOAT32_TYPE) {
2415             GetMasm()->movss(m, ArchVReg(src0));
2416 
2417             m.addOffset(WORD_SIZE_BYTES);
2418             GetMasm()->movss(m, ArchVReg(src1));
2419         } else {
2420             GetMasm()->movsd(m, ArchVReg(src0));
2421 
2422             m.addOffset(DOUBLE_WORD_SIZE_BYTES);
2423             GetMasm()->movsd(m, ArchVReg(src1));
2424         }
2425         return;
2426     }
2427 
2428     GetMasm()->mov(m, ArchReg(src0));
2429 
2430     m.addOffset(src0.GetSize() / BITS_PER_BYTE);
2431     GetMasm()->mov(m, ArchReg(src1));
2432 }
2433 
EncodeReverseBytes(Reg dst,Reg src)2434 void Amd64Encoder::EncodeReverseBytes(Reg dst, Reg src)
2435 {
2436     ASSERT(src.GetSize() > BYTE_SIZE);
2437     ASSERT(src.GetSize() == dst.GetSize());
2438     ASSERT(src.IsValid());
2439     ASSERT(dst.IsValid());
2440 
2441     if (src != dst) {
2442         GetMasm()->mov(ArchReg(dst), ArchReg(src));
2443     }
2444 
2445     if (src.GetSize() == HALF_SIZE) {
2446         GetMasm()->rol(ArchReg(dst), BYTE_SIZE);
2447         GetMasm()->movsx(ArchReg(dst, WORD_SIZE), ArchReg(dst));
2448     } else {
2449         GetMasm()->bswap(ArchReg(dst));
2450     }
2451 }
2452 
EncodeUnsignedExtendBytesToShorts(Reg dst,Reg src)2453 void Amd64Encoder::EncodeUnsignedExtendBytesToShorts(Reg dst, Reg src)
2454 {
2455     GetMasm()->pmovzxbw(ArchVReg(dst), ArchVReg(src));
2456 }
2457 
2458 /* Attention: the encoder belows operates on vector registers not GPRs */
EncodeReverseHalfWords(Reg dst,Reg src)2459 void Amd64Encoder::EncodeReverseHalfWords(Reg dst, Reg src)
2460 {
2461     ASSERT(src.GetSize() == dst.GetSize());
2462     ASSERT(src.IsValid());
2463     ASSERT(dst.IsValid());
2464 
2465     constexpr unsigned MASK = 0x1b;  // reverse mask: 00 01 10 11
2466     GetMasm()->pshuflw(ArchVReg(dst), ArchVReg(src), MASK);
2467 }
2468 
CanEncodeImmAddSubCmp(int64_t imm,uint32_t size,bool signedCompare)2469 bool Amd64Encoder::CanEncodeImmAddSubCmp(int64_t imm, uint32_t size, [[maybe_unused]] bool signedCompare)
2470 {
2471     return ImmFitsSize(imm, size);
2472 }
2473 
EncodeBitCount(Reg dst0,Reg src0)2474 void Amd64Encoder::EncodeBitCount(Reg dst0, Reg src0)
2475 {
2476     ASSERT(src0.GetSize() == WORD_SIZE || src0.GetSize() == DOUBLE_WORD_SIZE);
2477     ASSERT(dst0.GetSize() == WORD_SIZE);
2478     ASSERT(src0.IsScalar() && dst0.IsScalar());
2479 
2480     GetMasm()->popcnt(ArchReg(dst0, src0.GetSize()), ArchReg(src0));
2481 }
2482 
EncodeCountLeadingZeroBits(Reg dst,Reg src)2483 void Amd64Encoder::EncodeCountLeadingZeroBits(Reg dst, Reg src)
2484 {
2485     auto end = CreateLabel();
2486     auto zero = CreateLabel();
2487     EncodeJump(zero, src, Condition::EQ);
2488     GetMasm()->bsr(ArchReg(dst), ArchReg(src));
2489     GetMasm()->xor_(ArchReg(dst), asmjit::imm(dst.GetSize() - 1));
2490     EncodeJump(end);
2491 
2492     BindLabel(zero);
2493     GetMasm()->mov(ArchReg(dst), asmjit::imm(dst.GetSize()));
2494 
2495     BindLabel(end);
2496 }
2497 
EncodeCountTrailingZeroBits(Reg dst,Reg src)2498 void Amd64Encoder::EncodeCountTrailingZeroBits(Reg dst, Reg src)
2499 {
2500     ScopedTmpReg tmp(this, src.GetType());
2501     GetMasm()->bsf(ArchReg(tmp), ArchReg(src));
2502     GetMasm()->mov(ArchReg(dst), asmjit::imm(dst.GetSize()));
2503     GetMasm()->cmovne(ArchReg(dst), ArchReg(tmp));
2504 }
2505 
EncodeCeil(Reg dst,Reg src)2506 void Amd64Encoder::EncodeCeil(Reg dst, Reg src)
2507 {
2508     // NOLINTNEXTLINE(readability-magic-numbers)
2509     GetMasm()->roundsd(ArchVReg(dst), ArchVReg(src), asmjit::imm(2_I));
2510 }
2511 
EncodeFloor(Reg dst,Reg src)2512 void Amd64Encoder::EncodeFloor(Reg dst, Reg src)
2513 {
2514     GetMasm()->roundsd(ArchVReg(dst), ArchVReg(src), asmjit::imm(1));
2515 }
2516 
EncodeRint(Reg dst,Reg src)2517 void Amd64Encoder::EncodeRint(Reg dst, Reg src)
2518 {
2519     GetMasm()->roundsd(ArchVReg(dst), ArchVReg(src), asmjit::imm(0));
2520 }
2521 
EncodeTrunc(Reg dst,Reg src)2522 void Amd64Encoder::EncodeTrunc(Reg dst, Reg src)
2523 {
2524     GetMasm()->roundsd(ArchVReg(dst), ArchVReg(src), asmjit::imm(3_I));
2525 }
2526 
EncodeRoundAway(Reg dst,Reg src)2527 void Amd64Encoder::EncodeRoundAway(Reg dst, Reg src)
2528 {
2529     ASSERT(src.GetType() == FLOAT64_TYPE);
2530     ASSERT(dst.GetType() == FLOAT64_TYPE);
2531 
2532     ScopedTmpReg tv(this, src.GetType());
2533     ScopedTmpReg tv1(this, src.GetType());
2534     ScopedTmpRegU64 ti(this);
2535     auto dest = dst;
2536 
2537     auto shared = src == dst;
2538 
2539     if (shared) {
2540         dest = tv1.GetReg();
2541     }
2542     GetMasm()->movapd(ArchVReg(dest), ArchVReg(src));
2543 
2544     constexpr auto SIGN_BIT_MASK = 0x8000000000000000ULL;
2545     GetMasm()->mov(ArchReg(ti), asmjit::imm(SIGN_BIT_MASK));
2546     GetMasm()->movq(ArchVReg(tv), ArchReg(ti));
2547     GetMasm()->andpd(ArchVReg(dest), ArchVReg(tv));
2548 
2549     constexpr auto DOUBLE_POINT_FIVE = 0x3fdfffffffffffffULL;  // .49999999999999994
2550     GetMasm()->mov(ArchReg(ti), asmjit::imm(DOUBLE_POINT_FIVE));
2551     GetMasm()->movq(ArchVReg(tv), ArchReg(ti));
2552     GetMasm()->orpd(ArchVReg(dest), ArchVReg(tv));
2553 
2554     GetMasm()->addsd(ArchVReg(dest), ArchVReg(src));
2555     GetMasm()->roundsd(ArchVReg(dest), ArchVReg(dest), asmjit::imm(3_I));
2556     if (shared) {
2557         GetMasm()->movapd(ArchVReg(dst), ArchVReg(dest));
2558     }
2559 }
2560 
EncodeRoundToPInfFloat(Reg dst,Reg src)2561 void Amd64Encoder::EncodeRoundToPInfFloat(Reg dst, Reg src)
2562 {
2563     ScopedTmpReg t1(this, src.GetType());
2564     ScopedTmpReg t2(this, src.GetType());
2565     ScopedTmpReg t3(this, src.GetType());
2566     ScopedTmpReg t4(this, dst.GetType());
2567 
2568     auto skipIncrId = CreateLabel();
2569     auto doneId = CreateLabel();
2570 
2571     auto skipIncr = static_cast<Amd64LabelHolder *>(GetLabels())->GetLabel(skipIncrId);
2572     auto done = static_cast<Amd64LabelHolder *>(GetLabels())->GetLabel(doneId);
2573 
2574     GetMasm()->movss(ArchVReg(t2), ArchVReg(src));
2575     GetMasm()->roundss(ArchVReg(t1), ArchVReg(src), asmjit::imm(1));
2576     GetMasm()->subss(ArchVReg(t2), ArchVReg(t1));
2577     // NOLINTNEXTLINE(readability-magic-numbers)
2578     GetMasm()->mov(ArchReg(t4), asmjit::imm(bit_cast<int32_t, float>(0.5F)));
2579     GetMasm()->movd(ArchVReg(t3), ArchReg(t4));
2580     GetMasm()->comiss(ArchVReg(t2), ArchVReg(t3));
2581     GetMasm()->j(asmjit::x86::Condition::Code::kB, *skipIncr);
2582     // NOLINTNEXTLINE(readability-magic-numbers)
2583     GetMasm()->mov(ArchReg(t4), asmjit::imm(bit_cast<int32_t, float>(1.0F)));
2584     GetMasm()->movd(ArchVReg(t3), ArchReg(t4));
2585     GetMasm()->addss(ArchVReg(t1), ArchVReg(t3));
2586     BindLabel(skipIncrId);
2587 
2588     // NOLINTNEXTLINE(readability-magic-numbers)
2589     GetMasm()->mov(ArchReg(dst), asmjit::imm(0x7FFFFFFF));
2590     GetMasm()->cvtsi2ss(ArchVReg(t2), ArchReg(dst));
2591     GetMasm()->comiss(ArchVReg(t1), ArchVReg(t2));
2592     GetMasm()->j(asmjit::x86::Condition::Code::kAE,
2593                  *done);                           // clipped to max (already in dst), does not jump on unordered
2594     GetMasm()->mov(ArchReg(dst), asmjit::imm(0));  // does not change flags
2595     GetMasm()->j(asmjit::x86::Condition::Code::kParityEven, *done);  // NaN mapped to 0 (just moved in dst)
2596     GetMasm()->cvttss2si(ArchReg(dst), ArchVReg(t1));
2597     BindLabel(doneId);
2598 }
2599 
EncodeRoundToPInfDouble(Reg dst,Reg src)2600 void Amd64Encoder::EncodeRoundToPInfDouble(Reg dst, Reg src)
2601 {
2602     ScopedTmpReg t1(this, src.GetType());
2603     ScopedTmpReg t2(this, src.GetType());
2604     ScopedTmpReg t3(this, src.GetType());
2605     ScopedTmpReg t4(this, dst.GetType());
2606 
2607     auto skipIncrId = CreateLabel();
2608     auto doneId = CreateLabel();
2609 
2610     auto skipIncr = static_cast<Amd64LabelHolder *>(GetLabels())->GetLabel(skipIncrId);
2611     auto done = static_cast<Amd64LabelHolder *>(GetLabels())->GetLabel(doneId);
2612 
2613     GetMasm()->movsd(ArchVReg(t2), ArchVReg(src));
2614     GetMasm()->roundsd(ArchVReg(t1), ArchVReg(src), asmjit::imm(1));
2615     GetMasm()->subsd(ArchVReg(t2), ArchVReg(t1));
2616     // NOLINTNEXTLINE(readability-magic-numbers)
2617     GetMasm()->mov(ArchReg(t4), asmjit::imm(bit_cast<int64_t, double>(0.5F)));
2618     GetMasm()->movq(ArchVReg(t3), ArchReg(t4));
2619     GetMasm()->comisd(ArchVReg(t2), ArchVReg(t3));
2620     GetMasm()->j(asmjit::x86::Condition::Code::kB, *skipIncr);
2621     // NOLINTNEXTLINE(readability-magic-numbers)
2622     GetMasm()->mov(ArchReg(t4), asmjit::imm(bit_cast<int64_t, double>(1.0)));
2623     GetMasm()->movq(ArchVReg(t3), ArchReg(t4));
2624     GetMasm()->addsd(ArchVReg(t1), ArchVReg(t3));
2625     BindLabel(skipIncrId);
2626 
2627     // NOLINTNEXTLINE(readability-magic-numbers)
2628     GetMasm()->mov(ArchReg(dst), asmjit::imm(0x7FFFFFFFFFFFFFFFL));
2629     GetMasm()->cvtsi2sd(ArchVReg(t2), ArchReg(dst));
2630     GetMasm()->comisd(ArchVReg(t1), ArchVReg(t2));
2631     GetMasm()->j(asmjit::x86::Condition::Code::kAE,
2632                  *done);                           // clipped to max (already in dst), does not jump on unordered
2633     GetMasm()->mov(ArchReg(dst), asmjit::imm(0));  // does not change flags
2634     GetMasm()->j(asmjit::x86::Condition::Code::kParityEven, *done);  // NaN mapped to 0 (just moved in dst)
2635     GetMasm()->cvttsd2si(ArchReg(dst), ArchVReg(t1));
2636     BindLabel(doneId);
2637 }
2638 
EncodeRoundToPInfReturnScalar(Reg dst,Reg src)2639 void Amd64Encoder::EncodeRoundToPInfReturnScalar(Reg dst, Reg src)
2640 {
2641     if (src.GetType() == FLOAT32_TYPE) {
2642         EncodeRoundToPInfFloat(dst, src);
2643     } else if (src.GetType() == FLOAT64_TYPE) {
2644         EncodeRoundToPInfDouble(dst, src);
2645     } else {
2646         UNREACHABLE();
2647     }
2648 }
2649 
EncodeRoundToPInfReturnFloat(Reg dst,Reg src)2650 void Amd64Encoder::EncodeRoundToPInfReturnFloat(Reg dst, Reg src)
2651 {
2652     ASSERT(src.GetType() == FLOAT64_TYPE);
2653     ASSERT(dst.GetType() == FLOAT64_TYPE);
2654 
2655     // CC-OFFNXT(G.NAM.03-CPP) project code style
2656     constexpr int64_t HALF = 0x3FE0000000000000;  // double precision representation of 0.5
2657     // CC-OFFNXT(G.NAM.03-CPP) project code style
2658     constexpr int64_t ONE = 0x3FF0000000000000;  // double precision representation of 1.0
2659 
2660     ScopedTmpRegF64 ceil(this);
2661     GetMasm()->roundsd(ArchVReg(ceil), ArchVReg(src), asmjit::imm(0b10));
2662 
2663     // calculate ceil(val) - val
2664     ScopedTmpRegF64 diff(this);
2665     GetMasm()->movapd(ArchVReg(diff), ArchVReg(ceil));
2666     GetMasm()->subsd(ArchVReg(diff), ArchVReg(src));
2667 
2668     // load 0.5 constant and compare
2669     ScopedTmpRegF64 constReg(this);
2670     ScopedTmpRegU64 tmpReg(this);
2671     GetMasm()->mov(ArchReg(tmpReg), asmjit::imm(HALF));
2672     GetMasm()->movq(ArchVReg(constReg), ArchReg(tmpReg));
2673     GetMasm()->comisd(ArchVReg(diff), ArchVReg(constReg));
2674 
2675     // if difference > 0.5, subtract 1 from result
2676     auto done = GetMasm()->newLabel();
2677     GetMasm()->jbe(done);  // If difference <= 0.5, jump to end
2678 
2679     // Load 1.0 and subtract
2680     GetMasm()->mov(ArchReg(tmpReg), asmjit::imm(ONE));
2681     GetMasm()->movq(ArchVReg(constReg), ArchReg(tmpReg));
2682     GetMasm()->subsd(ArchVReg(ceil), ArchVReg(constReg));
2683 
2684     GetMasm()->bind(done);
2685 
2686     // move result to destination register
2687     GetMasm()->movapd(ArchVReg(dst), ArchVReg(ceil));
2688 }
2689 
2690 template <typename T>
EncodeReverseBitsImpl(Reg dst0,Reg src0)2691 void Amd64Encoder::EncodeReverseBitsImpl(Reg dst0, Reg src0)
2692 {
2693     ASSERT(std::numeric_limits<T>::is_integer && !std::numeric_limits<T>::is_signed);
2694     [[maybe_unused]] constexpr auto IMM_8 = 8;
2695     ASSERT(sizeof(T) * IMM_8 == dst0.GetSize());
2696     // NOLINTNEXTLINE(modernize-avoid-c-arrays)
2697     static constexpr T MASKS[] = {static_cast<T>(UINT64_C(0x5555555555555555)),
2698                                   static_cast<T>(UINT64_C(0x3333333333333333)),
2699                                   static_cast<T>(UINT64_C(0x0f0f0f0f0f0f0f0f))};
2700 
2701     ScopedTmpReg tmp(this, dst0.GetType());
2702     ScopedTmpReg immHolder(this, dst0.GetType());
2703     auto immHolderReg = ArchReg(immHolder);
2704 
2705     GetMasm()->mov(ArchReg(dst0), ArchReg(src0));
2706     GetMasm()->mov(ArchReg(tmp), ArchReg(src0));
2707     constexpr auto MAX_ROUNDS = 3;
2708     for (uint64_t round = 0; round < MAX_ROUNDS; round++) {
2709         auto shift = 1U << round;
2710         auto mask = asmjit::imm(MASKS[round]);
2711         GetMasm()->shr(ArchReg(dst0), shift);
2712         if (dst0.GetSize() == DOUBLE_WORD_SIZE) {
2713             GetMasm()->mov(immHolderReg, mask);
2714             GetMasm()->and_(ArchReg(tmp), immHolderReg);
2715             GetMasm()->and_(ArchReg(dst0), immHolderReg);
2716         } else {
2717             GetMasm()->and_(ArchReg(tmp), mask);
2718             GetMasm()->and_(ArchReg(dst0), mask);
2719         }
2720         GetMasm()->shl(ArchReg(tmp), shift);
2721         GetMasm()->or_(ArchReg(dst0), ArchReg(tmp));
2722         constexpr auto ROUND_2 = 2;
2723         if (round != ROUND_2) {
2724             GetMasm()->mov(ArchReg(tmp), ArchReg(dst0));
2725         }
2726     }
2727 
2728     GetMasm()->bswap(ArchReg(dst0));
2729 }
2730 
EncodeReverseBits(Reg dst0,Reg src0)2731 void Amd64Encoder::EncodeReverseBits(Reg dst0, Reg src0)
2732 {
2733     ASSERT(src0.GetSize() == WORD_SIZE || src0.GetSize() == DOUBLE_WORD_SIZE);
2734     ASSERT(src0.GetSize() == dst0.GetSize());
2735 
2736     if (src0.GetSize() == WORD_SIZE) {
2737         EncodeReverseBitsImpl<uint32_t>(dst0, src0);
2738         return;
2739     }
2740 
2741     EncodeReverseBitsImpl<uint64_t>(dst0, src0);
2742 }
2743 
CanEncodeScale(uint64_t imm,uint32_t size)2744 bool Amd64Encoder::CanEncodeScale(uint64_t imm, [[maybe_unused]] uint32_t size)
2745 {
2746     return imm <= 3U;
2747 }
2748 
CanEncodeImmLogical(uint64_t imm,uint32_t size)2749 bool Amd64Encoder::CanEncodeImmLogical(uint64_t imm, uint32_t size)
2750 {
2751 #ifndef NDEBUG
2752     if (size < DOUBLE_WORD_SIZE) {
2753         // Test if the highest part is consistent:
2754         ASSERT(((imm >> size) == 0) || (((~imm) >> size) == 0));
2755     }
2756 #endif  // NDEBUG
2757     return ImmFitsSize(imm, size);
2758 }
2759 
CanEncodeBitCount()2760 bool Amd64Encoder::CanEncodeBitCount()
2761 {
2762     return asmjit::CpuInfo::host().hasFeature(asmjit::x86::Features::kPOPCNT);
2763 }
2764 
CanOptimizeImmDivMod(uint64_t imm,bool isSigned) const2765 bool Amd64Encoder::CanOptimizeImmDivMod(uint64_t imm, bool isSigned) const
2766 {
2767     return CanOptimizeImmDivModCommon(imm, isSigned);
2768 }
2769 
EncodeIsInf(Reg dst,Reg src)2770 void Amd64Encoder::EncodeIsInf(Reg dst, Reg src)
2771 {
2772     ASSERT(dst.IsScalar() && src.IsFloat());
2773 
2774     GetMasm()->xor_(ArchReg(dst, DOUBLE_WORD_SIZE), ArchReg(dst, DOUBLE_WORD_SIZE));
2775 
2776     if (src.GetSize() == WORD_SIZE) {
2777         constexpr auto INF_MASK = uint32_t(0x7f800000) << 1U;
2778 
2779         ScopedTmpRegU32 tmpReg(this);
2780         ScopedTmpRegU32 tmp1Reg(this);
2781         auto tmp = ArchReg(tmpReg);
2782         auto tmp1 = ArchReg(tmp1Reg);
2783 
2784         GetMasm()->movd(tmp1, ArchVReg(src));
2785         GetMasm()->shl(tmp1, 1);
2786         GetMasm()->mov(tmp, INF_MASK);
2787         GetMasm()->cmp(tmp, tmp1);
2788     } else {
2789         constexpr auto INF_MASK = uint64_t(0x7ff0000000000000) << 1U;
2790 
2791         ScopedTmpRegU64 tmpReg(this);
2792         ScopedTmpRegU64 tmp1Reg(this);
2793         auto tmp = ArchReg(tmpReg);
2794         auto tmp1 = ArchReg(tmp1Reg);
2795 
2796         GetMasm()->movq(tmp1, ArchVReg(src));
2797         GetMasm()->shl(tmp1, 1);
2798 
2799         GetMasm()->mov(tmp, INF_MASK);
2800         GetMasm()->cmp(tmp, tmp1);
2801     }
2802 
2803     GetMasm()->sete(ArchReg(dst, BYTE_SIZE));
2804 }
2805 
EncodeCmpFracWithDelta(Reg src)2806 void Amd64Encoder::EncodeCmpFracWithDelta(Reg src)
2807 {
2808     ASSERT(src.IsFloat());
2809     ASSERT(src.GetType() == FLOAT32_TYPE || src.GetType() == FLOAT64_TYPE);
2810 
2811     // Rounding control bits: Truncated (aka Round to Zero)
2812     constexpr uint8_t RND_CTL_TRUNCATED = 0b00000011;
2813 
2814     // Encode (fabs(src - trunc(src)) <= DELTA)
2815     if (src.GetType() == FLOAT32_TYPE) {
2816         ScopedTmpRegF32 tmp(this);
2817         ScopedTmpRegF32 delta(this);
2818         GetMasm()->roundss(ArchVReg(tmp), ArchVReg(src), asmjit::imm(RND_CTL_TRUNCATED));
2819         EncodeSub(tmp, src, tmp);
2820         EncodeAbs(tmp, tmp);
2821         EncodeMov(delta, Imm(std::numeric_limits<float>::epsilon()));
2822         GetMasm()->ucomiss(ArchVReg(tmp), ArchVReg(delta));
2823     } else {
2824         ScopedTmpRegF64 tmp(this);
2825         ScopedTmpRegF64 delta(this);
2826         GetMasm()->roundsd(ArchVReg(tmp), ArchVReg(src), asmjit::imm(RND_CTL_TRUNCATED));
2827         EncodeSub(tmp, src, tmp);
2828         EncodeAbs(tmp, tmp);
2829         EncodeMov(delta, Imm(std::numeric_limits<double>::epsilon()));
2830         GetMasm()->ucomisd(ArchVReg(tmp), ArchVReg(delta));
2831     }
2832 }
2833 
EncodeIsInteger(Reg dst,Reg src)2834 void Amd64Encoder::EncodeIsInteger(Reg dst, Reg src)
2835 {
2836     ASSERT(dst.IsScalar() && src.IsFloat());
2837     ASSERT(src.GetType() == FLOAT32_TYPE || src.GetType() == FLOAT64_TYPE);
2838 
2839     auto labelExit = static_cast<Amd64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
2840 
2841     GetMasm()->xor_(ArchReg(dst, WORD_SIZE), ArchReg(dst, WORD_SIZE));
2842     EncodeCmpFracWithDelta(src);
2843     GetMasm()->jp(*labelExit);  // Inf or NaN
2844     GetMasm()->set(ArchCc(Condition::LE, true), ArchReg(dst, BYTE_SIZE));
2845     GetMasm()->bind(*labelExit);
2846 }
2847 
EncodeIsSafeInteger(Reg dst,Reg src)2848 void Amd64Encoder::EncodeIsSafeInteger(Reg dst, Reg src)
2849 {
2850     ASSERT(dst.IsScalar() && src.IsFloat());
2851     ASSERT(src.GetType() == FLOAT32_TYPE || src.GetType() == FLOAT64_TYPE);
2852 
2853     auto labelExit = static_cast<Amd64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
2854 
2855     GetMasm()->xor_(ArchReg(dst, WORD_SIZE), ArchReg(dst, WORD_SIZE));
2856 
2857     // Check if IsInteger
2858     EncodeCmpFracWithDelta(src);
2859     GetMasm()->jp(*labelExit);  // Inf or NaN
2860     GetMasm()->j(ArchCc(Condition::GT, true), *labelExit);
2861 
2862     // Check if it is safe, i.e. src can be represented in float/double without losing precision
2863     if (src.GetType() == FLOAT32_TYPE) {
2864         ScopedTmpRegF32 tmp1(this);
2865         ScopedTmpRegF32 tmp2(this);
2866         EncodeAbs(tmp1, src);
2867         EncodeMov(tmp2, Imm(MaxIntAsExactFloat()));
2868         GetMasm()->ucomiss(ArchVReg(tmp1), ArchVReg(tmp2));
2869     } else {
2870         ScopedTmpRegF64 tmp1(this);
2871         ScopedTmpRegF64 tmp2(this);
2872         EncodeAbs(tmp1, src);
2873         EncodeMov(tmp2, Imm(MaxIntAsExactDouble()));
2874         GetMasm()->ucomisd(ArchVReg(tmp1), ArchVReg(tmp2));
2875     }
2876     GetMasm()->set(ArchCc(Condition::LE, true), ArchReg(dst, BYTE_SIZE));
2877     GetMasm()->bind(*labelExit);
2878 }
2879 
2880 /* Since NaNs have to be canonicalized we compare the
2881  * input with itself, if it is NaN the comparison will
2882  * set the parity flag (PF) */
EncodeFpToBits(Reg dst,Reg src)2883 void Amd64Encoder::EncodeFpToBits(Reg dst, Reg src)
2884 {
2885     ASSERT(dst.IsScalar() && src.IsFloat());
2886 
2887     if (dst.GetType() == INT32_TYPE) {
2888         ASSERT(src.GetSize() == WORD_SIZE);
2889 
2890         constexpr auto FLOAT_NAN = uint32_t(0x7fc00000);
2891 
2892         ScopedTmpRegU32 tmp(this);
2893 
2894         GetMasm()->ucomiss(ArchVReg(src), ArchVReg(src));
2895         GetMasm()->mov(ArchReg(tmp), FLOAT_NAN);
2896         GetMasm()->movd(ArchReg(dst), ArchVReg(src));
2897         GetMasm()->cmovpe(ArchReg(dst), ArchReg(tmp));
2898     } else {
2899         ASSERT(src.GetSize() == DOUBLE_WORD_SIZE);
2900 
2901         constexpr auto DOUBLE_NAN = uint64_t(0x7ff8000000000000);
2902         ScopedTmpRegU64 tmp(this);
2903 
2904         GetMasm()->ucomisd(ArchVReg(src), ArchVReg(src));
2905         GetMasm()->mov(ArchReg(tmp), DOUBLE_NAN);
2906         GetMasm()->movq(ArchReg(dst), ArchVReg(src));
2907         GetMasm()->cmovpe(ArchReg(dst), ArchReg(tmp));
2908     }
2909 }
2910 
EncodeMoveBitsRaw(Reg dst,Reg src)2911 void Amd64Encoder::EncodeMoveBitsRaw(Reg dst, Reg src)
2912 {
2913     ASSERT((dst.IsFloat() && src.IsScalar()) || (src.IsFloat() && dst.IsScalar()));
2914     if (src.IsScalar()) {
2915         ASSERT((dst.GetSize() == src.GetSize()));
2916         if (src.GetSize() == WORD_SIZE) {
2917             GetMasm()->movd(ArchVReg(dst), ArchReg(src));
2918         } else {
2919             GetMasm()->movq(ArchVReg(dst), ArchReg(src));
2920         }
2921     } else {
2922         ASSERT((src.GetSize() == dst.GetSize()));
2923         if (dst.GetSize() == WORD_SIZE) {
2924             GetMasm()->movd(ArchReg(dst), ArchVReg(src));
2925         } else {
2926             GetMasm()->movq(ArchReg(dst), ArchVReg(src));
2927         }
2928     }
2929 }
2930 
2931 /* Unsafe intrinsics */
EncodeCompareAndSwap(Reg dst,Reg obj,const Reg * offset,Reg val,Reg newval)2932 void Amd64Encoder::EncodeCompareAndSwap(Reg dst, Reg obj, const Reg *offset, Reg val, Reg newval)
2933 {
2934     /*
2935      * movl    old, %eax
2936      * lock    cmpxchgl   new, addr
2937      * sete    %al
2938      */
2939     ScopedTmpRegU64 tmp1(this);
2940     ScopedTmpRegU64 tmp2(this);
2941     ScopedTmpRegU64 tmp3(this);
2942     Reg newvalue = newval;
2943     auto addr = ArchMem(MemRef(tmp2)).Prepare(GetMasm());
2944     auto addrReg = ArchReg(tmp2);
2945     Reg rax(ConvertRegNumber(asmjit::x86::rax.id()), INT64_TYPE);
2946 
2947     /* NOTE(ayodkev) this is a workaround for the failure of
2948      * jsr166.ScheduledExecutorTest, have to figure out if there
2949      * is less crude way to avoid this */
2950     if (newval.GetId() == rax.GetId()) {
2951         SetFalseResult();
2952         return;
2953     }
2954 
2955     if (offset != nullptr) {
2956         GetMasm()->lea(addrReg, asmjit::x86::ptr(ArchReg(obj), ArchReg(*offset)));
2957     } else {
2958         GetMasm()->mov(addrReg, ArchReg(obj));
2959     }
2960 
2961     /* the [er]ax register will be overwritten by cmpxchg instruction
2962      * save it unless it is set as a destination register */
2963     if (dst.GetId() != rax.GetId()) {
2964         GetMasm()->mov(ArchReg(tmp1), asmjit::x86::rax);
2965     }
2966 
2967     /* if the new value comes in [er]ax register we have to use a
2968      * different register as [er]ax will contain the current value */
2969     if (newval.GetId() == rax.GetId()) {
2970         GetMasm()->mov(ArchReg(tmp3, newval.GetSize()), ArchReg(newval));
2971         newvalue = tmp3;
2972     }
2973 
2974     if (val.GetId() != rax.GetId()) {
2975         GetMasm()->mov(asmjit::x86::rax, ArchReg(val).r64());
2976     }
2977 
2978     GetMasm()->lock().cmpxchg(addr, ArchReg(newvalue));
2979     GetMasm()->sete(ArchReg(dst));
2980 
2981     if (dst.GetId() != rax.GetId()) {
2982         GetMasm()->mov(asmjit::x86::rax, ArchReg(tmp1));
2983     }
2984 }
2985 
EncodeCompareAndSwap(Reg dst,Reg obj,Reg offset,Reg val,Reg newval)2986 void Amd64Encoder::EncodeCompareAndSwap(Reg dst, Reg obj, Reg offset, Reg val, Reg newval)
2987 {
2988     EncodeCompareAndSwap(dst, obj, &offset, val, newval);
2989 }
2990 
EncodeCompareAndSwap(Reg dst,Reg addr,Reg val,Reg newval)2991 void Amd64Encoder::EncodeCompareAndSwap(Reg dst, Reg addr, Reg val, Reg newval)
2992 {
2993     EncodeCompareAndSwap(dst, addr, nullptr, val, newval);
2994 }
2995 
EncodeUnsafeGetAndSet(Reg dst,Reg obj,Reg offset,Reg val)2996 void Amd64Encoder::EncodeUnsafeGetAndSet(Reg dst, Reg obj, Reg offset, Reg val)
2997 {
2998     ScopedTmpRegU64 tmp(this);
2999     auto addrReg = ArchReg(tmp);
3000     auto addr = ArchMem(MemRef(tmp)).Prepare(GetMasm());
3001     GetMasm()->lea(addrReg, asmjit::x86::ptr(ArchReg(obj), ArchReg(offset)));
3002     GetMasm()->mov(ArchReg(dst), ArchReg(val));
3003     GetMasm()->lock().xchg(addr, ArchReg(dst));
3004 }
3005 
EncodeUnsafeGetAndAdd(Reg dst,Reg obj,Reg offset,Reg val,Reg tmp)3006 void Amd64Encoder::EncodeUnsafeGetAndAdd(Reg dst, Reg obj, Reg offset, Reg val, [[maybe_unused]] Reg tmp)
3007 {
3008     ScopedTmpRegU64 tmp1(this);
3009     auto addrReg = ArchReg(tmp1);
3010     auto addr = ArchMem(MemRef(tmp1)).Prepare(GetMasm());
3011     GetMasm()->lea(addrReg, asmjit::x86::ptr(ArchReg(obj), ArchReg(offset)));
3012     GetMasm()->mov(ArchReg(dst), ArchReg(val));
3013     GetMasm()->lock().xadd(addr, ArchReg(dst));
3014 }
3015 
EncodeMemoryBarrier(memory_order::Order order)3016 void Amd64Encoder::EncodeMemoryBarrier(memory_order::Order order)
3017 {
3018     if (order == memory_order::FULL) {
3019         /* does the same as mfence but faster, not applicable for NT-writes, though */
3020         GetMasm()->lock().add(asmjit::x86::dword_ptr(asmjit::x86::rsp), asmjit::imm(0));
3021     }
3022 }
3023 
EncodeStackOverflowCheck(ssize_t offset)3024 void Amd64Encoder::EncodeStackOverflowCheck(ssize_t offset)
3025 {
3026     MemRef mem(GetTarget().GetStackReg(), offset);
3027     auto m = ArchMem(mem).Prepare(GetMasm());
3028     GetMasm()->test(m, ArchReg(GetTarget().GetParamReg(0)));
3029 }
3030 
GetCursorOffset() const3031 size_t Amd64Encoder::GetCursorOffset() const
3032 {
3033     // NOLINTNEXTLINE(readability-identifier-naming)
3034     return GetMasm()->offset();
3035 }
3036 
SetCursorOffset(size_t offset)3037 void Amd64Encoder::SetCursorOffset(size_t offset)
3038 {
3039     // NOLINTNEXTLINE(readability-identifier-naming)
3040     GetMasm()->setOffset(offset);
3041 }
3042 
EncodeGetCurrentPc(Reg dst)3043 void Amd64Encoder::EncodeGetCurrentPc(Reg dst)
3044 {
3045     ASSERT(dst.GetType() == INT64_TYPE);
3046     EncodeRelativePcMov(dst, 0L, [this](Reg reg, intptr_t offset) {
3047         GetMasm()->long_().lea(ArchReg(reg), asmjit::x86::ptr(asmjit::x86::rip, offset));
3048     });
3049 }
3050 
AcquireScratchRegister(TypeInfo type)3051 Reg Amd64Encoder::AcquireScratchRegister(TypeInfo type)
3052 {
3053     return (static_cast<Amd64RegisterDescription *>(GetRegfile()))->AcquireScratchRegister(type);
3054 }
3055 
AcquireScratchRegister(Reg reg)3056 void Amd64Encoder::AcquireScratchRegister(Reg reg)
3057 {
3058     (static_cast<Amd64RegisterDescription *>(GetRegfile()))->AcquireScratchRegister(reg);
3059 }
3060 
ReleaseScratchRegister(Reg reg)3061 void Amd64Encoder::ReleaseScratchRegister(Reg reg)
3062 {
3063     (static_cast<Amd64RegisterDescription *>(GetRegfile()))->ReleaseScratchRegister(reg);
3064 }
3065 
IsScratchRegisterReleased(Reg reg) const3066 bool Amd64Encoder::IsScratchRegisterReleased(Reg reg) const
3067 {
3068     return (static_cast<Amd64RegisterDescription *>(GetRegfile()))->IsScratchRegisterReleased(reg);
3069 }
3070 
GetScratchRegistersMask() const3071 RegMask Amd64Encoder::GetScratchRegistersMask() const
3072 {
3073     return (static_cast<const Amd64RegisterDescription *>(GetRegfile()))->GetScratchRegistersMask();
3074 }
3075 
GetScratchFpRegistersMask() const3076 RegMask Amd64Encoder::GetScratchFpRegistersMask() const
3077 {
3078     return (static_cast<const Amd64RegisterDescription *>(GetRegfile()))->GetScratchFpRegistersMask();
3079 }
3080 
GetAvailableScratchRegisters() const3081 RegMask Amd64Encoder::GetAvailableScratchRegisters() const
3082 {
3083     auto regfile = static_cast<const Amd64RegisterDescription *>(GetRegfile());
3084     return RegMask(regfile->GetScratchRegisters().GetMask());
3085 }
3086 
GetAvailableScratchFpRegisters() const3087 VRegMask Amd64Encoder::GetAvailableScratchFpRegisters() const
3088 {
3089     auto regfile = static_cast<const Amd64RegisterDescription *>(GetRegfile());
3090     return VRegMask(regfile->GetScratchFPRegisters().GetMask());
3091 }
3092 
GetRefType()3093 TypeInfo Amd64Encoder::GetRefType()
3094 {
3095     return INT64_TYPE;
3096 }
3097 
BufferData() const3098 void *Amd64Encoder::BufferData() const
3099 {
3100     // NOLINTNEXTLINE(readability-identifier-naming)
3101     return GetMasm()->bufferData();
3102 }
3103 
BufferSize() const3104 size_t Amd64Encoder::BufferSize() const
3105 {
3106     // NOLINTNEXTLINE(readability-identifier-naming)
3107     return GetMasm()->offset();
3108 }
3109 
MakeLibCall(Reg dst,Reg src0,Reg src1,void * entryPoint)3110 void Amd64Encoder::MakeLibCall(Reg dst, Reg src0, Reg src1, void *entryPoint)
3111 {
3112     if (!dst.IsFloat()) {
3113         SetFalseResult();
3114         return;
3115     }
3116 
3117     if (dst.GetType() == FLOAT32_TYPE) {
3118         if (!src0.IsFloat() || !src1.IsFloat()) {
3119             SetFalseResult();
3120             return;
3121         }
3122 
3123         if (src0.GetId() != asmjit::x86::xmm0.id() || src1.GetId() != asmjit::x86::xmm1.id()) {
3124             ScopedTmpRegF32 tmp(this);
3125             GetMasm()->movss(ArchVReg(tmp), ArchVReg(src1));
3126             GetMasm()->movss(asmjit::x86::xmm0, ArchVReg(src0));
3127             GetMasm()->movss(asmjit::x86::xmm1, ArchVReg(tmp));
3128         }
3129 
3130         MakeCall(entryPoint);
3131 
3132         if (dst.GetId() != asmjit::x86::xmm0.id()) {
3133             GetMasm()->movss(ArchVReg(dst), asmjit::x86::xmm0);
3134         }
3135     } else if (dst.GetType() == FLOAT64_TYPE) {
3136         if (!src0.IsFloat() || !src1.IsFloat()) {
3137             SetFalseResult();
3138             return;
3139         }
3140 
3141         if (src0.GetId() != asmjit::x86::xmm0.id() || src1.GetId() != asmjit::x86::xmm1.id()) {
3142             ScopedTmpRegF64 tmp(this);
3143             GetMasm()->movsd(ArchVReg(tmp), ArchVReg(src1));
3144             GetMasm()->movsd(asmjit::x86::xmm0, ArchVReg(src0));
3145             GetMasm()->movsd(asmjit::x86::xmm1, ArchVReg(tmp));
3146         }
3147 
3148         MakeCall(entryPoint);
3149 
3150         if (dst.GetId() != asmjit::x86::xmm0.id()) {
3151             GetMasm()->movsd(ArchVReg(dst), asmjit::x86::xmm0);
3152         }
3153     } else {
3154         UNREACHABLE();
3155     }
3156 }
3157 
3158 template <bool IS_STORE>
LoadStoreRegisters(RegMask registers,ssize_t slot,size_t startReg,bool isFp)3159 void Amd64Encoder::LoadStoreRegisters(RegMask registers, ssize_t slot, size_t startReg, bool isFp)
3160 {
3161     for (size_t i {0}; i < registers.size(); ++i) {
3162         if (!registers.test(i)) {
3163             continue;
3164         }
3165 
3166         asmjit::x86::Mem mem = asmjit::x86::ptr(asmjit::x86::rsp, (slot + i - startReg) * DOUBLE_WORD_SIZE_BYTES);
3167 
3168         if constexpr (IS_STORE) {  // NOLINT
3169             if (isFp) {
3170                 GetMasm()->movsd(mem, asmjit::x86::xmm(i));
3171             } else {
3172                 GetMasm()->mov(mem, asmjit::x86::gpq(ConvertRegNumber(i)));
3173             }
3174         } else {  // NOLINT
3175             if (isFp) {
3176                 GetMasm()->movsd(asmjit::x86::xmm(i), mem);
3177             } else {
3178                 GetMasm()->mov(asmjit::x86::gpq(ConvertRegNumber(i)), mem);
3179             }
3180         }
3181     }
3182 }
3183 
3184 template <bool IS_STORE>
LoadStoreRegisters(RegMask registers,bool isFp,int32_t slot,Reg base,RegMask mask)3185 void Amd64Encoder::LoadStoreRegisters(RegMask registers, bool isFp, int32_t slot, Reg base, RegMask mask)
3186 {
3187     auto baseReg = ArchReg(base);
3188     bool hasMask = mask.any();
3189     int32_t index = hasMask ? static_cast<int32_t>(mask.GetMinRegister()) : 0;
3190     slot -= index;
3191     for (size_t i = index; i < registers.size(); ++i) {
3192         if (hasMask) {
3193             if (!mask.test(i)) {
3194                 continue;
3195             }
3196             index++;
3197         }
3198         if (!registers.test(i)) {
3199             continue;
3200         }
3201 
3202         if (!hasMask) {
3203             index++;
3204         }
3205 
3206         // `-1` because we've incremented `index` in advance
3207         asmjit::x86::Mem mem = asmjit::x86::ptr(baseReg, (slot + index - 1) * DOUBLE_WORD_SIZE_BYTES);
3208 
3209         if constexpr (IS_STORE) {  // NOLINT
3210             if (isFp) {
3211                 GetMasm()->movsd(mem, asmjit::x86::xmm(i));
3212             } else {
3213                 GetMasm()->mov(mem, asmjit::x86::gpq(ConvertRegNumber(i)));
3214             }
3215         } else {  // NOLINT
3216             if (isFp) {
3217                 GetMasm()->movsd(asmjit::x86::xmm(i), mem);
3218             } else {
3219                 GetMasm()->mov(asmjit::x86::gpq(ConvertRegNumber(i)), mem);
3220             }
3221         }
3222     }
3223 }
3224 
SaveRegisters(RegMask registers,ssize_t slot,size_t startReg,bool isFp)3225 void Amd64Encoder::SaveRegisters(RegMask registers, ssize_t slot, size_t startReg, bool isFp)
3226 {
3227     LoadStoreRegisters<true>(registers, slot, startReg, isFp);
3228 }
3229 
LoadRegisters(RegMask registers,ssize_t slot,size_t startReg,bool isFp)3230 void Amd64Encoder::LoadRegisters(RegMask registers, ssize_t slot, size_t startReg, bool isFp)
3231 {
3232     LoadStoreRegisters<false>(registers, slot, startReg, isFp);
3233 }
3234 
SaveRegisters(RegMask registers,bool isFp,ssize_t slot,Reg base,RegMask mask)3235 void Amd64Encoder::SaveRegisters(RegMask registers, bool isFp, ssize_t slot, Reg base, RegMask mask)
3236 {
3237     LoadStoreRegisters<true>(registers, isFp, slot, base, mask);
3238 }
3239 
LoadRegisters(RegMask registers,bool isFp,ssize_t slot,Reg base,RegMask mask)3240 void Amd64Encoder::LoadRegisters(RegMask registers, bool isFp, ssize_t slot, Reg base, RegMask mask)
3241 {
3242     LoadStoreRegisters<false>(registers, isFp, slot, base, mask);
3243 }
3244 
PushRegisters(RegMask registers,bool isFp)3245 void Amd64Encoder::PushRegisters(RegMask registers, bool isFp)
3246 {
3247     for (size_t i = 0; i < registers.size(); i++) {
3248         if (registers[i]) {
3249             if (isFp) {
3250                 GetMasm()->sub(asmjit::x86::rsp, DOUBLE_WORD_SIZE_BYTES);
3251                 GetMasm()->movsd(asmjit::x86::ptr(asmjit::x86::rsp), ArchVReg(Reg(i, FLOAT64_TYPE)));
3252             } else {
3253                 GetMasm()->push(asmjit::x86::gpq(ConvertRegNumber(i)));
3254             }
3255         }
3256     }
3257 }
3258 
PopRegisters(RegMask registers,bool isFp)3259 void Amd64Encoder::PopRegisters(RegMask registers, bool isFp)
3260 {
3261     for (ssize_t i = registers.size() - 1; i >= 0; i--) {
3262         if (registers[i]) {
3263             if (isFp) {
3264                 GetMasm()->movsd(ArchVReg(Reg(i, FLOAT64_TYPE)), asmjit::x86::ptr(asmjit::x86::rsp));
3265                 GetMasm()->add(asmjit::x86::rsp, DOUBLE_WORD_SIZE_BYTES);
3266             } else {
3267                 GetMasm()->pop(asmjit::x86::gpq(ConvertRegNumber(i)));
3268             }
3269         }
3270     }
3271 }
3272 
GetMasm() const3273 asmjit::x86::Assembler *Amd64Encoder::GetMasm() const
3274 {
3275     ASSERT(masm_ != nullptr);
3276     return masm_;
3277 }
3278 
GetLabelAddress(LabelHolder::LabelId label)3279 size_t Amd64Encoder::GetLabelAddress(LabelHolder::LabelId label)
3280 {
3281     auto code = GetMasm()->code();
3282     ASSERT(code->isLabelBound(label));
3283     // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
3284     return code->baseAddress() + code->labelOffset(label);
3285 }
3286 
LabelHasLinks(LabelHolder::LabelId label)3287 bool Amd64Encoder::LabelHasLinks(LabelHolder::LabelId label)
3288 {
3289     auto code = GetMasm()->code();
3290     auto entry = code->labelEntry(label);
3291     return entry->links() != nullptr;
3292 }
3293 
3294 template <typename T, size_t N>
CopyArrayToXmm(Reg xmm,const std::array<T,N> & arr)3295 void Amd64Encoder::CopyArrayToXmm(Reg xmm, const std::array<T, N> &arr)
3296 {
3297     static constexpr auto SIZE {N * sizeof(T)};
3298     static_assert((SIZE == DOUBLE_WORD_SIZE_BYTES) || (SIZE == 2U * DOUBLE_WORD_SIZE_BYTES));
3299     ASSERT(xmm.GetType() == FLOAT64_TYPE);
3300 
3301     auto data {reinterpret_cast<const uint64_t *>(arr.data())};
3302 
3303     ScopedTmpRegU64 tmpGpr(this);
3304     // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
3305     GetMasm()->mov(ArchReg(tmpGpr), asmjit::imm(data[0]));
3306     GetMasm()->movq(ArchVReg(xmm), ArchReg(tmpGpr));
3307 
3308     if constexpr (SIZE == 2U * DOUBLE_WORD_SIZE_BYTES) {
3309         ScopedTmpRegF64 tmpXmm(this);
3310         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
3311         GetMasm()->mov(ArchReg(tmpGpr), asmjit::imm(data[1]));
3312         GetMasm()->movq(ArchVReg(tmpXmm), ArchReg(tmpGpr));
3313         GetMasm()->unpcklpd(ArchVReg(xmm), ArchVReg(tmpXmm));
3314     }
3315 }
3316 
3317 template <typename T>
CopyImmToXmm(Reg xmm,T imm)3318 void Amd64Encoder::CopyImmToXmm(Reg xmm, T imm)
3319 {
3320     static_assert((sizeof(imm) == WORD_SIZE_BYTES) || (sizeof(imm) == DOUBLE_WORD_SIZE_BYTES));
3321     ASSERT(xmm.GetSize() == BYTE_SIZE * sizeof(imm));
3322 
3323     if constexpr (sizeof(imm) == WORD_SIZE_BYTES) {  // NOLINT
3324         ScopedTmpRegU32 tmpGpr(this);
3325         GetMasm()->mov(ArchReg(tmpGpr), asmjit::imm(bit_cast<uint32_t>(imm)));
3326         GetMasm()->movd(ArchVReg(xmm), ArchReg(tmpGpr));
3327     } else {  // NOLINT
3328         ScopedTmpRegU64 tmpGpr(this);
3329         GetMasm()->mov(ArchReg(tmpGpr), asmjit::imm(bit_cast<uint64_t>(imm)));
3330         GetMasm()->movq(ArchVReg(xmm), ArchReg(tmpGpr));
3331     }
3332 }
3333 
DisasmInstr(std::ostream & stream,size_t pc,ssize_t codeOffset) const3334 size_t Amd64Encoder::DisasmInstr(std::ostream &stream, size_t pc, ssize_t codeOffset) const
3335 {
3336     if (codeOffset < 0) {
3337         (const_cast<Amd64Encoder *>(this))->Finalize();
3338     }
3339     // NOLINTNEXTLINE(readability-identifier-naming)
3340     Span code(GetMasm()->bufferData(), GetMasm()->offset());
3341 
3342     [[maybe_unused]] size_t dataLeft = code.Size() - pc;
3343     [[maybe_unused]] constexpr size_t LENGTH = ZYDIS_MAX_INSTRUCTION_LENGTH;  // 15 bytes is max inst length in amd64
3344 
3345     // Initialize decoder context
3346     ZydisDecoder decoder;
3347     [[maybe_unused]] bool res =
3348         ZYAN_SUCCESS(ZydisDecoderInit(&decoder, ZYDIS_MACHINE_MODE_LONG_64, ZYDIS_ADDRESS_WIDTH_64));
3349 
3350     // Initialize formatter
3351     ZydisFormatter formatter;
3352     res &= ZYAN_SUCCESS(ZydisFormatterInit(&formatter, ZYDIS_FORMATTER_STYLE_ATT));
3353     ZydisFormatterSetProperty(&formatter, ZYDIS_FORMATTER_PROP_FORCE_RELATIVE_BRANCHES, 1);
3354     ASSERT(res);
3355 
3356     ZydisDecodedInstruction instruction;
3357 
3358     res &= ZYAN_SUCCESS(ZydisDecoderDecodeBuffer(&decoder, &code[pc], std::min(LENGTH, dataLeft), &instruction));
3359 
3360     // Format & print the binary instruction structure to human readable format
3361     char buffer[256];  // NOLINT (modernize-avoid-c-arrays, readability-identifier-naming, readability-magic-numbers)
3362     res &= ZYAN_SUCCESS(
3363         ZydisFormatterFormatInstruction(&formatter, &instruction, buffer, sizeof(buffer), uintptr_t(&code[pc])));
3364 
3365     ASSERT(res);
3366 
3367     // Print disassembly
3368     if (codeOffset < 0) {
3369         stream << buffer;
3370     } else {
3371         stream << std::setw(0x8) << std::right << std::setfill('0') << std::hex << pc + codeOffset << std::dec
3372                << std::setfill(' ') << ": " << buffer;
3373     }
3374 
3375     return pc + instruction.length;
3376 }
3377 }  // namespace ark::compiler::amd64
3378