• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16 Encoder (implementation of math and mem Low-level emitters)
17 */
18 
19 #include <iomanip>
20 
21 #include "compiler/optimizer/code_generator/relocations.h"
22 #include "target/amd64/target.h"
23 
24 #include "lib_helpers.inl"
25 
26 #include "Zydis/Zydis.h"
27 
28 #ifndef PANDA_TARGET_MACOS
29 #include "elf.h"
30 #endif  // PANDA_TARGET_MACOS
31 
32 namespace panda::compiler::amd64 {
CreateLabel()33 LabelHolder::LabelId Amd64LabelHolder::CreateLabel()
34 {
35     ++id_;
36 
37     auto masm = (static_cast<Amd64Encoder *>(GetEncoder()))->GetMasm();
38     auto label = masm->newLabel();
39 
40     auto allocator = GetEncoder()->GetAllocator();
41     labels_.push_back(allocator->New<LabelType>(std::move(label)));
42     ASSERT(labels_.size() == id_);
43     return id_ - 1;
44 }
45 
BindLabel(LabelId id)46 void Amd64LabelHolder::BindLabel(LabelId id)
47 {
48     static_cast<Amd64Encoder *>(GetEncoder())->GetMasm()->bind(*labels_[id]);
49 }
50 
Amd64Encoder(ArenaAllocator * allocator)51 Amd64Encoder::Amd64Encoder(ArenaAllocator *allocator) : Encoder(allocator, Arch::X86_64, false) {}
52 
~Amd64Encoder()53 Amd64Encoder::~Amd64Encoder()
54 {
55     if (masm_ != nullptr) {
56         masm_->~Assembler();
57         masm_ = nullptr;
58     }
59 
60     if (code_holder_ != nullptr) {
61         code_holder_->~CodeHolder();
62         code_holder_ = nullptr;
63     }
64 
65     if (error_handler_ != nullptr) {
66         error_handler_->~ErrorHandler();
67         error_handler_ = nullptr;
68     }
69 
70     if (labels_ != nullptr) {
71         labels_->~Amd64LabelHolder();
72         labels_ = nullptr;
73     }
74 }
75 
InitMasm()76 bool Amd64Encoder::InitMasm()
77 {
78     if (masm_ == nullptr) {
79         labels_ = GetAllocator()->New<Amd64LabelHolder>(this);
80         if (labels_ == nullptr) {
81             SetFalseResult();
82             return false;
83         }
84 
85         asmjit::Environment env;
86         env.setArch(asmjit::Environment::kArchX64);
87 
88         code_holder_ = GetAllocator()->New<asmjit::CodeHolder>(GetAllocator());
89         if (code_holder_ == nullptr) {
90             SetFalseResult();
91             return false;
92         }
93         code_holder_->init(env, 0U);
94 
95         masm_ = GetAllocator()->New<asmjit::x86::Assembler>(code_holder_);
96         if (masm_ == nullptr) {
97             SetFalseResult();
98             return false;
99         }
100 
101         // Enable strict validation.
102         masm_->addValidationOptions(asmjit::BaseEmitter::kValidationOptionAssembler);
103         error_handler_ = GetAllocator()->New<AsmJitErrorHandler>(this);
104         if (error_handler_ == nullptr) {
105             SetFalseResult();
106             return false;
107         }
108         masm_->setErrorHandler(error_handler_);
109 
110         // Make sure that the compiler uses the same scratch registers as the assembler
111         CHECK_EQ(compiler::arch_info::x86_64::TEMP_REGS, GetTarget().GetTempRegsMask());
112         CHECK_EQ(compiler::arch_info::x86_64::TEMP_FP_REGS, GetTarget().GetTempVRegsMask());
113     }
114     return true;
115 }
116 
Finalize()117 void Amd64Encoder::Finalize()
118 {
119     auto code = GetMasm()->code();
120     auto code_size = code->codeSize();
121 
122     code->flatten();
123     code->resolveUnresolvedLinks();
124 
125     auto code_buffer = GetAllocator()->Alloc(code_size);
126 
127     code->relocateToBase(reinterpret_cast<uintptr_t>(code_buffer));
128     code->copyFlattenedData(code_buffer, code_size, asmjit::CodeHolder::kCopyPadSectionBuffer);
129 }
130 
EncodeJump(LabelHolder::LabelId id)131 void Amd64Encoder::EncodeJump(LabelHolder::LabelId id)
132 {
133     auto label = static_cast<Amd64LabelHolder *>(GetLabels())->GetLabel(id);
134     GetMasm()->jmp(*label);
135 }
136 
EncodeJump(LabelHolder::LabelId id,Reg src0,Reg src1,Condition cc)137 void Amd64Encoder::EncodeJump(LabelHolder::LabelId id, Reg src0, Reg src1, Condition cc)
138 {
139     if (src0.IsScalar()) {
140         if (src0.GetSize() == src1.GetSize()) {
141             GetMasm()->cmp(ArchReg(src0), ArchReg(src1));
142         } else if (src0.GetSize() > src1.GetSize()) {
143             ScopedTmpReg tmp_reg(this, src0.GetType());
144             EncodeCast(tmp_reg, false, src1, false);
145             GetMasm()->cmp(ArchReg(src0), ArchReg(tmp_reg));
146         } else {
147             ScopedTmpReg tmp_reg(this, src1.GetType());
148             EncodeCast(tmp_reg, false, src0, false);
149             GetMasm()->cmp(ArchReg(tmp_reg), ArchReg(src1));
150         }
151     } else if (src0.GetType() == FLOAT32_TYPE) {
152         GetMasm()->comiss(ArchVReg(src0), ArchVReg(src1));
153     } else {
154         GetMasm()->comisd(ArchVReg(src0), ArchVReg(src1));
155     }
156 
157     auto label = static_cast<Amd64LabelHolder *>(GetLabels())->GetLabel(id);
158     if (src0.IsScalar()) {
159         GetMasm()->j(ArchCc(cc, src0.IsFloat()), *label);
160         return;
161     }
162 
163     if (CcMatchesNan(cc)) {
164         GetMasm()->jp(*label);
165         GetMasm()->j(ArchCc(cc, src0.IsFloat()), *label);
166     } else {
167         auto end = GetMasm()->newLabel();
168 
169         GetMasm()->jp(end);
170         GetMasm()->j(ArchCc(cc, src0.IsFloat()), *label);
171         GetMasm()->bind(end);
172     }
173 }
174 
EncodeJump(LabelHolder::LabelId id,Reg src,Imm imm,Condition cc)175 void Amd64Encoder::EncodeJump(LabelHolder::LabelId id, Reg src, Imm imm, Condition cc)
176 {
177     ASSERT(src.IsScalar());
178 
179     auto imm_val = ImmToSignedInt(imm);
180     if (imm_val == 0) {
181         EncodeJump(id, src, cc);
182         return;
183     }
184 
185     if (ImmFitsSize(imm_val, src.GetSize())) {
186         auto label = static_cast<Amd64LabelHolder *>(GetLabels())->GetLabel(id);
187 
188         GetMasm()->cmp(ArchReg(src), asmjit::imm(imm_val));
189         GetMasm()->j(ArchCc(cc), *label);
190     } else {
191         ScopedTmpReg tmp_reg(this, src.GetType());
192         GetMasm()->mov(ArchReg(tmp_reg), asmjit::imm(imm_val));
193         EncodeJump(id, src, tmp_reg, cc);
194     }
195 }
196 
EncodeJumpTest(LabelHolder::LabelId id,Reg src0,Reg src1,Condition cc)197 void Amd64Encoder::EncodeJumpTest(LabelHolder::LabelId id, Reg src0, Reg src1, Condition cc)
198 {
199     ASSERT(src0.IsScalar());
200     if (src0.GetSize() == src1.GetSize()) {
201         GetMasm()->test(ArchReg(src0), ArchReg(src1));
202     } else if (src0.GetSize() > src1.GetSize()) {
203         ScopedTmpReg tmp_reg(this, src0.GetType());
204         EncodeCast(tmp_reg, false, src1, false);
205         GetMasm()->test(ArchReg(src0), ArchReg(tmp_reg));
206     } else {
207         ScopedTmpReg tmp_reg(this, src1.GetType());
208         EncodeCast(tmp_reg, false, src0, false);
209         GetMasm()->test(ArchReg(tmp_reg), ArchReg(src1));
210     }
211 
212     auto label = static_cast<Amd64LabelHolder *>(GetLabels())->GetLabel(id);
213     GetMasm()->j(ArchCcTest(cc), *label);
214 }
215 
EncodeJumpTest(LabelHolder::LabelId id,Reg src,Imm imm,Condition cc)216 void Amd64Encoder::EncodeJumpTest(LabelHolder::LabelId id, Reg src, Imm imm, Condition cc)
217 {
218     ASSERT(src.IsScalar());
219 
220     auto imm_val = ImmToSignedInt(imm);
221     if (ImmFitsSize(imm_val, src.GetSize())) {
222         auto label = static_cast<Amd64LabelHolder *>(GetLabels())->GetLabel(id);
223 
224         GetMasm()->test(ArchReg(src), asmjit::imm(imm_val));
225         GetMasm()->j(ArchCcTest(cc), *label);
226     } else {
227         ScopedTmpReg tmp_reg(this, src.GetType());
228         GetMasm()->mov(ArchReg(tmp_reg), asmjit::imm(imm_val));
229         EncodeJumpTest(id, src, tmp_reg, cc);
230     }
231 }
232 
EncodeJump(LabelHolder::LabelId id,Reg src,Condition cc)233 void Amd64Encoder::EncodeJump(LabelHolder::LabelId id, Reg src, Condition cc)
234 {
235     if (src.IsScalar()) {
236         auto label = static_cast<Amd64LabelHolder *>(GetLabels())->GetLabel(id);
237 
238         GetMasm()->cmp(ArchReg(src), asmjit::imm(0));
239         GetMasm()->j(ArchCc(cc), *label);
240         return;
241     }
242 
243     ScopedTmpReg tmp_reg(this, src.GetType());
244     if (src.GetType() == FLOAT32_TYPE) {
245         GetMasm()->xorps(ArchVReg(tmp_reg), ArchVReg(tmp_reg));
246     } else {
247         GetMasm()->xorpd(ArchVReg(tmp_reg), ArchVReg(tmp_reg));
248     }
249     EncodeJump(id, src, tmp_reg, cc);
250 }
251 
EncodeJump(Reg dst)252 void Amd64Encoder::EncodeJump(Reg dst)
253 {
254     GetMasm()->jmp(ArchReg(dst));
255 }
256 
EncodeJump(RelocationInfo * relocation)257 void Amd64Encoder::EncodeJump(RelocationInfo *relocation)
258 {
259 #ifdef PANDA_TARGET_MACOS
260     LOG(FATAL, COMPILER) << "Not supported in Macos build";
261 #else
262     // NOLINTNEXTLINE(readability-magic-numbers)
263     std::array<uint8_t, 5U> data = {0xe9, 0, 0, 0, 0};
264     GetMasm()->embed(data.data(), data.size());
265 
266     constexpr int ADDEND = 4;
267     relocation->offset = GetCursorOffset() - ADDEND;
268     relocation->addend = -ADDEND;
269     relocation->type = R_X86_64_PLT32;
270 #endif
271 }
272 
EncodeBitTestAndBranch(LabelHolder::LabelId id,compiler::Reg reg,uint32_t bit_pos,bool bit_value)273 void Amd64Encoder::EncodeBitTestAndBranch(LabelHolder::LabelId id, compiler::Reg reg, uint32_t bit_pos, bool bit_value)
274 {
275     ASSERT(reg.IsScalar() && reg.GetSize() > bit_pos);
276     auto label = static_cast<Amd64LabelHolder *>(GetLabels())->GetLabel(id);
277     if (reg.GetSize() == DOUBLE_WORD_SIZE) {
278         ScopedTmpRegU64 tmp_reg(this);
279         GetMasm()->mov(ArchReg(tmp_reg), asmjit::imm(static_cast<uint64_t>(1) << bit_pos));
280         GetMasm()->test(ArchReg(reg), ArchReg(tmp_reg));
281     } else {
282         GetMasm()->test(ArchReg(reg), asmjit::imm(1U << bit_pos));
283     }
284     if (bit_value) {
285         GetMasm()->j(ArchCc(Condition::NE), *label);
286     } else {
287         GetMasm()->j(ArchCc(Condition::EQ), *label);
288     }
289 }
290 
MakeCall(compiler::RelocationInfo * relocation)291 void Amd64Encoder::MakeCall([[maybe_unused]] compiler::RelocationInfo *relocation)
292 {
293 #ifdef PANDA_TARGET_MACOS
294     LOG(FATAL, COMPILER) << "Not supported in Macos build";
295 #else
296     // NOLINTNEXTLINE(readability-magic-numbers)
297     const size_t LEN = 5;
298     std::array<uint8_t, LEN> data = {0xe8, 0, 0, 0, 0};
299     GetMasm()->embed(data.data(), data.size());
300 
301     constexpr int APPEND = 4;
302     relocation->offset = GetCursorOffset() - APPEND;
303     relocation->addend = -APPEND;
304     relocation->type = R_X86_64_PLT32;
305 #endif
306 }
307 
MakeCall(LabelHolder::LabelId id)308 void Amd64Encoder::MakeCall(LabelHolder::LabelId id)
309 {
310     auto label = static_cast<Amd64LabelHolder *>(GetLabels())->GetLabel(id);
311     GetMasm()->call(*label);
312 }
313 
MakeCall(const void * entry_point)314 void Amd64Encoder::MakeCall(const void *entry_point)
315 {
316     ScopedTmpRegU64 tmp_reg(this);
317     GetMasm()->mov(ArchReg(tmp_reg), asmjit::imm(entry_point));
318     GetMasm()->call(ArchReg(tmp_reg));
319 }
320 
MakeCall(Reg reg)321 void Amd64Encoder::MakeCall(Reg reg)
322 {
323     GetMasm()->call(ArchReg(reg));
324 }
325 
MakeCall(MemRef entry_point)326 void Amd64Encoder::MakeCall(MemRef entry_point)
327 {
328     ScopedTmpRegU64 tmp_reg(this);
329     EncodeLdr(tmp_reg, false, entry_point);
330     GetMasm()->call(ArchReg(tmp_reg));
331 }
332 
333 template <typename Func>
EncodeRelativePcMov(Reg reg,intptr_t offset,Func encode_instruction)334 void Amd64Encoder::EncodeRelativePcMov(Reg reg, intptr_t offset, Func encode_instruction)
335 {
336     auto pos = GetMasm()->offset();
337     encode_instruction(reg, offset);
338     offset -= (GetMasm()->offset() - pos);
339     GetMasm()->setOffset(pos);
340     encode_instruction(reg, offset);
341 }
342 
MakeCallAot(intptr_t offset)343 void Amd64Encoder::MakeCallAot(intptr_t offset)
344 {
345     ScopedTmpRegU64 tmp_reg(this);
346     EncodeRelativePcMov(tmp_reg, offset, [this](Reg reg, intptr_t offset) {
347         GetMasm()->long_().mov(ArchReg(reg), asmjit::x86::ptr(asmjit::x86::rip, offset));
348     });
349     GetMasm()->call(ArchReg(tmp_reg));
350 }
351 
CanMakeCallByOffset(intptr_t offset)352 bool Amd64Encoder::CanMakeCallByOffset(intptr_t offset)
353 {
354     return offset == static_cast<intptr_t>(static_cast<int32_t>(offset));
355 }
356 
MakeCallByOffset(intptr_t offset)357 void Amd64Encoder::MakeCallByOffset(intptr_t offset)
358 {
359     GetMasm()->call(GetCursorOffset() + int32_t(offset));
360 }
361 
MakeLoadAotTable(intptr_t offset,Reg reg)362 void Amd64Encoder::MakeLoadAotTable(intptr_t offset, Reg reg)
363 {
364     EncodeRelativePcMov(reg, offset, [this](Reg reg, intptr_t offset) {
365         GetMasm()->long_().mov(ArchReg(reg), asmjit::x86::ptr(asmjit::x86::rip, offset));
366     });
367 }
368 
MakeLoadAotTableAddr(intptr_t offset,Reg addr,Reg val)369 void Amd64Encoder::MakeLoadAotTableAddr([[maybe_unused]] intptr_t offset, [[maybe_unused]] Reg addr,
370                                         [[maybe_unused]] Reg val)
371 {
372     EncodeRelativePcMov(addr, offset, [this](Reg reg, intptr_t offset) {
373         GetMasm()->long_().lea(ArchReg(reg), asmjit::x86::ptr(asmjit::x86::rip, offset));
374     });
375     GetMasm()->mov(ArchReg(val), asmjit::x86::ptr(ArchReg(addr)));
376 }
377 
EncodeAbort()378 void Amd64Encoder::EncodeAbort()
379 {
380     GetMasm()->int3();
381 }
382 
EncodeReturn()383 void Amd64Encoder::EncodeReturn()
384 {
385     GetMasm()->ret();
386 }
387 
EncodeMul(Reg dst,Reg src,Imm imm)388 void Amd64Encoder::EncodeMul([[maybe_unused]] Reg dst, [[maybe_unused]] Reg src, [[maybe_unused]] Imm imm)
389 {
390     SetFalseResult();
391 }
392 
EncodeNop()393 void Amd64Encoder::EncodeNop()
394 {
395     GetMasm()->nop();
396 }
397 
EncodeMov(Reg dst,Reg src)398 void Amd64Encoder::EncodeMov(Reg dst, Reg src)
399 {
400     if (dst == src) {
401         return;
402     }
403 
404     if (dst.IsFloat() != src.IsFloat()) {
405         ASSERT(src.GetSize() == dst.GetSize());
406         if (dst.GetSize() == WORD_SIZE) {
407             if (dst.IsFloat()) {
408                 GetMasm()->movd(ArchVReg(dst), ArchReg(src));
409             } else {
410                 GetMasm()->movd(ArchReg(dst), ArchVReg(src));
411             }
412         } else {
413             ASSERT(dst.GetSize() == DOUBLE_WORD_SIZE);
414             if (dst.IsFloat()) {
415                 GetMasm()->movq(ArchVReg(dst), ArchReg(src));
416             } else {
417                 GetMasm()->movq(ArchReg(dst), ArchVReg(src));
418             }
419         }
420         return;
421     }
422 
423     if (dst.IsFloat()) {
424         ASSERT(src.IsFloat());
425         if (dst.GetType() == FLOAT32_TYPE) {
426             GetMasm()->movss(ArchVReg(dst), ArchVReg(src));
427         } else {
428             GetMasm()->movsd(ArchVReg(dst), ArchVReg(src));
429         }
430         return;
431     }
432 
433     if (dst.GetSize() < WORD_SIZE && dst.GetSize() == src.GetSize()) {
434         GetMasm()->xor_(ArchReg(dst, WORD_SIZE), ArchReg(dst, WORD_SIZE));
435     }
436 
437     if (dst.GetSize() == src.GetSize()) {
438         GetMasm()->mov(ArchReg(dst), ArchReg(src));
439     } else {
440         EncodeCast(dst, false, src, false);
441     }
442 }
443 
EncodeNeg(Reg dst,Reg src)444 void Amd64Encoder::EncodeNeg(Reg dst, Reg src)
445 {
446     if (dst.IsScalar()) {
447         EncodeMov(dst, src);
448         GetMasm()->neg(ArchReg(dst));
449         return;
450     }
451 
452     if (dst.GetType() == FLOAT32_TYPE) {
453         ScopedTmpRegF32 tmp(this);
454         CopyImmToXmm(tmp, -0.0F);
455 
456         if (dst.GetId() != src.GetId()) {
457             GetMasm()->movsd(ArchVReg(dst), ArchVReg(src));
458         }
459         GetMasm()->xorps(ArchVReg(dst), ArchVReg(tmp));
460     } else {
461         ScopedTmpRegF64 tmp(this);
462         CopyImmToXmm(tmp, -0.0);
463 
464         if (dst.GetId() != src.GetId()) {
465             GetMasm()->movsd(ArchVReg(dst), ArchVReg(src));
466         }
467         GetMasm()->xorps(ArchVReg(dst), ArchVReg(tmp));
468     }
469 }
470 
EncodeAbs(Reg dst,Reg src)471 void Amd64Encoder::EncodeAbs(Reg dst, Reg src)
472 {
473     if (dst.IsScalar()) {
474         auto size = std::max<uint8_t>(src.GetSize(), WORD_SIZE);
475 
476         if (dst.GetId() != src.GetId()) {
477             GetMasm()->mov(ArchReg(dst), ArchReg(src));
478             GetMasm()->neg(ArchReg(dst));
479             GetMasm()->cmovl(ArchReg(dst, size), ArchReg(src, size));
480         } else if (GetScratchRegistersCount() > 0) {
481             ScopedTmpReg tmp_reg(this, dst.GetType());
482 
483             GetMasm()->mov(ArchReg(tmp_reg), ArchReg(src));
484             GetMasm()->neg(ArchReg(tmp_reg));
485 
486             GetMasm()->cmovl(ArchReg(tmp_reg, size), ArchReg(src, size));
487             GetMasm()->mov(ArchReg(dst), ArchReg(tmp_reg));
488         } else {
489             auto end = GetMasm()->newLabel();
490 
491             GetMasm()->test(ArchReg(dst), ArchReg(dst));
492             GetMasm()->jns(end);
493 
494             GetMasm()->neg(ArchReg(dst));
495             GetMasm()->bind(end);
496         }
497         return;
498     }
499 
500     if (dst.GetType() == FLOAT32_TYPE) {
501         ScopedTmpRegF32 tmp(this);
502         // NOLINTNEXTLINE(readability-magic-numbers)
503         CopyImmToXmm(tmp, uint32_t(0x7fffffff));
504 
505         if (dst.GetId() != src.GetId()) {
506             GetMasm()->movss(ArchVReg(dst), ArchVReg(src));
507         }
508         GetMasm()->andps(ArchVReg(dst), ArchVReg(tmp));
509     } else {
510         ScopedTmpRegF64 tmp(this);
511         // NOLINTNEXTLINE(readability-magic-numbers)
512         CopyImmToXmm(tmp, uint64_t(0x7fffffffffffffff));
513 
514         if (dst.GetId() != src.GetId()) {
515             GetMasm()->movsd(ArchVReg(dst), ArchVReg(src));
516         }
517         GetMasm()->andps(ArchVReg(dst), ArchVReg(tmp));
518     }
519 }
520 
EncodeNot(Reg dst,Reg src)521 void Amd64Encoder::EncodeNot(Reg dst, Reg src)
522 {
523     ASSERT(dst.IsScalar());
524 
525     EncodeMov(dst, src);
526     GetMasm()->not_(ArchReg(dst));
527 }
528 
EncodeSqrt(Reg dst,Reg src)529 void Amd64Encoder::EncodeSqrt(Reg dst, Reg src)
530 {
531     ASSERT(dst.IsFloat());
532     if (src.GetType() == FLOAT32_TYPE) {
533         GetMasm()->sqrtps(ArchVReg(dst), ArchVReg(src));
534     } else {
535         GetMasm()->sqrtpd(ArchVReg(dst), ArchVReg(src));
536     }
537 }
538 
EncodeCastFloatToScalar(Reg dst,bool dst_signed,Reg src)539 void Amd64Encoder::EncodeCastFloatToScalar(Reg dst, bool dst_signed, Reg src)
540 {
541     // We DON'T support casts from float32/64 to int8/16 and bool, because this caste is not declared anywhere
542     // in other languages and architecture, we do not know what the behavior should be.
543     ASSERT(dst.GetSize() >= WORD_SIZE);
544     auto end = GetMasm()->newLabel();
545 
546     // if src is NaN, then dst = 0
547     EncodeCastFloatCheckNan(dst, src, end);
548 
549     // For JS number cast we treat Infinity as a zero integer value
550     if (IsJsNumberCast() && src.GetType() == FLOAT64_TYPE) {
551         static constexpr uint64_t EXP_BIT_MASK = 0x7FF0000000000000L;
552         ScopedTmpReg cmp_reg(this, src.GetType());
553         ScopedTmpReg tmp_reg(this, src.GetType() == FLOAT64_TYPE ? INT64_TYPE : INT32_TYPE);
554         GetMasm()->mov(ArchReg(tmp_reg, DOUBLE_WORD_SIZE), asmjit::imm(EXP_BIT_MASK));
555         GetMasm()->movq(ArchVReg(cmp_reg), ArchReg(tmp_reg));
556         GetMasm()->ucomisd(ArchVReg(src), ArchVReg(cmp_reg));
557         GetMasm()->je(end);
558     }
559 
560     if (dst_signed) {
561         EncodeCastFloatSignCheckRange(dst, src, end);
562     } else {
563         EncodeCastFloatUnsignCheckRange(dst, src, end);
564     }
565 
566     if (src.GetType() == FLOAT32_TYPE) {
567         if (dst.GetSize() == DOUBLE_WORD_SIZE) {
568             EncodeCastFloat32ToUint64(dst, src);
569         } else {
570             GetMasm()->cvttss2si(ArchReg(dst, DOUBLE_WORD_SIZE), ArchVReg(src));
571         }
572     } else {
573         if (dst.GetSize() == DOUBLE_WORD_SIZE) {
574             EncodeCastFloat64ToUint64(dst, src);
575         } else {
576             GetMasm()->cvttsd2si(ArchReg(dst, DOUBLE_WORD_SIZE), ArchVReg(src));
577         }
578     }
579 
580     GetMasm()->bind(end);
581 }
582 
EncodeCastFloat32ToUint64(Reg dst,Reg src)583 void Amd64Encoder::EncodeCastFloat32ToUint64(Reg dst, Reg src)
584 {
585     auto big_number_label = GetMasm()->newLabel();
586     auto end_label = GetMasm()->newLabel();
587     ScopedTmpReg tmp_reg(this, src.GetType());
588     ScopedTmpReg tmp_num(this, dst.GetType());
589 
590     // It is max number with max degree that we can load in sign int64
591     // NOLINTNEXTLINE (readability-magic-numbers)
592     GetMasm()->mov(ArchReg(dst, WORD_SIZE), asmjit::imm(0x5F000000));
593     GetMasm()->movd(ArchVReg(tmp_reg), ArchReg(dst, WORD_SIZE));
594     GetMasm()->comiss(ArchVReg(src), ArchVReg(tmp_reg));
595     GetMasm()->jnb(big_number_label);
596 
597     GetMasm()->cvttss2si(ArchReg(dst), ArchVReg(src));
598     GetMasm()->jmp(end_label);
599 
600     GetMasm()->bind(big_number_label);
601     GetMasm()->subss(ArchVReg(src), ArchVReg(tmp_reg));
602     GetMasm()->cvttss2si(ArchReg(dst), ArchVReg(src));
603     // NOLINTNEXTLINE (readability-magic-numbers)
604     GetMasm()->mov(ArchReg(tmp_num), asmjit::imm(0x8000000000000000));
605     GetMasm()->xor_(ArchReg(dst), ArchReg(tmp_num));
606     GetMasm()->bind(end_label);
607 }
608 
EncodeCastFloat64ToUint64(Reg dst,Reg src)609 void Amd64Encoder::EncodeCastFloat64ToUint64(Reg dst, Reg src)
610 {
611     auto big_number_label = GetMasm()->newLabel();
612     auto end_label = GetMasm()->newLabel();
613     ScopedTmpReg tmp_reg(this, src.GetType());
614     ScopedTmpReg tmp_num(this, dst.GetType());
615 
616     // It is max number with max degree that we can load in sign int64
617     // NOLINTNEXTLINE (readability-magic-numbers)
618     GetMasm()->mov(ArchReg(dst), asmjit::imm(0x43E0000000000000));
619     GetMasm()->movq(ArchVReg(tmp_reg), ArchReg(dst));
620     GetMasm()->comisd(ArchVReg(src), ArchVReg(tmp_reg));
621     GetMasm()->jnb(big_number_label);
622 
623     GetMasm()->cvttsd2si(ArchReg(dst), ArchVReg(src));
624     GetMasm()->jmp(end_label);
625 
626     GetMasm()->bind(big_number_label);
627     GetMasm()->subsd(ArchVReg(src), ArchVReg(tmp_reg));
628     GetMasm()->cvttsd2si(ArchReg(dst), ArchVReg(src));
629     // NOLINTNEXTLINE (readability-magic-numbers)
630     GetMasm()->mov(ArchReg(tmp_num), asmjit::imm(0x8000000000000000));
631     GetMasm()->xor_(ArchReg(dst), ArchReg(tmp_num));
632     GetMasm()->bind(end_label);
633 }
634 
EncodeCastFloatCheckNan(Reg dst,Reg src,const asmjit::Label & end)635 void Amd64Encoder::EncodeCastFloatCheckNan(Reg dst, Reg src, const asmjit::Label &end)
636 {
637     GetMasm()->xor_(ArchReg(dst, DOUBLE_WORD_SIZE), ArchReg(dst, DOUBLE_WORD_SIZE));
638     if (src.GetType() == FLOAT32_TYPE) {
639         GetMasm()->ucomiss(ArchVReg(src), ArchVReg(src));
640     } else {
641         GetMasm()->ucomisd(ArchVReg(src), ArchVReg(src));
642     }
643     GetMasm()->jp(end);
644 }
645 
EncodeCastFloatSignCheckRange(Reg dst,Reg src,const asmjit::Label & end)646 void Amd64Encoder::EncodeCastFloatSignCheckRange(Reg dst, Reg src, const asmjit::Label &end)
647 {
648     // if src < INT_MIN, then dst = INT_MIN
649     // if src >= (INT_MAX + 1), then dst = INT_MAX
650     if (dst.GetSize() == DOUBLE_WORD_SIZE) {
651         EncodeCastFloatCheckRange(dst, src, end, INT64_MIN, INT64_MAX);
652     } else {
653         EncodeCastFloatCheckRange(dst, src, end, INT32_MIN, INT32_MAX);
654     }
655 }
656 
EncodeCastFloatCheckRange(Reg dst,Reg src,const asmjit::Label & end,const int64_t min_value,const uint64_t max_value)657 void Amd64Encoder::EncodeCastFloatCheckRange(Reg dst, Reg src, const asmjit::Label &end, const int64_t min_value,
658                                              const uint64_t max_value)
659 {
660     ScopedTmpReg cmp_reg(this, src.GetType());
661     ScopedTmpReg tmp_reg(this, src.GetType() == FLOAT64_TYPE ? INT64_TYPE : INT32_TYPE);
662 
663     GetMasm()->mov(ArchReg(dst, DOUBLE_WORD_SIZE), asmjit::imm(min_value));
664     if (src.GetType() == FLOAT32_TYPE) {
665         GetMasm()->mov(ArchReg(tmp_reg), asmjit::imm(bit_cast<uint32_t>(float(min_value))));
666         GetMasm()->movd(ArchVReg(cmp_reg), ArchReg(tmp_reg));
667         GetMasm()->ucomiss(ArchVReg(src), ArchVReg(cmp_reg));
668     } else {
669         GetMasm()->mov(ArchReg(tmp_reg), asmjit::imm(bit_cast<uint64_t>(double(min_value))));
670         GetMasm()->movq(ArchVReg(cmp_reg), ArchReg(tmp_reg));
671         GetMasm()->ucomisd(ArchVReg(src), ArchVReg(cmp_reg));
672     }
673     GetMasm()->jb(end);
674 
675     GetMasm()->mov(ArchReg(dst, DOUBLE_WORD_SIZE), asmjit::imm(max_value));
676     if (src.GetType() == FLOAT32_TYPE) {
677         GetMasm()->mov(ArchReg(tmp_reg), asmjit::imm(bit_cast<uint32_t>(float(max_value) + 1U)));
678         GetMasm()->movd(ArchVReg(cmp_reg), ArchReg(tmp_reg));
679         GetMasm()->ucomiss(ArchVReg(src), ArchVReg(cmp_reg));
680     } else {
681         GetMasm()->mov(ArchReg(tmp_reg), asmjit::imm(bit_cast<uint64_t>(double(max_value) + 1U)));
682         GetMasm()->movq(ArchVReg(cmp_reg), ArchReg(tmp_reg));
683         GetMasm()->ucomisd(ArchVReg(src), ArchVReg(cmp_reg));
684     }
685     GetMasm()->jae(end);
686 }
687 
EncodeCastFloatUnsignCheckRange(Reg dst,Reg src,const asmjit::Label & end)688 void Amd64Encoder::EncodeCastFloatUnsignCheckRange(Reg dst, Reg src, const asmjit::Label &end)
689 {
690     // if src < 0, then dst = 0
691     // if src >= (UINT_MAX + 1), then dst = UINT_MAX
692     if (dst.GetSize() == DOUBLE_WORD_SIZE) {
693         EncodeCastFloatCheckRange(dst, src, end, 0, UINT64_MAX);
694     } else {
695         EncodeCastFloatCheckRange(dst, src, end, 0, UINT32_MAX);
696     }
697 }
698 
EncodeCastScalarToFloatUnsignDouble(Reg dst,Reg src)699 void Amd64Encoder::EncodeCastScalarToFloatUnsignDouble(Reg dst, Reg src)
700 {
701     if (dst.GetType() == FLOAT32_TYPE) {
702         ScopedTmpRegU64 int1_reg(this);
703         ScopedTmpRegU64 int2_reg(this);
704 
705         auto sgn = GetMasm()->newLabel();
706         auto end = GetMasm()->newLabel();
707 
708         GetMasm()->test(ArchReg(src), ArchReg(src));
709         GetMasm()->js(sgn);
710         GetMasm()->cvtsi2ss(ArchVReg(dst), ArchReg(src));
711         GetMasm()->jmp(end);
712 
713         GetMasm()->bind(sgn);
714         GetMasm()->mov(ArchReg(int1_reg), ArchReg(src));
715         GetMasm()->mov(ArchReg(int2_reg), ArchReg(src));
716         GetMasm()->shr(ArchReg(int2_reg), asmjit::imm(1));
717         GetMasm()->and_(ArchReg(int1_reg, WORD_SIZE), asmjit::imm(1));
718         GetMasm()->or_(ArchReg(int1_reg), ArchReg(int2_reg));
719         GetMasm()->cvtsi2ss(ArchVReg(dst), ArchReg(int1_reg));
720         GetMasm()->addss(ArchVReg(dst), ArchVReg(dst));
721 
722         GetMasm()->bind(end);
723     } else {
724         static constexpr std::array<uint32_t, 4> ARR1 = {uint32_t(0x43300000), uint32_t(0x45300000), 0x0, 0x0};
725         static constexpr std::array<uint64_t, 2> ARR2 = {uint64_t(0x4330000000000000), uint64_t(0x4530000000000000)};
726 
727         ScopedTmpReg float1_reg(this, dst.GetType());
728         ScopedTmpRegF64 tmp(this);
729 
730         GetMasm()->movq(ArchVReg(float1_reg), ArchReg(src));
731         CopyArrayToXmm(tmp, ARR1);
732         GetMasm()->punpckldq(ArchVReg(float1_reg), ArchVReg(tmp));
733         CopyArrayToXmm(tmp, ARR2);
734         GetMasm()->subpd(ArchVReg(float1_reg), ArchVReg(tmp));
735         GetMasm()->movapd(ArchVReg(dst), ArchVReg(float1_reg));
736         GetMasm()->unpckhpd(ArchVReg(dst), ArchVReg(float1_reg));
737         GetMasm()->addsd(ArchVReg(dst), ArchVReg(float1_reg));
738     }
739 }
740 
EncodeCastScalarToFloat(Reg dst,Reg src,bool src_signed)741 void Amd64Encoder::EncodeCastScalarToFloat(Reg dst, Reg src, bool src_signed)
742 {
743     if (!src_signed && src.GetSize() == DOUBLE_WORD_SIZE) {
744         EncodeCastScalarToFloatUnsignDouble(dst, src);
745         return;
746     }
747 
748     if (src.GetSize() < WORD_SIZE || (src_signed && src.GetSize() == WORD_SIZE)) {
749         if (dst.GetType() == FLOAT32_TYPE) {
750             GetMasm()->cvtsi2ss(ArchVReg(dst), ArchReg(src, WORD_SIZE));
751         } else {
752             GetMasm()->cvtsi2sd(ArchVReg(dst), ArchReg(src, WORD_SIZE));
753         }
754         return;
755     }
756 
757     if (!src_signed && src.GetSize() == WORD_SIZE) {
758         ScopedTmpRegU64 int1_reg(this);
759 
760         GetMasm()->mov(ArchReg(int1_reg, WORD_SIZE), ArchReg(src, WORD_SIZE));
761         if (dst.GetType() == FLOAT32_TYPE) {
762             GetMasm()->cvtsi2ss(ArchVReg(dst), ArchReg(int1_reg));
763         } else {
764             GetMasm()->cvtsi2sd(ArchVReg(dst), ArchReg(int1_reg));
765         }
766         return;
767     }
768 
769     ASSERT(src_signed && src.GetSize() == DOUBLE_WORD_SIZE);
770     if (dst.GetType() == FLOAT32_TYPE) {
771         GetMasm()->cvtsi2ss(ArchVReg(dst), ArchReg(src));
772     } else {
773         GetMasm()->cvtsi2sd(ArchVReg(dst), ArchReg(src));
774     }
775 }
776 
EncodeCastToBool(Reg dst,Reg src)777 void Amd64Encoder::EncodeCastToBool(Reg dst, Reg src)
778 {
779     // In ISA says that we only support casts:
780     // i32tou1, i64tou1, u32tou1, u64tou1
781     ASSERT(src.IsScalar());
782     ASSERT(dst.IsScalar());
783 
784     // In our ISA minimal type is 32-bit, so bool in 32bit
785     GetMasm()->test(ArchReg(src), ArchReg(src));
786     // One "mov" will be better, then 2 jump. Else other instructions will overwrite the flags.
787     GetMasm()->mov(ArchReg(dst, WORD_SIZE), asmjit::imm(0));
788     GetMasm()->setne(ArchReg(dst));
789 }
790 
EncodeCast(Reg dst,bool dst_signed,Reg src,bool src_signed)791 void Amd64Encoder::EncodeCast(Reg dst, bool dst_signed, Reg src, bool src_signed)
792 {
793     if (src.IsFloat() && dst.IsScalar()) {
794         EncodeCastFloatToScalar(dst, dst_signed, src);
795         return;
796     }
797 
798     if (src.IsScalar() && dst.IsFloat()) {
799         EncodeCastScalarToFloat(dst, src, src_signed);
800         return;
801     }
802 
803     if (src.IsFloat() && dst.IsFloat()) {
804         if (src.GetSize() != dst.GetSize()) {
805             if (src.GetType() == FLOAT32_TYPE) {
806                 GetMasm()->cvtss2sd(ArchVReg(dst), ArchVReg(src));
807             } else {
808                 GetMasm()->cvtsd2ss(ArchVReg(dst), ArchVReg(src));
809             }
810             return;
811         }
812 
813         if (src.GetType() == FLOAT32_TYPE) {
814             GetMasm()->movss(ArchVReg(dst), ArchVReg(src));
815         } else {
816             GetMasm()->movsd(ArchVReg(dst), ArchVReg(src));
817         }
818         return;
819     }
820 
821     ASSERT(src.IsScalar() && dst.IsScalar());
822     EncodeCastScalar(dst, dst_signed, src, src_signed);
823 }
824 
EncodeCastScalar(Reg dst,bool dst_signed,Reg src,bool src_signed)825 void Amd64Encoder::EncodeCastScalar(Reg dst, bool dst_signed, Reg src, bool src_signed)
826 {
827     auto extend_to_32bit = [this](Reg reg, bool is_signed) {
828         if (reg.GetSize() < WORD_SIZE) {
829             if (is_signed) {
830                 GetMasm()->movsx(ArchReg(reg, WORD_SIZE), ArchReg(reg));
831             } else {
832                 GetMasm()->movzx(ArchReg(reg, WORD_SIZE), ArchReg(reg));
833             }
834         }
835     };
836 
837     if (src.GetSize() >= dst.GetSize()) {
838         if (dst.GetId() != src.GetId()) {
839             GetMasm()->mov(ArchReg(dst), ArchReg(src, dst.GetSize()));
840         }
841         extend_to_32bit(dst, dst_signed);
842         return;
843     }
844 
845     if (src_signed) {
846         if (dst.GetSize() < DOUBLE_WORD_SIZE) {
847             GetMasm()->movsx(ArchReg(dst), ArchReg(src));
848             extend_to_32bit(dst, dst_signed);
849         } else if (src.GetSize() == WORD_SIZE) {
850             GetMasm()->movsxd(ArchReg(dst), ArchReg(src));
851         } else {
852             GetMasm()->movsx(ArchReg(dst, WORD_SIZE), ArchReg(src));
853             GetMasm()->movsxd(ArchReg(dst), ArchReg(dst, WORD_SIZE));
854         }
855         return;
856     }
857 
858     if (src.GetSize() == WORD_SIZE) {
859         GetMasm()->mov(ArchReg(dst, WORD_SIZE), ArchReg(src));
860     } else if (dst.GetSize() == DOUBLE_WORD_SIZE) {
861         GetMasm()->movzx(ArchReg(dst, WORD_SIZE), ArchReg(src));
862     } else {
863         GetMasm()->movzx(ArchReg(dst), ArchReg(src));
864         extend_to_32bit(dst, dst_signed);
865     }
866 }
867 
MakeShift(Shift shift)868 Reg Amd64Encoder::MakeShift(Shift shift)
869 {
870     Reg reg = shift.GetBase();
871     ASSERT(reg.IsValid());
872     if (reg.IsScalar()) {
873         ASSERT(shift.GetType() != ShiftType::INVALID_SHIFT);
874         switch (shift.GetType()) {
875             case ShiftType::LSL:
876                 GetMasm()->shl(ArchReg(reg), asmjit::imm(shift.GetScale()));
877                 break;
878             case ShiftType::LSR:
879                 GetMasm()->shr(ArchReg(reg), asmjit::imm(shift.GetScale()));
880                 break;
881             case ShiftType::ASR:
882                 GetMasm()->sar(ArchReg(reg), asmjit::imm(shift.GetScale()));
883                 break;
884             case ShiftType::ROR:
885                 GetMasm()->ror(ArchReg(reg), asmjit::imm(shift.GetScale()));
886                 break;
887             default:
888                 UNREACHABLE();
889         }
890 
891         return reg;
892     }
893 
894     // Invalid register type
895     UNREACHABLE();
896 }
897 
EncodeAdd(Reg dst,Reg src0,Shift src1)898 void Amd64Encoder::EncodeAdd(Reg dst, Reg src0, Shift src1)
899 {
900     if (dst.IsFloat()) {
901         SetFalseResult();
902         return;
903     }
904 
905     ASSERT(dst.GetSize() >= src0.GetSize());
906 
907     auto shift_reg = MakeShift(src1);
908 
909     if (src0.GetSize() < WORD_SIZE) {
910         EncodeAdd(dst, src0, shift_reg);
911         return;
912     }
913 
914     if (src0.GetSize() == DOUBLE_WORD_SIZE && shift_reg.GetSize() < DOUBLE_WORD_SIZE) {
915         GetMasm()->movsxd(ArchReg(shift_reg, DOUBLE_WORD_SIZE), ArchReg(shift_reg));
916     }
917 
918     GetMasm()->lea(ArchReg(dst), asmjit::x86::ptr(ArchReg(src0), ArchReg(shift_reg, src0.GetSize())));
919 }
920 
EncodeAdd(Reg dst,Reg src0,Reg src1)921 void Amd64Encoder::EncodeAdd(Reg dst, Reg src0, Reg src1)
922 {
923     if (dst.IsScalar()) {
924         auto size = std::max<uint8_t>(WORD_SIZE, dst.GetSize());
925         GetMasm()->lea(ArchReg(dst, size), asmjit::x86::ptr(ArchReg(src0, size), ArchReg(src1, size)));
926         return;
927     }
928 
929     if (dst.GetType() == FLOAT32_TYPE) {
930         if (dst.GetId() == src0.GetId()) {
931             GetMasm()->addss(ArchVReg(dst), ArchVReg(src1));
932         } else if (dst.GetId() == src1.GetId()) {
933             GetMasm()->addss(ArchVReg(dst), ArchVReg(src0));
934         } else {
935             GetMasm()->movss(ArchVReg(dst), ArchVReg(src0));
936             GetMasm()->addss(ArchVReg(dst), ArchVReg(src1));
937         }
938     } else {
939         if (dst.GetId() == src0.GetId()) {
940             GetMasm()->addsd(ArchVReg(dst), ArchVReg(src1));
941         } else if (dst.GetId() == src1.GetId()) {
942             GetMasm()->addsd(ArchVReg(dst), ArchVReg(src0));
943         } else {
944             GetMasm()->movsd(ArchVReg(dst), ArchVReg(src0));
945             GetMasm()->addsd(ArchVReg(dst), ArchVReg(src1));
946         }
947     }
948 }
949 
EncodeSub(Reg dst,Reg src0,Reg src1)950 void Amd64Encoder::EncodeSub(Reg dst, Reg src0, Reg src1)
951 {
952     if (dst.IsScalar()) {
953         if (dst.GetId() == src0.GetId()) {
954             GetMasm()->sub(ArchReg(dst), ArchReg(src1));
955         } else if (dst.GetId() == src1.GetId()) {
956             GetMasm()->sub(ArchReg(dst), ArchReg(src0));
957             GetMasm()->neg(ArchReg(dst));
958         } else {
959             GetMasm()->mov(ArchReg(dst), ArchReg(src0));
960             GetMasm()->sub(ArchReg(dst), ArchReg(src1));
961         }
962         return;
963     }
964 
965     if (dst.GetType() == FLOAT32_TYPE) {
966         if (dst.GetId() == src0.GetId()) {
967             GetMasm()->subss(ArchVReg(dst), ArchVReg(src1));
968         } else if (dst.GetId() != src1.GetId()) {
969             GetMasm()->movss(ArchVReg(dst), ArchVReg(src0));
970             GetMasm()->subss(ArchVReg(dst), ArchVReg(src1));
971         } else {
972             ScopedTmpReg tmp_reg(this, dst.GetType());
973             GetMasm()->movss(ArchVReg(tmp_reg), ArchVReg(src0));
974             GetMasm()->subss(ArchVReg(tmp_reg), ArchVReg(src1));
975             GetMasm()->movss(ArchVReg(dst), ArchVReg(tmp_reg));
976         }
977     } else {
978         if (dst.GetId() == src0.GetId()) {
979             GetMasm()->subsd(ArchVReg(dst), ArchVReg(src1));
980         } else if (dst.GetId() != src1.GetId()) {
981             GetMasm()->movsd(ArchVReg(dst), ArchVReg(src0));
982             GetMasm()->subsd(ArchVReg(dst), ArchVReg(src1));
983         } else {
984             ScopedTmpReg tmp_reg(this, dst.GetType());
985             GetMasm()->movsd(ArchVReg(tmp_reg), ArchVReg(src0));
986             GetMasm()->subsd(ArchVReg(tmp_reg), ArchVReg(src1));
987             GetMasm()->movsd(ArchVReg(dst), ArchVReg(tmp_reg));
988         }
989     }
990 }
991 
EncodeMul(Reg dst,Reg src0,Reg src1)992 void Amd64Encoder::EncodeMul(Reg dst, Reg src0, Reg src1)
993 {
994     if (dst.IsScalar()) {
995         auto size = std::max<uint8_t>(WORD_SIZE, dst.GetSize());
996 
997         if (dst.GetId() == src0.GetId()) {
998             GetMasm()->imul(ArchReg(dst, size), ArchReg(src1, size));
999         } else if (dst.GetId() == src1.GetId()) {
1000             GetMasm()->imul(ArchReg(dst, size), ArchReg(src0, size));
1001         } else {
1002             GetMasm()->mov(ArchReg(dst, size), ArchReg(src0, size));
1003             GetMasm()->imul(ArchReg(dst, size), ArchReg(src1, size));
1004         }
1005         return;
1006     }
1007 
1008     if (dst.GetType() == FLOAT32_TYPE) {
1009         if (dst.GetId() == src0.GetId()) {
1010             GetMasm()->mulss(ArchVReg(dst), ArchVReg(src1));
1011         } else if (dst.GetId() == src1.GetId()) {
1012             GetMasm()->mulss(ArchVReg(dst), ArchVReg(src0));
1013         } else {
1014             GetMasm()->movss(ArchVReg(dst), ArchVReg(src0));
1015             GetMasm()->mulss(ArchVReg(dst), ArchVReg(src1));
1016         }
1017     } else {
1018         if (dst.GetId() == src0.GetId()) {
1019             GetMasm()->mulsd(ArchVReg(dst), ArchVReg(src1));
1020         } else if (dst.GetId() == src1.GetId()) {
1021             GetMasm()->mulsd(ArchVReg(dst), ArchVReg(src0));
1022         } else {
1023             GetMasm()->movsd(ArchVReg(dst), ArchVReg(src0));
1024             GetMasm()->mulsd(ArchVReg(dst), ArchVReg(src1));
1025         }
1026     }
1027 }
1028 
EncodeAddOverflow(compiler::LabelHolder::LabelId id,Reg dst,Reg src0,Reg src1,Condition cc)1029 void Amd64Encoder::EncodeAddOverflow(compiler::LabelHolder::LabelId id, Reg dst, Reg src0, Reg src1, Condition cc)
1030 {
1031     ASSERT(!dst.IsFloat() && !src0.IsFloat() && !src1.IsFloat());
1032     ASSERT(cc == Condition::VS || cc == Condition::VC);
1033     auto size = dst.GetSize();
1034     if (dst.GetId() == src0.GetId()) {
1035         GetMasm()->add(ArchReg(dst, size), ArchReg(src1, size));
1036     } else if (dst.GetId() == src1.GetId()) {
1037         GetMasm()->add(ArchReg(dst, size), ArchReg(src0, size));
1038     } else {
1039         GetMasm()->mov(ArchReg(dst, size), ArchReg(src0, size));
1040         GetMasm()->add(ArchReg(dst, size), ArchReg(src1, size));
1041     }
1042     auto label = static_cast<Amd64LabelHolder *>(GetLabels())->GetLabel(id);
1043     GetMasm()->j(ArchCc(cc, false), *label);
1044 }
1045 
EncodeSubOverflow(compiler::LabelHolder::LabelId id,Reg dst,Reg src0,Reg src1,Condition cc)1046 void Amd64Encoder::EncodeSubOverflow(compiler::LabelHolder::LabelId id, Reg dst, Reg src0, Reg src1, Condition cc)
1047 {
1048     ASSERT(!dst.IsFloat() && !src0.IsFloat() && !src1.IsFloat());
1049     ASSERT(cc == Condition::VS || cc == Condition::VC);
1050     auto size = dst.GetSize();
1051     if (dst.GetId() == src0.GetId()) {
1052         GetMasm()->sub(ArchReg(dst, size), ArchReg(src1, size));
1053     } else if (dst.GetId() == src1.GetId()) {
1054         ScopedTmpReg tmp_reg(this, dst.GetType());
1055         GetMasm()->mov(ArchReg(tmp_reg, size), ArchReg(src1, size));
1056         GetMasm()->mov(ArchReg(dst, size), ArchReg(src0, size));
1057         GetMasm()->sub(ArchReg(dst, size), ArchReg(tmp_reg, size));
1058     } else {
1059         GetMasm()->mov(ArchReg(dst, size), ArchReg(src0, size));
1060         GetMasm()->sub(ArchReg(dst, size), ArchReg(src1, size));
1061     }
1062     auto label = static_cast<Amd64LabelHolder *>(GetLabels())->GetLabel(id);
1063     GetMasm()->j(ArchCc(cc, false), *label);
1064 }
1065 
EncodeDivFloat(Reg dst,Reg src0,Reg src1)1066 void Amd64Encoder::EncodeDivFloat(Reg dst, Reg src0, Reg src1)
1067 {
1068     ASSERT(dst.IsFloat());
1069     if (dst.GetType() == FLOAT32_TYPE) {
1070         if (dst.GetId() == src0.GetId()) {
1071             GetMasm()->divss(ArchVReg(dst), ArchVReg(src1));
1072         } else if (dst.GetId() != src1.GetId()) {
1073             GetMasm()->movss(ArchVReg(dst), ArchVReg(src0));
1074             GetMasm()->divss(ArchVReg(dst), ArchVReg(src1));
1075         } else {
1076             ScopedTmpRegF32 tmp(this);
1077             GetMasm()->movss(ArchVReg(tmp), ArchVReg(src0));
1078             GetMasm()->divss(ArchVReg(tmp), ArchVReg(src1));
1079             GetMasm()->movss(ArchVReg(dst), ArchVReg(tmp));
1080         }
1081     } else {
1082         if (dst.GetId() == src0.GetId()) {
1083             GetMasm()->divsd(ArchVReg(dst), ArchVReg(src1));
1084         } else if (dst.GetId() != src1.GetId()) {
1085             GetMasm()->movsd(ArchVReg(dst), ArchVReg(src0));
1086             GetMasm()->divsd(ArchVReg(dst), ArchVReg(src1));
1087         } else {
1088             ScopedTmpRegF64 tmp(this);
1089             GetMasm()->movsd(ArchVReg(tmp), ArchVReg(src0));
1090             GetMasm()->divsd(ArchVReg(tmp), ArchVReg(src1));
1091             GetMasm()->movsd(ArchVReg(dst), ArchVReg(tmp));
1092         }
1093     }
1094 }
1095 
EncodeDiv(Reg dst,bool dst_signed,Reg src0,Reg src1)1096 void Amd64Encoder::EncodeDiv(Reg dst, bool dst_signed, Reg src0, Reg src1)
1097 {
1098     if (dst.IsFloat()) {
1099         EncodeDivFloat(dst, src0, src1);
1100         return;
1101     }
1102 
1103     auto neg_path = GetMasm()->newLabel();
1104     auto crossroad = GetMasm()->newLabel();
1105 
1106     GetMasm()->cmp(ArchReg(src1), asmjit::imm(-1));
1107     GetMasm()->je(neg_path);
1108 
1109     if (dst.GetId() != ConvertRegNumber(asmjit::x86::rdx.id())) {
1110         GetMasm()->push(asmjit::x86::rdx);
1111     }
1112     if (dst.GetId() != ConvertRegNumber(asmjit::x86::rax.id())) {
1113         GetMasm()->push(asmjit::x86::rax);
1114     }
1115 
1116     ScopedTmpReg tmp_reg(this, dst.GetType());
1117     Reg op1 {src1};
1118     if (src1.GetId() == ConvertRegNumber(asmjit::x86::rax.id()) ||
1119         src1.GetId() == ConvertRegNumber(asmjit::x86::rdx.id())) {
1120         GetMasm()->mov(ArchReg(tmp_reg), ArchReg(src1));
1121         op1 = Reg(tmp_reg);
1122     }
1123 
1124     if (src0.GetId() != ConvertRegNumber(asmjit::x86::rax.id())) {
1125         GetMasm()->mov(asmjit::x86::rax, ArchReg(src0, DOUBLE_WORD_SIZE));
1126     }
1127 
1128     if (dst_signed) {
1129         if (dst.GetSize() <= WORD_SIZE) {
1130             GetMasm()->cdq();
1131         } else {
1132             GetMasm()->cqo();
1133         }
1134         GetMasm()->idiv(ArchReg(op1));
1135     } else {
1136         GetMasm()->xor_(asmjit::x86::rdx, asmjit::x86::rdx);
1137         GetMasm()->div(ArchReg(op1));
1138     }
1139 
1140     if (dst.GetId() != ConvertRegNumber(asmjit::x86::rax.id())) {
1141         GetMasm()->mov(ArchReg(dst, DOUBLE_WORD_SIZE), asmjit::x86::rax);
1142         GetMasm()->pop(asmjit::x86::rax);
1143     }
1144 
1145     if (dst.GetId() != ConvertRegNumber(asmjit::x86::rdx.id())) {
1146         GetMasm()->pop(asmjit::x86::rdx);
1147     }
1148     GetMasm()->jmp(crossroad);
1149 
1150     GetMasm()->bind(neg_path);
1151     if (dst.GetId() != src0.GetId()) {
1152         GetMasm()->mov(ArchReg(dst), ArchReg(src0));
1153     }
1154     GetMasm()->neg(ArchReg(dst));
1155 
1156     GetMasm()->bind(crossroad);
1157 }
1158 
EncodeModFloat(Reg dst,Reg src0,Reg src1)1159 void Amd64Encoder::EncodeModFloat(Reg dst, Reg src0, Reg src1)
1160 {
1161     ASSERT(dst.IsFloat());
1162     if (dst.GetType() == FLOAT32_TYPE) {
1163         using fp = float (*)(float, float);
1164         MakeLibCall(dst, src0, src1, reinterpret_cast<void *>(static_cast<fp>(fmodf)));
1165     } else {
1166         using fp = double (*)(double, double);
1167         MakeLibCall(dst, src0, src1, reinterpret_cast<void *>(static_cast<fp>(fmod)));
1168     }
1169 }
1170 
EncodeMod(Reg dst,bool dst_signed,Reg src0,Reg src1)1171 void Amd64Encoder::EncodeMod(Reg dst, bool dst_signed, Reg src0, Reg src1)
1172 {
1173     if (dst.IsFloat()) {
1174         EncodeModFloat(dst, src0, src1);
1175         return;
1176     }
1177 
1178     auto zero_path = GetMasm()->newLabel();
1179     auto crossroad = GetMasm()->newLabel();
1180 
1181     GetMasm()->cmp(ArchReg(src1), asmjit::imm(-1));
1182     GetMasm()->je(zero_path);
1183 
1184     if (dst.GetId() != ConvertRegNumber(asmjit::x86::rax.id())) {
1185         GetMasm()->push(asmjit::x86::rax);
1186     }
1187     if (dst.GetId() != ConvertRegNumber(asmjit::x86::rdx.id())) {
1188         GetMasm()->push(asmjit::x86::rdx);
1189     }
1190 
1191     ScopedTmpReg tmp_reg(this, dst.GetType());
1192     Reg op1 {src1};
1193     if (src1.GetId() == ConvertRegNumber(asmjit::x86::rax.id()) ||
1194         src1.GetId() == ConvertRegNumber(asmjit::x86::rdx.id())) {
1195         GetMasm()->mov(ArchReg(tmp_reg), ArchReg(src1));
1196         op1 = Reg(tmp_reg);
1197     }
1198 
1199     if (src0.GetId() != ConvertRegNumber(asmjit::x86::rax.id())) {
1200         GetMasm()->mov(asmjit::x86::rax, ArchReg(src0, DOUBLE_WORD_SIZE));
1201     }
1202 
1203     if (dst_signed) {
1204         if (dst.GetSize() <= WORD_SIZE) {
1205             GetMasm()->cdq();
1206         } else {
1207             GetMasm()->cqo();
1208         }
1209         GetMasm()->idiv(ArchReg(op1));
1210     } else {
1211         GetMasm()->xor_(asmjit::x86::rdx, asmjit::x86::rdx);
1212         GetMasm()->div(ArchReg(op1));
1213     }
1214 
1215     if (dst.GetId() != ConvertRegNumber(asmjit::x86::rdx.id())) {
1216         GetMasm()->mov(ArchReg(dst, DOUBLE_WORD_SIZE), asmjit::x86::rdx);
1217         GetMasm()->pop(asmjit::x86::rdx);
1218     }
1219 
1220     if (dst.GetId() != ConvertRegNumber(asmjit::x86::rax.id())) {
1221         GetMasm()->pop(asmjit::x86::rax);
1222     }
1223     GetMasm()->jmp(crossroad);
1224 
1225     GetMasm()->bind(zero_path);
1226     GetMasm()->xor_(ArchReg(dst, WORD_SIZE), ArchReg(dst, WORD_SIZE));
1227 
1228     GetMasm()->bind(crossroad);
1229 }
1230 
EncodeMin(Reg dst,bool dst_signed,Reg src0,Reg src1)1231 void Amd64Encoder::EncodeMin(Reg dst, bool dst_signed, Reg src0, Reg src1)
1232 {
1233     if (dst.IsScalar()) {
1234         ScopedTmpReg tmp_reg(this, dst.GetType());
1235         GetMasm()->mov(ArchReg(tmp_reg), ArchReg(src1));
1236         GetMasm()->cmp(ArchReg(src0), ArchReg(src1));
1237 
1238         auto size = std::max<uint8_t>(src0.GetSize(), WORD_SIZE);
1239         if (dst_signed) {
1240             GetMasm()->cmovle(ArchReg(tmp_reg, size), ArchReg(src0, size));
1241         } else {
1242             GetMasm()->cmovb(ArchReg(tmp_reg, size), ArchReg(src0, size));
1243         }
1244         EncodeMov(dst, tmp_reg);
1245         return;
1246     }
1247 
1248     EncodeMinMaxFp<false>(dst, src0, src1);
1249 }
1250 
EncodeMax(Reg dst,bool dst_signed,Reg src0,Reg src1)1251 void Amd64Encoder::EncodeMax(Reg dst, bool dst_signed, Reg src0, Reg src1)
1252 {
1253     if (dst.IsScalar()) {
1254         ScopedTmpReg tmp_reg(this, dst.GetType());
1255         GetMasm()->mov(ArchReg(tmp_reg), ArchReg(src1));
1256         GetMasm()->cmp(ArchReg(src0), ArchReg(src1));
1257 
1258         auto size = std::max<uint8_t>(src0.GetSize(), WORD_SIZE);
1259         if (dst_signed) {
1260             GetMasm()->cmovge(ArchReg(tmp_reg, size), ArchReg(src0, size));
1261         } else {
1262             GetMasm()->cmova(ArchReg(tmp_reg, size), ArchReg(src0, size));
1263         }
1264         EncodeMov(dst, tmp_reg);
1265         return;
1266     }
1267 
1268     EncodeMinMaxFp<true>(dst, src0, src1);
1269 }
1270 
1271 template <bool is_max>
EncodeMinMaxFp(Reg dst,Reg src0,Reg src1)1272 void Amd64Encoder::EncodeMinMaxFp(Reg dst, Reg src0, Reg src1)
1273 {
1274     auto end = GetMasm()->newLabel();
1275     auto not_equal = GetMasm()->newLabel();
1276     auto got_nan = GetMasm()->newLabel();
1277     auto &src_a = dst.GetId() != src1.GetId() ? src0 : src1;
1278     auto &src_b = src_a.GetId() == src0.GetId() ? src1 : src0;
1279     if (dst.GetType() == FLOAT32_TYPE) {
1280         GetMasm()->movaps(ArchVReg(dst), ArchVReg(src_a));
1281         GetMasm()->ucomiss(ArchVReg(src_b), ArchVReg(src_a));
1282         GetMasm()->jne(not_equal);
1283         GetMasm()->jp(got_nan);
1284         // calculate result for positive/negative zero operands
1285         if (is_max) {
1286             GetMasm()->andps(ArchVReg(dst), ArchVReg(src_b));
1287         } else {
1288             GetMasm()->orps(ArchVReg(dst), ArchVReg(src_b));
1289         }
1290         GetMasm()->jmp(end);
1291         GetMasm()->bind(got_nan);
1292         // if any operand is NaN result is NaN
1293         GetMasm()->por(ArchVReg(dst), ArchVReg(src_b));
1294         GetMasm()->jmp(end);
1295         GetMasm()->bind(not_equal);
1296         if (is_max) {
1297             GetMasm()->maxss(ArchVReg(dst), ArchVReg(src_b));
1298         } else {
1299             GetMasm()->minss(ArchVReg(dst), ArchVReg(src_b));
1300         }
1301         GetMasm()->bind(end);
1302     } else {
1303         GetMasm()->movapd(ArchVReg(dst), ArchVReg(src_a));
1304         GetMasm()->ucomisd(ArchVReg(src_b), ArchVReg(src_a));
1305         GetMasm()->jne(not_equal);
1306         GetMasm()->jp(got_nan);
1307         // calculate result for positive/negative zero operands
1308         if (is_max) {
1309             GetMasm()->andpd(ArchVReg(dst), ArchVReg(src_b));
1310         } else {
1311             GetMasm()->orpd(ArchVReg(dst), ArchVReg(src_b));
1312         }
1313         GetMasm()->jmp(end);
1314         GetMasm()->bind(got_nan);
1315         // if any operand is NaN result is NaN
1316         GetMasm()->por(ArchVReg(dst), ArchVReg(src_b));
1317         GetMasm()->jmp(end);
1318         GetMasm()->bind(not_equal);
1319         if (is_max) {
1320             GetMasm()->maxsd(ArchVReg(dst), ArchVReg(src_b));
1321         } else {
1322             GetMasm()->minsd(ArchVReg(dst), ArchVReg(src_b));
1323         }
1324         GetMasm()->bind(end);
1325     }
1326 }
1327 
EncodeShl(Reg dst,Reg src0,Reg src1)1328 void Amd64Encoder::EncodeShl(Reg dst, Reg src0, Reg src1)
1329 {
1330     ASSERT(dst.IsScalar());
1331     ScopedTmpReg tmp_reg(this, dst.GetType());
1332     Reg rcx(ConvertRegNumber(asmjit::x86::rcx.id()), dst.GetType());
1333     GetMasm()->mov(ArchReg(tmp_reg), ArchReg(src0));
1334     if (dst.GetId() != rcx.GetId()) {
1335         GetMasm()->push(ArchReg(rcx, DOUBLE_WORD_SIZE));
1336     }
1337     GetMasm()->mov(ArchReg(rcx), ArchReg(src1));
1338     GetMasm()->shl(ArchReg(tmp_reg), asmjit::x86::cl);
1339     if (dst.GetId() != rcx.GetId()) {
1340         GetMasm()->pop(ArchReg(rcx, DOUBLE_WORD_SIZE));
1341     }
1342     GetMasm()->mov(ArchReg(dst), ArchReg(tmp_reg));
1343 }
1344 
EncodeShr(Reg dst,Reg src0,Reg src1)1345 void Amd64Encoder::EncodeShr(Reg dst, Reg src0, Reg src1)
1346 {
1347     ASSERT(dst.IsScalar());
1348     ScopedTmpReg tmp_reg(this, dst.GetType());
1349     Reg rcx(ConvertRegNumber(asmjit::x86::rcx.id()), dst.GetType());
1350     GetMasm()->mov(ArchReg(tmp_reg), ArchReg(src0));
1351     if (dst.GetId() != rcx.GetId()) {
1352         GetMasm()->push(ArchReg(rcx, DOUBLE_WORD_SIZE));
1353     }
1354     GetMasm()->mov(ArchReg(rcx), ArchReg(src1));
1355     GetMasm()->shr(ArchReg(tmp_reg), asmjit::x86::cl);
1356     if (dst.GetId() != rcx.GetId()) {
1357         GetMasm()->pop(ArchReg(rcx, DOUBLE_WORD_SIZE));
1358     }
1359     GetMasm()->mov(ArchReg(dst), ArchReg(tmp_reg));
1360 }
1361 
EncodeAShr(Reg dst,Reg src0,Reg src1)1362 void Amd64Encoder::EncodeAShr(Reg dst, Reg src0, Reg src1)
1363 {
1364     ASSERT(dst.IsScalar());
1365     ScopedTmpReg tmp_reg(this, dst.GetType());
1366     Reg rcx(ConvertRegNumber(asmjit::x86::rcx.id()), dst.GetType());
1367     GetMasm()->mov(ArchReg(tmp_reg), ArchReg(src0));
1368     if (dst.GetId() != rcx.GetId()) {
1369         GetMasm()->push(ArchReg(rcx, DOUBLE_WORD_SIZE));
1370     }
1371     GetMasm()->mov(ArchReg(rcx), ArchReg(src1));
1372     GetMasm()->sar(ArchReg(tmp_reg), asmjit::x86::cl);
1373     if (dst.GetId() != rcx.GetId()) {
1374         GetMasm()->pop(ArchReg(rcx, DOUBLE_WORD_SIZE));
1375     }
1376     GetMasm()->mov(ArchReg(dst), ArchReg(tmp_reg));
1377 }
1378 
EncodeAnd(Reg dst,Reg src0,Reg src1)1379 void Amd64Encoder::EncodeAnd(Reg dst, Reg src0, Reg src1)
1380 {
1381     ASSERT(dst.IsScalar());
1382     if (dst.GetId() == src0.GetId()) {
1383         GetMasm()->and_(ArchReg(dst), ArchReg(src1));
1384     } else if (dst.GetId() == src1.GetId()) {
1385         GetMasm()->and_(ArchReg(dst), ArchReg(src0));
1386     } else {
1387         GetMasm()->mov(ArchReg(dst), ArchReg(src0));
1388         GetMasm()->and_(ArchReg(dst), ArchReg(src1));
1389     }
1390 }
1391 
EncodeOr(Reg dst,Reg src0,Reg src1)1392 void Amd64Encoder::EncodeOr(Reg dst, Reg src0, Reg src1)
1393 {
1394     ASSERT(dst.IsScalar());
1395     if (dst.GetId() == src0.GetId()) {
1396         GetMasm()->or_(ArchReg(dst), ArchReg(src1));
1397     } else if (dst.GetId() == src1.GetId()) {
1398         GetMasm()->or_(ArchReg(dst), ArchReg(src0));
1399     } else {
1400         GetMasm()->mov(ArchReg(dst), ArchReg(src0));
1401         GetMasm()->or_(ArchReg(dst), ArchReg(src1));
1402     }
1403 }
1404 
EncodeXor(Reg dst,Reg src0,Reg src1)1405 void Amd64Encoder::EncodeXor(Reg dst, Reg src0, Reg src1)
1406 {
1407     ASSERT(dst.IsScalar());
1408     if (dst.GetId() == src0.GetId()) {
1409         GetMasm()->xor_(ArchReg(dst), ArchReg(src1));
1410     } else if (dst.GetId() == src1.GetId()) {
1411         GetMasm()->xor_(ArchReg(dst), ArchReg(src0));
1412     } else {
1413         GetMasm()->mov(ArchReg(dst), ArchReg(src0));
1414         GetMasm()->xor_(ArchReg(dst), ArchReg(src1));
1415     }
1416 }
1417 
EncodeAdd(Reg dst,Reg src,Imm imm)1418 void Amd64Encoder::EncodeAdd(Reg dst, Reg src, Imm imm)
1419 {
1420     if (dst.IsFloat()) {
1421         SetFalseResult();
1422         return;
1423     }
1424 
1425     auto imm_val = ImmToSignedInt(imm);
1426     auto size = std::max<uint8_t>(WORD_SIZE, dst.GetSize());
1427     if (ImmFitsSize(imm_val, size)) {
1428         GetMasm()->lea(ArchReg(dst, size), asmjit::x86::ptr(ArchReg(src, size), imm_val));
1429     } else {
1430         if (dst.GetId() != src.GetId()) {
1431             GetMasm()->mov(ArchReg(dst), asmjit::imm(imm_val));
1432             GetMasm()->add(ArchReg(dst), ArchReg(src));
1433         } else {
1434             ScopedTmpReg tmp_reg(this, dst.GetType());
1435             GetMasm()->mov(ArchReg(tmp_reg), asmjit::imm(imm_val));
1436             GetMasm()->add(ArchReg(dst), ArchReg(tmp_reg));
1437         }
1438     }
1439 }
1440 
EncodeSub(Reg dst,Reg src,Imm imm)1441 void Amd64Encoder::EncodeSub(Reg dst, Reg src, Imm imm)
1442 {
1443     if (dst.IsFloat()) {
1444         SetFalseResult();
1445         return;
1446     }
1447 
1448     auto imm_val = -ImmToSignedInt(imm);
1449     auto size = std::max<uint8_t>(WORD_SIZE, dst.GetSize());
1450     if (ImmFitsSize(imm_val, size)) {
1451         GetMasm()->lea(ArchReg(dst, size), asmjit::x86::ptr(ArchReg(src, size), imm_val));
1452     } else {
1453         if (dst.GetId() != src.GetId()) {
1454             GetMasm()->mov(ArchReg(dst), asmjit::imm(imm_val));
1455             GetMasm()->add(ArchReg(dst), ArchReg(src));
1456         } else {
1457             ScopedTmpReg tmp_reg(this, dst.GetType());
1458             GetMasm()->mov(ArchReg(tmp_reg), asmjit::imm(imm_val));
1459             GetMasm()->add(ArchReg(dst), ArchReg(tmp_reg));
1460         }
1461     }
1462 }
1463 
EncodeShl(Reg dst,Reg src,Imm imm)1464 void Amd64Encoder::EncodeShl(Reg dst, Reg src, Imm imm)
1465 {
1466     ASSERT(dst.IsScalar());
1467     EncodeMov(dst, src);
1468     GetMasm()->shl(ArchReg(dst), ArchImm(imm));
1469 }
1470 
EncodeShr(Reg dst,Reg src,Imm imm)1471 void Amd64Encoder::EncodeShr(Reg dst, Reg src, Imm imm)
1472 {
1473     ASSERT(dst.IsScalar());
1474 
1475     EncodeMov(dst, src);
1476     GetMasm()->shr(ArchReg(dst), ArchImm(imm));
1477 }
1478 
EncodeAShr(Reg dst,Reg src,Imm imm)1479 void Amd64Encoder::EncodeAShr(Reg dst, Reg src, Imm imm)
1480 {
1481     ASSERT(dst.IsScalar());
1482     EncodeMov(dst, src);
1483     GetMasm()->sar(ArchReg(dst), ArchImm(imm));
1484 }
1485 
EncodeAnd(Reg dst,Reg src,Imm imm)1486 void Amd64Encoder::EncodeAnd(Reg dst, Reg src, Imm imm)
1487 {
1488     ASSERT(dst.IsScalar());
1489     auto imm_val = ImmToUnsignedInt(imm);
1490 
1491     switch (imm.GetSize()) {
1492         case BYTE_SIZE:
1493             imm_val |= ~uint64_t(0xFF);  // NOLINT
1494             break;
1495         case HALF_SIZE:
1496             imm_val |= ~uint64_t(0xFFFF);  // NOLINT
1497             break;
1498         case WORD_SIZE:
1499             imm_val |= ~uint64_t(0xFFFFFFFF);  // NOLINT
1500             break;
1501         default:
1502             break;
1503     }
1504 
1505     if (dst.GetSize() != DOUBLE_WORD_SIZE) {
1506         // NOLINTNEXTLINE(clang-analyzer-core.UndefinedBinaryOperatorResult)
1507         imm_val &= (uint64_t(1) << dst.GetSize()) - 1;
1508     }
1509 
1510     if (ImmFitsSize(imm_val, dst.GetSize())) {
1511         EncodeMov(dst, src);
1512         GetMasm()->and_(ArchReg(dst), imm_val);
1513     } else {
1514         if (dst.GetId() != src.GetId()) {
1515             GetMasm()->mov(ArchReg(dst), asmjit::imm(imm_val));
1516             GetMasm()->and_(ArchReg(dst), ArchReg(src));
1517         } else {
1518             ScopedTmpReg tmp_reg(this, dst.GetType());
1519             GetMasm()->mov(ArchReg(tmp_reg), asmjit::imm(imm_val));
1520             GetMasm()->and_(ArchReg(dst), ArchReg(tmp_reg));
1521         }
1522     }
1523 }
1524 
EncodeOr(Reg dst,Reg src,Imm imm)1525 void Amd64Encoder::EncodeOr(Reg dst, Reg src, Imm imm)
1526 {
1527     ASSERT(dst.IsScalar());
1528     auto imm_val = ImmToUnsignedInt(imm);
1529 
1530     if (ImmFitsSize(imm_val, dst.GetSize())) {
1531         EncodeMov(dst, src);
1532         GetMasm()->or_(ArchReg(dst), imm_val);
1533     } else {
1534         if (dst.GetId() != src.GetId()) {
1535             GetMasm()->mov(ArchReg(dst), asmjit::imm(imm_val));
1536             GetMasm()->or_(ArchReg(dst), ArchReg(src));
1537         } else {
1538             ScopedTmpReg tmp_reg(this, dst.GetType());
1539             GetMasm()->mov(ArchReg(tmp_reg), asmjit::imm(imm_val));
1540             GetMasm()->or_(ArchReg(dst), ArchReg(tmp_reg));
1541         }
1542     }
1543 }
1544 
EncodeXor(Reg dst,Reg src,Imm imm)1545 void Amd64Encoder::EncodeXor(Reg dst, Reg src, Imm imm)
1546 {
1547     ASSERT(dst.IsScalar());
1548     auto imm_val = ImmToUnsignedInt(imm);
1549 
1550     if (ImmFitsSize(imm_val, dst.GetSize())) {
1551         EncodeMov(dst, src);
1552         GetMasm()->xor_(ArchReg(dst), imm_val);
1553     } else {
1554         if (dst.GetId() != src.GetId()) {
1555             GetMasm()->mov(ArchReg(dst), asmjit::imm(imm_val));
1556             GetMasm()->xor_(ArchReg(dst), ArchReg(src));
1557         } else {
1558             ScopedTmpReg tmp_reg(this, dst.GetType());
1559             GetMasm()->mov(ArchReg(tmp_reg), asmjit::imm(imm_val));
1560             GetMasm()->xor_(ArchReg(dst), ArchReg(tmp_reg));
1561         }
1562     }
1563 }
1564 
EncodeMov(Reg dst,Imm src)1565 void Amd64Encoder::EncodeMov(Reg dst, Imm src)
1566 {
1567     if (dst.IsScalar()) {
1568         if (dst.GetSize() < WORD_SIZE) {
1569             GetMasm()->xor_(ArchReg(dst, WORD_SIZE), ArchReg(dst, WORD_SIZE));
1570         }
1571         GetMasm()->mov(ArchReg(dst), ArchImm(src));
1572         return;
1573     }
1574 
1575     if (dst.GetType() == FLOAT32_TYPE) {
1576         ScopedTmpRegU32 tmp_reg(this);
1577         auto val = bit_cast<uint32_t>(src.GetValue<float>());
1578         GetMasm()->mov(ArchReg(tmp_reg), asmjit::imm(val));
1579         GetMasm()->movd(ArchVReg(dst), ArchReg(tmp_reg));
1580     } else {
1581         ScopedTmpRegU64 tmp_reg(this);
1582         auto val = bit_cast<uint64_t>(src.GetValue<double>());
1583         GetMasm()->mov(ArchReg(tmp_reg), asmjit::imm(val));
1584         GetMasm()->movq(ArchVReg(dst), ArchReg(tmp_reg));
1585     }
1586 }
1587 
EncodeLdr(Reg dst,bool dst_signed,MemRef mem)1588 void Amd64Encoder::EncodeLdr(Reg dst, bool dst_signed, MemRef mem)
1589 {
1590     auto m = ArchMem(mem).Prepare(GetMasm());
1591 
1592     if (dst.GetType() == FLOAT32_TYPE) {
1593         GetMasm()->movss(ArchVReg(dst), m);
1594         return;
1595     }
1596     if (dst.GetType() == FLOAT64_TYPE) {
1597         GetMasm()->movsd(ArchVReg(dst), m);
1598         return;
1599     }
1600 
1601     m.setSize(dst.GetSize() / BITS_PER_BYTE);
1602 
1603     if (dst_signed && dst.GetSize() < DOUBLE_WORD_SIZE) {
1604         if (dst.GetSize() == WORD_SIZE) {
1605             GetMasm()->movsxd(ArchReg(dst, DOUBLE_WORD_SIZE), m);
1606         } else {
1607             GetMasm()->movsx(ArchReg(dst, DOUBLE_WORD_SIZE), m);
1608         }
1609         return;
1610     }
1611     if (!dst_signed && dst.GetSize() < WORD_SIZE) {
1612         GetMasm()->movzx(ArchReg(dst, WORD_SIZE), m);
1613         return;
1614     }
1615 
1616     GetMasm()->mov(ArchReg(dst), m);
1617 }
1618 
EncodeLdrAcquire(Reg dst,bool dst_signed,MemRef mem)1619 void Amd64Encoder::EncodeLdrAcquire(Reg dst, bool dst_signed, MemRef mem)
1620 {
1621     EncodeLdr(dst, dst_signed, mem);
1622     // LoadLoad and LoadStore barrier should be here, but this is no-op in amd64 memory model
1623 }
1624 
EncodeStr(Reg src,MemRef mem)1625 void Amd64Encoder::EncodeStr(Reg src, MemRef mem)
1626 {
1627     auto m = ArchMem(mem).Prepare(GetMasm());
1628 
1629     if (src.GetType() == FLOAT32_TYPE) {
1630         GetMasm()->movss(m, ArchVReg(src));
1631         return;
1632     }
1633     if (src.GetType() == FLOAT64_TYPE) {
1634         GetMasm()->movsd(m, ArchVReg(src));
1635         return;
1636     }
1637 
1638     m.setSize(src.GetSize() / BITS_PER_BYTE);
1639     GetMasm()->mov(m, ArchReg(src));
1640 }
1641 
EncodeStrRelease(Reg src,MemRef mem)1642 void Amd64Encoder::EncodeStrRelease(Reg src, MemRef mem)
1643 {
1644     // StoreStore barrier should be here, but this is no-op in amd64 memory model
1645     EncodeStr(src, mem);
1646     // this is StoreLoad barrier (which is also full memory barrier in amd64 memory model)
1647     GetMasm()->lock().add(asmjit::x86::dword_ptr(asmjit::x86::rsp), asmjit::imm(0));
1648 }
1649 
EncodeStrz(Reg src,MemRef mem)1650 void Amd64Encoder::EncodeStrz(Reg src, MemRef mem)
1651 {
1652     if (src.IsScalar()) {
1653         if (src.GetSize() == DOUBLE_WORD_SIZE) {
1654             GetMasm()->mov(ArchMem(mem).Prepare(GetMasm()), ArchReg(src));
1655         } else {
1656             ScopedTmpRegU64 tmp_reg(this);
1657             GetMasm()->xor_(ArchReg(tmp_reg), ArchReg(tmp_reg));
1658             GetMasm()->mov(ArchReg(tmp_reg, src.GetSize()), ArchReg(src));
1659             GetMasm()->mov(ArchMem(mem).Prepare(GetMasm()), ArchReg(tmp_reg));
1660         }
1661     } else {
1662         if (src.GetType() == FLOAT64_TYPE) {
1663             GetMasm()->movsd(ArchMem(mem).Prepare(GetMasm()), ArchVReg(src));
1664         } else {
1665             ScopedTmpRegF64 tmp_reg(this);
1666 
1667             GetMasm()->xorpd(ArchVReg(tmp_reg), ArchVReg(tmp_reg));
1668             GetMasm()->movss(ArchVReg(tmp_reg), ArchVReg(src));
1669             GetMasm()->movsd(ArchMem(mem).Prepare(GetMasm()), ArchVReg(tmp_reg));
1670         }
1671     }
1672 }
1673 
EncodeSti(Imm src,MemRef mem)1674 void Amd64Encoder::EncodeSti(Imm src, MemRef mem)
1675 {
1676     if (src.IsFloat()) {
1677         if (src.GetType() == FLOAT32_TYPE) {
1678             EncodeSti(Imm(bit_cast<int32_t>(src.GetValue<float>())), mem);
1679         } else {
1680             EncodeSti(Imm(bit_cast<int64_t>(src.GetValue<double>())), mem);
1681         }
1682         return;
1683     }
1684 
1685     auto m = ArchMem(mem).Prepare(GetMasm());
1686     if (src.GetSize() <= HALF_SIZE) {
1687         m.setSize(src.GetSize() / BITS_PER_BYTE);
1688         GetMasm()->mov(m, ArchImm(src));
1689     } else {
1690         m.setSize(DOUBLE_WORD_SIZE_BYTE);
1691 
1692         auto imm_val = ImmToSignedInt(src);
1693         if (ImmFitsSize(imm_val, DOUBLE_WORD_SIZE)) {
1694             GetMasm()->mov(m, asmjit::imm(imm_val));
1695         } else {
1696             ScopedTmpRegU64 tmp_reg(this);
1697             GetMasm()->mov(ArchReg(tmp_reg), asmjit::imm(imm_val));
1698             GetMasm()->mov(m, ArchReg(tmp_reg));
1699         }
1700     }
1701 }
1702 
EncodeMemCopy(MemRef mem_from,MemRef mem_to,size_t size)1703 void Amd64Encoder::EncodeMemCopy(MemRef mem_from, MemRef mem_to, size_t size)
1704 {
1705     ScopedTmpRegU64 tmp_reg(this);
1706     GetMasm()->mov(ArchReg(tmp_reg, size), ArchMem(mem_from).Prepare(GetMasm()));
1707     GetMasm()->mov(ArchMem(mem_to).Prepare(GetMasm()), ArchReg(tmp_reg, size));
1708 }
1709 
EncodeMemCopyz(MemRef mem_from,MemRef mem_to,size_t size)1710 void Amd64Encoder::EncodeMemCopyz(MemRef mem_from, MemRef mem_to, size_t size)
1711 {
1712     ScopedTmpRegU64 tmp_reg(this);
1713     if (size < DOUBLE_WORD_SIZE) {
1714         GetMasm()->xor_(ArchReg(tmp_reg), ArchReg(tmp_reg));
1715     }
1716     GetMasm()->mov(ArchReg(tmp_reg, size), ArchMem(mem_from).Prepare(GetMasm()));
1717     GetMasm()->mov(ArchMem(mem_to).Prepare(GetMasm()), ArchReg(tmp_reg));
1718 }
1719 
EncodeCompare(Reg dst,Reg src0,Reg src1,Condition cc)1720 void Amd64Encoder::EncodeCompare(Reg dst, Reg src0, Reg src1, Condition cc)
1721 {
1722     if (src0.IsScalar()) {
1723         GetMasm()->cmp(ArchReg(src0), ArchReg(src1));
1724     } else {
1725         if (src0.GetType() == FLOAT32_TYPE) {
1726             GetMasm()->ucomiss(ArchVReg(src0), ArchVReg(src1));
1727         } else {
1728             GetMasm()->ucomisd(ArchVReg(src0), ArchVReg(src1));
1729         }
1730     }
1731     GetMasm()->mov(ArchReg(dst, DOUBLE_WORD_SIZE), asmjit::imm(0));
1732 
1733     if (src0.IsScalar()) {
1734         GetMasm()->set(ArchCc(cc), ArchReg(dst, BYTE_SIZE));
1735         return;
1736     }
1737 
1738     auto end = GetMasm()->newLabel();
1739 
1740     if (CcMatchesNan(cc)) {
1741         GetMasm()->setp(ArchReg(dst, BYTE_SIZE));
1742     }
1743     GetMasm()->jp(end);
1744     GetMasm()->set(ArchCc(cc, true), ArchReg(dst, BYTE_SIZE));
1745 
1746     GetMasm()->bind(end);
1747 }
1748 
EncodeCompareTest(Reg dst,Reg src0,Reg src1,Condition cc)1749 void Amd64Encoder::EncodeCompareTest(Reg dst, Reg src0, Reg src1, Condition cc)
1750 {
1751     ASSERT(src0.IsScalar());
1752 
1753     GetMasm()->test(ArchReg(src0), ArchReg(src1));
1754 
1755     GetMasm()->mov(ArchReg(dst, DOUBLE_WORD_SIZE), asmjit::imm(0));
1756     GetMasm()->set(ArchCcTest(cc), ArchReg(dst, BYTE_SIZE));
1757 }
1758 
EncodeCmp(Reg dst,Reg src0,Reg src1,Condition cc)1759 void Amd64Encoder::EncodeCmp(Reg dst, Reg src0, Reg src1, Condition cc)
1760 {
1761     auto end = GetMasm()->newLabel();
1762 
1763     if (src0.IsFloat()) {
1764         ASSERT(src1.IsFloat());
1765         ASSERT(cc == Condition::MI || cc == Condition::LT);
1766 
1767         if (src0.GetType() == FLOAT32_TYPE) {
1768             GetMasm()->ucomiss(ArchVReg(src0), ArchVReg(src1));
1769         } else {
1770             GetMasm()->ucomisd(ArchVReg(src0), ArchVReg(src1));
1771         }
1772 
1773         GetMasm()->mov(ArchReg(dst, DOUBLE_WORD_SIZE), cc == Condition::LT ? asmjit::imm(-1) : asmjit::imm(1));
1774         cc = Condition::LO;
1775 
1776         GetMasm()->jp(end);
1777     } else {
1778         ASSERT(src0.IsScalar() && src1.IsScalar());
1779         ASSERT(cc == Condition::LO || cc == Condition::LT);
1780         GetMasm()->cmp(ArchReg(src0), ArchReg(src1));
1781     }
1782     GetMasm()->mov(ArchReg(dst, DOUBLE_WORD_SIZE), asmjit::imm(0));
1783     GetMasm()->setne(ArchReg(dst, BYTE_SIZE));
1784 
1785     GetMasm()->j(asmjit::x86::Condition::negate(ArchCc(cc)), end);
1786     GetMasm()->neg(ArchReg(dst));
1787 
1788     GetMasm()->bind(end);
1789 }
1790 
EncodeSelect(Reg dst,Reg src0,Reg src1,Reg src2,Reg src3,Condition cc)1791 void Amd64Encoder::EncodeSelect(Reg dst, Reg src0, Reg src1, Reg src2, Reg src3, Condition cc)
1792 {
1793     ASSERT(!src0.IsFloat() && !src1.IsFloat());
1794     if (src2.IsScalar()) {
1795         GetMasm()->cmp(ArchReg(src2), ArchReg(src3));
1796     } else if (src2.GetType() == FLOAT32_TYPE) {
1797         GetMasm()->comiss(ArchVReg(src2), ArchVReg(src3));
1798     } else {
1799         GetMasm()->comisd(ArchVReg(src2), ArchVReg(src3));
1800     }
1801 
1802     auto size = std::max<uint8_t>(src0.GetSize(), WORD_SIZE);
1803     bool dst_aliased = dst.GetId() == src0.GetId();
1804     ScopedTmpReg tmp_reg(this, dst.GetType());
1805     auto dst_reg = dst_aliased ? ArchReg(tmp_reg, size) : ArchReg(dst, size);
1806 
1807     GetMasm()->mov(dst_reg, ArchReg(src1, size));
1808 
1809     if (src2.IsScalar()) {
1810         GetMasm()->cmov(ArchCc(cc), dst_reg, ArchReg(src0, size));
1811     } else if (CcMatchesNan(cc)) {
1812         GetMasm()->cmovp(dst_reg, ArchReg(src0, size));
1813         GetMasm()->cmov(ArchCc(cc, src2.IsFloat()), dst_reg, ArchReg(src0, size));
1814     } else {
1815         auto end = GetMasm()->newLabel();
1816 
1817         GetMasm()->jp(end);
1818         GetMasm()->cmov(ArchCc(cc, src2.IsFloat()), dst_reg, ArchReg(src0, size));
1819 
1820         GetMasm()->bind(end);
1821     }
1822     if (dst_aliased) {
1823         EncodeMov(dst, tmp_reg);
1824     }
1825 }
1826 
EncodeSelect(Reg dst,Reg src0,Reg src1,Reg src2,Imm imm,Condition cc)1827 void Amd64Encoder::EncodeSelect(Reg dst, Reg src0, Reg src1, Reg src2, Imm imm, Condition cc)
1828 {
1829     ASSERT(!src0.IsFloat() && !src1.IsFloat() && !src2.IsFloat());
1830 
1831     auto imm_val = ImmToSignedInt(imm);
1832     if (ImmFitsSize(imm_val, src2.GetSize())) {
1833         GetMasm()->cmp(ArchReg(src2), asmjit::imm(imm_val));
1834     } else {
1835         ScopedTmpReg tmp_reg(this, src2.GetType());
1836         GetMasm()->mov(ArchReg(tmp_reg), asmjit::imm(imm_val));
1837         GetMasm()->cmp(ArchReg(src2), ArchReg(tmp_reg));
1838     }
1839 
1840     ScopedTmpReg tmp_reg(this, dst.GetType());
1841     auto size = std::max<uint8_t>(src0.GetSize(), WORD_SIZE);
1842     bool dst_aliased = dst.GetId() == src0.GetId();
1843     auto dst_reg = dst_aliased ? ArchReg(tmp_reg, size) : ArchReg(dst, size);
1844 
1845     GetMasm()->mov(dst_reg, ArchReg(src1, size));
1846     GetMasm()->cmov(ArchCc(cc), dst_reg, ArchReg(src0, size));
1847     if (dst_aliased) {
1848         EncodeMov(dst, tmp_reg);
1849     }
1850 }
1851 
EncodeSelectTest(Reg dst,Reg src0,Reg src1,Reg src2,Reg src3,Condition cc)1852 void Amd64Encoder::EncodeSelectTest(Reg dst, Reg src0, Reg src1, Reg src2, Reg src3, Condition cc)
1853 {
1854     ASSERT(!src0.IsFloat() && !src1.IsFloat() && !src2.IsFloat());
1855 
1856     GetMasm()->test(ArchReg(src2), ArchReg(src3));
1857 
1858     ScopedTmpReg tmp_reg(this, dst.GetType());
1859     auto size = std::max<uint8_t>(src0.GetSize(), WORD_SIZE);
1860     bool dst_aliased = dst.GetId() == src0.GetId();
1861     auto dst_reg = dst_aliased ? ArchReg(tmp_reg, size) : ArchReg(dst, size);
1862 
1863     GetMasm()->mov(dst_reg, ArchReg(src1, size));
1864     GetMasm()->cmov(ArchCcTest(cc), dst_reg, ArchReg(src0, size));
1865     if (dst_aliased) {
1866         EncodeMov(dst, tmp_reg);
1867     }
1868 }
1869 
EncodeSelectTest(Reg dst,Reg src0,Reg src1,Reg src2,Imm imm,Condition cc)1870 void Amd64Encoder::EncodeSelectTest(Reg dst, Reg src0, Reg src1, Reg src2, Imm imm, Condition cc)
1871 {
1872     ASSERT(!src0.IsFloat() && !src1.IsFloat() && !src2.IsFloat());
1873 
1874     auto imm_val = ImmToSignedInt(imm);
1875     if (ImmFitsSize(imm_val, src2.GetSize())) {
1876         GetMasm()->test(ArchReg(src2), asmjit::imm(imm_val));
1877     } else {
1878         ScopedTmpReg tmp_reg(this, src2.GetType());
1879         GetMasm()->mov(ArchReg(tmp_reg), asmjit::imm(imm_val));
1880         GetMasm()->test(ArchReg(src2), ArchReg(tmp_reg));
1881     }
1882 
1883     ScopedTmpReg tmp_reg(this, dst.GetType());
1884     auto size = std::max<uint8_t>(src0.GetSize(), WORD_SIZE);
1885     bool dst_aliased = dst.GetId() == src0.GetId();
1886     auto dst_reg = dst_aliased ? ArchReg(tmp_reg, size) : ArchReg(dst, size);
1887 
1888     GetMasm()->mov(dst_reg, ArchReg(src1, size));
1889     GetMasm()->cmov(ArchCcTest(cc), dst_reg, ArchReg(src0, size));
1890     if (dst_aliased) {
1891         EncodeMov(dst, tmp_reg);
1892     }
1893 }
1894 
EncodeLdp(Reg dst0,Reg dst1,bool dst_signed,MemRef mem)1895 void Amd64Encoder::EncodeLdp(Reg dst0, Reg dst1, bool dst_signed, MemRef mem)
1896 {
1897     ASSERT(dst0.IsFloat() == dst1.IsFloat());
1898     ASSERT(dst0.GetSize() == dst1.GetSize());
1899 
1900     auto m = ArchMem(mem).Prepare(GetMasm());
1901 
1902     if (dst0.IsFloat()) {
1903         if (dst0.GetType() == FLOAT32_TYPE) {
1904             GetMasm()->movss(ArchVReg(dst0), m);
1905 
1906             m.addOffset(WORD_SIZE_BYTE);
1907             GetMasm()->movss(ArchVReg(dst1), m);
1908         } else {
1909             GetMasm()->movsd(ArchVReg(dst0), m);
1910 
1911             m.addOffset(DOUBLE_WORD_SIZE_BYTE);
1912             GetMasm()->movsd(ArchVReg(dst1), m);
1913         }
1914         return;
1915     }
1916 
1917     if (dst_signed && dst0.GetSize() == WORD_SIZE) {
1918         m.setSize(WORD_SIZE_BYTE);
1919         GetMasm()->movsxd(ArchReg(dst0, DOUBLE_WORD_SIZE), m);
1920 
1921         m.addOffset(WORD_SIZE_BYTE);
1922         GetMasm()->movsxd(ArchReg(dst1, DOUBLE_WORD_SIZE), m);
1923         return;
1924     }
1925 
1926     GetMasm()->mov(ArchReg(dst0), m);
1927 
1928     m.addOffset(dst0.GetSize() / BITS_PER_BYTE);
1929     GetMasm()->mov(ArchReg(dst1), m);
1930 }
1931 
EncodeStp(Reg src0,Reg src1,MemRef mem)1932 void Amd64Encoder::EncodeStp(Reg src0, Reg src1, MemRef mem)
1933 {
1934     ASSERT(src0.IsFloat() == src1.IsFloat());
1935     ASSERT(src0.GetSize() == src1.GetSize());
1936 
1937     auto m = ArchMem(mem).Prepare(GetMasm());
1938 
1939     if (src0.IsFloat()) {
1940         if (src0.GetType() == FLOAT32_TYPE) {
1941             GetMasm()->movss(m, ArchVReg(src0));
1942 
1943             m.addOffset(WORD_SIZE_BYTE);
1944             GetMasm()->movss(m, ArchVReg(src1));
1945         } else {
1946             GetMasm()->movsd(m, ArchVReg(src0));
1947 
1948             m.addOffset(DOUBLE_WORD_SIZE_BYTE);
1949             GetMasm()->movsd(m, ArchVReg(src1));
1950         }
1951         return;
1952     }
1953 
1954     GetMasm()->mov(m, ArchReg(src0));
1955 
1956     m.addOffset(src0.GetSize() / BITS_PER_BYTE);
1957     GetMasm()->mov(m, ArchReg(src1));
1958 }
1959 
EncodeReverseBytes(Reg dst,Reg src)1960 void Amd64Encoder::EncodeReverseBytes(Reg dst, Reg src)
1961 {
1962     ASSERT(src.GetSize() > BYTE_SIZE);
1963     ASSERT(src.GetSize() == dst.GetSize());
1964 
1965     if (src != dst) {
1966         GetMasm()->mov(ArchReg(dst), ArchReg(src));
1967     }
1968 
1969     if (src.GetSize() == HALF_SIZE) {
1970         GetMasm()->rol(ArchReg(dst), BYTE_SIZE);
1971         GetMasm()->movsx(ArchReg(dst, WORD_SIZE), ArchReg(dst));
1972     } else {
1973         GetMasm()->bswap(ArchReg(dst));
1974     }
1975 }
1976 
CanEncodeImmAddSubCmp(int64_t imm,uint32_t size,bool signed_compare)1977 bool Amd64Encoder::CanEncodeImmAddSubCmp(int64_t imm, uint32_t size, [[maybe_unused]] bool signed_compare)
1978 {
1979     return ImmFitsSize(imm, size);
1980 }
1981 
EncodeBitCount(Reg dst0,Reg src0)1982 void Amd64Encoder::EncodeBitCount(Reg dst0, Reg src0)
1983 {
1984     ASSERT(src0.GetSize() == WORD_SIZE || src0.GetSize() == DOUBLE_WORD_SIZE);
1985     ASSERT(dst0.GetSize() == WORD_SIZE);
1986     ASSERT(src0.IsScalar() && dst0.IsScalar());
1987 
1988     GetMasm()->popcnt(ArchReg(dst0, src0.GetSize()), ArchReg(src0));
1989 }
1990 
EncodeCountLeadingZeroBits(Reg dst,Reg src)1991 void Amd64Encoder::EncodeCountLeadingZeroBits(Reg dst, Reg src)
1992 {
1993     auto end = CreateLabel();
1994     auto zero = CreateLabel();
1995     EncodeJump(zero, src, Condition::EQ);
1996     GetMasm()->bsr(ArchReg(dst), ArchReg(src));
1997     GetMasm()->xor_(ArchReg(dst), asmjit::imm(dst.GetSize() - 1));
1998     EncodeJump(end);
1999 
2000     BindLabel(zero);
2001     GetMasm()->mov(ArchReg(dst), asmjit::imm(dst.GetSize()));
2002 
2003     BindLabel(end);
2004 }
2005 
EncodeCountTrailingZeroBits(Reg dst,Reg src)2006 void Amd64Encoder::EncodeCountTrailingZeroBits(Reg dst, Reg src)
2007 {
2008     ScopedTmpReg tmp(this, src.GetType());
2009     GetMasm()->bsf(ArchReg(tmp), ArchReg(src));
2010     GetMasm()->mov(ArchReg(dst), asmjit::imm(dst.GetSize()));
2011     GetMasm()->cmovne(ArchReg(dst), ArchReg(tmp));
2012 }
2013 
EncodeCeil(Reg dst,Reg src)2014 void Amd64Encoder::EncodeCeil(Reg dst, Reg src)
2015 {
2016     // NOLINTNEXTLINE(readability-magic-numbers)
2017     GetMasm()->roundsd(ArchVReg(dst), ArchVReg(src), asmjit::imm(2));
2018 }
2019 
EncodeFloor(Reg dst,Reg src)2020 void Amd64Encoder::EncodeFloor(Reg dst, Reg src)
2021 {
2022     GetMasm()->roundsd(ArchVReg(dst), ArchVReg(src), asmjit::imm(1));
2023 }
2024 
EncodeRint(Reg dst,Reg src)2025 void Amd64Encoder::EncodeRint(Reg dst, Reg src)
2026 {
2027     GetMasm()->roundsd(ArchVReg(dst), ArchVReg(src), asmjit::imm(0));
2028 }
2029 
EncodeRound(Reg dst,Reg src)2030 void Amd64Encoder::EncodeRound(Reg dst, Reg src)
2031 {
2032     ScopedTmpReg t1(this, src.GetType());
2033     ScopedTmpReg t2(this, src.GetType());
2034     ScopedTmpReg t3(this, src.GetType());
2035     ScopedTmpReg t4(this, dst.GetType());
2036 
2037     auto skip_incr_id = CreateLabel();
2038     auto done_id = CreateLabel();
2039 
2040     auto skip_incr = static_cast<Amd64LabelHolder *>(GetLabels())->GetLabel(skip_incr_id);
2041     auto done = static_cast<Amd64LabelHolder *>(GetLabels())->GetLabel(done_id);
2042 
2043     if (src.GetType() == FLOAT32_TYPE) {
2044         GetMasm()->movss(ArchVReg(t2), ArchVReg(src));
2045         GetMasm()->roundss(ArchVReg(t1), ArchVReg(src), asmjit::imm(1));
2046         GetMasm()->subss(ArchVReg(t2), ArchVReg(t1));
2047         // NOLINTNEXTLINE(readability-magic-numbers)
2048         const auto HALF_F = 0.5F;
2049         GetMasm()->mov(ArchReg(t4), asmjit::imm(bit_cast<int32_t, float>(HALF_F)));
2050         GetMasm()->movd(ArchVReg(t3), ArchReg(t4));
2051         GetMasm()->comiss(ArchVReg(t2), ArchVReg(t3));
2052         GetMasm()->j(asmjit::x86::Condition::Code::kB, *skip_incr);
2053         // NOLINTNEXTLINE(readability-magic-numbers)
2054         const auto ONE_F = 1.0F;
2055         GetMasm()->mov(ArchReg(t4), asmjit::imm(bit_cast<int32_t, float>(ONE_F)));
2056         GetMasm()->movd(ArchVReg(t3), ArchReg(t4));
2057         GetMasm()->addss(ArchVReg(t1), ArchVReg(t3));
2058         BindLabel(skip_incr_id);
2059 
2060         // NOLINTNEXTLINE(readability-magic-numbers)
2061         GetMasm()->mov(ArchReg(dst), asmjit::imm(0x7FFFFFFF));
2062         GetMasm()->cvtsi2ss(ArchVReg(t2), ArchReg(dst));
2063         GetMasm()->comiss(ArchVReg(t1), ArchVReg(t2));
2064         GetMasm()->j(asmjit::x86::Condition::Code::kAE,
2065                      *done);                           // clipped to max (already in dst), does not jump on unordered
2066         GetMasm()->mov(ArchReg(dst), asmjit::imm(0));  // does not change flags
2067         GetMasm()->j(asmjit::x86::Condition::Code::kParityEven, *done);  // NaN mapped to 0 (just moved in dst)
2068         GetMasm()->cvttss2si(ArchReg(dst), ArchVReg(t1));
2069         BindLabel(done_id);
2070     } else if (src.GetType() == FLOAT64_TYPE) {
2071         GetMasm()->movsd(ArchVReg(t2), ArchVReg(src));
2072         GetMasm()->roundsd(ArchVReg(t1), ArchVReg(src), asmjit::imm(1));
2073         GetMasm()->subsd(ArchVReg(t2), ArchVReg(t1));
2074         // NOLINTNEXTLINE(readability-magic-numbers)
2075         const auto HALF = 0.5;
2076         GetMasm()->mov(ArchReg(t4), asmjit::imm(bit_cast<int64_t, double>(HALF)));
2077         GetMasm()->movq(ArchVReg(t3), ArchReg(t4));
2078         GetMasm()->comisd(ArchVReg(t2), ArchVReg(t3));
2079         GetMasm()->j(asmjit::x86::Condition::Code::kB, *skip_incr);
2080         // NOLINTNEXTLINE(readability-magic-numbers)
2081         const auto ONE = 1.0;
2082         GetMasm()->mov(ArchReg(t4), asmjit::imm(bit_cast<int64_t, double>(ONE)));
2083         GetMasm()->movq(ArchVReg(t3), ArchReg(t4));
2084         GetMasm()->addsd(ArchVReg(t1), ArchVReg(t3));
2085         BindLabel(skip_incr_id);
2086 
2087         // NOLINTNEXTLINE(readability-magic-numbers)
2088         GetMasm()->mov(ArchReg(dst), asmjit::imm(0x7FFFFFFFFFFFFFFFL));
2089         GetMasm()->cvtsi2sd(ArchVReg(t2), ArchReg(dst));
2090         GetMasm()->comisd(ArchVReg(t1), ArchVReg(t2));
2091         GetMasm()->j(asmjit::x86::Condition::Code::kAE,
2092                      *done);                           // clipped to max (already in dst), does not jump on unordered
2093         GetMasm()->mov(ArchReg(dst), asmjit::imm(0));  // does not change flags
2094         GetMasm()->j(asmjit::x86::Condition::Code::kParityEven, *done);  // NaN mapped to 0 (just moved in dst)
2095         GetMasm()->cvttsd2si(ArchReg(dst), ArchVReg(t1));
2096         BindLabel(done_id);
2097     } else {
2098         UNREACHABLE();
2099     }
2100 }
2101 
2102 template <typename T>
EncodeReverseBitsImpl(Reg dst0,Reg src0)2103 void Amd64Encoder::EncodeReverseBitsImpl(Reg dst0, Reg src0)
2104 {
2105     ASSERT(std::numeric_limits<T>::is_integer && !std::numeric_limits<T>::is_signed);
2106     [[maybe_unused]] constexpr auto IMM_8 = 8;
2107     ASSERT(sizeof(T) * IMM_8 == dst0.GetSize());
2108     // NOLINTNEXTLINE(modernize-avoid-c-arrays)
2109     static constexpr T MASKS[] = {static_cast<T>(UINT64_C(0x5555555555555555)),
2110                                   static_cast<T>(UINT64_C(0x3333333333333333)),
2111                                   static_cast<T>(UINT64_C(0x0f0f0f0f0f0f0f0f))};
2112 
2113     ScopedTmpReg tmp(this, dst0.GetType());
2114     ScopedTmpReg imm_holder(this, dst0.GetType());
2115     auto imm_holder_reg = ArchReg(imm_holder);
2116 
2117     GetMasm()->mov(ArchReg(dst0), ArchReg(src0));
2118     GetMasm()->mov(ArchReg(tmp), ArchReg(src0));
2119     constexpr auto MAX_ROUNDS = 3;
2120     for (uint64_t round = 0; round < MAX_ROUNDS; round++) {
2121         auto shift = 1U << round;
2122         auto mask = asmjit::imm(MASKS[round]);
2123         GetMasm()->shr(ArchReg(dst0), shift);
2124         if (dst0.GetSize() == DOUBLE_WORD_SIZE) {
2125             GetMasm()->mov(imm_holder_reg, mask);
2126             GetMasm()->and_(ArchReg(tmp), imm_holder_reg);
2127             GetMasm()->and_(ArchReg(dst0), imm_holder_reg);
2128         } else {
2129             GetMasm()->and_(ArchReg(tmp), mask);
2130             GetMasm()->and_(ArchReg(dst0), mask);
2131         }
2132         GetMasm()->shl(ArchReg(tmp), shift);
2133         GetMasm()->or_(ArchReg(dst0), ArchReg(tmp));
2134         constexpr auto ROUND_2 = 2;
2135         if (round != ROUND_2) {
2136             GetMasm()->mov(ArchReg(tmp), ArchReg(dst0));
2137         }
2138     }
2139 
2140     GetMasm()->bswap(ArchReg(dst0));
2141 }
2142 
EncodeReverseBits(Reg dst0,Reg src0)2143 void Amd64Encoder::EncodeReverseBits(Reg dst0, Reg src0)
2144 {
2145     ASSERT(src0.GetSize() == WORD_SIZE || src0.GetSize() == DOUBLE_WORD_SIZE);
2146     ASSERT(src0.GetSize() == dst0.GetSize());
2147 
2148     if (src0.GetSize() == WORD_SIZE) {
2149         EncodeReverseBitsImpl<uint32_t>(dst0, src0);
2150         return;
2151     }
2152 
2153     EncodeReverseBitsImpl<uint64_t>(dst0, src0);
2154 }
2155 
CanEncodeScale(uint64_t imm,uint32_t size)2156 bool Amd64Encoder::CanEncodeScale(uint64_t imm, [[maybe_unused]] uint32_t size)
2157 {
2158     return imm <= 3U;
2159 }
2160 
CanEncodeImmLogical(uint64_t imm,uint32_t size)2161 bool Amd64Encoder::CanEncodeImmLogical(uint64_t imm, uint32_t size)
2162 {
2163     return ImmFitsSize(imm, size);
2164 }
2165 
CanEncodeBitCount()2166 bool Amd64Encoder::CanEncodeBitCount()
2167 {
2168     return asmjit::CpuInfo::host().hasFeature(asmjit::x86::Features::kPOPCNT);
2169 }
2170 
EncodeIsInf(Reg dst,Reg src)2171 void Amd64Encoder::EncodeIsInf(Reg dst, Reg src)
2172 {
2173     ASSERT(dst.IsScalar() && src.IsFloat());
2174 
2175     GetMasm()->xor_(ArchReg(dst, DOUBLE_WORD_SIZE), ArchReg(dst, DOUBLE_WORD_SIZE));
2176 
2177     if (src.GetSize() == WORD_SIZE) {
2178         constexpr auto INF_MASK = uint32_t(0x7f800000) << 1U;
2179 
2180         ScopedTmpRegU32 tmp_reg(this);
2181         ScopedTmpRegU32 tmp1_reg(this);
2182         auto tmp = ArchReg(tmp_reg);
2183         auto tmp1 = ArchReg(tmp1_reg);
2184 
2185         GetMasm()->movd(tmp1, ArchVReg(src));
2186         GetMasm()->shl(tmp1, 1);
2187         GetMasm()->mov(tmp, INF_MASK);
2188         GetMasm()->cmp(tmp, tmp1);
2189     } else {
2190         constexpr auto INF_MASK = uint64_t(0x7ff0000000000000) << 1U;
2191 
2192         ScopedTmpRegU64 tmp_reg(this);
2193         ScopedTmpRegU64 tmp1_reg(this);
2194         auto tmp = ArchReg(tmp_reg);
2195         auto tmp1 = ArchReg(tmp1_reg);
2196 
2197         GetMasm()->movq(tmp1, ArchVReg(src));
2198         GetMasm()->shl(tmp1, 1);
2199 
2200         GetMasm()->mov(tmp, INF_MASK);
2201         GetMasm()->cmp(tmp, tmp1);
2202     }
2203 
2204     GetMasm()->sete(ArchReg(dst, BYTE_SIZE));
2205 }
2206 
2207 /* Since NaNs have to be canonicalized we compare the
2208  * input with itself, if it is NaN the comparison will
2209  * set the parity flag (PF) */
EncodeFpToBits(Reg dst,Reg src)2210 void Amd64Encoder::EncodeFpToBits(Reg dst, Reg src)
2211 {
2212     ASSERT(dst.IsScalar() && src.IsFloat());
2213 
2214     if (dst.GetType() == INT32_TYPE) {
2215         ASSERT(src.GetSize() == WORD_SIZE);
2216 
2217         constexpr auto FLOAT_NAN = uint32_t(0x7fc00000);
2218 
2219         ScopedTmpRegU32 tmp(this);
2220 
2221         GetMasm()->ucomiss(ArchVReg(src), ArchVReg(src));
2222         GetMasm()->mov(ArchReg(tmp), FLOAT_NAN);
2223         GetMasm()->movd(ArchReg(dst), ArchVReg(src));
2224         GetMasm()->cmovpe(ArchReg(dst), ArchReg(tmp));
2225     } else {
2226         ASSERT(src.GetSize() == DOUBLE_WORD_SIZE);
2227 
2228         constexpr auto DOUBLE_NAN = uint64_t(0x7ff8000000000000);
2229         ScopedTmpRegU64 tmp(this);
2230 
2231         GetMasm()->ucomisd(ArchVReg(src), ArchVReg(src));
2232         GetMasm()->mov(ArchReg(tmp), DOUBLE_NAN);
2233         GetMasm()->movq(ArchReg(dst), ArchVReg(src));
2234         GetMasm()->cmovpe(ArchReg(dst), ArchReg(tmp));
2235     }
2236 }
2237 
EncodeMoveBitsRaw(Reg dst,Reg src)2238 void Amd64Encoder::EncodeMoveBitsRaw(Reg dst, Reg src)
2239 {
2240     ASSERT((dst.IsFloat() && src.IsScalar()) || (src.IsFloat() && dst.IsScalar()));
2241     if (src.IsScalar()) {
2242         ASSERT((dst.GetSize() == src.GetSize()));
2243         if (src.GetSize() == WORD_SIZE) {
2244             GetMasm()->movd(ArchVReg(dst), ArchReg(src));
2245         } else {
2246             GetMasm()->movq(ArchVReg(dst), ArchReg(src));
2247         }
2248     } else {
2249         ASSERT((src.GetSize() == dst.GetSize()));
2250         if (dst.GetSize() == WORD_SIZE) {
2251             GetMasm()->movd(ArchReg(dst), ArchVReg(src));
2252         } else {
2253             GetMasm()->movq(ArchReg(dst), ArchVReg(src));
2254         }
2255     }
2256 }
2257 
2258 /* Unsafe intrinsics */
EncodeCompareAndSwap(Reg dst,Reg obj,const Reg * offset,Reg val,Reg newval)2259 void Amd64Encoder::EncodeCompareAndSwap(Reg dst, Reg obj, const Reg *offset, Reg val, Reg newval)
2260 {
2261     /*
2262      * movl    old, %eax
2263      * lock    cmpxchgl   new, addr
2264      * sete    %al
2265      */
2266     ScopedTmpRegU64 tmp1(this);
2267     ScopedTmpRegU64 tmp2(this);
2268     ScopedTmpRegU64 tmp3(this);
2269     Reg newvalue = newval;
2270     auto addr = ArchMem(MemRef(tmp2)).Prepare(GetMasm());
2271     auto addr_reg = ArchReg(tmp2);
2272     Reg rax(ConvertRegNumber(asmjit::x86::rax.id()), INT64_TYPE);
2273 
2274     /* TODO(ayodkev) this is a workaround for the failure of
2275      * jsr166.ScheduledExecutorTest, have to figure out if there
2276      * is less crude way to avoid this */
2277     if (newval.GetId() == rax.GetId()) {
2278         SetFalseResult();
2279         return;
2280     }
2281 
2282     if (offset != nullptr) {
2283         GetMasm()->lea(addr_reg, asmjit::x86::ptr(ArchReg(obj), ArchReg(*offset)));
2284     } else {
2285         GetMasm()->mov(addr_reg, ArchReg(obj));
2286     }
2287 
2288     /* the [er]ax register will be overwritten by cmpxchg instruction
2289      * save it unless it is set as a destination register */
2290     if (dst.GetId() != rax.GetId()) {
2291         GetMasm()->mov(ArchReg(tmp1), asmjit::x86::rax);
2292     }
2293 
2294     /* if the new value comes in [er]ax register we have to use a
2295      * different register as [er]ax will contain the current value */
2296     if (newval.GetId() == rax.GetId()) {
2297         GetMasm()->mov(ArchReg(tmp3, newval.GetSize()), ArchReg(newval));
2298         newvalue = tmp3;
2299     }
2300 
2301     if (val.GetId() != rax.GetId()) {
2302         GetMasm()->mov(asmjit::x86::rax, ArchReg(val).r64());
2303     }
2304 
2305     GetMasm()->lock().cmpxchg(addr, ArchReg(newvalue));
2306     GetMasm()->sete(ArchReg(dst));
2307 
2308     if (dst.GetId() != rax.GetId()) {
2309         GetMasm()->mov(asmjit::x86::rax, ArchReg(tmp1));
2310     }
2311 }
2312 
EncodeUnsafeGetAndSet(Reg dst,Reg obj,Reg offset,Reg val)2313 void Amd64Encoder::EncodeUnsafeGetAndSet(Reg dst, Reg obj, Reg offset, Reg val)
2314 {
2315     ScopedTmpRegU64 tmp(this);
2316     auto addr_reg = ArchReg(tmp);
2317     auto addr = ArchMem(MemRef(tmp)).Prepare(GetMasm());
2318     GetMasm()->lea(addr_reg, asmjit::x86::ptr(ArchReg(obj), ArchReg(offset)));
2319     GetMasm()->mov(ArchReg(dst), ArchReg(val));
2320     GetMasm()->lock().xchg(addr, ArchReg(dst));
2321 }
2322 
EncodeUnsafeGetAndAdd(Reg dst,Reg obj,Reg offset,Reg val,Reg tmp)2323 void Amd64Encoder::EncodeUnsafeGetAndAdd(Reg dst, Reg obj, Reg offset, Reg val, [[maybe_unused]] Reg tmp)
2324 {
2325     ScopedTmpRegU64 tmp1(this);
2326     auto addr_reg = ArchReg(tmp1);
2327     auto addr = ArchMem(MemRef(tmp1)).Prepare(GetMasm());
2328     GetMasm()->lea(addr_reg, asmjit::x86::ptr(ArchReg(obj), ArchReg(offset)));
2329     GetMasm()->mov(ArchReg(dst), ArchReg(val));
2330     GetMasm()->lock().xadd(addr, ArchReg(dst));
2331 }
2332 
EncodeMemoryBarrier(MemoryOrder::Order order)2333 void Amd64Encoder::EncodeMemoryBarrier(MemoryOrder::Order order)
2334 {
2335     if (order == MemoryOrder::Full) {
2336         /* does the same as mfence but faster, not applicable for NT-writes, though */
2337         GetMasm()->lock().add(asmjit::x86::dword_ptr(asmjit::x86::rsp), asmjit::imm(0));
2338     }
2339 }
2340 
EncodeStackOverflowCheck(ssize_t offset)2341 void Amd64Encoder::EncodeStackOverflowCheck(ssize_t offset)
2342 {
2343     MemRef mem(GetTarget().GetStackReg(), offset);
2344     auto m = ArchMem(mem).Prepare(GetMasm());
2345     GetMasm()->test(m, ArchReg(GetTarget().GetParamReg(0)));
2346 }
2347 
MakeLibCall(Reg dst,Reg src0,Reg src1,void * entry_point)2348 void Amd64Encoder::MakeLibCall(Reg dst, Reg src0, Reg src1, void *entry_point)
2349 {
2350     if (!dst.IsFloat()) {
2351         SetFalseResult();
2352         return;
2353     }
2354 
2355     if (dst.GetType() == FLOAT32_TYPE) {
2356         if (!src0.IsFloat() || !src1.IsFloat()) {
2357             SetFalseResult();
2358             return;
2359         }
2360 
2361         if (src0.GetId() != asmjit::x86::xmm0.id() || src1.GetId() != asmjit::x86::xmm1.id()) {
2362             ScopedTmpRegF32 tmp(this);
2363             GetMasm()->movss(ArchVReg(tmp), ArchVReg(src1));
2364             GetMasm()->movss(asmjit::x86::xmm0, ArchVReg(src0));
2365             GetMasm()->movss(asmjit::x86::xmm1, ArchVReg(tmp));
2366         }
2367 
2368         MakeCall(entry_point);
2369 
2370         if (dst.GetId() != asmjit::x86::xmm0.id()) {
2371             GetMasm()->movss(ArchVReg(dst), asmjit::x86::xmm0);
2372         }
2373     } else if (dst.GetType() == FLOAT64_TYPE) {
2374         if (!src0.IsFloat() || !src1.IsFloat()) {
2375             SetFalseResult();
2376             return;
2377         }
2378 
2379         if (src0.GetId() != asmjit::x86::xmm0.id() || src1.GetId() != asmjit::x86::xmm1.id()) {
2380             ScopedTmpRegF64 tmp(this);
2381             GetMasm()->movsd(ArchVReg(tmp), ArchVReg(src1));
2382             GetMasm()->movsd(asmjit::x86::xmm0, ArchVReg(src0));
2383             GetMasm()->movsd(asmjit::x86::xmm1, ArchVReg(tmp));
2384         }
2385 
2386         MakeCall(entry_point);
2387 
2388         if (dst.GetId() != asmjit::x86::xmm0.id()) {
2389             GetMasm()->movsd(ArchVReg(dst), asmjit::x86::xmm0);
2390         }
2391     } else {
2392         UNREACHABLE();
2393     }
2394 }
2395 
2396 template <bool is_store>
LoadStoreRegisters(RegMask registers,ssize_t slot,size_t start_reg,bool is_fp)2397 void Amd64Encoder::LoadStoreRegisters(RegMask registers, ssize_t slot, size_t start_reg, bool is_fp)
2398 {
2399     for (size_t i {0}; i < registers.size(); ++i) {
2400         if (!registers.test(i)) {
2401             continue;
2402         }
2403 
2404         asmjit::x86::Mem mem = asmjit::x86::ptr(asmjit::x86::rsp, (slot + i - start_reg) * DOUBLE_WORD_SIZE_BYTE);
2405 
2406         if constexpr (is_store) {  // NOLINT
2407             if (is_fp) {
2408                 GetMasm()->movsd(mem, asmjit::x86::xmm(i));
2409             } else {
2410                 GetMasm()->mov(mem, asmjit::x86::gpq(ConvertRegNumber(i)));
2411             }
2412         } else {  // NOLINT
2413             if (is_fp) {
2414                 GetMasm()->movsd(asmjit::x86::xmm(i), mem);
2415             } else {
2416                 GetMasm()->mov(asmjit::x86::gpq(ConvertRegNumber(i)), mem);
2417             }
2418         }
2419     }
2420 }
2421 
2422 template <bool is_store>
LoadStoreRegisters(RegMask registers,bool is_fp,int32_t slot,Reg base,RegMask mask)2423 void Amd64Encoder::LoadStoreRegisters(RegMask registers, bool is_fp, int32_t slot, Reg base, RegMask mask)
2424 {
2425     auto base_reg = ArchReg(base);
2426     bool has_mask = mask.any();
2427     int32_t index = has_mask ? static_cast<int32_t>(mask.GetMinRegister()) : 0;
2428     slot -= index;
2429     for (size_t i = index; i < registers.size(); ++i) {
2430         if (has_mask) {
2431             if (!mask.test(i)) {
2432                 continue;
2433             }
2434             index++;
2435         }
2436         if (!registers.test(i)) {
2437             continue;
2438         }
2439 
2440         if (!has_mask) {
2441             index++;
2442         }
2443 
2444         // `-1` because we've incremented `index` in advance
2445         asmjit::x86::Mem mem = asmjit::x86::ptr(base_reg, (slot + index - 1) * DOUBLE_WORD_SIZE_BYTE);
2446 
2447         if constexpr (is_store) {  // NOLINT
2448             if (is_fp) {
2449                 GetMasm()->movsd(mem, asmjit::x86::xmm(i));
2450             } else {
2451                 GetMasm()->mov(mem, asmjit::x86::gpq(ConvertRegNumber(i)));
2452             }
2453         } else {  // NOLINT
2454             if (is_fp) {
2455                 GetMasm()->movsd(asmjit::x86::xmm(i), mem);
2456             } else {
2457                 GetMasm()->mov(asmjit::x86::gpq(ConvertRegNumber(i)), mem);
2458             }
2459         }
2460     }
2461 }
2462 
PushRegisters(RegMask registers,bool is_fp,bool align)2463 void Amd64Encoder::PushRegisters(RegMask registers, bool is_fp, bool align)
2464 {
2465     for (size_t i = 0; i < registers.size(); i++) {
2466         if (registers[i]) {
2467             if (is_fp) {
2468                 GetMasm()->sub(asmjit::x86::rsp, DOUBLE_WORD_SIZE_BYTE);
2469                 GetMasm()->movsd(asmjit::x86::ptr(asmjit::x86::rsp), ArchVReg(Reg(i, FLOAT64_TYPE)));
2470             } else {
2471                 GetMasm()->push(asmjit::x86::gpq(ConvertRegNumber(i)));
2472             }
2473         }
2474     }
2475     if (align && (registers.count() & 1U) != 0) {
2476         GetMasm()->sub(asmjit::x86::rsp, DOUBLE_WORD_SIZE_BYTE);
2477     }
2478 }
2479 
PopRegisters(RegMask registers,bool is_fp,bool align)2480 void Amd64Encoder::PopRegisters(RegMask registers, bool is_fp, bool align)
2481 {
2482     if (align && (registers.count() & 1U) != 0) {
2483         GetMasm()->add(asmjit::x86::rsp, DOUBLE_WORD_SIZE_BYTE);
2484     }
2485     for (ssize_t i = registers.size() - 1; i >= 0; i--) {
2486         if (registers[i]) {
2487             if (is_fp) {
2488                 GetMasm()->movsd(ArchVReg(Reg(i, FLOAT64_TYPE)), asmjit::x86::ptr(asmjit::x86::rsp));
2489                 GetMasm()->add(asmjit::x86::rsp, DOUBLE_WORD_SIZE_BYTE);
2490             } else {
2491                 GetMasm()->pop(asmjit::x86::gpq(ConvertRegNumber(i)));
2492             }
2493         }
2494     }
2495 }
2496 
2497 template <typename T, size_t n>
CopyArrayToXmm(Reg xmm,const std::array<T,n> & arr)2498 void Amd64Encoder::CopyArrayToXmm(Reg xmm, const std::array<T, n> &arr)
2499 {
2500     static constexpr auto SIZE {n * sizeof(T)};
2501     static_assert((SIZE == DOUBLE_WORD_SIZE_BYTE) || (SIZE == 2U * DOUBLE_WORD_SIZE_BYTE));
2502     ASSERT(xmm.GetType() == FLOAT64_TYPE);
2503 
2504     auto data {reinterpret_cast<const uint64_t *>(arr.data())};
2505 
2506     ScopedTmpRegU64 tmp_gpr(this);
2507     // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
2508     GetMasm()->mov(ArchReg(tmp_gpr), asmjit::imm(data[0]));
2509     GetMasm()->movq(ArchVReg(xmm), ArchReg(tmp_gpr));
2510 
2511     if constexpr (SIZE == 2U * DOUBLE_WORD_SIZE_BYTE) {
2512         ScopedTmpRegF64 tmp_xmm(this);
2513         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
2514         GetMasm()->mov(ArchReg(tmp_gpr), asmjit::imm(data[1]));
2515         GetMasm()->movq(ArchVReg(tmp_xmm), ArchReg(tmp_gpr));
2516         GetMasm()->unpcklpd(ArchVReg(xmm), ArchVReg(tmp_xmm));
2517     }
2518 }
2519 
2520 template <typename T>
CopyImmToXmm(Reg xmm,T imm)2521 void Amd64Encoder::CopyImmToXmm(Reg xmm, T imm)
2522 {
2523     static_assert((sizeof(imm) == WORD_SIZE_BYTE) || (sizeof(imm) == DOUBLE_WORD_SIZE_BYTE));
2524     ASSERT(xmm.GetSize() == BYTE_SIZE * sizeof(imm));
2525 
2526     if constexpr (sizeof(imm) == WORD_SIZE_BYTE) {  // NOLINT
2527         ScopedTmpRegU32 tmp_gpr(this);
2528         GetMasm()->mov(ArchReg(tmp_gpr), asmjit::imm(bit_cast<uint32_t>(imm)));
2529         GetMasm()->movd(ArchVReg(xmm), ArchReg(tmp_gpr));
2530     } else {  // NOLINT
2531         ScopedTmpRegU64 tmp_gpr(this);
2532         GetMasm()->mov(ArchReg(tmp_gpr), asmjit::imm(bit_cast<uint64_t>(imm)));
2533         GetMasm()->movq(ArchVReg(xmm), ArchReg(tmp_gpr));
2534     }
2535 }
2536 
DisasmInstr(std::ostream & stream,size_t pc,ssize_t code_offset) const2537 size_t Amd64Encoder::DisasmInstr(std::ostream &stream, size_t pc, ssize_t code_offset) const
2538 {
2539     if (code_offset < 0) {
2540         (const_cast<Amd64Encoder *>(this))->Finalize();
2541     }
2542     Span code(GetMasm()->bufferData(), GetMasm()->offset());
2543 
2544     [[maybe_unused]] size_t data_left = code.Size() - pc;
2545     [[maybe_unused]] constexpr size_t LENGTH = ZYDIS_MAX_INSTRUCTION_LENGTH;  // 15 bytes is max inst length in amd64
2546 
2547     // Initialize decoder context
2548     ZydisDecoder decoder;
2549     [[maybe_unused]] bool res =
2550         ZYAN_SUCCESS(ZydisDecoderInit(&decoder, ZYDIS_MACHINE_MODE_LONG_64, ZYDIS_ADDRESS_WIDTH_64));
2551 
2552     // Initialize formatter
2553     ZydisFormatter formatter;
2554     res &= ZYAN_SUCCESS(ZydisFormatterInit(&formatter, ZYDIS_FORMATTER_STYLE_ATT));
2555     ASSERT(res);
2556 
2557     ZydisDecodedInstruction instruction;
2558 
2559     res &= ZYAN_SUCCESS(ZydisDecoderDecodeBuffer(&decoder, &code[pc], std::min(LENGTH, data_left), &instruction));
2560 
2561     // Format & print the binary instruction structure to human readable format
2562     char buffer[256];  // NOLINT (modernize-avoid-c-arrays, readability-identifier-naming, readability-magic-numbers)
2563     res &= ZYAN_SUCCESS(
2564         ZydisFormatterFormatInstruction(&formatter, &instruction, buffer, sizeof(buffer), uintptr_t(&code[pc])));
2565 
2566     ASSERT(res);
2567 
2568     // Print disassembly
2569     if (code_offset < 0) {
2570         stream << buffer;
2571     } else {
2572         stream << std::setw(0x8) << std::right << std::setfill('0') << std::hex << pc + code_offset << std::dec
2573                << std::setfill(' ') << ": " << buffer;
2574     }
2575 
2576     return pc + instruction.length;
2577 }
2578 }  // namespace panda::compiler::amd64
2579