1 /*
2 * Copyright (c) 2021-2025 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15 /*
16 Encoder (implementation of math and mem Low-level emitters)
17 */
18
19 #include <iomanip>
20
21 #include "libpandabase/utils/utils.h"
22 #include "compiler/optimizer/code_generator/relocations.h"
23 #include "compiler/optimizer/code_generator/fast_divisor.h"
24 #include "operands.h"
25 #include "scoped_tmp_reg.h"
26 #include "target/amd64/target.h"
27
28 #include "lib_helpers.inl"
29
30 #include "Zydis/Zydis.h"
31
32 #ifndef PANDA_TARGET_MACOS
33 #include "elf.h"
34 #endif // PANDA_TARGET_MACOS
35
36 namespace ark::compiler::amd64 {
37
ArchCcInt(Condition cc)38 static auto ArchCcInt(Condition cc)
39 {
40 switch (cc) {
41 case Condition::EQ:
42 return asmjit::x86::Condition::Code::kEqual;
43 case Condition::NE:
44 return asmjit::x86::Condition::Code::kNotEqual;
45 case Condition::LT:
46 return asmjit::x86::Condition::Code::kSignedLT;
47 case Condition::GT:
48 return asmjit::x86::Condition::Code::kSignedGT;
49 case Condition::LE:
50 return asmjit::x86::Condition::Code::kSignedLE;
51 case Condition::GE:
52 return asmjit::x86::Condition::Code::kSignedGE;
53 case Condition::LO:
54 return asmjit::x86::Condition::Code::kUnsignedLT;
55 case Condition::LS:
56 return asmjit::x86::Condition::Code::kUnsignedLE;
57 case Condition::HI:
58 return asmjit::x86::Condition::Code::kUnsignedGT;
59 case Condition::HS:
60 return asmjit::x86::Condition::Code::kUnsignedGE;
61 // NOTE(igorban) : Remove them
62 case Condition::MI:
63 return asmjit::x86::Condition::Code::kNegative;
64 case Condition::PL:
65 return asmjit::x86::Condition::Code::kPositive;
66 case Condition::VS:
67 return asmjit::x86::Condition::Code::kOverflow;
68 case Condition::VC:
69 return asmjit::x86::Condition::Code::kNotOverflow;
70 case Condition::AL:
71 case Condition::NV:
72 default:
73 UNREACHABLE();
74 return asmjit::x86::Condition::Code::kEqual;
75 }
76 }
ArchCcFloat(Condition cc)77 static auto ArchCcFloat(Condition cc)
78 {
79 switch (cc) {
80 case Condition::EQ:
81 return asmjit::x86::Condition::Code::kEqual;
82 case Condition::NE:
83 return asmjit::x86::Condition::Code::kNotEqual;
84 case Condition::LT:
85 return asmjit::x86::Condition::Code::kUnsignedLT;
86 case Condition::GT:
87 return asmjit::x86::Condition::Code::kUnsignedGT;
88 case Condition::LE:
89 return asmjit::x86::Condition::Code::kUnsignedLE;
90 case Condition::GE:
91 return asmjit::x86::Condition::Code::kUnsignedGE;
92 case Condition::LO:
93 return asmjit::x86::Condition::Code::kUnsignedLT;
94 case Condition::LS:
95 return asmjit::x86::Condition::Code::kUnsignedLE;
96 case Condition::HI:
97 return asmjit::x86::Condition::Code::kUnsignedGT;
98 case Condition::HS:
99 return asmjit::x86::Condition::Code::kUnsignedGE;
100 // NOTE(igorban) : Remove them
101 case Condition::MI:
102 return asmjit::x86::Condition::Code::kNegative;
103 case Condition::PL:
104 return asmjit::x86::Condition::Code::kPositive;
105 case Condition::VS:
106 return asmjit::x86::Condition::Code::kOverflow;
107 case Condition::VC:
108 return asmjit::x86::Condition::Code::kNotOverflow;
109 case Condition::AL:
110 case Condition::NV:
111 default:
112 UNREACHABLE();
113 return asmjit::x86::Condition::Code::kEqual;
114 }
115 }
116 /// Converters
ArchCc(Condition cc,bool isFloat=false)117 static asmjit::x86::Condition::Code ArchCc(Condition cc, bool isFloat = false)
118 {
119 return isFloat ? ArchCcFloat(cc) : ArchCcInt(cc);
120 }
121
ArchCcTest(Condition cc)122 static asmjit::x86::Condition::Code ArchCcTest(Condition cc)
123 {
124 ASSERT(cc == Condition::TST_EQ || cc == Condition::TST_NE);
125 return cc == Condition::TST_EQ ? asmjit::x86::Condition::Code::kEqual : asmjit::x86::Condition::Code::kNotEqual;
126 }
127
CcMatchesNan(Condition cc)128 static bool CcMatchesNan(Condition cc)
129 {
130 switch (cc) {
131 case Condition::NE:
132 case Condition::LT:
133 case Condition::LE:
134 case Condition::HI:
135 case Condition::HS:
136 return true;
137
138 default:
139 return false;
140 }
141 }
142
143 /// Converters
ArchReg(Reg reg,uint8_t size=0)144 static asmjit::x86::Gp ArchReg(Reg reg, uint8_t size = 0)
145 {
146 ASSERT(reg.IsValid());
147 if (reg.IsScalar()) {
148 size_t regSize = size == 0 ? reg.GetSize() : size;
149 auto archId = ConvertRegNumber(reg.GetId());
150
151 asmjit::x86::Gp archReg;
152 switch (regSize) {
153 case DOUBLE_WORD_SIZE:
154 archReg = asmjit::x86::Gp(asmjit::x86::Gpq::kSignature, archId);
155 break;
156 case WORD_SIZE:
157 archReg = asmjit::x86::Gp(asmjit::x86::Gpd::kSignature, archId);
158 break;
159 case HALF_SIZE:
160 archReg = asmjit::x86::Gp(asmjit::x86::Gpw::kSignature, archId);
161 break;
162 case BYTE_SIZE:
163 archReg = asmjit::x86::Gp(asmjit::x86::GpbLo::kSignature, archId);
164 break;
165
166 default:
167 UNREACHABLE();
168 }
169
170 ASSERT(archReg.isValid());
171 return archReg;
172 }
173 if (reg.GetId() == ConvertRegNumber(asmjit::x86::rsp.id())) {
174 return asmjit::x86::rsp;
175 }
176
177 // Invalid register type
178 UNREACHABLE();
179 return asmjit::x86::rax;
180 }
181
ArchVReg(Reg reg)182 static asmjit::x86::Xmm ArchVReg(Reg reg)
183 {
184 ASSERT(reg.IsValid() && reg.IsFloat());
185 auto archVreg = asmjit::x86::xmm(reg.GetId());
186 return archVreg;
187 }
188
ArchImm(Imm imm)189 static asmjit::Imm ArchImm(Imm imm)
190 {
191 ASSERT(imm.GetType() == INT64_TYPE);
192 return asmjit::imm(imm.GetAsInt());
193 }
194
ImmToUnsignedInt(Imm imm)195 static uint64_t ImmToUnsignedInt(Imm imm)
196 {
197 ASSERT(imm.GetType() == INT64_TYPE);
198 return uint64_t(imm.GetAsInt());
199 }
200
ImmFitsSize(int64_t imm,uint8_t size)201 static bool ImmFitsSize(int64_t imm, uint8_t size)
202 {
203 if (size == DOUBLE_WORD_SIZE) {
204 size = WORD_SIZE;
205 }
206
207 // NOLINTNEXTLINE(clang-analyzer-core.UndefinedBinaryOperatorResult)
208 int64_t max = (uint64_t(1) << (size - 1U)) - 1U;
209 int64_t min = ~uint64_t(max);
210 ASSERT(min < 0);
211 ASSERT(max > 0);
212
213 return imm >= min && imm <= max;
214 }
215
CreateLabel()216 LabelHolder::LabelId Amd64LabelHolder::CreateLabel()
217 {
218 ++id_;
219
220 auto masm = (static_cast<Amd64Encoder *>(GetEncoder()))->GetMasm();
221 auto label = masm->newLabel();
222
223 auto allocator = GetEncoder()->GetAllocator();
224 labels_.push_back(allocator->New<LabelType>(std::move(label)));
225 ASSERT(labels_.size() == id_);
226 return id_ - 1;
227 }
228
ArchMem(MemRef mem)229 ArchMem::ArchMem(MemRef mem)
230 {
231 bool base = mem.HasBase();
232 bool regoffset = mem.HasIndex();
233 bool shift = mem.HasScale();
234 bool offset = mem.HasDisp();
235
236 if (base && !regoffset && !shift) {
237 // Default memory - base + offset
238 mem_ = asmjit::x86::ptr(ArchReg(mem.GetBase()), mem.GetDisp());
239 } else if (base && regoffset && !offset) {
240 auto baseSize = mem.GetBase().GetSize();
241 auto indexSize = mem.GetIndex().GetSize();
242
243 ASSERT(baseSize >= indexSize);
244 ASSERT(indexSize >= WORD_SIZE);
245
246 if (baseSize > indexSize) {
247 needExtendIndex_ = true;
248 }
249
250 if (mem.GetScale() == 0) {
251 mem_ = asmjit::x86::ptr(ArchReg(mem.GetBase()), ArchReg(mem.GetIndex(), baseSize));
252 } else {
253 auto scale = mem.GetScale();
254 if (scale <= 3U) {
255 mem_ = asmjit::x86::ptr(ArchReg(mem.GetBase()), ArchReg(mem.GetIndex(), baseSize), scale);
256 } else {
257 mem_ = asmjit::x86::ptr(ArchReg(mem.GetBase()), ArchReg(mem.GetIndex(), baseSize));
258 bigShift_ = scale;
259 }
260 }
261 } else {
262 // Wrong memRef
263 UNREACHABLE();
264 }
265 }
266
Prepare(asmjit::x86::Assembler * masm)267 asmjit::x86::Mem ArchMem::Prepare(asmjit::x86::Assembler *masm)
268 {
269 if (isPrepared_) {
270 return mem_;
271 }
272
273 if (bigShift_ != 0) {
274 ASSERT(!mem_.hasOffset() && mem_.hasIndex() && bigShift_ > 3U);
275 masm->shl(mem_.indexReg().as<asmjit::x86::Gp>(), asmjit::imm(bigShift_));
276 }
277
278 if (needExtendIndex_) {
279 ASSERT(mem_.hasIndex());
280 auto qIndex = mem_.indexReg().as<asmjit::x86::Gp>();
281 auto dIndex {qIndex};
282 dIndex.setSignature(asmjit::x86::Gpd::kSignature);
283 masm->movsxd(qIndex, dIndex);
284 }
285
286 isPrepared_ = true;
287 return mem_;
288 }
289
AsmJitErrorHandler(Encoder * encoder)290 AsmJitErrorHandler::AsmJitErrorHandler(Encoder *encoder) : encoder_(encoder)
291 {
292 ASSERT(encoder != nullptr);
293 }
294
handleError(asmjit::Error err,const char * message,asmjit::BaseEmitter * origin)295 void AsmJitErrorHandler::handleError([[maybe_unused]] asmjit::Error err, [[maybe_unused]] const char *message,
296 [[maybe_unused]] asmjit::BaseEmitter *origin)
297 {
298 encoder_->SetFalseResult();
299 }
300
CreateLabels(LabelId max)301 void Amd64LabelHolder::CreateLabels(LabelId max)
302 {
303 for (LabelId i = 0; i < max; ++i) {
304 CreateLabel();
305 }
306 }
307
GetLabel(LabelId id)308 Amd64LabelHolder::LabelType *Amd64LabelHolder::GetLabel(LabelId id)
309 {
310 ASSERT(labels_.size() > id);
311 return labels_[id];
312 }
313
Size()314 Amd64LabelHolder::LabelId Amd64LabelHolder::Size()
315 {
316 return labels_.size();
317 }
318
BindLabel(LabelId id)319 void Amd64LabelHolder::BindLabel(LabelId id)
320 {
321 static_cast<Amd64Encoder *>(GetEncoder())->GetMasm()->bind(*labels_[id]);
322 }
323
Amd64Encoder(ArenaAllocator * allocator)324 Amd64Encoder::Amd64Encoder(ArenaAllocator *allocator) : Encoder(allocator, Arch::X86_64, false) {}
325
~Amd64Encoder()326 Amd64Encoder::~Amd64Encoder()
327 {
328 if (masm_ != nullptr) {
329 masm_->~Assembler();
330 masm_ = nullptr;
331 }
332
333 if (codeHolder_ != nullptr) {
334 codeHolder_->~CodeHolder();
335 codeHolder_ = nullptr;
336 }
337
338 if (errorHandler_ != nullptr) {
339 errorHandler_->~ErrorHandler();
340 errorHandler_ = nullptr;
341 }
342
343 if (labels_ != nullptr) {
344 labels_->~Amd64LabelHolder();
345 labels_ = nullptr;
346 }
347 }
348
GetLabels() const349 LabelHolder *Amd64Encoder::GetLabels() const
350 {
351 ASSERT(labels_ != nullptr);
352 return labels_;
353 }
354
IsValid() const355 bool Amd64Encoder::IsValid() const
356 {
357 return true;
358 }
359
GetTarget()360 constexpr auto Amd64Encoder::GetTarget()
361 {
362 return ark::compiler::Target(Arch::X86_64);
363 }
364
InitMasm()365 bool Amd64Encoder::InitMasm()
366 {
367 if (masm_ == nullptr) {
368 labels_ = GetAllocator()->New<Amd64LabelHolder>(this);
369 if (labels_ == nullptr) {
370 SetFalseResult();
371 return false;
372 }
373
374 asmjit::Environment env;
375 env.setArch(asmjit::Environment::kArchX64);
376
377 codeHolder_ = GetAllocator()->New<asmjit::CodeHolder>(GetAllocator());
378 if (codeHolder_ == nullptr) {
379 SetFalseResult();
380 return false;
381 }
382 codeHolder_->init(env, 0U);
383
384 masm_ = GetAllocator()->New<asmjit::x86::Assembler>(codeHolder_);
385 if (masm_ == nullptr) {
386 SetFalseResult();
387 return false;
388 }
389
390 // Enable strict validation.
391 masm_->addValidationOptions(asmjit::BaseEmitter::kValidationOptionAssembler);
392 errorHandler_ = GetAllocator()->New<AsmJitErrorHandler>(this);
393 if (errorHandler_ == nullptr) {
394 SetFalseResult();
395 return false;
396 }
397 masm_->setErrorHandler(errorHandler_);
398
399 // Make sure that the compiler uses the same scratch registers as the assembler
400 CHECK_EQ(compiler::arch_info::x86_64::TEMP_REGS, GetTarget().GetTempRegsMask());
401 CHECK_EQ(compiler::arch_info::x86_64::TEMP_FP_REGS, GetTarget().GetTempVRegsMask());
402 }
403 return true;
404 }
405
Finalize()406 void Amd64Encoder::Finalize()
407 {
408 auto code = GetMasm()->code();
409 auto codeSize = code->codeSize();
410
411 code->flatten();
412 code->resolveUnresolvedLinks();
413
414 auto codeBuffer = GetAllocator()->Alloc(codeSize);
415
416 code->relocateToBase(reinterpret_cast<uintptr_t>(codeBuffer));
417 code->copyFlattenedData(codeBuffer, codeSize, asmjit::CodeHolder::kCopyPadSectionBuffer);
418 }
419
EncodeJump(LabelHolder::LabelId id)420 void Amd64Encoder::EncodeJump(LabelHolder::LabelId id)
421 {
422 auto label = static_cast<Amd64LabelHolder *>(GetLabels())->GetLabel(id);
423 GetMasm()->jmp(*label);
424 }
425
EncodeJump(LabelHolder::LabelId id,Reg src0,Reg src1,Condition cc)426 void Amd64Encoder::EncodeJump(LabelHolder::LabelId id, Reg src0, Reg src1, Condition cc)
427 {
428 if (src0.IsScalar()) {
429 if (src0.GetSize() == src1.GetSize()) {
430 GetMasm()->cmp(ArchReg(src0), ArchReg(src1));
431 } else if (src0.GetSize() > src1.GetSize()) {
432 ScopedTmpReg tmpReg(this, src0.GetType());
433 EncodeCast(tmpReg, false, src1, false);
434 GetMasm()->cmp(ArchReg(src0), ArchReg(tmpReg));
435 } else {
436 ScopedTmpReg tmpReg(this, src1.GetType());
437 EncodeCast(tmpReg, false, src0, false);
438 GetMasm()->cmp(ArchReg(tmpReg), ArchReg(src1));
439 }
440 } else if (src0.GetType() == FLOAT32_TYPE) {
441 GetMasm()->comiss(ArchVReg(src0), ArchVReg(src1));
442 } else {
443 GetMasm()->comisd(ArchVReg(src0), ArchVReg(src1));
444 }
445
446 auto label = static_cast<Amd64LabelHolder *>(GetLabels())->GetLabel(id);
447 if (src0.IsScalar()) {
448 GetMasm()->j(ArchCc(cc, src0.IsFloat()), *label);
449 return;
450 }
451
452 if (CcMatchesNan(cc)) {
453 GetMasm()->jp(*label);
454 GetMasm()->j(ArchCc(cc, src0.IsFloat()), *label);
455 } else {
456 auto end = GetMasm()->newLabel();
457
458 GetMasm()->jp(end);
459 GetMasm()->j(ArchCc(cc, src0.IsFloat()), *label);
460 GetMasm()->bind(end);
461 }
462 }
463
EncodeJump(LabelHolder::LabelId id,Reg src,Imm imm,Condition cc)464 void Amd64Encoder::EncodeJump(LabelHolder::LabelId id, Reg src, Imm imm, Condition cc)
465 {
466 ASSERT(src.IsScalar());
467
468 auto immVal = imm.GetAsInt();
469 if (immVal == 0) {
470 EncodeJump(id, src, cc);
471 return;
472 }
473
474 if (ImmFitsSize(immVal, src.GetSize())) {
475 auto label = static_cast<Amd64LabelHolder *>(GetLabels())->GetLabel(id);
476
477 GetMasm()->cmp(ArchReg(src), asmjit::imm(immVal));
478 GetMasm()->j(ArchCc(cc), *label);
479 } else {
480 ScopedTmpReg tmpReg(this, src.GetType());
481 GetMasm()->mov(ArchReg(tmpReg), asmjit::imm(immVal));
482 EncodeJump(id, src, tmpReg, cc);
483 }
484 }
485
EncodeJumpTest(LabelHolder::LabelId id,Reg src0,Reg src1,Condition cc)486 void Amd64Encoder::EncodeJumpTest(LabelHolder::LabelId id, Reg src0, Reg src1, Condition cc)
487 {
488 ASSERT(src0.IsScalar());
489 if (src0.GetSize() == src1.GetSize()) {
490 GetMasm()->test(ArchReg(src0), ArchReg(src1));
491 } else if (src0.GetSize() > src1.GetSize()) {
492 ScopedTmpReg tmpReg(this, src0.GetType());
493 EncodeCast(tmpReg, false, src1, false);
494 GetMasm()->test(ArchReg(src0), ArchReg(tmpReg));
495 } else {
496 ScopedTmpReg tmpReg(this, src1.GetType());
497 EncodeCast(tmpReg, false, src0, false);
498 GetMasm()->test(ArchReg(tmpReg), ArchReg(src1));
499 }
500
501 auto label = static_cast<Amd64LabelHolder *>(GetLabels())->GetLabel(id);
502 GetMasm()->j(ArchCcTest(cc), *label);
503 }
504
EncodeJumpTest(LabelHolder::LabelId id,Reg src,Imm imm,Condition cc)505 void Amd64Encoder::EncodeJumpTest(LabelHolder::LabelId id, Reg src, Imm imm, Condition cc)
506 {
507 ASSERT(src.IsScalar());
508
509 auto immVal = imm.GetAsInt();
510 if (ImmFitsSize(immVal, src.GetSize())) {
511 auto label = static_cast<Amd64LabelHolder *>(GetLabels())->GetLabel(id);
512
513 GetMasm()->test(ArchReg(src), asmjit::imm(immVal));
514 GetMasm()->j(ArchCcTest(cc), *label);
515 } else {
516 ScopedTmpReg tmpReg(this, src.GetType());
517 GetMasm()->mov(ArchReg(tmpReg), asmjit::imm(immVal));
518 EncodeJumpTest(id, src, tmpReg, cc);
519 }
520 }
521
EncodeJump(LabelHolder::LabelId id,Reg src,Condition cc)522 void Amd64Encoder::EncodeJump(LabelHolder::LabelId id, Reg src, Condition cc)
523 {
524 if (src.IsScalar()) {
525 auto label = static_cast<Amd64LabelHolder *>(GetLabels())->GetLabel(id);
526
527 GetMasm()->cmp(ArchReg(src), asmjit::imm(0));
528 GetMasm()->j(ArchCc(cc), *label);
529 return;
530 }
531
532 ScopedTmpReg tmpReg(this, src.GetType());
533 if (src.GetType() == FLOAT32_TYPE) {
534 GetMasm()->xorps(ArchVReg(tmpReg), ArchVReg(tmpReg));
535 } else {
536 GetMasm()->xorpd(ArchVReg(tmpReg), ArchVReg(tmpReg));
537 }
538 EncodeJump(id, src, tmpReg, cc);
539 }
540
EncodeJump(Reg dst)541 void Amd64Encoder::EncodeJump(Reg dst)
542 {
543 GetMasm()->jmp(ArchReg(dst));
544 }
545
EncodeJump(RelocationInfo * relocation)546 void Amd64Encoder::EncodeJump(RelocationInfo *relocation)
547 {
548 #ifdef PANDA_TARGET_MACOS
549 LOG(FATAL, COMPILER) << "Not supported in Macos build";
550 #else
551 // NOLINTNEXTLINE(readability-magic-numbers)
552 std::array<uint8_t, 5U> data = {0xe9, 0, 0, 0, 0};
553 GetMasm()->embed(data.data(), data.size());
554
555 constexpr int ADDEND = 4;
556 relocation->offset = GetCursorOffset() - ADDEND;
557 relocation->addend = -ADDEND;
558 relocation->type = R_X86_64_PLT32;
559 #endif
560 }
561
EncodeBitTestAndBranch(LabelHolder::LabelId id,compiler::Reg reg,uint32_t bitPos,bool bitValue)562 void Amd64Encoder::EncodeBitTestAndBranch(LabelHolder::LabelId id, compiler::Reg reg, uint32_t bitPos, bool bitValue)
563 {
564 ASSERT(reg.IsScalar() && reg.GetSize() > bitPos);
565 auto label = static_cast<Amd64LabelHolder *>(GetLabels())->GetLabel(id);
566 if (reg.GetSize() == DOUBLE_WORD_SIZE) {
567 ScopedTmpRegU64 tmpReg(this);
568 GetMasm()->mov(ArchReg(tmpReg), asmjit::imm(static_cast<uint64_t>(1) << bitPos));
569 GetMasm()->test(ArchReg(reg), ArchReg(tmpReg));
570 } else {
571 GetMasm()->test(ArchReg(reg), asmjit::imm(1U << bitPos));
572 }
573 if (bitValue) {
574 GetMasm()->j(ArchCc(Condition::NE), *label);
575 } else {
576 GetMasm()->j(ArchCc(Condition::EQ), *label);
577 }
578 }
579
MakeCall(compiler::RelocationInfo * relocation)580 void Amd64Encoder::MakeCall([[maybe_unused]] compiler::RelocationInfo *relocation)
581 {
582 #ifdef PANDA_TARGET_MACOS
583 LOG(FATAL, COMPILER) << "Not supported in Macos build";
584 #else
585 // NOLINTNEXTLINE(readability-magic-numbers)
586 std::array<uint8_t, 5U> data = {0xe8, 0, 0, 0, 0};
587 GetMasm()->embed(data.data(), data.size());
588
589 relocation->offset = GetCursorOffset() - 4_I;
590 relocation->addend = -4_I;
591 relocation->type = R_X86_64_PLT32;
592 #endif
593 }
594
MakeCall(LabelHolder::LabelId id)595 void Amd64Encoder::MakeCall(LabelHolder::LabelId id)
596 {
597 auto label = static_cast<Amd64LabelHolder *>(GetLabels())->GetLabel(id);
598 GetMasm()->call(*label);
599 }
600
MakeCall(const void * entryPoint)601 void Amd64Encoder::MakeCall(const void *entryPoint)
602 {
603 ScopedTmpRegU64 tmpReg(this);
604 GetMasm()->mov(ArchReg(tmpReg), asmjit::imm(entryPoint));
605 GetMasm()->call(ArchReg(tmpReg));
606 }
607
MakeCall(Reg reg)608 void Amd64Encoder::MakeCall(Reg reg)
609 {
610 GetMasm()->call(ArchReg(reg));
611 }
612
MakeCall(MemRef entryPoint)613 void Amd64Encoder::MakeCall(MemRef entryPoint)
614 {
615 ScopedTmpRegU64 tmpReg(this);
616 EncodeLdr(tmpReg, false, entryPoint);
617 GetMasm()->call(ArchReg(tmpReg));
618 }
619
620 template <typename Func>
EncodeRelativePcMov(Reg reg,intptr_t offset,Func encodeInstruction)621 void Amd64Encoder::EncodeRelativePcMov(Reg reg, intptr_t offset, Func encodeInstruction)
622 {
623 // NOLINTNEXTLINE(readability-identifier-naming)
624 auto pos = GetMasm()->offset();
625 encodeInstruction(reg, offset);
626 // NOLINTNEXTLINE(readability-identifier-naming)
627 offset -= (GetMasm()->offset() - pos);
628 // NOLINTNEXTLINE(readability-identifier-naming)
629 GetMasm()->setOffset(pos);
630 encodeInstruction(reg, offset);
631 }
632
MakeCallAot(intptr_t offset)633 void Amd64Encoder::MakeCallAot(intptr_t offset)
634 {
635 ScopedTmpRegU64 tmpReg(this);
636 EncodeRelativePcMov(tmpReg, offset, [this](Reg reg, intptr_t offset) {
637 GetMasm()->long_().mov(ArchReg(reg), asmjit::x86::ptr(asmjit::x86::rip, offset));
638 });
639 GetMasm()->call(ArchReg(tmpReg));
640 }
641
CanMakeCallByOffset(intptr_t offset)642 bool Amd64Encoder::CanMakeCallByOffset(intptr_t offset)
643 {
644 return offset == static_cast<intptr_t>(static_cast<int32_t>(offset));
645 }
646
MakeCallByOffset(intptr_t offset)647 void Amd64Encoder::MakeCallByOffset(intptr_t offset)
648 {
649 GetMasm()->call(GetCursorOffset() + int32_t(offset));
650 }
651
MakeLoadAotTable(intptr_t offset,Reg reg)652 void Amd64Encoder::MakeLoadAotTable(intptr_t offset, Reg reg)
653 {
654 EncodeRelativePcMov(reg, offset, [this](Reg reg, intptr_t offset) {
655 GetMasm()->long_().mov(ArchReg(reg), asmjit::x86::ptr(asmjit::x86::rip, offset));
656 });
657 }
658
MakeLoadAotTableAddr(intptr_t offset,Reg addr,Reg val)659 void Amd64Encoder::MakeLoadAotTableAddr([[maybe_unused]] intptr_t offset, [[maybe_unused]] Reg addr,
660 [[maybe_unused]] Reg val)
661 {
662 EncodeRelativePcMov(addr, offset, [this](Reg reg, intptr_t offset) {
663 GetMasm()->long_().lea(ArchReg(reg), asmjit::x86::ptr(asmjit::x86::rip, offset));
664 });
665 GetMasm()->mov(ArchReg(val), asmjit::x86::ptr(ArchReg(addr)));
666 }
667
EncodeAbort()668 void Amd64Encoder::EncodeAbort()
669 {
670 GetMasm()->int3();
671 }
672
EncodeReturn()673 void Amd64Encoder::EncodeReturn()
674 {
675 GetMasm()->ret();
676 }
677
EncodeMul(Reg dst,Reg src,Imm imm)678 void Amd64Encoder::EncodeMul([[maybe_unused]] Reg dst, [[maybe_unused]] Reg src, [[maybe_unused]] Imm imm)
679 {
680 SetFalseResult();
681 }
682
EncodeNop()683 void Amd64Encoder::EncodeNop()
684 {
685 GetMasm()->nop();
686 }
687
EncodeMov(Reg dst,Reg src)688 void Amd64Encoder::EncodeMov(Reg dst, Reg src)
689 {
690 if (dst == src) {
691 return;
692 }
693
694 if (dst.IsFloat() != src.IsFloat()) {
695 ASSERT(src.GetSize() == dst.GetSize());
696 if (dst.GetSize() == WORD_SIZE) {
697 if (dst.IsFloat()) {
698 GetMasm()->movd(ArchVReg(dst), ArchReg(src));
699 } else {
700 GetMasm()->movd(ArchReg(dst), ArchVReg(src));
701 }
702 } else {
703 ASSERT(dst.GetSize() == DOUBLE_WORD_SIZE);
704 if (dst.IsFloat()) {
705 GetMasm()->movq(ArchVReg(dst), ArchReg(src));
706 } else {
707 GetMasm()->movq(ArchReg(dst), ArchVReg(src));
708 }
709 }
710 return;
711 }
712
713 if (dst.IsFloat()) {
714 ASSERT(src.IsFloat());
715 if (dst.GetType() == FLOAT32_TYPE) {
716 GetMasm()->movss(ArchVReg(dst), ArchVReg(src));
717 } else {
718 GetMasm()->movsd(ArchVReg(dst), ArchVReg(src));
719 }
720 return;
721 }
722
723 if (dst.GetSize() < WORD_SIZE && dst.GetSize() == src.GetSize()) {
724 GetMasm()->xor_(ArchReg(dst, WORD_SIZE), ArchReg(dst, WORD_SIZE));
725 }
726
727 if (dst.GetSize() == src.GetSize()) {
728 GetMasm()->mov(ArchReg(dst), ArchReg(src));
729 } else {
730 EncodeCast(dst, false, src, false);
731 }
732 }
733
EncodeNeg(Reg dst,Reg src)734 void Amd64Encoder::EncodeNeg(Reg dst, Reg src)
735 {
736 if (dst.IsScalar()) {
737 EncodeMov(dst, src);
738 GetMasm()->neg(ArchReg(dst));
739 return;
740 }
741
742 if (dst.GetType() == FLOAT32_TYPE) {
743 ScopedTmpRegF32 tmp(this);
744 CopyImmToXmm(tmp, -0.0F);
745
746 if (dst.GetId() != src.GetId()) {
747 GetMasm()->movsd(ArchVReg(dst), ArchVReg(src));
748 }
749 GetMasm()->xorps(ArchVReg(dst), ArchVReg(tmp));
750 } else {
751 ScopedTmpRegF64 tmp(this);
752 CopyImmToXmm(tmp, -0.0);
753
754 if (dst.GetId() != src.GetId()) {
755 GetMasm()->movsd(ArchVReg(dst), ArchVReg(src));
756 }
757 GetMasm()->xorps(ArchVReg(dst), ArchVReg(tmp));
758 }
759 }
760
EncodeAbs(Reg dst,Reg src)761 void Amd64Encoder::EncodeAbs(Reg dst, Reg src)
762 {
763 if (dst.IsScalar()) {
764 auto size = std::max<uint8_t>(src.GetSize(), WORD_SIZE);
765
766 if (dst.GetId() != src.GetId()) {
767 GetMasm()->mov(ArchReg(dst), ArchReg(src));
768 GetMasm()->neg(ArchReg(dst));
769 GetMasm()->cmovl(ArchReg(dst, size), ArchReg(src, size));
770 } else if (GetScratchRegistersCount() > 0) {
771 ScopedTmpReg tmpReg(this, dst.GetType());
772
773 GetMasm()->mov(ArchReg(tmpReg), ArchReg(src));
774 GetMasm()->neg(ArchReg(tmpReg));
775
776 GetMasm()->cmovl(ArchReg(tmpReg, size), ArchReg(src, size));
777 GetMasm()->mov(ArchReg(dst), ArchReg(tmpReg));
778 } else {
779 auto end = GetMasm()->newLabel();
780
781 GetMasm()->test(ArchReg(dst), ArchReg(dst));
782 GetMasm()->jns(end);
783
784 GetMasm()->neg(ArchReg(dst));
785 GetMasm()->bind(end);
786 }
787 return;
788 }
789
790 if (dst.GetType() == FLOAT32_TYPE) {
791 ScopedTmpRegF32 tmp(this);
792 // NOLINTNEXTLINE(readability-magic-numbers)
793 CopyImmToXmm(tmp, uint32_t(0x7fffffff));
794
795 if (dst.GetId() != src.GetId()) {
796 GetMasm()->movss(ArchVReg(dst), ArchVReg(src));
797 }
798 GetMasm()->andps(ArchVReg(dst), ArchVReg(tmp));
799 } else {
800 ScopedTmpRegF64 tmp(this);
801 // NOLINTNEXTLINE(readability-magic-numbers)
802 CopyImmToXmm(tmp, uint64_t(0x7fffffffffffffff));
803
804 if (dst.GetId() != src.GetId()) {
805 GetMasm()->movsd(ArchVReg(dst), ArchVReg(src));
806 }
807 GetMasm()->andps(ArchVReg(dst), ArchVReg(tmp));
808 }
809 }
810
EncodeNot(Reg dst,Reg src)811 void Amd64Encoder::EncodeNot(Reg dst, Reg src)
812 {
813 ASSERT(dst.IsScalar());
814
815 EncodeMov(dst, src);
816 GetMasm()->not_(ArchReg(dst));
817 }
818
EncodeSqrt(Reg dst,Reg src)819 void Amd64Encoder::EncodeSqrt(Reg dst, Reg src)
820 {
821 ASSERT(dst.IsFloat());
822 if (src.GetType() == FLOAT32_TYPE) {
823 GetMasm()->sqrtps(ArchVReg(dst), ArchVReg(src));
824 } else {
825 GetMasm()->sqrtpd(ArchVReg(dst), ArchVReg(src));
826 }
827 }
828
EncodeCastFloatToScalar(Reg dst,bool dstSigned,Reg src)829 void Amd64Encoder::EncodeCastFloatToScalar(Reg dst, bool dstSigned, Reg src)
830 {
831 // We DON'T support casts from float32/64 to int8/16 and bool, because this caste is not declared anywhere
832 // in other languages and architecture, we do not know what the behavior should be.
833 ASSERT(dst.GetSize() >= WORD_SIZE);
834 auto end = GetMasm()->newLabel();
835
836 // if src is NaN, then dst = 0
837 EncodeCastFloatCheckNan(dst, src, end);
838
839 if (dstSigned) {
840 EncodeCastFloatSignCheckRange(dst, src, end);
841 } else {
842 EncodeCastFloatUnsignCheckRange(dst, src, end);
843 }
844
845 if (src.GetType() == FLOAT32_TYPE) {
846 if (dst.GetSize() == DOUBLE_WORD_SIZE) {
847 EncodeCastFloat32ToUint64(dst, src);
848 } else {
849 GetMasm()->cvttss2si(ArchReg(dst, DOUBLE_WORD_SIZE), ArchVReg(src));
850 }
851 } else {
852 if (dst.GetSize() == DOUBLE_WORD_SIZE) {
853 EncodeCastFloat64ToUint64(dst, src);
854 } else {
855 GetMasm()->cvttsd2si(ArchReg(dst, DOUBLE_WORD_SIZE), ArchVReg(src));
856 }
857 }
858
859 GetMasm()->bind(end);
860 }
861
EncodeCastFloat32ToUint64(Reg dst,Reg src)862 void Amd64Encoder::EncodeCastFloat32ToUint64(Reg dst, Reg src)
863 {
864 auto bigNumberLabel = GetMasm()->newLabel();
865 auto endLabel = GetMasm()->newLabel();
866 ScopedTmpReg tmpReg(this, src.GetType());
867 ScopedTmpReg tmpNum(this, dst.GetType());
868
869 // It is max number with max degree that we can load in sign int64
870 // NOLINTNEXTLINE (readability-magic-numbers)
871 GetMasm()->mov(ArchReg(dst, WORD_SIZE), asmjit::imm(0x5F000000));
872 GetMasm()->movd(ArchVReg(tmpReg), ArchReg(dst, WORD_SIZE));
873 GetMasm()->comiss(ArchVReg(src), ArchVReg(tmpReg));
874 GetMasm()->jnb(bigNumberLabel);
875
876 GetMasm()->cvttss2si(ArchReg(dst), ArchVReg(src));
877 GetMasm()->jmp(endLabel);
878
879 GetMasm()->bind(bigNumberLabel);
880 GetMasm()->subss(ArchVReg(src), ArchVReg(tmpReg));
881 GetMasm()->cvttss2si(ArchReg(dst), ArchVReg(src));
882 // NOLINTNEXTLINE (readability-magic-numbers)
883 GetMasm()->mov(ArchReg(tmpNum), asmjit::imm(0x8000000000000000));
884 GetMasm()->xor_(ArchReg(dst), ArchReg(tmpNum));
885 GetMasm()->bind(endLabel);
886 }
887
EncodeCastFloat64ToUint64(Reg dst,Reg src)888 void Amd64Encoder::EncodeCastFloat64ToUint64(Reg dst, Reg src)
889 {
890 auto bigNumberLabel = GetMasm()->newLabel();
891 auto endLabel = GetMasm()->newLabel();
892 ScopedTmpReg tmpReg(this, src.GetType());
893 ScopedTmpReg tmpNum(this, dst.GetType());
894
895 // It is max number with max degree that we can load in sign int64
896 // NOLINTNEXTLINE (readability-magic-numbers)
897 GetMasm()->mov(ArchReg(dst), asmjit::imm(0x43E0000000000000));
898 GetMasm()->movq(ArchVReg(tmpReg), ArchReg(dst));
899 GetMasm()->comisd(ArchVReg(src), ArchVReg(tmpReg));
900 GetMasm()->jnb(bigNumberLabel);
901
902 GetMasm()->cvttsd2si(ArchReg(dst), ArchVReg(src));
903 GetMasm()->jmp(endLabel);
904
905 GetMasm()->bind(bigNumberLabel);
906 GetMasm()->subsd(ArchVReg(src), ArchVReg(tmpReg));
907 GetMasm()->cvttsd2si(ArchReg(dst), ArchVReg(src));
908 // NOLINTNEXTLINE (readability-magic-numbers)
909 GetMasm()->mov(ArchReg(tmpNum), asmjit::imm(0x8000000000000000));
910 GetMasm()->xor_(ArchReg(dst), ArchReg(tmpNum));
911 GetMasm()->bind(endLabel);
912 }
913
EncodeCastFloatCheckNan(Reg dst,Reg src,const asmjit::Label & end)914 void Amd64Encoder::EncodeCastFloatCheckNan(Reg dst, Reg src, const asmjit::Label &end)
915 {
916 GetMasm()->xor_(ArchReg(dst, DOUBLE_WORD_SIZE), ArchReg(dst, DOUBLE_WORD_SIZE));
917 if (src.GetType() == FLOAT32_TYPE) {
918 GetMasm()->ucomiss(ArchVReg(src), ArchVReg(src));
919 } else {
920 GetMasm()->ucomisd(ArchVReg(src), ArchVReg(src));
921 }
922 GetMasm()->jp(end);
923 }
924
EncodeCastFloatSignCheckRange(Reg dst,Reg src,const asmjit::Label & end)925 void Amd64Encoder::EncodeCastFloatSignCheckRange(Reg dst, Reg src, const asmjit::Label &end)
926 {
927 // if src < INT_MIN, then dst = INT_MIN
928 // if src >= (INT_MAX + 1), then dst = INT_MAX
929 if (dst.GetSize() == DOUBLE_WORD_SIZE) {
930 EncodeCastFloatCheckRange(dst, src, end, INT64_MIN, INT64_MAX);
931 } else {
932 EncodeCastFloatCheckRange(dst, src, end, INT32_MIN, INT32_MAX);
933 }
934 }
935
EncodeCastFloatCheckRange(Reg dst,Reg src,const asmjit::Label & end,const int64_t minValue,const uint64_t maxValue)936 void Amd64Encoder::EncodeCastFloatCheckRange(Reg dst, Reg src, const asmjit::Label &end, const int64_t minValue,
937 const uint64_t maxValue)
938 {
939 ScopedTmpReg cmpReg(this, src.GetType());
940 ScopedTmpReg tmpReg(this, src.GetType() == FLOAT64_TYPE ? INT64_TYPE : INT32_TYPE);
941
942 GetMasm()->mov(ArchReg(dst, DOUBLE_WORD_SIZE), asmjit::imm(minValue));
943 if (src.GetType() == FLOAT32_TYPE) {
944 GetMasm()->mov(ArchReg(tmpReg), asmjit::imm(bit_cast<uint32_t>(float(minValue))));
945 GetMasm()->movd(ArchVReg(cmpReg), ArchReg(tmpReg));
946 GetMasm()->ucomiss(ArchVReg(src), ArchVReg(cmpReg));
947 } else {
948 GetMasm()->mov(ArchReg(tmpReg), asmjit::imm(bit_cast<uint64_t>(double(minValue))));
949 GetMasm()->movq(ArchVReg(cmpReg), ArchReg(tmpReg));
950 GetMasm()->ucomisd(ArchVReg(src), ArchVReg(cmpReg));
951 }
952 GetMasm()->jb(end);
953
954 GetMasm()->mov(ArchReg(dst, DOUBLE_WORD_SIZE), asmjit::imm(maxValue));
955 if (src.GetType() == FLOAT32_TYPE) {
956 GetMasm()->mov(ArchReg(tmpReg), asmjit::imm(bit_cast<uint32_t>(float(maxValue) + 1U)));
957 GetMasm()->movd(ArchVReg(cmpReg), ArchReg(tmpReg));
958 GetMasm()->ucomiss(ArchVReg(src), ArchVReg(cmpReg));
959 } else {
960 GetMasm()->mov(ArchReg(tmpReg), asmjit::imm(bit_cast<uint64_t>(double(maxValue) + 1U)));
961 GetMasm()->movq(ArchVReg(cmpReg), ArchReg(tmpReg));
962 GetMasm()->ucomisd(ArchVReg(src), ArchVReg(cmpReg));
963 }
964 GetMasm()->jae(end);
965 }
966
EncodeCastFloatUnsignCheckRange(Reg dst,Reg src,const asmjit::Label & end)967 void Amd64Encoder::EncodeCastFloatUnsignCheckRange(Reg dst, Reg src, const asmjit::Label &end)
968 {
969 // if src < 0, then dst = 0
970 // if src >= (UINT_MAX + 1), then dst = UINT_MAX
971 if (dst.GetSize() == DOUBLE_WORD_SIZE) {
972 EncodeCastFloatCheckRange(dst, src, end, 0, UINT64_MAX);
973 } else {
974 EncodeCastFloatCheckRange(dst, src, end, 0, UINT32_MAX);
975 }
976 }
977
EncodeCastScalarToFloatUnsignDouble(Reg dst,Reg src)978 void Amd64Encoder::EncodeCastScalarToFloatUnsignDouble(Reg dst, Reg src)
979 {
980 if (dst.GetType() == FLOAT32_TYPE) {
981 ScopedTmpRegU64 int1Reg(this);
982 ScopedTmpRegU64 int2Reg(this);
983
984 auto sgn = GetMasm()->newLabel();
985 auto end = GetMasm()->newLabel();
986
987 GetMasm()->test(ArchReg(src), ArchReg(src));
988 GetMasm()->js(sgn);
989 GetMasm()->cvtsi2ss(ArchVReg(dst), ArchReg(src));
990 GetMasm()->jmp(end);
991
992 GetMasm()->bind(sgn);
993 GetMasm()->mov(ArchReg(int1Reg), ArchReg(src));
994 GetMasm()->mov(ArchReg(int2Reg), ArchReg(src));
995 GetMasm()->shr(ArchReg(int2Reg), asmjit::imm(1));
996 GetMasm()->and_(ArchReg(int1Reg, WORD_SIZE), asmjit::imm(1));
997 GetMasm()->or_(ArchReg(int1Reg), ArchReg(int2Reg));
998 GetMasm()->cvtsi2ss(ArchVReg(dst), ArchReg(int1Reg));
999 GetMasm()->addss(ArchVReg(dst), ArchVReg(dst));
1000
1001 GetMasm()->bind(end);
1002 } else {
1003 static constexpr std::array<uint32_t, 4> ARR1 = {uint32_t(0x43300000), uint32_t(0x45300000), 0x0, 0x0};
1004 static constexpr std::array<uint64_t, 2> ARR2 = {uint64_t(0x4330000000000000), uint64_t(0x4530000000000000)};
1005
1006 ScopedTmpReg float1Reg(this, dst.GetType());
1007 ScopedTmpRegF64 tmp(this);
1008
1009 GetMasm()->movq(ArchVReg(float1Reg), ArchReg(src));
1010 CopyArrayToXmm(tmp, ARR1);
1011 GetMasm()->punpckldq(ArchVReg(float1Reg), ArchVReg(tmp));
1012 CopyArrayToXmm(tmp, ARR2);
1013 GetMasm()->subpd(ArchVReg(float1Reg), ArchVReg(tmp));
1014 GetMasm()->movapd(ArchVReg(dst), ArchVReg(float1Reg));
1015 GetMasm()->unpckhpd(ArchVReg(dst), ArchVReg(float1Reg));
1016 GetMasm()->addsd(ArchVReg(dst), ArchVReg(float1Reg));
1017 }
1018 }
1019
EncodeCastScalarToFloat(Reg dst,Reg src,bool srcSigned)1020 void Amd64Encoder::EncodeCastScalarToFloat(Reg dst, Reg src, bool srcSigned)
1021 {
1022 if (!srcSigned && src.GetSize() == DOUBLE_WORD_SIZE) {
1023 EncodeCastScalarToFloatUnsignDouble(dst, src);
1024 return;
1025 }
1026
1027 if (src.GetSize() < WORD_SIZE || (srcSigned && src.GetSize() == WORD_SIZE)) {
1028 if (dst.GetType() == FLOAT32_TYPE) {
1029 GetMasm()->cvtsi2ss(ArchVReg(dst), ArchReg(src, WORD_SIZE));
1030 } else {
1031 GetMasm()->cvtsi2sd(ArchVReg(dst), ArchReg(src, WORD_SIZE));
1032 }
1033 return;
1034 }
1035
1036 if (!srcSigned && src.GetSize() == WORD_SIZE) {
1037 ScopedTmpRegU64 int1Reg(this);
1038
1039 GetMasm()->mov(ArchReg(int1Reg, WORD_SIZE), ArchReg(src, WORD_SIZE));
1040 if (dst.GetType() == FLOAT32_TYPE) {
1041 GetMasm()->cvtsi2ss(ArchVReg(dst), ArchReg(int1Reg));
1042 } else {
1043 GetMasm()->cvtsi2sd(ArchVReg(dst), ArchReg(int1Reg));
1044 }
1045 return;
1046 }
1047
1048 ASSERT(srcSigned && src.GetSize() == DOUBLE_WORD_SIZE);
1049 if (dst.GetType() == FLOAT32_TYPE) {
1050 GetMasm()->cvtsi2ss(ArchVReg(dst), ArchReg(src));
1051 } else {
1052 GetMasm()->cvtsi2sd(ArchVReg(dst), ArchReg(src));
1053 }
1054 }
1055
EncodeCastToBool(Reg dst,Reg src)1056 void Amd64Encoder::EncodeCastToBool(Reg dst, Reg src)
1057 {
1058 // In ISA says that we only support casts:
1059 // i32tou1, i64tou1, u32tou1, u64tou1
1060 ASSERT(src.IsScalar());
1061 ASSERT(dst.IsScalar());
1062
1063 // In our ISA minimal type is 32-bit, so bool in 32bit
1064 GetMasm()->test(ArchReg(src), ArchReg(src));
1065 // One "mov" will be better, then 2 jump. Else other instructions will overwrite the flags.
1066 GetMasm()->mov(ArchReg(dst, WORD_SIZE), asmjit::imm(0));
1067 GetMasm()->setne(ArchReg(dst));
1068 }
1069
EncodeFastPathDynamicCast(Reg dst,Reg src,LabelHolder::LabelId slow)1070 void Amd64Encoder::EncodeFastPathDynamicCast(Reg dst, Reg src, LabelHolder::LabelId slow)
1071 {
1072 ASSERT(IsLabelValid(slow));
1073 ASSERT(src.IsFloat() && dst.IsScalar());
1074
1075 CHECK_EQ(src.GetSize(), BITS_PER_UINT64);
1076 CHECK_EQ(dst.GetSize(), BITS_PER_UINT32);
1077
1078 auto end {GetMasm()->newLabel()};
1079
1080 // if src is NaN, then dst = 0
1081 EncodeCastFloatCheckNan(dst, src, end);
1082
1083 // infinite and big numbers will overflow here to INT64_MIN
1084 GetMasm()->cvttsd2si(ArchReg(dst, DOUBLE_WORD_SIZE), ArchVReg(src));
1085 // check INT64_MIN
1086 GetMasm()->cmp(ArchReg(dst, DOUBLE_WORD_SIZE), asmjit::imm(1));
1087 auto slowLabel {static_cast<Amd64LabelHolder *>(GetLabels())->GetLabel(slow)};
1088 // jump to slow path in case of overflow
1089 GetMasm()->jo(*slowLabel);
1090
1091 GetMasm()->bind(end);
1092 }
1093
EncodeJsDoubleToCharCast(Reg dst,Reg src,Reg tmp,uint32_t failureResult)1094 void Amd64Encoder::EncodeJsDoubleToCharCast(Reg dst, Reg src, Reg tmp, uint32_t failureResult)
1095 {
1096 ASSERT(src.IsFloat() && dst.IsScalar());
1097
1098 CHECK_EQ(src.GetSize(), BITS_PER_UINT64);
1099 CHECK_EQ(dst.GetSize(), BITS_PER_UINT32);
1100
1101 // infinite and big numbers will overflow here to INT64_MIN. If src is NaN, cvttsd2si itself returns zero.
1102 GetMasm()->cvttsd2si(ArchReg(dst, DOUBLE_WORD_SIZE), ArchVReg(src));
1103 // save the result to tmp
1104 GetMasm()->mov(ArchReg(tmp, DOUBLE_WORD_SIZE), ArchReg(dst, DOUBLE_WORD_SIZE));
1105 // 'and' the result with 0xffff
1106 constexpr uint32_t UTF16_CHAR_MASK = 0xffff;
1107 GetMasm()->and_(ArchReg(dst), asmjit::imm(UTF16_CHAR_MASK));
1108 // check INT64_MIN
1109 GetMasm()->cmp(ArchReg(tmp, DOUBLE_WORD_SIZE), asmjit::imm(1));
1110 // 'mov' never affects the flags
1111 GetMasm()->mov(ArchReg(tmp, DOUBLE_WORD_SIZE), failureResult);
1112 // ... and we may move conditionally the failureResult into dst for overflow only
1113 GetMasm()->cmovo(ArchReg(dst), ArchReg(tmp));
1114 }
1115
EncodeCast(Reg dst,bool dstSigned,Reg src,bool srcSigned)1116 void Amd64Encoder::EncodeCast(Reg dst, bool dstSigned, Reg src, bool srcSigned)
1117 {
1118 if (src.IsFloat() && dst.IsScalar()) {
1119 EncodeCastFloatToScalar(dst, dstSigned, src);
1120 return;
1121 }
1122
1123 if (src.IsScalar() && dst.IsFloat()) {
1124 EncodeCastScalarToFloat(dst, src, srcSigned);
1125 return;
1126 }
1127
1128 if (src.IsFloat() && dst.IsFloat()) {
1129 if (src.GetSize() != dst.GetSize()) {
1130 if (src.GetType() == FLOAT32_TYPE) {
1131 GetMasm()->cvtss2sd(ArchVReg(dst), ArchVReg(src));
1132 } else {
1133 GetMasm()->cvtsd2ss(ArchVReg(dst), ArchVReg(src));
1134 }
1135 return;
1136 }
1137
1138 if (src.GetType() == FLOAT32_TYPE) {
1139 GetMasm()->movss(ArchVReg(dst), ArchVReg(src));
1140 } else {
1141 GetMasm()->movsd(ArchVReg(dst), ArchVReg(src));
1142 }
1143 return;
1144 }
1145
1146 ASSERT(src.IsScalar() && dst.IsScalar());
1147 EncodeCastScalar(dst, dstSigned, src, srcSigned);
1148 }
1149
EncodeCastScalar(Reg dst,bool dstSigned,Reg src,bool srcSigned)1150 void Amd64Encoder::EncodeCastScalar(Reg dst, bool dstSigned, Reg src, bool srcSigned)
1151 {
1152 auto extendTo32bit = [this](Reg reg, bool isSigned) {
1153 if (reg.GetSize() < WORD_SIZE) {
1154 if (isSigned) {
1155 GetMasm()->movsx(ArchReg(reg, WORD_SIZE), ArchReg(reg));
1156 } else {
1157 GetMasm()->movzx(ArchReg(reg, WORD_SIZE), ArchReg(reg));
1158 }
1159 }
1160 };
1161
1162 if (src.GetSize() >= dst.GetSize()) {
1163 if (dst.GetId() != src.GetId()) {
1164 GetMasm()->mov(ArchReg(dst), ArchReg(src, dst.GetSize()));
1165 }
1166 extendTo32bit(dst, dstSigned);
1167 return;
1168 }
1169
1170 if (srcSigned) {
1171 if (dst.GetSize() < DOUBLE_WORD_SIZE) {
1172 GetMasm()->movsx(ArchReg(dst), ArchReg(src));
1173 extendTo32bit(dst, dstSigned);
1174 } else if (src.GetSize() == WORD_SIZE) {
1175 GetMasm()->movsxd(ArchReg(dst), ArchReg(src));
1176 } else {
1177 GetMasm()->movsx(ArchReg(dst, WORD_SIZE), ArchReg(src));
1178 GetMasm()->movsxd(ArchReg(dst), ArchReg(dst, WORD_SIZE));
1179 }
1180 return;
1181 }
1182
1183 if (src.GetSize() == WORD_SIZE) {
1184 GetMasm()->mov(ArchReg(dst, WORD_SIZE), ArchReg(src));
1185 } else if (dst.GetSize() == DOUBLE_WORD_SIZE) {
1186 GetMasm()->movzx(ArchReg(dst, WORD_SIZE), ArchReg(src));
1187 } else {
1188 GetMasm()->movzx(ArchReg(dst), ArchReg(src));
1189 extendTo32bit(dst, dstSigned);
1190 }
1191 }
1192
MakeShift(Shift shift)1193 Reg Amd64Encoder::MakeShift(Shift shift)
1194 {
1195 Reg reg = shift.GetBase();
1196 ASSERT(reg.IsValid());
1197 if (reg.IsScalar()) {
1198 ASSERT(shift.GetType() != ShiftType::INVALID_SHIFT);
1199 switch (shift.GetType()) {
1200 case ShiftType::LSL:
1201 GetMasm()->shl(ArchReg(reg), asmjit::imm(shift.GetScale()));
1202 break;
1203 case ShiftType::LSR:
1204 GetMasm()->shr(ArchReg(reg), asmjit::imm(shift.GetScale()));
1205 break;
1206 case ShiftType::ASR:
1207 GetMasm()->sar(ArchReg(reg), asmjit::imm(shift.GetScale()));
1208 break;
1209 case ShiftType::ROR:
1210 GetMasm()->ror(ArchReg(reg), asmjit::imm(shift.GetScale()));
1211 break;
1212 default:
1213 UNREACHABLE();
1214 }
1215
1216 return reg;
1217 }
1218
1219 // Invalid register type
1220 UNREACHABLE();
1221 }
1222
EncodeAdd(Reg dst,Reg src0,Shift src1)1223 void Amd64Encoder::EncodeAdd(Reg dst, Reg src0, Shift src1)
1224 {
1225 if (dst.IsFloat()) {
1226 SetFalseResult();
1227 return;
1228 }
1229
1230 ASSERT(dst.GetSize() >= src0.GetSize());
1231
1232 auto shiftReg = MakeShift(src1);
1233
1234 if (src0.GetSize() < WORD_SIZE) {
1235 EncodeAdd(dst, src0, shiftReg);
1236 return;
1237 }
1238
1239 if (src0.GetSize() == DOUBLE_WORD_SIZE && shiftReg.GetSize() < DOUBLE_WORD_SIZE) {
1240 GetMasm()->movsxd(ArchReg(shiftReg, DOUBLE_WORD_SIZE), ArchReg(shiftReg));
1241 }
1242
1243 GetMasm()->lea(ArchReg(dst), asmjit::x86::ptr(ArchReg(src0), ArchReg(shiftReg, src0.GetSize())));
1244 }
1245
EncodeAdd(Reg dst,Reg src0,Reg src1)1246 void Amd64Encoder::EncodeAdd(Reg dst, Reg src0, Reg src1)
1247 {
1248 if (dst.IsScalar()) {
1249 auto size = std::max<uint8_t>(WORD_SIZE, dst.GetSize());
1250 GetMasm()->lea(ArchReg(dst, size), asmjit::x86::ptr(ArchReg(src0, size), ArchReg(src1, size)));
1251 return;
1252 }
1253
1254 if (dst.GetType() == FLOAT32_TYPE) {
1255 if (dst.GetId() == src0.GetId()) {
1256 GetMasm()->addss(ArchVReg(dst), ArchVReg(src1));
1257 } else if (dst.GetId() == src1.GetId()) {
1258 GetMasm()->addss(ArchVReg(dst), ArchVReg(src0));
1259 } else {
1260 GetMasm()->movss(ArchVReg(dst), ArchVReg(src0));
1261 GetMasm()->addss(ArchVReg(dst), ArchVReg(src1));
1262 }
1263 } else {
1264 if (dst.GetId() == src0.GetId()) {
1265 GetMasm()->addsd(ArchVReg(dst), ArchVReg(src1));
1266 } else if (dst.GetId() == src1.GetId()) {
1267 GetMasm()->addsd(ArchVReg(dst), ArchVReg(src0));
1268 } else {
1269 GetMasm()->movsd(ArchVReg(dst), ArchVReg(src0));
1270 GetMasm()->addsd(ArchVReg(dst), ArchVReg(src1));
1271 }
1272 }
1273 }
1274
EncodeSub(Reg dst,Reg src0,Reg src1)1275 void Amd64Encoder::EncodeSub(Reg dst, Reg src0, Reg src1)
1276 {
1277 if (dst.IsScalar()) {
1278 if (dst.GetId() == src0.GetId()) {
1279 GetMasm()->sub(ArchReg(dst), ArchReg(src1));
1280 } else if (dst.GetId() == src1.GetId()) {
1281 GetMasm()->sub(ArchReg(dst), ArchReg(src0));
1282 GetMasm()->neg(ArchReg(dst));
1283 } else {
1284 EncodeMov(dst, src0);
1285 GetMasm()->sub(ArchReg(dst), ArchReg(src1));
1286 }
1287 return;
1288 }
1289
1290 if (dst.GetType() == FLOAT32_TYPE) {
1291 if (dst.GetId() == src0.GetId()) {
1292 GetMasm()->subss(ArchVReg(dst), ArchVReg(src1));
1293 } else if (dst.GetId() != src1.GetId()) {
1294 GetMasm()->movss(ArchVReg(dst), ArchVReg(src0));
1295 GetMasm()->subss(ArchVReg(dst), ArchVReg(src1));
1296 } else {
1297 ScopedTmpReg tmpReg(this, dst.GetType());
1298 GetMasm()->movss(ArchVReg(tmpReg), ArchVReg(src0));
1299 GetMasm()->subss(ArchVReg(tmpReg), ArchVReg(src1));
1300 GetMasm()->movss(ArchVReg(dst), ArchVReg(tmpReg));
1301 }
1302 } else {
1303 if (dst.GetId() == src0.GetId()) {
1304 GetMasm()->subsd(ArchVReg(dst), ArchVReg(src1));
1305 } else if (dst.GetId() != src1.GetId()) {
1306 GetMasm()->movsd(ArchVReg(dst), ArchVReg(src0));
1307 GetMasm()->subsd(ArchVReg(dst), ArchVReg(src1));
1308 } else {
1309 ScopedTmpReg tmpReg(this, dst.GetType());
1310 GetMasm()->movsd(ArchVReg(tmpReg), ArchVReg(src0));
1311 GetMasm()->subsd(ArchVReg(tmpReg), ArchVReg(src1));
1312 GetMasm()->movsd(ArchVReg(dst), ArchVReg(tmpReg));
1313 }
1314 }
1315 }
1316
EncodeMul(Reg dst,Reg src0,Reg src1)1317 void Amd64Encoder::EncodeMul(Reg dst, Reg src0, Reg src1)
1318 {
1319 if (dst.IsScalar()) {
1320 auto size = std::max<uint8_t>(WORD_SIZE, dst.GetSize());
1321
1322 if (dst.GetId() == src0.GetId()) {
1323 GetMasm()->imul(ArchReg(dst, size), ArchReg(src1, size));
1324 } else if (dst.GetId() == src1.GetId()) {
1325 GetMasm()->imul(ArchReg(dst, size), ArchReg(src0, size));
1326 } else {
1327 GetMasm()->mov(ArchReg(dst, size), ArchReg(src0, size));
1328 GetMasm()->imul(ArchReg(dst, size), ArchReg(src1, size));
1329 }
1330 return;
1331 }
1332
1333 if (dst.GetType() == FLOAT32_TYPE) {
1334 if (dst.GetId() == src0.GetId()) {
1335 GetMasm()->mulss(ArchVReg(dst), ArchVReg(src1));
1336 } else if (dst.GetId() == src1.GetId()) {
1337 GetMasm()->mulss(ArchVReg(dst), ArchVReg(src0));
1338 } else {
1339 GetMasm()->movss(ArchVReg(dst), ArchVReg(src0));
1340 GetMasm()->mulss(ArchVReg(dst), ArchVReg(src1));
1341 }
1342 } else {
1343 if (dst.GetId() == src0.GetId()) {
1344 GetMasm()->mulsd(ArchVReg(dst), ArchVReg(src1));
1345 } else if (dst.GetId() == src1.GetId()) {
1346 GetMasm()->mulsd(ArchVReg(dst), ArchVReg(src0));
1347 } else {
1348 GetMasm()->movsd(ArchVReg(dst), ArchVReg(src0));
1349 GetMasm()->mulsd(ArchVReg(dst), ArchVReg(src1));
1350 }
1351 }
1352 }
1353
EncodeAddOverflow(compiler::LabelHolder::LabelId id,Reg dst,Reg src0,Reg src1,Condition cc)1354 void Amd64Encoder::EncodeAddOverflow(compiler::LabelHolder::LabelId id, Reg dst, Reg src0, Reg src1, Condition cc)
1355 {
1356 ASSERT(!dst.IsFloat() && !src0.IsFloat() && !src1.IsFloat());
1357 ASSERT(cc == Condition::VS || cc == Condition::VC);
1358 auto size = dst.GetSize();
1359 if (dst.GetId() == src0.GetId()) {
1360 GetMasm()->add(ArchReg(dst, size), ArchReg(src1, size));
1361 } else if (dst.GetId() == src1.GetId()) {
1362 GetMasm()->add(ArchReg(dst, size), ArchReg(src0, size));
1363 } else {
1364 GetMasm()->mov(ArchReg(dst, size), ArchReg(src0, size));
1365 GetMasm()->add(ArchReg(dst, size), ArchReg(src1, size));
1366 }
1367 auto label = static_cast<Amd64LabelHolder *>(GetLabels())->GetLabel(id);
1368 GetMasm()->j(ArchCc(cc, false), *label);
1369 }
1370
EncodeSubOverflow(compiler::LabelHolder::LabelId id,Reg dst,Reg src0,Reg src1,Condition cc)1371 void Amd64Encoder::EncodeSubOverflow(compiler::LabelHolder::LabelId id, Reg dst, Reg src0, Reg src1, Condition cc)
1372 {
1373 ASSERT(!dst.IsFloat() && !src0.IsFloat() && !src1.IsFloat());
1374 ASSERT(cc == Condition::VS || cc == Condition::VC);
1375 auto size = dst.GetSize();
1376 if (dst.GetId() == src0.GetId()) {
1377 GetMasm()->sub(ArchReg(dst, size), ArchReg(src1, size));
1378 } else if (dst.GetId() == src1.GetId()) {
1379 ScopedTmpReg tmpReg(this, dst.GetType());
1380 GetMasm()->mov(ArchReg(tmpReg, size), ArchReg(src1, size));
1381 GetMasm()->mov(ArchReg(dst, size), ArchReg(src0, size));
1382 GetMasm()->sub(ArchReg(dst, size), ArchReg(tmpReg, size));
1383 } else {
1384 GetMasm()->mov(ArchReg(dst, size), ArchReg(src0, size));
1385 GetMasm()->sub(ArchReg(dst, size), ArchReg(src1, size));
1386 }
1387 auto label = static_cast<Amd64LabelHolder *>(GetLabels())->GetLabel(id);
1388 GetMasm()->j(ArchCc(cc, false), *label);
1389 }
1390
EncodeNegOverflowAndZero(compiler::LabelHolder::LabelId id,Reg dst,Reg src)1391 void Amd64Encoder::EncodeNegOverflowAndZero(compiler::LabelHolder::LabelId id, Reg dst, Reg src)
1392 {
1393 ASSERT(!dst.IsFloat() && !src.IsFloat());
1394 auto size = dst.GetSize();
1395 // NOLINTNEXTLINE(readability-magic-numbers)
1396 EncodeJumpTest(id, src, Imm(0x7fffffff), Condition::TST_EQ);
1397 EncodeMov(dst, src);
1398 GetMasm()->neg(ArchReg(dst, size));
1399 }
1400
EncodeDivFloat(Reg dst,Reg src0,Reg src1)1401 void Amd64Encoder::EncodeDivFloat(Reg dst, Reg src0, Reg src1)
1402 {
1403 ASSERT(dst.IsFloat());
1404 if (dst.GetType() == FLOAT32_TYPE) {
1405 if (dst.GetId() == src0.GetId()) {
1406 GetMasm()->divss(ArchVReg(dst), ArchVReg(src1));
1407 } else if (dst.GetId() != src1.GetId()) {
1408 GetMasm()->movss(ArchVReg(dst), ArchVReg(src0));
1409 GetMasm()->divss(ArchVReg(dst), ArchVReg(src1));
1410 } else {
1411 ScopedTmpRegF32 tmp(this);
1412 GetMasm()->movss(ArchVReg(tmp), ArchVReg(src0));
1413 GetMasm()->divss(ArchVReg(tmp), ArchVReg(src1));
1414 GetMasm()->movss(ArchVReg(dst), ArchVReg(tmp));
1415 }
1416 } else {
1417 if (dst.GetId() == src0.GetId()) {
1418 GetMasm()->divsd(ArchVReg(dst), ArchVReg(src1));
1419 } else if (dst.GetId() != src1.GetId()) {
1420 GetMasm()->movsd(ArchVReg(dst), ArchVReg(src0));
1421 GetMasm()->divsd(ArchVReg(dst), ArchVReg(src1));
1422 } else {
1423 ScopedTmpRegF64 tmp(this);
1424 GetMasm()->movsd(ArchVReg(tmp), ArchVReg(src0));
1425 GetMasm()->divsd(ArchVReg(tmp), ArchVReg(src1));
1426 GetMasm()->movsd(ArchVReg(dst), ArchVReg(tmp));
1427 }
1428 }
1429 }
1430
EncodeDivSpillDst(asmjit::x86::Assembler * masm,Reg dst)1431 static void EncodeDivSpillDst(asmjit::x86::Assembler *masm, Reg dst)
1432 {
1433 if (dst.GetId() != ConvertRegNumber(asmjit::x86::rdx.id())) {
1434 masm->push(asmjit::x86::rdx);
1435 }
1436 if (dst.GetId() != ConvertRegNumber(asmjit::x86::rax.id())) {
1437 masm->push(asmjit::x86::rax);
1438 }
1439 }
1440
EncodeDivFillDst(asmjit::x86::Assembler * masm,Reg dst)1441 static void EncodeDivFillDst(asmjit::x86::Assembler *masm, Reg dst)
1442 {
1443 if (dst.GetId() != ConvertRegNumber(asmjit::x86::rax.id())) {
1444 masm->mov(ArchReg(dst, DOUBLE_WORD_SIZE), asmjit::x86::rax);
1445 masm->pop(asmjit::x86::rax);
1446 }
1447
1448 if (dst.GetId() != ConvertRegNumber(asmjit::x86::rdx.id())) {
1449 masm->pop(asmjit::x86::rdx);
1450 }
1451 }
1452
EncodeDiv(Reg dst,bool dstSigned,Reg src0,Reg src1)1453 void Amd64Encoder::EncodeDiv(Reg dst, bool dstSigned, Reg src0, Reg src1)
1454 {
1455 if (dst.IsFloat()) {
1456 EncodeDivFloat(dst, src0, src1);
1457 return;
1458 }
1459
1460 auto negPath = GetMasm()->newLabel();
1461 auto crossroad = GetMasm()->newLabel();
1462
1463 if (dstSigned) {
1464 GetMasm()->cmp(ArchReg(src1), asmjit::imm(-1));
1465 GetMasm()->je(negPath);
1466 }
1467
1468 EncodeDivSpillDst(GetMasm(), dst);
1469
1470 ScopedTmpReg tmpReg(this, dst.GetType());
1471 Reg op1 {src1};
1472 if (src1.GetId() == ConvertRegNumber(asmjit::x86::rax.id()) ||
1473 src1.GetId() == ConvertRegNumber(asmjit::x86::rdx.id())) {
1474 GetMasm()->mov(ArchReg(tmpReg), ArchReg(src1));
1475 op1 = Reg(tmpReg);
1476 }
1477
1478 if (src0.GetId() != ConvertRegNumber(asmjit::x86::rax.id())) {
1479 GetMasm()->mov(asmjit::x86::rax, ArchReg(src0, DOUBLE_WORD_SIZE));
1480 }
1481 if (dstSigned) {
1482 if (dst.GetSize() <= WORD_SIZE) {
1483 GetMasm()->cdq();
1484 } else {
1485 GetMasm()->cqo();
1486 }
1487 GetMasm()->idiv(ArchReg(op1));
1488 } else {
1489 GetMasm()->xor_(asmjit::x86::rdx, asmjit::x86::rdx);
1490 GetMasm()->div(ArchReg(op1));
1491 }
1492
1493 EncodeDivFillDst(GetMasm(), dst);
1494
1495 GetMasm()->jmp(crossroad);
1496
1497 GetMasm()->bind(negPath);
1498 if (dst.GetId() != src0.GetId()) {
1499 GetMasm()->mov(ArchReg(dst), ArchReg(src0));
1500 }
1501 GetMasm()->neg(ArchReg(dst));
1502
1503 GetMasm()->bind(crossroad);
1504 }
1505
EncodeSignedDiv(Reg dst,Reg src0,Imm imm)1506 void Amd64Encoder::EncodeSignedDiv(Reg dst, Reg src0, Imm imm)
1507 {
1508 int64_t divisor = imm.GetAsInt();
1509
1510 Reg ax(ConvertRegNumber(asmjit::x86::rax.id()), dst.GetType());
1511 Reg dx(ConvertRegNumber(asmjit::x86::rdx.id()), dst.GetType());
1512
1513 if (dst != ax) {
1514 GetMasm()->push(ArchReg(ax, DOUBLE_WORD_SIZE));
1515 }
1516 if (dst != dx) {
1517 GetMasm()->push(ArchReg(dx, DOUBLE_WORD_SIZE));
1518 }
1519
1520 FastConstSignedDivisor fastDivisor(divisor, dst.GetSize());
1521 int64_t magic = fastDivisor.GetMagic();
1522
1523 ScopedTmpReg tmp(this, dst.GetType());
1524 EncodeMov(tmp, src0);
1525 EncodeMov(ax, src0);
1526 EncodeMov(dx, Imm(magic));
1527 GetMasm()->imul(ArchReg(dx));
1528
1529 if (divisor > 0 && magic < 0) {
1530 EncodeAdd(dx, dx, tmp);
1531 } else if (divisor < 0 && magic > 0) {
1532 EncodeSub(dx, dx, tmp);
1533 }
1534
1535 int64_t shift = fastDivisor.GetShift();
1536 EncodeAShr(dst, dx, Imm(shift));
1537
1538 // result = (result < 0 ? result + 1 : result)
1539 EncodeShr(tmp, dst, Imm(dst.GetSize() - 1U));
1540 EncodeAdd(dst, dst, tmp);
1541
1542 if (dst != dx) {
1543 GetMasm()->pop(ArchReg(dx, DOUBLE_WORD_SIZE));
1544 }
1545 if (dst != ax) {
1546 GetMasm()->pop(ArchReg(ax, DOUBLE_WORD_SIZE));
1547 }
1548 }
1549
EncodeUnsignedDiv(Reg dst,Reg src0,Imm imm)1550 void Amd64Encoder::EncodeUnsignedDiv(Reg dst, Reg src0, Imm imm)
1551 {
1552 auto divisor = bit_cast<uint64_t>(imm.GetAsInt());
1553
1554 Reg ax(ConvertRegNumber(asmjit::x86::rax.id()), dst.GetType());
1555 Reg dx(ConvertRegNumber(asmjit::x86::rdx.id()), dst.GetType());
1556
1557 if (dst != ax) {
1558 GetMasm()->push(ArchReg(ax, DOUBLE_WORD_SIZE));
1559 }
1560 if (dst != dx) {
1561 GetMasm()->push(ArchReg(dx, DOUBLE_WORD_SIZE));
1562 }
1563
1564 FastConstUnsignedDivisor fastDivisor(divisor, dst.GetSize());
1565 uint64_t magic = fastDivisor.GetMagic();
1566
1567 ScopedTmpReg tmp(this, dst.GetType());
1568 if (fastDivisor.GetAdd()) {
1569 EncodeMov(tmp, src0);
1570 }
1571 EncodeMov(ax, src0);
1572 EncodeMov(dx, Imm(magic));
1573 GetMasm()->mul(ArchReg(dx));
1574
1575 uint64_t shift = fastDivisor.GetShift();
1576 if (!fastDivisor.GetAdd()) {
1577 EncodeShr(dst, dx, Imm(shift));
1578 } else {
1579 ASSERT(shift >= 1U);
1580 EncodeSub(tmp, tmp, dx);
1581 EncodeShr(tmp, tmp, Imm(1U));
1582 EncodeAdd(tmp, tmp, dx);
1583 EncodeShr(dst, tmp, Imm(shift - 1U));
1584 }
1585
1586 if (dst != dx) {
1587 GetMasm()->pop(ArchReg(dx, DOUBLE_WORD_SIZE));
1588 }
1589 if (dst != ax) {
1590 GetMasm()->pop(ArchReg(ax, DOUBLE_WORD_SIZE));
1591 }
1592 }
1593
EncodeDiv(Reg dst,Reg src0,Imm imm,bool isSigned)1594 void Amd64Encoder::EncodeDiv(Reg dst, Reg src0, Imm imm, bool isSigned)
1595 {
1596 ASSERT(dst.IsScalar() && dst.GetSize() >= WORD_SIZE);
1597 ASSERT(CanOptimizeImmDivMod(bit_cast<uint64_t>(imm.GetAsInt()), isSigned));
1598 if (isSigned) {
1599 EncodeSignedDiv(dst, src0, imm);
1600 } else {
1601 EncodeUnsignedDiv(dst, src0, imm);
1602 }
1603 }
1604
EncodeMod(Reg dst,Reg src0,Imm imm,bool isSigned)1605 void Amd64Encoder::EncodeMod(Reg dst, Reg src0, Imm imm, bool isSigned)
1606 {
1607 ASSERT(dst.IsScalar() && dst.GetSize() >= WORD_SIZE);
1608 ASSERT(CanOptimizeImmDivMod(bit_cast<uint64_t>(imm.GetAsInt()), isSigned));
1609
1610 // dst = src0 - imm * (src0 / imm)
1611 ScopedTmpReg tmp(this, dst.GetType());
1612 EncodeDiv(tmp, src0, imm, isSigned);
1613 if (dst.GetSize() == WORD_SIZE) {
1614 GetMasm()->imul(ArchReg(tmp), ArchReg(tmp), asmjit::imm(imm.GetAsInt()));
1615 } else {
1616 ScopedTmpRegU64 immReg(this);
1617 EncodeMov(immReg, imm);
1618 EncodeMul(tmp, tmp, immReg);
1619 }
1620 EncodeSub(dst, src0, tmp);
1621 }
1622
EncodeModFloat(Reg dst,Reg src0,Reg src1)1623 void Amd64Encoder::EncodeModFloat(Reg dst, Reg src0, Reg src1)
1624 {
1625 ASSERT(dst.IsFloat());
1626 if (dst.GetType() == FLOAT32_TYPE) {
1627 using Fp = float (*)(float, float);
1628 MakeLibCall(dst, src0, src1, reinterpret_cast<void *>(static_cast<Fp>(fmodf)));
1629 } else {
1630 using Fp = double (*)(double, double);
1631 MakeLibCall(dst, src0, src1, reinterpret_cast<void *>(static_cast<Fp>(fmod)));
1632 }
1633 }
1634
EncodeMod(Reg dst,bool dstSigned,Reg src0,Reg src1)1635 void Amd64Encoder::EncodeMod(Reg dst, bool dstSigned, Reg src0, Reg src1)
1636 {
1637 if (dst.IsFloat()) {
1638 EncodeModFloat(dst, src0, src1);
1639 return;
1640 }
1641
1642 auto zeroPath = GetMasm()->newLabel();
1643 auto crossroad = GetMasm()->newLabel();
1644
1645 if (dstSigned) {
1646 GetMasm()->cmp(ArchReg(src1), asmjit::imm(-1));
1647 GetMasm()->je(zeroPath);
1648 }
1649
1650 if (dst.GetId() != ConvertRegNumber(asmjit::x86::rax.id())) {
1651 GetMasm()->push(asmjit::x86::rax);
1652 }
1653 if (dst.GetId() != ConvertRegNumber(asmjit::x86::rdx.id())) {
1654 GetMasm()->push(asmjit::x86::rdx);
1655 }
1656
1657 ScopedTmpReg tmpReg(this, dst.GetType());
1658 Reg op1 {src1};
1659 if (src1.GetId() == ConvertRegNumber(asmjit::x86::rax.id()) ||
1660 src1.GetId() == ConvertRegNumber(asmjit::x86::rdx.id())) {
1661 GetMasm()->mov(ArchReg(tmpReg), ArchReg(src1));
1662 op1 = Reg(tmpReg);
1663 }
1664
1665 if (src0.GetId() != ConvertRegNumber(asmjit::x86::rax.id())) {
1666 GetMasm()->mov(asmjit::x86::rax, ArchReg(src0, DOUBLE_WORD_SIZE));
1667 }
1668
1669 if (dstSigned) {
1670 if (dst.GetSize() <= WORD_SIZE) {
1671 GetMasm()->cdq();
1672 } else {
1673 GetMasm()->cqo();
1674 }
1675 GetMasm()->idiv(ArchReg(op1));
1676 } else {
1677 GetMasm()->xor_(asmjit::x86::rdx, asmjit::x86::rdx);
1678 GetMasm()->div(ArchReg(op1));
1679 }
1680
1681 if (dst.GetId() != ConvertRegNumber(asmjit::x86::rdx.id())) {
1682 GetMasm()->mov(ArchReg(dst, DOUBLE_WORD_SIZE), asmjit::x86::rdx);
1683 GetMasm()->pop(asmjit::x86::rdx);
1684 }
1685
1686 if (dst.GetId() != ConvertRegNumber(asmjit::x86::rax.id())) {
1687 GetMasm()->pop(asmjit::x86::rax);
1688 }
1689 GetMasm()->jmp(crossroad);
1690
1691 GetMasm()->bind(zeroPath);
1692 GetMasm()->xor_(ArchReg(dst, WORD_SIZE), ArchReg(dst, WORD_SIZE));
1693
1694 GetMasm()->bind(crossroad);
1695 }
1696
EncodeMin(Reg dst,bool dstSigned,Reg src0,Reg src1)1697 void Amd64Encoder::EncodeMin(Reg dst, bool dstSigned, Reg src0, Reg src1)
1698 {
1699 if (dst.IsScalar()) {
1700 ScopedTmpReg tmpReg(this, dst.GetType());
1701 GetMasm()->mov(ArchReg(tmpReg), ArchReg(src1));
1702 GetMasm()->cmp(ArchReg(src0), ArchReg(src1));
1703
1704 auto size = std::max<uint8_t>(src0.GetSize(), WORD_SIZE);
1705 if (dstSigned) {
1706 GetMasm()->cmovle(ArchReg(tmpReg, size), ArchReg(src0, size));
1707 } else {
1708 GetMasm()->cmovb(ArchReg(tmpReg, size), ArchReg(src0, size));
1709 }
1710 EncodeMov(dst, tmpReg);
1711 return;
1712 }
1713
1714 EncodeMinMaxFp<false>(dst, src0, src1);
1715 }
1716
EncodeMax(Reg dst,bool dstSigned,Reg src0,Reg src1)1717 void Amd64Encoder::EncodeMax(Reg dst, bool dstSigned, Reg src0, Reg src1)
1718 {
1719 if (dst.IsScalar()) {
1720 ScopedTmpReg tmpReg(this, dst.GetType());
1721 GetMasm()->mov(ArchReg(tmpReg), ArchReg(src1));
1722 GetMasm()->cmp(ArchReg(src0), ArchReg(src1));
1723
1724 auto size = std::max<uint8_t>(src0.GetSize(), WORD_SIZE);
1725 if (dstSigned) {
1726 GetMasm()->cmovge(ArchReg(tmpReg, size), ArchReg(src0, size));
1727 } else {
1728 GetMasm()->cmova(ArchReg(tmpReg, size), ArchReg(src0, size));
1729 }
1730 EncodeMov(dst, tmpReg);
1731 return;
1732 }
1733
1734 EncodeMinMaxFp<true>(dst, src0, src1);
1735 }
1736
1737 template <bool IS_MAX>
EncodeMinMaxFp(Reg dst,Reg src0,Reg src1)1738 void Amd64Encoder::EncodeMinMaxFp(Reg dst, Reg src0, Reg src1)
1739 {
1740 auto end = GetMasm()->newLabel();
1741 auto notEqual = GetMasm()->newLabel();
1742 auto gotNan = GetMasm()->newLabel();
1743 auto &srcA = dst.GetId() != src1.GetId() ? src0 : src1;
1744 auto &srcB = srcA.GetId() == src0.GetId() ? src1 : src0;
1745 if (dst.GetType() == FLOAT32_TYPE) {
1746 GetMasm()->movaps(ArchVReg(dst), ArchVReg(srcA));
1747 GetMasm()->ucomiss(ArchVReg(srcB), ArchVReg(srcA));
1748 GetMasm()->jne(notEqual);
1749 GetMasm()->jp(gotNan);
1750 // calculate result for positive/negative zero operands
1751 if (IS_MAX) {
1752 GetMasm()->andps(ArchVReg(dst), ArchVReg(srcB));
1753 } else {
1754 GetMasm()->orps(ArchVReg(dst), ArchVReg(srcB));
1755 }
1756 GetMasm()->jmp(end);
1757 GetMasm()->bind(gotNan);
1758 // if any operand is NaN result is NaN
1759 GetMasm()->por(ArchVReg(dst), ArchVReg(srcB));
1760 GetMasm()->jmp(end);
1761 GetMasm()->bind(notEqual);
1762 if (IS_MAX) {
1763 GetMasm()->maxss(ArchVReg(dst), ArchVReg(srcB));
1764 } else {
1765 GetMasm()->minss(ArchVReg(dst), ArchVReg(srcB));
1766 }
1767 GetMasm()->bind(end);
1768 } else {
1769 GetMasm()->movapd(ArchVReg(dst), ArchVReg(srcA));
1770 GetMasm()->ucomisd(ArchVReg(srcB), ArchVReg(srcA));
1771 GetMasm()->jne(notEqual);
1772 GetMasm()->jp(gotNan);
1773 // calculate result for positive/negative zero operands
1774 if (IS_MAX) {
1775 GetMasm()->andpd(ArchVReg(dst), ArchVReg(srcB));
1776 } else {
1777 GetMasm()->orpd(ArchVReg(dst), ArchVReg(srcB));
1778 }
1779 GetMasm()->jmp(end);
1780 GetMasm()->bind(gotNan);
1781 // if any operand is NaN result is NaN
1782 GetMasm()->por(ArchVReg(dst), ArchVReg(srcB));
1783 GetMasm()->jmp(end);
1784 GetMasm()->bind(notEqual);
1785 if (IS_MAX) {
1786 GetMasm()->maxsd(ArchVReg(dst), ArchVReg(srcB));
1787 } else {
1788 GetMasm()->minsd(ArchVReg(dst), ArchVReg(srcB));
1789 }
1790 GetMasm()->bind(end);
1791 }
1792 }
1793
EncodeShl(Reg dst,Reg src0,Reg src1)1794 void Amd64Encoder::EncodeShl(Reg dst, Reg src0, Reg src1)
1795 {
1796 ASSERT(dst.IsScalar());
1797 ScopedTmpReg tmpReg(this, dst.GetType());
1798 Reg rcx(ConvertRegNumber(asmjit::x86::rcx.id()), dst.GetType());
1799 GetMasm()->mov(ArchReg(tmpReg), ArchReg(src0));
1800 if (dst.GetId() != rcx.GetId()) {
1801 GetMasm()->push(ArchReg(rcx, DOUBLE_WORD_SIZE));
1802 }
1803 GetMasm()->mov(ArchReg(rcx), ArchReg(src1));
1804 GetMasm()->shl(ArchReg(tmpReg), asmjit::x86::cl);
1805 if (dst.GetId() != rcx.GetId()) {
1806 GetMasm()->pop(ArchReg(rcx, DOUBLE_WORD_SIZE));
1807 }
1808 GetMasm()->mov(ArchReg(dst), ArchReg(tmpReg));
1809 }
1810
EncodeShr(Reg dst,Reg src0,Reg src1)1811 void Amd64Encoder::EncodeShr(Reg dst, Reg src0, Reg src1)
1812 {
1813 ASSERT(dst.IsScalar());
1814 ScopedTmpReg tmpReg(this, dst.GetType());
1815 Reg rcx(ConvertRegNumber(asmjit::x86::rcx.id()), dst.GetType());
1816 GetMasm()->mov(ArchReg(tmpReg), ArchReg(src0));
1817 if (dst.GetId() != rcx.GetId()) {
1818 GetMasm()->push(ArchReg(rcx, DOUBLE_WORD_SIZE));
1819 }
1820 GetMasm()->mov(ArchReg(rcx), ArchReg(src1));
1821 GetMasm()->shr(ArchReg(tmpReg), asmjit::x86::cl);
1822 if (dst.GetId() != rcx.GetId()) {
1823 GetMasm()->pop(ArchReg(rcx, DOUBLE_WORD_SIZE));
1824 }
1825 GetMasm()->mov(ArchReg(dst), ArchReg(tmpReg));
1826 }
1827
EncodeAShr(Reg dst,Reg src0,Reg src1)1828 void Amd64Encoder::EncodeAShr(Reg dst, Reg src0, Reg src1)
1829 {
1830 ASSERT(dst.IsScalar());
1831 ScopedTmpReg tmpReg(this, dst.GetType());
1832 Reg rcx(ConvertRegNumber(asmjit::x86::rcx.id()), dst.GetType());
1833 GetMasm()->mov(ArchReg(tmpReg), ArchReg(src0));
1834 if (dst.GetId() != rcx.GetId()) {
1835 GetMasm()->push(ArchReg(rcx, DOUBLE_WORD_SIZE));
1836 }
1837 GetMasm()->mov(ArchReg(rcx), ArchReg(src1));
1838 GetMasm()->sar(ArchReg(tmpReg), asmjit::x86::cl);
1839 if (dst.GetId() != rcx.GetId()) {
1840 GetMasm()->pop(ArchReg(rcx, DOUBLE_WORD_SIZE));
1841 }
1842 GetMasm()->mov(ArchReg(dst), ArchReg(tmpReg));
1843 }
1844
EncodeAnd(Reg dst,Reg src0,Reg src1)1845 void Amd64Encoder::EncodeAnd(Reg dst, Reg src0, Reg src1)
1846 {
1847 ASSERT(dst.IsScalar());
1848 if (dst.GetId() == src0.GetId()) {
1849 GetMasm()->and_(ArchReg(dst), ArchReg(src1));
1850 } else if (dst.GetId() == src1.GetId()) {
1851 GetMasm()->and_(ArchReg(dst), ArchReg(src0));
1852 } else {
1853 EncodeMov(dst, src0);
1854 GetMasm()->and_(ArchReg(dst), ArchReg(src1));
1855 }
1856 }
1857
EncodeOr(Reg dst,Reg src0,Reg src1)1858 void Amd64Encoder::EncodeOr(Reg dst, Reg src0, Reg src1)
1859 {
1860 ASSERT(dst.IsScalar());
1861 if (dst.GetId() == src0.GetId()) {
1862 GetMasm()->or_(ArchReg(dst), ArchReg(src1));
1863 } else if (dst.GetId() == src1.GetId()) {
1864 GetMasm()->or_(ArchReg(dst), ArchReg(src0));
1865 } else {
1866 EncodeMov(dst, src0);
1867 GetMasm()->or_(ArchReg(dst), ArchReg(src1));
1868 }
1869 }
1870
EncodeXor(Reg dst,Reg src0,Reg src1)1871 void Amd64Encoder::EncodeXor(Reg dst, Reg src0, Reg src1)
1872 {
1873 ASSERT(dst.IsScalar());
1874 if (dst.GetId() == src0.GetId()) {
1875 GetMasm()->xor_(ArchReg(dst), ArchReg(src1));
1876 } else if (dst.GetId() == src1.GetId()) {
1877 GetMasm()->xor_(ArchReg(dst), ArchReg(src0));
1878 } else {
1879 EncodeMov(dst, src0);
1880 GetMasm()->xor_(ArchReg(dst), ArchReg(src1));
1881 }
1882 }
1883
EncodeAdd(Reg dst,Reg src,Imm imm)1884 void Amd64Encoder::EncodeAdd(Reg dst, Reg src, Imm imm)
1885 {
1886 if (dst.IsFloat()) {
1887 SetFalseResult();
1888 return;
1889 }
1890
1891 auto immVal = imm.GetAsInt();
1892 auto size = std::max<uint8_t>(WORD_SIZE, dst.GetSize());
1893 if (ImmFitsSize(immVal, size)) {
1894 GetMasm()->lea(ArchReg(dst, size), asmjit::x86::ptr(ArchReg(src, size), immVal));
1895 } else {
1896 if (dst.GetId() != src.GetId()) {
1897 GetMasm()->mov(ArchReg(dst), asmjit::imm(immVal));
1898 GetMasm()->add(ArchReg(dst), ArchReg(src));
1899 } else {
1900 ScopedTmpReg tmpReg(this, dst.GetType());
1901 GetMasm()->mov(ArchReg(tmpReg), asmjit::imm(immVal));
1902 GetMasm()->add(ArchReg(dst), ArchReg(tmpReg));
1903 }
1904 }
1905 }
1906
EncodeSub(Reg dst,Reg src,Imm imm)1907 void Amd64Encoder::EncodeSub(Reg dst, Reg src, Imm imm)
1908 {
1909 if (dst.IsFloat()) {
1910 SetFalseResult();
1911 return;
1912 }
1913
1914 auto immVal = -imm.GetAsInt();
1915 auto size = std::max<uint8_t>(WORD_SIZE, dst.GetSize());
1916 if (ImmFitsSize(immVal, size)) {
1917 GetMasm()->lea(ArchReg(dst, size), asmjit::x86::ptr(ArchReg(src, size), immVal));
1918 } else {
1919 if (dst.GetId() != src.GetId()) {
1920 GetMasm()->mov(ArchReg(dst), asmjit::imm(immVal));
1921 GetMasm()->add(ArchReg(dst), ArchReg(src));
1922 } else {
1923 ScopedTmpReg tmpReg(this, dst.GetType());
1924 GetMasm()->mov(ArchReg(tmpReg), asmjit::imm(immVal));
1925 GetMasm()->add(ArchReg(dst), ArchReg(tmpReg));
1926 }
1927 }
1928 }
1929
EncodeShl(Reg dst,Reg src,Imm imm)1930 void Amd64Encoder::EncodeShl(Reg dst, Reg src, Imm imm)
1931 {
1932 ASSERT(dst.IsScalar());
1933 EncodeMov(dst, src);
1934 GetMasm()->shl(ArchReg(dst), ArchImm(imm));
1935 }
1936
EncodeShr(Reg dst,Reg src,Imm imm)1937 void Amd64Encoder::EncodeShr(Reg dst, Reg src, Imm imm)
1938 {
1939 ASSERT(dst.IsScalar());
1940
1941 EncodeMov(dst, src);
1942 GetMasm()->shr(ArchReg(dst), ArchImm(imm));
1943 }
1944
EncodeAShr(Reg dst,Reg src,Imm imm)1945 void Amd64Encoder::EncodeAShr(Reg dst, Reg src, Imm imm)
1946 {
1947 ASSERT(dst.IsScalar());
1948 EncodeMov(dst, src);
1949 GetMasm()->sar(ArchReg(dst), ArchImm(imm));
1950 }
1951
EncodeAnd(Reg dst,Reg src,Imm imm)1952 void Amd64Encoder::EncodeAnd(Reg dst, Reg src, Imm imm)
1953 {
1954 ASSERT(dst.IsScalar());
1955 auto immVal = ImmToUnsignedInt(imm);
1956
1957 switch (src.GetSize()) {
1958 case BYTE_SIZE:
1959 immVal |= ~uint64_t(0xFF); // NOLINT
1960 break;
1961 case HALF_SIZE:
1962 immVal |= ~uint64_t(0xFFFF); // NOLINT
1963 break;
1964 case WORD_SIZE:
1965 immVal |= ~uint64_t(0xFFFFFFFF); // NOLINT
1966 break;
1967 default:
1968 break;
1969 }
1970
1971 if (dst.GetSize() != DOUBLE_WORD_SIZE) {
1972 // NOLINTNEXTLINE(clang-analyzer-core.UndefinedBinaryOperatorResult)
1973 immVal &= (uint64_t(1) << dst.GetSize()) - 1;
1974 }
1975
1976 if (ImmFitsSize(immVal, dst.GetSize())) {
1977 EncodeMov(dst, src);
1978 GetMasm()->and_(ArchReg(dst), immVal);
1979 } else {
1980 if (dst.GetId() != src.GetId()) {
1981 GetMasm()->mov(ArchReg(dst), asmjit::imm(immVal));
1982 GetMasm()->and_(ArchReg(dst), ArchReg(src));
1983 } else {
1984 ScopedTmpReg tmpReg(this, dst.GetType());
1985 GetMasm()->mov(ArchReg(tmpReg), asmjit::imm(immVal));
1986 GetMasm()->and_(ArchReg(dst), ArchReg(tmpReg));
1987 }
1988 }
1989 }
1990
EncodeOr(Reg dst,Reg src,Imm imm)1991 void Amd64Encoder::EncodeOr(Reg dst, Reg src, Imm imm)
1992 {
1993 ASSERT(dst.IsScalar());
1994 auto immVal = ImmToUnsignedInt(imm);
1995 if (ImmFitsSize(immVal, dst.GetSize())) {
1996 EncodeMov(dst, src);
1997 GetMasm()->or_(ArchReg(dst), immVal);
1998 } else {
1999 if (dst.GetId() != src.GetId()) {
2000 GetMasm()->mov(ArchReg(dst), asmjit::imm(immVal));
2001 GetMasm()->or_(ArchReg(dst), ArchReg(src));
2002 } else {
2003 ScopedTmpReg tmpReg(this, dst.GetType());
2004 GetMasm()->mov(ArchReg(tmpReg), asmjit::imm(immVal));
2005 GetMasm()->or_(ArchReg(dst), ArchReg(tmpReg));
2006 }
2007 }
2008 }
2009
EncodeXor(Reg dst,Reg src,Imm imm)2010 void Amd64Encoder::EncodeXor(Reg dst, Reg src, Imm imm)
2011 {
2012 ASSERT(dst.IsScalar());
2013 auto immVal = ImmToUnsignedInt(imm);
2014 if (ImmFitsSize(immVal, dst.GetSize())) {
2015 EncodeMov(dst, src);
2016 GetMasm()->xor_(ArchReg(dst), immVal);
2017 } else {
2018 if (dst.GetId() != src.GetId()) {
2019 GetMasm()->mov(ArchReg(dst), asmjit::imm(immVal));
2020 GetMasm()->xor_(ArchReg(dst), ArchReg(src));
2021 } else {
2022 ScopedTmpReg tmpReg(this, dst.GetType());
2023 GetMasm()->mov(ArchReg(tmpReg), asmjit::imm(immVal));
2024 GetMasm()->xor_(ArchReg(dst), ArchReg(tmpReg));
2025 }
2026 }
2027 }
2028
EncodeMov(Reg dst,Imm src)2029 void Amd64Encoder::EncodeMov(Reg dst, Imm src)
2030 {
2031 if (dst.IsScalar()) {
2032 if (dst.GetSize() < WORD_SIZE) {
2033 GetMasm()->xor_(ArchReg(dst, WORD_SIZE), ArchReg(dst, WORD_SIZE));
2034 }
2035 GetMasm()->mov(ArchReg(dst), ArchImm(src));
2036 return;
2037 }
2038
2039 if (dst.GetType() == FLOAT32_TYPE) {
2040 ScopedTmpRegU32 tmpReg(this);
2041 auto val = bit_cast<uint32_t>(src.GetAsFloat());
2042 GetMasm()->mov(ArchReg(tmpReg), asmjit::imm(val));
2043 GetMasm()->movd(ArchVReg(dst), ArchReg(tmpReg));
2044 } else {
2045 ScopedTmpRegU64 tmpReg(this);
2046 auto val = bit_cast<uint64_t>(src.GetAsDouble());
2047 GetMasm()->mov(ArchReg(tmpReg), asmjit::imm(val));
2048 GetMasm()->movq(ArchVReg(dst), ArchReg(tmpReg));
2049 }
2050 }
2051
EncodeLdr(Reg dst,bool dstSigned,MemRef mem)2052 void Amd64Encoder::EncodeLdr(Reg dst, bool dstSigned, MemRef mem)
2053 {
2054 auto m = ArchMem(mem).Prepare(GetMasm());
2055
2056 if (dst.GetType() == FLOAT32_TYPE) {
2057 GetMasm()->movss(ArchVReg(dst), m);
2058 return;
2059 }
2060 if (dst.GetType() == FLOAT64_TYPE) {
2061 GetMasm()->movsd(ArchVReg(dst), m);
2062 return;
2063 }
2064
2065 m.setSize(dst.GetSize() / BITS_PER_BYTE);
2066
2067 if (dstSigned && dst.GetSize() < DOUBLE_WORD_SIZE) {
2068 if (dst.GetSize() == WORD_SIZE) {
2069 GetMasm()->movsxd(ArchReg(dst, DOUBLE_WORD_SIZE), m);
2070 } else {
2071 GetMasm()->movsx(ArchReg(dst, DOUBLE_WORD_SIZE), m);
2072 }
2073 return;
2074 }
2075 if (!dstSigned && dst.GetSize() < WORD_SIZE) {
2076 GetMasm()->movzx(ArchReg(dst, WORD_SIZE), m);
2077 return;
2078 }
2079
2080 GetMasm()->mov(ArchReg(dst), m);
2081 }
2082
EncodeLdrAcquire(Reg dst,bool dstSigned,MemRef mem)2083 void Amd64Encoder::EncodeLdrAcquire(Reg dst, bool dstSigned, MemRef mem)
2084 {
2085 EncodeLdr(dst, dstSigned, mem);
2086 // LoadLoad and LoadStore barrier should be here, but this is no-op in amd64 memory model
2087 }
2088
EncodeStr(Reg src,MemRef mem)2089 void Amd64Encoder::EncodeStr(Reg src, MemRef mem)
2090 {
2091 auto m = ArchMem(mem).Prepare(GetMasm());
2092
2093 if (src.GetType() == FLOAT32_TYPE) {
2094 GetMasm()->movss(m, ArchVReg(src));
2095 return;
2096 }
2097 if (src.GetType() == FLOAT64_TYPE) {
2098 GetMasm()->movsd(m, ArchVReg(src));
2099 return;
2100 }
2101
2102 m.setSize(src.GetSize() / BITS_PER_BYTE);
2103 GetMasm()->mov(m, ArchReg(src));
2104 }
2105
EncodeStrRelease(Reg src,MemRef mem)2106 void Amd64Encoder::EncodeStrRelease(Reg src, MemRef mem)
2107 {
2108 // StoreStore barrier should be here, but this is no-op in amd64 memory model
2109 EncodeStr(src, mem);
2110 // this is StoreLoad barrier (which is also full memory barrier in amd64 memory model)
2111 GetMasm()->lock().add(asmjit::x86::dword_ptr(asmjit::x86::rsp), asmjit::imm(0));
2112 }
2113
EncodeStrz(Reg src,MemRef mem)2114 void Amd64Encoder::EncodeStrz(Reg src, MemRef mem)
2115 {
2116 if (src.IsScalar()) {
2117 if (src.GetSize() == DOUBLE_WORD_SIZE) {
2118 GetMasm()->mov(ArchMem(mem).Prepare(GetMasm()), ArchReg(src));
2119 } else {
2120 ScopedTmpRegU64 tmpReg(this);
2121 GetMasm()->xor_(ArchReg(tmpReg), ArchReg(tmpReg));
2122 GetMasm()->mov(ArchReg(tmpReg, src.GetSize()), ArchReg(src));
2123 GetMasm()->mov(ArchMem(mem).Prepare(GetMasm()), ArchReg(tmpReg));
2124 }
2125 } else {
2126 if (src.GetType() == FLOAT64_TYPE) {
2127 GetMasm()->movsd(ArchMem(mem).Prepare(GetMasm()), ArchVReg(src));
2128 } else {
2129 ScopedTmpRegF64 tmpReg(this);
2130
2131 GetMasm()->xorpd(ArchVReg(tmpReg), ArchVReg(tmpReg));
2132 GetMasm()->movss(ArchVReg(tmpReg), ArchVReg(src));
2133 GetMasm()->movsd(ArchMem(mem).Prepare(GetMasm()), ArchVReg(tmpReg));
2134 }
2135 }
2136 }
2137
EncodeSti(int64_t src,uint8_t srcSizeBytes,MemRef mem)2138 void Amd64Encoder::EncodeSti(int64_t src, uint8_t srcSizeBytes, MemRef mem)
2139 {
2140 ASSERT(srcSizeBytes <= 8U);
2141 auto m = ArchMem(mem).Prepare(GetMasm());
2142 if (srcSizeBytes <= HALF_WORD_SIZE_BYTES) {
2143 m.setSize(srcSizeBytes);
2144 GetMasm()->mov(m, asmjit::imm(src));
2145 } else {
2146 m.setSize(DOUBLE_WORD_SIZE_BYTES);
2147
2148 if (ImmFitsSize(src, DOUBLE_WORD_SIZE)) {
2149 GetMasm()->mov(m, asmjit::imm(src));
2150 } else {
2151 ScopedTmpRegU64 tmpReg(this);
2152 GetMasm()->mov(ArchReg(tmpReg), asmjit::imm(src));
2153 GetMasm()->mov(m, ArchReg(tmpReg));
2154 }
2155 }
2156 }
2157
EncodeSti(float src,MemRef mem)2158 void Amd64Encoder::EncodeSti(float src, MemRef mem)
2159 {
2160 EncodeSti(bit_cast<int32_t>(src), sizeof(int32_t), mem);
2161 }
2162
EncodeSti(double src,MemRef mem)2163 void Amd64Encoder::EncodeSti(double src, MemRef mem)
2164 {
2165 EncodeSti(bit_cast<int64_t>(src), sizeof(int64_t), mem);
2166 }
2167
EncodeMemCopy(MemRef memFrom,MemRef memTo,size_t size)2168 void Amd64Encoder::EncodeMemCopy(MemRef memFrom, MemRef memTo, size_t size)
2169 {
2170 ScopedTmpRegU64 tmpReg(this);
2171 GetMasm()->mov(ArchReg(tmpReg, size), ArchMem(memFrom).Prepare(GetMasm()));
2172 GetMasm()->mov(ArchMem(memTo).Prepare(GetMasm()), ArchReg(tmpReg, size));
2173 }
2174
EncodeMemCopyz(MemRef memFrom,MemRef memTo,size_t size)2175 void Amd64Encoder::EncodeMemCopyz(MemRef memFrom, MemRef memTo, size_t size)
2176 {
2177 ScopedTmpRegU64 tmpReg(this);
2178 if (size < DOUBLE_WORD_SIZE) {
2179 GetMasm()->xor_(ArchReg(tmpReg), ArchReg(tmpReg));
2180 }
2181 GetMasm()->mov(ArchReg(tmpReg, size), ArchMem(memFrom).Prepare(GetMasm()));
2182 GetMasm()->mov(ArchMem(memTo).Prepare(GetMasm()), ArchReg(tmpReg));
2183 }
2184
EncodeCompare(Reg dst,Reg src0,Reg src1,Condition cc)2185 void Amd64Encoder::EncodeCompare(Reg dst, Reg src0, Reg src1, Condition cc)
2186 {
2187 if (src0.IsScalar()) {
2188 GetMasm()->cmp(ArchReg(src0), ArchReg(src1));
2189 } else {
2190 if (src0.GetType() == FLOAT32_TYPE) {
2191 GetMasm()->ucomiss(ArchVReg(src0), ArchVReg(src1));
2192 } else {
2193 GetMasm()->ucomisd(ArchVReg(src0), ArchVReg(src1));
2194 }
2195 }
2196 GetMasm()->mov(ArchReg(dst, DOUBLE_WORD_SIZE), asmjit::imm(0));
2197
2198 if (src0.IsScalar()) {
2199 GetMasm()->set(ArchCc(cc), ArchReg(dst, BYTE_SIZE));
2200 return;
2201 }
2202
2203 auto end = GetMasm()->newLabel();
2204
2205 if (CcMatchesNan(cc)) {
2206 GetMasm()->setp(ArchReg(dst, BYTE_SIZE));
2207 }
2208 GetMasm()->jp(end);
2209 GetMasm()->set(ArchCc(cc, true), ArchReg(dst, BYTE_SIZE));
2210
2211 GetMasm()->bind(end);
2212 }
2213
EncodeCompareTest(Reg dst,Reg src0,Reg src1,Condition cc)2214 void Amd64Encoder::EncodeCompareTest(Reg dst, Reg src0, Reg src1, Condition cc)
2215 {
2216 ASSERT(src0.IsScalar());
2217
2218 GetMasm()->test(ArchReg(src0), ArchReg(src1));
2219
2220 GetMasm()->mov(ArchReg(dst, DOUBLE_WORD_SIZE), asmjit::imm(0));
2221 GetMasm()->set(ArchCcTest(cc), ArchReg(dst, BYTE_SIZE));
2222 }
2223
EncodeAtomicByteOr(Reg addr,Reg value,bool fastEncoding)2224 void Amd64Encoder::EncodeAtomicByteOr(Reg addr, Reg value, [[maybe_unused]] bool fastEncoding)
2225 {
2226 GetMasm()->lock().or_(asmjit::x86::byte_ptr(ArchReg(addr)), ArchReg(value, ark::compiler::BYTE_SIZE));
2227 }
2228
EncodeCmp(Reg dst,Reg src0,Reg src1,Condition cc)2229 void Amd64Encoder::EncodeCmp(Reg dst, Reg src0, Reg src1, Condition cc)
2230 {
2231 auto end = GetMasm()->newLabel();
2232
2233 if (src0.IsFloat()) {
2234 ASSERT(src1.IsFloat());
2235 ASSERT(cc == Condition::MI || cc == Condition::LT);
2236
2237 if (src0.GetType() == FLOAT32_TYPE) {
2238 GetMasm()->ucomiss(ArchVReg(src0), ArchVReg(src1));
2239 } else {
2240 GetMasm()->ucomisd(ArchVReg(src0), ArchVReg(src1));
2241 }
2242
2243 GetMasm()->mov(ArchReg(dst, DOUBLE_WORD_SIZE), cc == Condition::LT ? asmjit::imm(-1) : asmjit::imm(1));
2244 cc = Condition::LO;
2245
2246 GetMasm()->jp(end);
2247 } else {
2248 ASSERT(src0.IsScalar() && src1.IsScalar());
2249 ASSERT(cc == Condition::LO || cc == Condition::LT);
2250 GetMasm()->cmp(ArchReg(src0), ArchReg(src1));
2251 }
2252 GetMasm()->mov(ArchReg(dst, DOUBLE_WORD_SIZE), asmjit::imm(0));
2253 GetMasm()->setne(ArchReg(dst, BYTE_SIZE));
2254
2255 GetMasm()->j(asmjit::x86::Condition::negate(ArchCc(cc)), end);
2256 GetMasm()->neg(ArchReg(dst));
2257
2258 GetMasm()->bind(end);
2259 }
2260
EncodeSelect(ArgsSelect && args)2261 void Amd64Encoder::EncodeSelect(ArgsSelect &&args)
2262 {
2263 auto [dst, src0, src1, src2, src3, cc] = args;
2264 ASSERT(!src0.IsFloat() && !src1.IsFloat());
2265 if (src2.IsScalar()) {
2266 GetMasm()->cmp(ArchReg(src2), ArchReg(src3));
2267 } else if (src2.GetType() == FLOAT32_TYPE) {
2268 GetMasm()->comiss(ArchVReg(src2), ArchVReg(src3));
2269 } else {
2270 GetMasm()->comisd(ArchVReg(src2), ArchVReg(src3));
2271 }
2272
2273 auto size = std::max<uint8_t>(src0.GetSize(), WORD_SIZE);
2274 bool dstAliased = dst.GetId() == src0.GetId();
2275 ScopedTmpReg tmpReg(this, dst.GetType());
2276 auto dstReg = dstAliased ? ArchReg(tmpReg, size) : ArchReg(dst, size);
2277
2278 GetMasm()->mov(dstReg, ArchReg(src1, size));
2279
2280 if (src2.IsScalar()) {
2281 GetMasm()->cmov(ArchCc(cc), dstReg, ArchReg(src0, size));
2282 } else if (CcMatchesNan(cc)) {
2283 GetMasm()->cmovp(dstReg, ArchReg(src0, size));
2284 GetMasm()->cmov(ArchCc(cc, src2.IsFloat()), dstReg, ArchReg(src0, size));
2285 } else {
2286 auto end = GetMasm()->newLabel();
2287
2288 GetMasm()->jp(end);
2289 GetMasm()->cmov(ArchCc(cc, src2.IsFloat()), dstReg, ArchReg(src0, size));
2290
2291 GetMasm()->bind(end);
2292 }
2293 if (dstAliased) {
2294 EncodeMov(dst, tmpReg);
2295 }
2296 }
2297
EncodeSelect(ArgsSelectImm && args)2298 void Amd64Encoder::EncodeSelect(ArgsSelectImm &&args)
2299 {
2300 auto [dst, src0, src1, src2, imm, cc] = args;
2301 ASSERT(!src0.IsFloat() && !src1.IsFloat() && !src2.IsFloat());
2302
2303 auto immVal = imm.GetAsInt();
2304 if (ImmFitsSize(immVal, src2.GetSize())) {
2305 GetMasm()->cmp(ArchReg(src2), asmjit::imm(immVal));
2306 } else {
2307 ScopedTmpReg tmpReg(this, src2.GetType());
2308 GetMasm()->mov(ArchReg(tmpReg), asmjit::imm(immVal));
2309 GetMasm()->cmp(ArchReg(src2), ArchReg(tmpReg));
2310 }
2311
2312 ScopedTmpReg tmpReg(this, dst.GetType());
2313 auto size = std::max<uint8_t>(src0.GetSize(), WORD_SIZE);
2314 bool dstAliased = dst.GetId() == src0.GetId();
2315 auto dstReg = dstAliased ? ArchReg(tmpReg, size) : ArchReg(dst, size);
2316
2317 GetMasm()->mov(dstReg, ArchReg(src1, size));
2318 GetMasm()->cmov(ArchCc(cc), dstReg, ArchReg(src0, size));
2319 if (dstAliased) {
2320 EncodeMov(dst, tmpReg);
2321 }
2322 }
2323
EncodeSelectTest(ArgsSelect && args)2324 void Amd64Encoder::EncodeSelectTest(ArgsSelect &&args)
2325 {
2326 auto [dst, src0, src1, src2, src3, cc] = args;
2327 ASSERT(!src0.IsFloat() && !src1.IsFloat() && !src2.IsFloat());
2328
2329 GetMasm()->test(ArchReg(src2), ArchReg(src3));
2330
2331 ScopedTmpReg tmpReg(this, dst.GetType());
2332 auto size = std::max<uint8_t>(src0.GetSize(), WORD_SIZE);
2333 bool dstAliased = dst.GetId() == src0.GetId();
2334 auto dstReg = dstAliased ? ArchReg(tmpReg, size) : ArchReg(dst, size);
2335
2336 GetMasm()->mov(dstReg, ArchReg(src1, size));
2337 GetMasm()->cmov(ArchCcTest(cc), dstReg, ArchReg(src0, size));
2338 if (dstAliased) {
2339 EncodeMov(dst, tmpReg);
2340 }
2341 }
2342
EncodeSelectTest(ArgsSelectImm && args)2343 void Amd64Encoder::EncodeSelectTest(ArgsSelectImm &&args)
2344 {
2345 auto [dst, src0, src1, src2, imm, cc] = args;
2346 ASSERT(!src0.IsFloat() && !src1.IsFloat() && !src2.IsFloat());
2347
2348 auto immVal = imm.GetAsInt();
2349 if (ImmFitsSize(immVal, src2.GetSize())) {
2350 GetMasm()->test(ArchReg(src2), asmjit::imm(immVal));
2351 } else {
2352 ScopedTmpReg tmpReg(this, src2.GetType());
2353 GetMasm()->mov(ArchReg(tmpReg), asmjit::imm(immVal));
2354 GetMasm()->test(ArchReg(src2), ArchReg(tmpReg));
2355 }
2356
2357 ScopedTmpReg tmpReg(this, dst.GetType());
2358 auto size = std::max<uint8_t>(src0.GetSize(), WORD_SIZE);
2359 bool dstAliased = dst.GetId() == src0.GetId();
2360 auto dstReg = dstAliased ? ArchReg(tmpReg, size) : ArchReg(dst, size);
2361
2362 GetMasm()->mov(dstReg, ArchReg(src1, size));
2363 GetMasm()->cmov(ArchCcTest(cc), dstReg, ArchReg(src0, size));
2364 if (dstAliased) {
2365 EncodeMov(dst, tmpReg);
2366 }
2367 }
2368
EncodeLdp(Reg dst0,Reg dst1,bool dstSigned,MemRef mem)2369 void Amd64Encoder::EncodeLdp(Reg dst0, Reg dst1, bool dstSigned, MemRef mem)
2370 {
2371 ASSERT(dst0.IsFloat() == dst1.IsFloat());
2372 ASSERT(dst0.GetSize() == dst1.GetSize());
2373
2374 auto m = ArchMem(mem).Prepare(GetMasm());
2375
2376 if (dst0.IsFloat()) {
2377 if (dst0.GetType() == FLOAT32_TYPE) {
2378 GetMasm()->movss(ArchVReg(dst0), m);
2379
2380 m.addOffset(WORD_SIZE_BYTES);
2381 GetMasm()->movss(ArchVReg(dst1), m);
2382 } else {
2383 GetMasm()->movsd(ArchVReg(dst0), m);
2384
2385 m.addOffset(DOUBLE_WORD_SIZE_BYTES);
2386 GetMasm()->movsd(ArchVReg(dst1), m);
2387 }
2388 return;
2389 }
2390
2391 if (dstSigned && dst0.GetSize() == WORD_SIZE) {
2392 m.setSize(WORD_SIZE_BYTES);
2393 GetMasm()->movsxd(ArchReg(dst0, DOUBLE_WORD_SIZE), m);
2394
2395 m.addOffset(WORD_SIZE_BYTES);
2396 GetMasm()->movsxd(ArchReg(dst1, DOUBLE_WORD_SIZE), m);
2397 return;
2398 }
2399
2400 GetMasm()->mov(ArchReg(dst0), m);
2401
2402 m.addOffset(dst0.GetSize() / BITS_PER_BYTE);
2403 GetMasm()->mov(ArchReg(dst1), m);
2404 }
2405
EncodeStp(Reg src0,Reg src1,MemRef mem)2406 void Amd64Encoder::EncodeStp(Reg src0, Reg src1, MemRef mem)
2407 {
2408 ASSERT(src0.IsFloat() == src1.IsFloat());
2409 ASSERT(src0.GetSize() == src1.GetSize());
2410
2411 auto m = ArchMem(mem).Prepare(GetMasm());
2412
2413 if (src0.IsFloat()) {
2414 if (src0.GetType() == FLOAT32_TYPE) {
2415 GetMasm()->movss(m, ArchVReg(src0));
2416
2417 m.addOffset(WORD_SIZE_BYTES);
2418 GetMasm()->movss(m, ArchVReg(src1));
2419 } else {
2420 GetMasm()->movsd(m, ArchVReg(src0));
2421
2422 m.addOffset(DOUBLE_WORD_SIZE_BYTES);
2423 GetMasm()->movsd(m, ArchVReg(src1));
2424 }
2425 return;
2426 }
2427
2428 GetMasm()->mov(m, ArchReg(src0));
2429
2430 m.addOffset(src0.GetSize() / BITS_PER_BYTE);
2431 GetMasm()->mov(m, ArchReg(src1));
2432 }
2433
EncodeReverseBytes(Reg dst,Reg src)2434 void Amd64Encoder::EncodeReverseBytes(Reg dst, Reg src)
2435 {
2436 ASSERT(src.GetSize() > BYTE_SIZE);
2437 ASSERT(src.GetSize() == dst.GetSize());
2438 ASSERT(src.IsValid());
2439 ASSERT(dst.IsValid());
2440
2441 if (src != dst) {
2442 GetMasm()->mov(ArchReg(dst), ArchReg(src));
2443 }
2444
2445 if (src.GetSize() == HALF_SIZE) {
2446 GetMasm()->rol(ArchReg(dst), BYTE_SIZE);
2447 GetMasm()->movsx(ArchReg(dst, WORD_SIZE), ArchReg(dst));
2448 } else {
2449 GetMasm()->bswap(ArchReg(dst));
2450 }
2451 }
2452
EncodeUnsignedExtendBytesToShorts(Reg dst,Reg src)2453 void Amd64Encoder::EncodeUnsignedExtendBytesToShorts(Reg dst, Reg src)
2454 {
2455 GetMasm()->pmovzxbw(ArchVReg(dst), ArchVReg(src));
2456 }
2457
2458 /* Attention: the encoder belows operates on vector registers not GPRs */
EncodeReverseHalfWords(Reg dst,Reg src)2459 void Amd64Encoder::EncodeReverseHalfWords(Reg dst, Reg src)
2460 {
2461 ASSERT(src.GetSize() == dst.GetSize());
2462 ASSERT(src.IsValid());
2463 ASSERT(dst.IsValid());
2464
2465 constexpr unsigned MASK = 0x1b; // reverse mask: 00 01 10 11
2466 GetMasm()->pshuflw(ArchVReg(dst), ArchVReg(src), MASK);
2467 }
2468
CanEncodeImmAddSubCmp(int64_t imm,uint32_t size,bool signedCompare)2469 bool Amd64Encoder::CanEncodeImmAddSubCmp(int64_t imm, uint32_t size, [[maybe_unused]] bool signedCompare)
2470 {
2471 return ImmFitsSize(imm, size);
2472 }
2473
EncodeBitCount(Reg dst0,Reg src0)2474 void Amd64Encoder::EncodeBitCount(Reg dst0, Reg src0)
2475 {
2476 ASSERT(src0.GetSize() == WORD_SIZE || src0.GetSize() == DOUBLE_WORD_SIZE);
2477 ASSERT(dst0.GetSize() == WORD_SIZE);
2478 ASSERT(src0.IsScalar() && dst0.IsScalar());
2479
2480 GetMasm()->popcnt(ArchReg(dst0, src0.GetSize()), ArchReg(src0));
2481 }
2482
EncodeCountLeadingZeroBits(Reg dst,Reg src)2483 void Amd64Encoder::EncodeCountLeadingZeroBits(Reg dst, Reg src)
2484 {
2485 auto end = CreateLabel();
2486 auto zero = CreateLabel();
2487 EncodeJump(zero, src, Condition::EQ);
2488 GetMasm()->bsr(ArchReg(dst), ArchReg(src));
2489 GetMasm()->xor_(ArchReg(dst), asmjit::imm(dst.GetSize() - 1));
2490 EncodeJump(end);
2491
2492 BindLabel(zero);
2493 GetMasm()->mov(ArchReg(dst), asmjit::imm(dst.GetSize()));
2494
2495 BindLabel(end);
2496 }
2497
EncodeCountTrailingZeroBits(Reg dst,Reg src)2498 void Amd64Encoder::EncodeCountTrailingZeroBits(Reg dst, Reg src)
2499 {
2500 ScopedTmpReg tmp(this, src.GetType());
2501 GetMasm()->bsf(ArchReg(tmp), ArchReg(src));
2502 GetMasm()->mov(ArchReg(dst), asmjit::imm(dst.GetSize()));
2503 GetMasm()->cmovne(ArchReg(dst), ArchReg(tmp));
2504 }
2505
EncodeCeil(Reg dst,Reg src)2506 void Amd64Encoder::EncodeCeil(Reg dst, Reg src)
2507 {
2508 // NOLINTNEXTLINE(readability-magic-numbers)
2509 GetMasm()->roundsd(ArchVReg(dst), ArchVReg(src), asmjit::imm(2_I));
2510 }
2511
EncodeFloor(Reg dst,Reg src)2512 void Amd64Encoder::EncodeFloor(Reg dst, Reg src)
2513 {
2514 GetMasm()->roundsd(ArchVReg(dst), ArchVReg(src), asmjit::imm(1));
2515 }
2516
EncodeRint(Reg dst,Reg src)2517 void Amd64Encoder::EncodeRint(Reg dst, Reg src)
2518 {
2519 GetMasm()->roundsd(ArchVReg(dst), ArchVReg(src), asmjit::imm(0));
2520 }
2521
EncodeTrunc(Reg dst,Reg src)2522 void Amd64Encoder::EncodeTrunc(Reg dst, Reg src)
2523 {
2524 GetMasm()->roundsd(ArchVReg(dst), ArchVReg(src), asmjit::imm(3_I));
2525 }
2526
EncodeRoundAway(Reg dst,Reg src)2527 void Amd64Encoder::EncodeRoundAway(Reg dst, Reg src)
2528 {
2529 ASSERT(src.GetType() == FLOAT64_TYPE);
2530 ASSERT(dst.GetType() == FLOAT64_TYPE);
2531
2532 ScopedTmpReg tv(this, src.GetType());
2533 ScopedTmpReg tv1(this, src.GetType());
2534 ScopedTmpRegU64 ti(this);
2535 auto dest = dst;
2536
2537 auto shared = src == dst;
2538
2539 if (shared) {
2540 dest = tv1.GetReg();
2541 }
2542 GetMasm()->movapd(ArchVReg(dest), ArchVReg(src));
2543
2544 constexpr auto SIGN_BIT_MASK = 0x8000000000000000ULL;
2545 GetMasm()->mov(ArchReg(ti), asmjit::imm(SIGN_BIT_MASK));
2546 GetMasm()->movq(ArchVReg(tv), ArchReg(ti));
2547 GetMasm()->andpd(ArchVReg(dest), ArchVReg(tv));
2548
2549 constexpr auto DOUBLE_POINT_FIVE = 0x3fdfffffffffffffULL; // .49999999999999994
2550 GetMasm()->mov(ArchReg(ti), asmjit::imm(DOUBLE_POINT_FIVE));
2551 GetMasm()->movq(ArchVReg(tv), ArchReg(ti));
2552 GetMasm()->orpd(ArchVReg(dest), ArchVReg(tv));
2553
2554 GetMasm()->addsd(ArchVReg(dest), ArchVReg(src));
2555 GetMasm()->roundsd(ArchVReg(dest), ArchVReg(dest), asmjit::imm(3_I));
2556 if (shared) {
2557 GetMasm()->movapd(ArchVReg(dst), ArchVReg(dest));
2558 }
2559 }
2560
EncodeRoundToPInfFloat(Reg dst,Reg src)2561 void Amd64Encoder::EncodeRoundToPInfFloat(Reg dst, Reg src)
2562 {
2563 ScopedTmpReg t1(this, src.GetType());
2564 ScopedTmpReg t2(this, src.GetType());
2565 ScopedTmpReg t3(this, src.GetType());
2566 ScopedTmpReg t4(this, dst.GetType());
2567
2568 auto skipIncrId = CreateLabel();
2569 auto doneId = CreateLabel();
2570
2571 auto skipIncr = static_cast<Amd64LabelHolder *>(GetLabels())->GetLabel(skipIncrId);
2572 auto done = static_cast<Amd64LabelHolder *>(GetLabels())->GetLabel(doneId);
2573
2574 GetMasm()->movss(ArchVReg(t2), ArchVReg(src));
2575 GetMasm()->roundss(ArchVReg(t1), ArchVReg(src), asmjit::imm(1));
2576 GetMasm()->subss(ArchVReg(t2), ArchVReg(t1));
2577 // NOLINTNEXTLINE(readability-magic-numbers)
2578 GetMasm()->mov(ArchReg(t4), asmjit::imm(bit_cast<int32_t, float>(0.5F)));
2579 GetMasm()->movd(ArchVReg(t3), ArchReg(t4));
2580 GetMasm()->comiss(ArchVReg(t2), ArchVReg(t3));
2581 GetMasm()->j(asmjit::x86::Condition::Code::kB, *skipIncr);
2582 // NOLINTNEXTLINE(readability-magic-numbers)
2583 GetMasm()->mov(ArchReg(t4), asmjit::imm(bit_cast<int32_t, float>(1.0F)));
2584 GetMasm()->movd(ArchVReg(t3), ArchReg(t4));
2585 GetMasm()->addss(ArchVReg(t1), ArchVReg(t3));
2586 BindLabel(skipIncrId);
2587
2588 // NOLINTNEXTLINE(readability-magic-numbers)
2589 GetMasm()->mov(ArchReg(dst), asmjit::imm(0x7FFFFFFF));
2590 GetMasm()->cvtsi2ss(ArchVReg(t2), ArchReg(dst));
2591 GetMasm()->comiss(ArchVReg(t1), ArchVReg(t2));
2592 GetMasm()->j(asmjit::x86::Condition::Code::kAE,
2593 *done); // clipped to max (already in dst), does not jump on unordered
2594 GetMasm()->mov(ArchReg(dst), asmjit::imm(0)); // does not change flags
2595 GetMasm()->j(asmjit::x86::Condition::Code::kParityEven, *done); // NaN mapped to 0 (just moved in dst)
2596 GetMasm()->cvttss2si(ArchReg(dst), ArchVReg(t1));
2597 BindLabel(doneId);
2598 }
2599
EncodeRoundToPInfDouble(Reg dst,Reg src)2600 void Amd64Encoder::EncodeRoundToPInfDouble(Reg dst, Reg src)
2601 {
2602 ScopedTmpReg t1(this, src.GetType());
2603 ScopedTmpReg t2(this, src.GetType());
2604 ScopedTmpReg t3(this, src.GetType());
2605 ScopedTmpReg t4(this, dst.GetType());
2606
2607 auto skipIncrId = CreateLabel();
2608 auto doneId = CreateLabel();
2609
2610 auto skipIncr = static_cast<Amd64LabelHolder *>(GetLabels())->GetLabel(skipIncrId);
2611 auto done = static_cast<Amd64LabelHolder *>(GetLabels())->GetLabel(doneId);
2612
2613 GetMasm()->movsd(ArchVReg(t2), ArchVReg(src));
2614 GetMasm()->roundsd(ArchVReg(t1), ArchVReg(src), asmjit::imm(1));
2615 GetMasm()->subsd(ArchVReg(t2), ArchVReg(t1));
2616 // NOLINTNEXTLINE(readability-magic-numbers)
2617 GetMasm()->mov(ArchReg(t4), asmjit::imm(bit_cast<int64_t, double>(0.5F)));
2618 GetMasm()->movq(ArchVReg(t3), ArchReg(t4));
2619 GetMasm()->comisd(ArchVReg(t2), ArchVReg(t3));
2620 GetMasm()->j(asmjit::x86::Condition::Code::kB, *skipIncr);
2621 // NOLINTNEXTLINE(readability-magic-numbers)
2622 GetMasm()->mov(ArchReg(t4), asmjit::imm(bit_cast<int64_t, double>(1.0)));
2623 GetMasm()->movq(ArchVReg(t3), ArchReg(t4));
2624 GetMasm()->addsd(ArchVReg(t1), ArchVReg(t3));
2625 BindLabel(skipIncrId);
2626
2627 // NOLINTNEXTLINE(readability-magic-numbers)
2628 GetMasm()->mov(ArchReg(dst), asmjit::imm(0x7FFFFFFFFFFFFFFFL));
2629 GetMasm()->cvtsi2sd(ArchVReg(t2), ArchReg(dst));
2630 GetMasm()->comisd(ArchVReg(t1), ArchVReg(t2));
2631 GetMasm()->j(asmjit::x86::Condition::Code::kAE,
2632 *done); // clipped to max (already in dst), does not jump on unordered
2633 GetMasm()->mov(ArchReg(dst), asmjit::imm(0)); // does not change flags
2634 GetMasm()->j(asmjit::x86::Condition::Code::kParityEven, *done); // NaN mapped to 0 (just moved in dst)
2635 GetMasm()->cvttsd2si(ArchReg(dst), ArchVReg(t1));
2636 BindLabel(doneId);
2637 }
2638
EncodeRoundToPInfReturnScalar(Reg dst,Reg src)2639 void Amd64Encoder::EncodeRoundToPInfReturnScalar(Reg dst, Reg src)
2640 {
2641 if (src.GetType() == FLOAT32_TYPE) {
2642 EncodeRoundToPInfFloat(dst, src);
2643 } else if (src.GetType() == FLOAT64_TYPE) {
2644 EncodeRoundToPInfDouble(dst, src);
2645 } else {
2646 UNREACHABLE();
2647 }
2648 }
2649
EncodeRoundToPInfReturnFloat(Reg dst,Reg src)2650 void Amd64Encoder::EncodeRoundToPInfReturnFloat(Reg dst, Reg src)
2651 {
2652 ASSERT(src.GetType() == FLOAT64_TYPE);
2653 ASSERT(dst.GetType() == FLOAT64_TYPE);
2654
2655 // CC-OFFNXT(G.NAM.03-CPP) project code style
2656 constexpr int64_t HALF = 0x3FE0000000000000; // double precision representation of 0.5
2657 // CC-OFFNXT(G.NAM.03-CPP) project code style
2658 constexpr int64_t ONE = 0x3FF0000000000000; // double precision representation of 1.0
2659
2660 ScopedTmpRegF64 ceil(this);
2661 GetMasm()->roundsd(ArchVReg(ceil), ArchVReg(src), asmjit::imm(0b10));
2662
2663 // calculate ceil(val) - val
2664 ScopedTmpRegF64 diff(this);
2665 GetMasm()->movapd(ArchVReg(diff), ArchVReg(ceil));
2666 GetMasm()->subsd(ArchVReg(diff), ArchVReg(src));
2667
2668 // load 0.5 constant and compare
2669 ScopedTmpRegF64 constReg(this);
2670 ScopedTmpRegU64 tmpReg(this);
2671 GetMasm()->mov(ArchReg(tmpReg), asmjit::imm(HALF));
2672 GetMasm()->movq(ArchVReg(constReg), ArchReg(tmpReg));
2673 GetMasm()->comisd(ArchVReg(diff), ArchVReg(constReg));
2674
2675 // if difference > 0.5, subtract 1 from result
2676 auto done = GetMasm()->newLabel();
2677 GetMasm()->jbe(done); // If difference <= 0.5, jump to end
2678
2679 // Load 1.0 and subtract
2680 GetMasm()->mov(ArchReg(tmpReg), asmjit::imm(ONE));
2681 GetMasm()->movq(ArchVReg(constReg), ArchReg(tmpReg));
2682 GetMasm()->subsd(ArchVReg(ceil), ArchVReg(constReg));
2683
2684 GetMasm()->bind(done);
2685
2686 // move result to destination register
2687 GetMasm()->movapd(ArchVReg(dst), ArchVReg(ceil));
2688 }
2689
2690 template <typename T>
EncodeReverseBitsImpl(Reg dst0,Reg src0)2691 void Amd64Encoder::EncodeReverseBitsImpl(Reg dst0, Reg src0)
2692 {
2693 ASSERT(std::numeric_limits<T>::is_integer && !std::numeric_limits<T>::is_signed);
2694 [[maybe_unused]] constexpr auto IMM_8 = 8;
2695 ASSERT(sizeof(T) * IMM_8 == dst0.GetSize());
2696 // NOLINTNEXTLINE(modernize-avoid-c-arrays)
2697 static constexpr T MASKS[] = {static_cast<T>(UINT64_C(0x5555555555555555)),
2698 static_cast<T>(UINT64_C(0x3333333333333333)),
2699 static_cast<T>(UINT64_C(0x0f0f0f0f0f0f0f0f))};
2700
2701 ScopedTmpReg tmp(this, dst0.GetType());
2702 ScopedTmpReg immHolder(this, dst0.GetType());
2703 auto immHolderReg = ArchReg(immHolder);
2704
2705 GetMasm()->mov(ArchReg(dst0), ArchReg(src0));
2706 GetMasm()->mov(ArchReg(tmp), ArchReg(src0));
2707 constexpr auto MAX_ROUNDS = 3;
2708 for (uint64_t round = 0; round < MAX_ROUNDS; round++) {
2709 auto shift = 1U << round;
2710 auto mask = asmjit::imm(MASKS[round]);
2711 GetMasm()->shr(ArchReg(dst0), shift);
2712 if (dst0.GetSize() == DOUBLE_WORD_SIZE) {
2713 GetMasm()->mov(immHolderReg, mask);
2714 GetMasm()->and_(ArchReg(tmp), immHolderReg);
2715 GetMasm()->and_(ArchReg(dst0), immHolderReg);
2716 } else {
2717 GetMasm()->and_(ArchReg(tmp), mask);
2718 GetMasm()->and_(ArchReg(dst0), mask);
2719 }
2720 GetMasm()->shl(ArchReg(tmp), shift);
2721 GetMasm()->or_(ArchReg(dst0), ArchReg(tmp));
2722 constexpr auto ROUND_2 = 2;
2723 if (round != ROUND_2) {
2724 GetMasm()->mov(ArchReg(tmp), ArchReg(dst0));
2725 }
2726 }
2727
2728 GetMasm()->bswap(ArchReg(dst0));
2729 }
2730
EncodeReverseBits(Reg dst0,Reg src0)2731 void Amd64Encoder::EncodeReverseBits(Reg dst0, Reg src0)
2732 {
2733 ASSERT(src0.GetSize() == WORD_SIZE || src0.GetSize() == DOUBLE_WORD_SIZE);
2734 ASSERT(src0.GetSize() == dst0.GetSize());
2735
2736 if (src0.GetSize() == WORD_SIZE) {
2737 EncodeReverseBitsImpl<uint32_t>(dst0, src0);
2738 return;
2739 }
2740
2741 EncodeReverseBitsImpl<uint64_t>(dst0, src0);
2742 }
2743
CanEncodeScale(uint64_t imm,uint32_t size)2744 bool Amd64Encoder::CanEncodeScale(uint64_t imm, [[maybe_unused]] uint32_t size)
2745 {
2746 return imm <= 3U;
2747 }
2748
CanEncodeImmLogical(uint64_t imm,uint32_t size)2749 bool Amd64Encoder::CanEncodeImmLogical(uint64_t imm, uint32_t size)
2750 {
2751 #ifndef NDEBUG
2752 if (size < DOUBLE_WORD_SIZE) {
2753 // Test if the highest part is consistent:
2754 ASSERT(((imm >> size) == 0) || (((~imm) >> size) == 0));
2755 }
2756 #endif // NDEBUG
2757 return ImmFitsSize(imm, size);
2758 }
2759
CanEncodeBitCount()2760 bool Amd64Encoder::CanEncodeBitCount()
2761 {
2762 return asmjit::CpuInfo::host().hasFeature(asmjit::x86::Features::kPOPCNT);
2763 }
2764
CanOptimizeImmDivMod(uint64_t imm,bool isSigned) const2765 bool Amd64Encoder::CanOptimizeImmDivMod(uint64_t imm, bool isSigned) const
2766 {
2767 return CanOptimizeImmDivModCommon(imm, isSigned);
2768 }
2769
EncodeIsInf(Reg dst,Reg src)2770 void Amd64Encoder::EncodeIsInf(Reg dst, Reg src)
2771 {
2772 ASSERT(dst.IsScalar() && src.IsFloat());
2773
2774 GetMasm()->xor_(ArchReg(dst, DOUBLE_WORD_SIZE), ArchReg(dst, DOUBLE_WORD_SIZE));
2775
2776 if (src.GetSize() == WORD_SIZE) {
2777 constexpr auto INF_MASK = uint32_t(0x7f800000) << 1U;
2778
2779 ScopedTmpRegU32 tmpReg(this);
2780 ScopedTmpRegU32 tmp1Reg(this);
2781 auto tmp = ArchReg(tmpReg);
2782 auto tmp1 = ArchReg(tmp1Reg);
2783
2784 GetMasm()->movd(tmp1, ArchVReg(src));
2785 GetMasm()->shl(tmp1, 1);
2786 GetMasm()->mov(tmp, INF_MASK);
2787 GetMasm()->cmp(tmp, tmp1);
2788 } else {
2789 constexpr auto INF_MASK = uint64_t(0x7ff0000000000000) << 1U;
2790
2791 ScopedTmpRegU64 tmpReg(this);
2792 ScopedTmpRegU64 tmp1Reg(this);
2793 auto tmp = ArchReg(tmpReg);
2794 auto tmp1 = ArchReg(tmp1Reg);
2795
2796 GetMasm()->movq(tmp1, ArchVReg(src));
2797 GetMasm()->shl(tmp1, 1);
2798
2799 GetMasm()->mov(tmp, INF_MASK);
2800 GetMasm()->cmp(tmp, tmp1);
2801 }
2802
2803 GetMasm()->sete(ArchReg(dst, BYTE_SIZE));
2804 }
2805
EncodeCmpFracWithDelta(Reg src)2806 void Amd64Encoder::EncodeCmpFracWithDelta(Reg src)
2807 {
2808 ASSERT(src.IsFloat());
2809 ASSERT(src.GetType() == FLOAT32_TYPE || src.GetType() == FLOAT64_TYPE);
2810
2811 // Rounding control bits: Truncated (aka Round to Zero)
2812 constexpr uint8_t RND_CTL_TRUNCATED = 0b00000011;
2813
2814 // Encode (fabs(src - trunc(src)) <= DELTA)
2815 if (src.GetType() == FLOAT32_TYPE) {
2816 ScopedTmpRegF32 tmp(this);
2817 ScopedTmpRegF32 delta(this);
2818 GetMasm()->roundss(ArchVReg(tmp), ArchVReg(src), asmjit::imm(RND_CTL_TRUNCATED));
2819 EncodeSub(tmp, src, tmp);
2820 EncodeAbs(tmp, tmp);
2821 EncodeMov(delta, Imm(std::numeric_limits<float>::epsilon()));
2822 GetMasm()->ucomiss(ArchVReg(tmp), ArchVReg(delta));
2823 } else {
2824 ScopedTmpRegF64 tmp(this);
2825 ScopedTmpRegF64 delta(this);
2826 GetMasm()->roundsd(ArchVReg(tmp), ArchVReg(src), asmjit::imm(RND_CTL_TRUNCATED));
2827 EncodeSub(tmp, src, tmp);
2828 EncodeAbs(tmp, tmp);
2829 EncodeMov(delta, Imm(std::numeric_limits<double>::epsilon()));
2830 GetMasm()->ucomisd(ArchVReg(tmp), ArchVReg(delta));
2831 }
2832 }
2833
EncodeIsInteger(Reg dst,Reg src)2834 void Amd64Encoder::EncodeIsInteger(Reg dst, Reg src)
2835 {
2836 ASSERT(dst.IsScalar() && src.IsFloat());
2837 ASSERT(src.GetType() == FLOAT32_TYPE || src.GetType() == FLOAT64_TYPE);
2838
2839 auto labelExit = static_cast<Amd64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
2840
2841 GetMasm()->xor_(ArchReg(dst, WORD_SIZE), ArchReg(dst, WORD_SIZE));
2842 EncodeCmpFracWithDelta(src);
2843 GetMasm()->jp(*labelExit); // Inf or NaN
2844 GetMasm()->set(ArchCc(Condition::LE, true), ArchReg(dst, BYTE_SIZE));
2845 GetMasm()->bind(*labelExit);
2846 }
2847
EncodeIsSafeInteger(Reg dst,Reg src)2848 void Amd64Encoder::EncodeIsSafeInteger(Reg dst, Reg src)
2849 {
2850 ASSERT(dst.IsScalar() && src.IsFloat());
2851 ASSERT(src.GetType() == FLOAT32_TYPE || src.GetType() == FLOAT64_TYPE);
2852
2853 auto labelExit = static_cast<Amd64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
2854
2855 GetMasm()->xor_(ArchReg(dst, WORD_SIZE), ArchReg(dst, WORD_SIZE));
2856
2857 // Check if IsInteger
2858 EncodeCmpFracWithDelta(src);
2859 GetMasm()->jp(*labelExit); // Inf or NaN
2860 GetMasm()->j(ArchCc(Condition::GT, true), *labelExit);
2861
2862 // Check if it is safe, i.e. src can be represented in float/double without losing precision
2863 if (src.GetType() == FLOAT32_TYPE) {
2864 ScopedTmpRegF32 tmp1(this);
2865 ScopedTmpRegF32 tmp2(this);
2866 EncodeAbs(tmp1, src);
2867 EncodeMov(tmp2, Imm(MaxIntAsExactFloat()));
2868 GetMasm()->ucomiss(ArchVReg(tmp1), ArchVReg(tmp2));
2869 } else {
2870 ScopedTmpRegF64 tmp1(this);
2871 ScopedTmpRegF64 tmp2(this);
2872 EncodeAbs(tmp1, src);
2873 EncodeMov(tmp2, Imm(MaxIntAsExactDouble()));
2874 GetMasm()->ucomisd(ArchVReg(tmp1), ArchVReg(tmp2));
2875 }
2876 GetMasm()->set(ArchCc(Condition::LE, true), ArchReg(dst, BYTE_SIZE));
2877 GetMasm()->bind(*labelExit);
2878 }
2879
2880 /* Since NaNs have to be canonicalized we compare the
2881 * input with itself, if it is NaN the comparison will
2882 * set the parity flag (PF) */
EncodeFpToBits(Reg dst,Reg src)2883 void Amd64Encoder::EncodeFpToBits(Reg dst, Reg src)
2884 {
2885 ASSERT(dst.IsScalar() && src.IsFloat());
2886
2887 if (dst.GetType() == INT32_TYPE) {
2888 ASSERT(src.GetSize() == WORD_SIZE);
2889
2890 constexpr auto FLOAT_NAN = uint32_t(0x7fc00000);
2891
2892 ScopedTmpRegU32 tmp(this);
2893
2894 GetMasm()->ucomiss(ArchVReg(src), ArchVReg(src));
2895 GetMasm()->mov(ArchReg(tmp), FLOAT_NAN);
2896 GetMasm()->movd(ArchReg(dst), ArchVReg(src));
2897 GetMasm()->cmovpe(ArchReg(dst), ArchReg(tmp));
2898 } else {
2899 ASSERT(src.GetSize() == DOUBLE_WORD_SIZE);
2900
2901 constexpr auto DOUBLE_NAN = uint64_t(0x7ff8000000000000);
2902 ScopedTmpRegU64 tmp(this);
2903
2904 GetMasm()->ucomisd(ArchVReg(src), ArchVReg(src));
2905 GetMasm()->mov(ArchReg(tmp), DOUBLE_NAN);
2906 GetMasm()->movq(ArchReg(dst), ArchVReg(src));
2907 GetMasm()->cmovpe(ArchReg(dst), ArchReg(tmp));
2908 }
2909 }
2910
EncodeMoveBitsRaw(Reg dst,Reg src)2911 void Amd64Encoder::EncodeMoveBitsRaw(Reg dst, Reg src)
2912 {
2913 ASSERT((dst.IsFloat() && src.IsScalar()) || (src.IsFloat() && dst.IsScalar()));
2914 if (src.IsScalar()) {
2915 ASSERT((dst.GetSize() == src.GetSize()));
2916 if (src.GetSize() == WORD_SIZE) {
2917 GetMasm()->movd(ArchVReg(dst), ArchReg(src));
2918 } else {
2919 GetMasm()->movq(ArchVReg(dst), ArchReg(src));
2920 }
2921 } else {
2922 ASSERT((src.GetSize() == dst.GetSize()));
2923 if (dst.GetSize() == WORD_SIZE) {
2924 GetMasm()->movd(ArchReg(dst), ArchVReg(src));
2925 } else {
2926 GetMasm()->movq(ArchReg(dst), ArchVReg(src));
2927 }
2928 }
2929 }
2930
2931 /* Unsafe intrinsics */
EncodeCompareAndSwap(Reg dst,Reg obj,const Reg * offset,Reg val,Reg newval)2932 void Amd64Encoder::EncodeCompareAndSwap(Reg dst, Reg obj, const Reg *offset, Reg val, Reg newval)
2933 {
2934 /*
2935 * movl old, %eax
2936 * lock cmpxchgl new, addr
2937 * sete %al
2938 */
2939 ScopedTmpRegU64 tmp1(this);
2940 ScopedTmpRegU64 tmp2(this);
2941 ScopedTmpRegU64 tmp3(this);
2942 Reg newvalue = newval;
2943 auto addr = ArchMem(MemRef(tmp2)).Prepare(GetMasm());
2944 auto addrReg = ArchReg(tmp2);
2945 Reg rax(ConvertRegNumber(asmjit::x86::rax.id()), INT64_TYPE);
2946
2947 /* NOTE(ayodkev) this is a workaround for the failure of
2948 * jsr166.ScheduledExecutorTest, have to figure out if there
2949 * is less crude way to avoid this */
2950 if (newval.GetId() == rax.GetId()) {
2951 SetFalseResult();
2952 return;
2953 }
2954
2955 if (offset != nullptr) {
2956 GetMasm()->lea(addrReg, asmjit::x86::ptr(ArchReg(obj), ArchReg(*offset)));
2957 } else {
2958 GetMasm()->mov(addrReg, ArchReg(obj));
2959 }
2960
2961 /* the [er]ax register will be overwritten by cmpxchg instruction
2962 * save it unless it is set as a destination register */
2963 if (dst.GetId() != rax.GetId()) {
2964 GetMasm()->mov(ArchReg(tmp1), asmjit::x86::rax);
2965 }
2966
2967 /* if the new value comes in [er]ax register we have to use a
2968 * different register as [er]ax will contain the current value */
2969 if (newval.GetId() == rax.GetId()) {
2970 GetMasm()->mov(ArchReg(tmp3, newval.GetSize()), ArchReg(newval));
2971 newvalue = tmp3;
2972 }
2973
2974 if (val.GetId() != rax.GetId()) {
2975 GetMasm()->mov(asmjit::x86::rax, ArchReg(val).r64());
2976 }
2977
2978 GetMasm()->lock().cmpxchg(addr, ArchReg(newvalue));
2979 GetMasm()->sete(ArchReg(dst));
2980
2981 if (dst.GetId() != rax.GetId()) {
2982 GetMasm()->mov(asmjit::x86::rax, ArchReg(tmp1));
2983 }
2984 }
2985
EncodeCompareAndSwap(Reg dst,Reg obj,Reg offset,Reg val,Reg newval)2986 void Amd64Encoder::EncodeCompareAndSwap(Reg dst, Reg obj, Reg offset, Reg val, Reg newval)
2987 {
2988 EncodeCompareAndSwap(dst, obj, &offset, val, newval);
2989 }
2990
EncodeCompareAndSwap(Reg dst,Reg addr,Reg val,Reg newval)2991 void Amd64Encoder::EncodeCompareAndSwap(Reg dst, Reg addr, Reg val, Reg newval)
2992 {
2993 EncodeCompareAndSwap(dst, addr, nullptr, val, newval);
2994 }
2995
EncodeUnsafeGetAndSet(Reg dst,Reg obj,Reg offset,Reg val)2996 void Amd64Encoder::EncodeUnsafeGetAndSet(Reg dst, Reg obj, Reg offset, Reg val)
2997 {
2998 ScopedTmpRegU64 tmp(this);
2999 auto addrReg = ArchReg(tmp);
3000 auto addr = ArchMem(MemRef(tmp)).Prepare(GetMasm());
3001 GetMasm()->lea(addrReg, asmjit::x86::ptr(ArchReg(obj), ArchReg(offset)));
3002 GetMasm()->mov(ArchReg(dst), ArchReg(val));
3003 GetMasm()->lock().xchg(addr, ArchReg(dst));
3004 }
3005
EncodeUnsafeGetAndAdd(Reg dst,Reg obj,Reg offset,Reg val,Reg tmp)3006 void Amd64Encoder::EncodeUnsafeGetAndAdd(Reg dst, Reg obj, Reg offset, Reg val, [[maybe_unused]] Reg tmp)
3007 {
3008 ScopedTmpRegU64 tmp1(this);
3009 auto addrReg = ArchReg(tmp1);
3010 auto addr = ArchMem(MemRef(tmp1)).Prepare(GetMasm());
3011 GetMasm()->lea(addrReg, asmjit::x86::ptr(ArchReg(obj), ArchReg(offset)));
3012 GetMasm()->mov(ArchReg(dst), ArchReg(val));
3013 GetMasm()->lock().xadd(addr, ArchReg(dst));
3014 }
3015
EncodeMemoryBarrier(memory_order::Order order)3016 void Amd64Encoder::EncodeMemoryBarrier(memory_order::Order order)
3017 {
3018 if (order == memory_order::FULL) {
3019 /* does the same as mfence but faster, not applicable for NT-writes, though */
3020 GetMasm()->lock().add(asmjit::x86::dword_ptr(asmjit::x86::rsp), asmjit::imm(0));
3021 }
3022 }
3023
EncodeStackOverflowCheck(ssize_t offset)3024 void Amd64Encoder::EncodeStackOverflowCheck(ssize_t offset)
3025 {
3026 MemRef mem(GetTarget().GetStackReg(), offset);
3027 auto m = ArchMem(mem).Prepare(GetMasm());
3028 GetMasm()->test(m, ArchReg(GetTarget().GetParamReg(0)));
3029 }
3030
GetCursorOffset() const3031 size_t Amd64Encoder::GetCursorOffset() const
3032 {
3033 // NOLINTNEXTLINE(readability-identifier-naming)
3034 return GetMasm()->offset();
3035 }
3036
SetCursorOffset(size_t offset)3037 void Amd64Encoder::SetCursorOffset(size_t offset)
3038 {
3039 // NOLINTNEXTLINE(readability-identifier-naming)
3040 GetMasm()->setOffset(offset);
3041 }
3042
EncodeGetCurrentPc(Reg dst)3043 void Amd64Encoder::EncodeGetCurrentPc(Reg dst)
3044 {
3045 ASSERT(dst.GetType() == INT64_TYPE);
3046 EncodeRelativePcMov(dst, 0L, [this](Reg reg, intptr_t offset) {
3047 GetMasm()->long_().lea(ArchReg(reg), asmjit::x86::ptr(asmjit::x86::rip, offset));
3048 });
3049 }
3050
AcquireScratchRegister(TypeInfo type)3051 Reg Amd64Encoder::AcquireScratchRegister(TypeInfo type)
3052 {
3053 return (static_cast<Amd64RegisterDescription *>(GetRegfile()))->AcquireScratchRegister(type);
3054 }
3055
AcquireScratchRegister(Reg reg)3056 void Amd64Encoder::AcquireScratchRegister(Reg reg)
3057 {
3058 (static_cast<Amd64RegisterDescription *>(GetRegfile()))->AcquireScratchRegister(reg);
3059 }
3060
ReleaseScratchRegister(Reg reg)3061 void Amd64Encoder::ReleaseScratchRegister(Reg reg)
3062 {
3063 (static_cast<Amd64RegisterDescription *>(GetRegfile()))->ReleaseScratchRegister(reg);
3064 }
3065
IsScratchRegisterReleased(Reg reg) const3066 bool Amd64Encoder::IsScratchRegisterReleased(Reg reg) const
3067 {
3068 return (static_cast<Amd64RegisterDescription *>(GetRegfile()))->IsScratchRegisterReleased(reg);
3069 }
3070
GetScratchRegistersMask() const3071 RegMask Amd64Encoder::GetScratchRegistersMask() const
3072 {
3073 return (static_cast<const Amd64RegisterDescription *>(GetRegfile()))->GetScratchRegistersMask();
3074 }
3075
GetScratchFpRegistersMask() const3076 RegMask Amd64Encoder::GetScratchFpRegistersMask() const
3077 {
3078 return (static_cast<const Amd64RegisterDescription *>(GetRegfile()))->GetScratchFpRegistersMask();
3079 }
3080
GetAvailableScratchRegisters() const3081 RegMask Amd64Encoder::GetAvailableScratchRegisters() const
3082 {
3083 auto regfile = static_cast<const Amd64RegisterDescription *>(GetRegfile());
3084 return RegMask(regfile->GetScratchRegisters().GetMask());
3085 }
3086
GetAvailableScratchFpRegisters() const3087 VRegMask Amd64Encoder::GetAvailableScratchFpRegisters() const
3088 {
3089 auto regfile = static_cast<const Amd64RegisterDescription *>(GetRegfile());
3090 return VRegMask(regfile->GetScratchFPRegisters().GetMask());
3091 }
3092
GetRefType()3093 TypeInfo Amd64Encoder::GetRefType()
3094 {
3095 return INT64_TYPE;
3096 }
3097
BufferData() const3098 void *Amd64Encoder::BufferData() const
3099 {
3100 // NOLINTNEXTLINE(readability-identifier-naming)
3101 return GetMasm()->bufferData();
3102 }
3103
BufferSize() const3104 size_t Amd64Encoder::BufferSize() const
3105 {
3106 // NOLINTNEXTLINE(readability-identifier-naming)
3107 return GetMasm()->offset();
3108 }
3109
MakeLibCall(Reg dst,Reg src0,Reg src1,void * entryPoint)3110 void Amd64Encoder::MakeLibCall(Reg dst, Reg src0, Reg src1, void *entryPoint)
3111 {
3112 if (!dst.IsFloat()) {
3113 SetFalseResult();
3114 return;
3115 }
3116
3117 if (dst.GetType() == FLOAT32_TYPE) {
3118 if (!src0.IsFloat() || !src1.IsFloat()) {
3119 SetFalseResult();
3120 return;
3121 }
3122
3123 if (src0.GetId() != asmjit::x86::xmm0.id() || src1.GetId() != asmjit::x86::xmm1.id()) {
3124 ScopedTmpRegF32 tmp(this);
3125 GetMasm()->movss(ArchVReg(tmp), ArchVReg(src1));
3126 GetMasm()->movss(asmjit::x86::xmm0, ArchVReg(src0));
3127 GetMasm()->movss(asmjit::x86::xmm1, ArchVReg(tmp));
3128 }
3129
3130 MakeCall(entryPoint);
3131
3132 if (dst.GetId() != asmjit::x86::xmm0.id()) {
3133 GetMasm()->movss(ArchVReg(dst), asmjit::x86::xmm0);
3134 }
3135 } else if (dst.GetType() == FLOAT64_TYPE) {
3136 if (!src0.IsFloat() || !src1.IsFloat()) {
3137 SetFalseResult();
3138 return;
3139 }
3140
3141 if (src0.GetId() != asmjit::x86::xmm0.id() || src1.GetId() != asmjit::x86::xmm1.id()) {
3142 ScopedTmpRegF64 tmp(this);
3143 GetMasm()->movsd(ArchVReg(tmp), ArchVReg(src1));
3144 GetMasm()->movsd(asmjit::x86::xmm0, ArchVReg(src0));
3145 GetMasm()->movsd(asmjit::x86::xmm1, ArchVReg(tmp));
3146 }
3147
3148 MakeCall(entryPoint);
3149
3150 if (dst.GetId() != asmjit::x86::xmm0.id()) {
3151 GetMasm()->movsd(ArchVReg(dst), asmjit::x86::xmm0);
3152 }
3153 } else {
3154 UNREACHABLE();
3155 }
3156 }
3157
3158 template <bool IS_STORE>
LoadStoreRegisters(RegMask registers,ssize_t slot,size_t startReg,bool isFp)3159 void Amd64Encoder::LoadStoreRegisters(RegMask registers, ssize_t slot, size_t startReg, bool isFp)
3160 {
3161 for (size_t i {0}; i < registers.size(); ++i) {
3162 if (!registers.test(i)) {
3163 continue;
3164 }
3165
3166 asmjit::x86::Mem mem = asmjit::x86::ptr(asmjit::x86::rsp, (slot + i - startReg) * DOUBLE_WORD_SIZE_BYTES);
3167
3168 if constexpr (IS_STORE) { // NOLINT
3169 if (isFp) {
3170 GetMasm()->movsd(mem, asmjit::x86::xmm(i));
3171 } else {
3172 GetMasm()->mov(mem, asmjit::x86::gpq(ConvertRegNumber(i)));
3173 }
3174 } else { // NOLINT
3175 if (isFp) {
3176 GetMasm()->movsd(asmjit::x86::xmm(i), mem);
3177 } else {
3178 GetMasm()->mov(asmjit::x86::gpq(ConvertRegNumber(i)), mem);
3179 }
3180 }
3181 }
3182 }
3183
3184 template <bool IS_STORE>
LoadStoreRegisters(RegMask registers,bool isFp,int32_t slot,Reg base,RegMask mask)3185 void Amd64Encoder::LoadStoreRegisters(RegMask registers, bool isFp, int32_t slot, Reg base, RegMask mask)
3186 {
3187 auto baseReg = ArchReg(base);
3188 bool hasMask = mask.any();
3189 int32_t index = hasMask ? static_cast<int32_t>(mask.GetMinRegister()) : 0;
3190 slot -= index;
3191 for (size_t i = index; i < registers.size(); ++i) {
3192 if (hasMask) {
3193 if (!mask.test(i)) {
3194 continue;
3195 }
3196 index++;
3197 }
3198 if (!registers.test(i)) {
3199 continue;
3200 }
3201
3202 if (!hasMask) {
3203 index++;
3204 }
3205
3206 // `-1` because we've incremented `index` in advance
3207 asmjit::x86::Mem mem = asmjit::x86::ptr(baseReg, (slot + index - 1) * DOUBLE_WORD_SIZE_BYTES);
3208
3209 if constexpr (IS_STORE) { // NOLINT
3210 if (isFp) {
3211 GetMasm()->movsd(mem, asmjit::x86::xmm(i));
3212 } else {
3213 GetMasm()->mov(mem, asmjit::x86::gpq(ConvertRegNumber(i)));
3214 }
3215 } else { // NOLINT
3216 if (isFp) {
3217 GetMasm()->movsd(asmjit::x86::xmm(i), mem);
3218 } else {
3219 GetMasm()->mov(asmjit::x86::gpq(ConvertRegNumber(i)), mem);
3220 }
3221 }
3222 }
3223 }
3224
SaveRegisters(RegMask registers,ssize_t slot,size_t startReg,bool isFp)3225 void Amd64Encoder::SaveRegisters(RegMask registers, ssize_t slot, size_t startReg, bool isFp)
3226 {
3227 LoadStoreRegisters<true>(registers, slot, startReg, isFp);
3228 }
3229
LoadRegisters(RegMask registers,ssize_t slot,size_t startReg,bool isFp)3230 void Amd64Encoder::LoadRegisters(RegMask registers, ssize_t slot, size_t startReg, bool isFp)
3231 {
3232 LoadStoreRegisters<false>(registers, slot, startReg, isFp);
3233 }
3234
SaveRegisters(RegMask registers,bool isFp,ssize_t slot,Reg base,RegMask mask)3235 void Amd64Encoder::SaveRegisters(RegMask registers, bool isFp, ssize_t slot, Reg base, RegMask mask)
3236 {
3237 LoadStoreRegisters<true>(registers, isFp, slot, base, mask);
3238 }
3239
LoadRegisters(RegMask registers,bool isFp,ssize_t slot,Reg base,RegMask mask)3240 void Amd64Encoder::LoadRegisters(RegMask registers, bool isFp, ssize_t slot, Reg base, RegMask mask)
3241 {
3242 LoadStoreRegisters<false>(registers, isFp, slot, base, mask);
3243 }
3244
PushRegisters(RegMask registers,bool isFp)3245 void Amd64Encoder::PushRegisters(RegMask registers, bool isFp)
3246 {
3247 for (size_t i = 0; i < registers.size(); i++) {
3248 if (registers[i]) {
3249 if (isFp) {
3250 GetMasm()->sub(asmjit::x86::rsp, DOUBLE_WORD_SIZE_BYTES);
3251 GetMasm()->movsd(asmjit::x86::ptr(asmjit::x86::rsp), ArchVReg(Reg(i, FLOAT64_TYPE)));
3252 } else {
3253 GetMasm()->push(asmjit::x86::gpq(ConvertRegNumber(i)));
3254 }
3255 }
3256 }
3257 }
3258
PopRegisters(RegMask registers,bool isFp)3259 void Amd64Encoder::PopRegisters(RegMask registers, bool isFp)
3260 {
3261 for (ssize_t i = registers.size() - 1; i >= 0; i--) {
3262 if (registers[i]) {
3263 if (isFp) {
3264 GetMasm()->movsd(ArchVReg(Reg(i, FLOAT64_TYPE)), asmjit::x86::ptr(asmjit::x86::rsp));
3265 GetMasm()->add(asmjit::x86::rsp, DOUBLE_WORD_SIZE_BYTES);
3266 } else {
3267 GetMasm()->pop(asmjit::x86::gpq(ConvertRegNumber(i)));
3268 }
3269 }
3270 }
3271 }
3272
GetMasm() const3273 asmjit::x86::Assembler *Amd64Encoder::GetMasm() const
3274 {
3275 ASSERT(masm_ != nullptr);
3276 return masm_;
3277 }
3278
GetLabelAddress(LabelHolder::LabelId label)3279 size_t Amd64Encoder::GetLabelAddress(LabelHolder::LabelId label)
3280 {
3281 auto code = GetMasm()->code();
3282 ASSERT(code->isLabelBound(label));
3283 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
3284 return code->baseAddress() + code->labelOffset(label);
3285 }
3286
LabelHasLinks(LabelHolder::LabelId label)3287 bool Amd64Encoder::LabelHasLinks(LabelHolder::LabelId label)
3288 {
3289 auto code = GetMasm()->code();
3290 auto entry = code->labelEntry(label);
3291 return entry->links() != nullptr;
3292 }
3293
3294 template <typename T, size_t N>
CopyArrayToXmm(Reg xmm,const std::array<T,N> & arr)3295 void Amd64Encoder::CopyArrayToXmm(Reg xmm, const std::array<T, N> &arr)
3296 {
3297 static constexpr auto SIZE {N * sizeof(T)};
3298 static_assert((SIZE == DOUBLE_WORD_SIZE_BYTES) || (SIZE == 2U * DOUBLE_WORD_SIZE_BYTES));
3299 ASSERT(xmm.GetType() == FLOAT64_TYPE);
3300
3301 auto data {reinterpret_cast<const uint64_t *>(arr.data())};
3302
3303 ScopedTmpRegU64 tmpGpr(this);
3304 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
3305 GetMasm()->mov(ArchReg(tmpGpr), asmjit::imm(data[0]));
3306 GetMasm()->movq(ArchVReg(xmm), ArchReg(tmpGpr));
3307
3308 if constexpr (SIZE == 2U * DOUBLE_WORD_SIZE_BYTES) {
3309 ScopedTmpRegF64 tmpXmm(this);
3310 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
3311 GetMasm()->mov(ArchReg(tmpGpr), asmjit::imm(data[1]));
3312 GetMasm()->movq(ArchVReg(tmpXmm), ArchReg(tmpGpr));
3313 GetMasm()->unpcklpd(ArchVReg(xmm), ArchVReg(tmpXmm));
3314 }
3315 }
3316
3317 template <typename T>
CopyImmToXmm(Reg xmm,T imm)3318 void Amd64Encoder::CopyImmToXmm(Reg xmm, T imm)
3319 {
3320 static_assert((sizeof(imm) == WORD_SIZE_BYTES) || (sizeof(imm) == DOUBLE_WORD_SIZE_BYTES));
3321 ASSERT(xmm.GetSize() == BYTE_SIZE * sizeof(imm));
3322
3323 if constexpr (sizeof(imm) == WORD_SIZE_BYTES) { // NOLINT
3324 ScopedTmpRegU32 tmpGpr(this);
3325 GetMasm()->mov(ArchReg(tmpGpr), asmjit::imm(bit_cast<uint32_t>(imm)));
3326 GetMasm()->movd(ArchVReg(xmm), ArchReg(tmpGpr));
3327 } else { // NOLINT
3328 ScopedTmpRegU64 tmpGpr(this);
3329 GetMasm()->mov(ArchReg(tmpGpr), asmjit::imm(bit_cast<uint64_t>(imm)));
3330 GetMasm()->movq(ArchVReg(xmm), ArchReg(tmpGpr));
3331 }
3332 }
3333
DisasmInstr(std::ostream & stream,size_t pc,ssize_t codeOffset) const3334 size_t Amd64Encoder::DisasmInstr(std::ostream &stream, size_t pc, ssize_t codeOffset) const
3335 {
3336 if (codeOffset < 0) {
3337 (const_cast<Amd64Encoder *>(this))->Finalize();
3338 }
3339 // NOLINTNEXTLINE(readability-identifier-naming)
3340 Span code(GetMasm()->bufferData(), GetMasm()->offset());
3341
3342 [[maybe_unused]] size_t dataLeft = code.Size() - pc;
3343 [[maybe_unused]] constexpr size_t LENGTH = ZYDIS_MAX_INSTRUCTION_LENGTH; // 15 bytes is max inst length in amd64
3344
3345 // Initialize decoder context
3346 ZydisDecoder decoder;
3347 [[maybe_unused]] bool res =
3348 ZYAN_SUCCESS(ZydisDecoderInit(&decoder, ZYDIS_MACHINE_MODE_LONG_64, ZYDIS_ADDRESS_WIDTH_64));
3349
3350 // Initialize formatter
3351 ZydisFormatter formatter;
3352 res &= ZYAN_SUCCESS(ZydisFormatterInit(&formatter, ZYDIS_FORMATTER_STYLE_ATT));
3353 ZydisFormatterSetProperty(&formatter, ZYDIS_FORMATTER_PROP_FORCE_RELATIVE_BRANCHES, 1);
3354 ASSERT(res);
3355
3356 ZydisDecodedInstruction instruction;
3357
3358 res &= ZYAN_SUCCESS(ZydisDecoderDecodeBuffer(&decoder, &code[pc], std::min(LENGTH, dataLeft), &instruction));
3359
3360 // Format & print the binary instruction structure to human readable format
3361 char buffer[256]; // NOLINT (modernize-avoid-c-arrays, readability-identifier-naming, readability-magic-numbers)
3362 res &= ZYAN_SUCCESS(
3363 ZydisFormatterFormatInstruction(&formatter, &instruction, buffer, sizeof(buffer), uintptr_t(&code[pc])));
3364
3365 ASSERT(res);
3366
3367 // Print disassembly
3368 if (codeOffset < 0) {
3369 stream << buffer;
3370 } else {
3371 stream << std::setw(0x8) << std::right << std::setfill('0') << std::hex << pc + codeOffset << std::dec
3372 << std::setfill(' ') << ": " << buffer;
3373 }
3374
3375 return pc + instruction.length;
3376 }
3377 } // namespace ark::compiler::amd64
3378