1 /**
2 * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15 /*
16 Encoder (implementation of math and mem Low-level emitters)
17 */
18
19 #include <iomanip>
20
21 #include "compiler/optimizer/code_generator/relocations.h"
22 #include "target/amd64/target.h"
23
24 #include "lib_helpers.inl"
25
26 #include "Zydis/Zydis.h"
27
28 #ifndef PANDA_TARGET_MACOS
29 #include "elf.h"
30 #endif // PANDA_TARGET_MACOS
31
32 namespace panda::compiler::amd64 {
CreateLabel()33 LabelHolder::LabelId Amd64LabelHolder::CreateLabel()
34 {
35 ++id_;
36
37 auto masm = (static_cast<Amd64Encoder *>(GetEncoder()))->GetMasm();
38 auto label = masm->newLabel();
39
40 auto allocator = GetEncoder()->GetAllocator();
41 labels_.push_back(allocator->New<LabelType>(std::move(label)));
42 ASSERT(labels_.size() == id_);
43 return id_ - 1;
44 }
45
BindLabel(LabelId id)46 void Amd64LabelHolder::BindLabel(LabelId id)
47 {
48 static_cast<Amd64Encoder *>(GetEncoder())->GetMasm()->bind(*labels_[id]);
49 }
50
Amd64Encoder(ArenaAllocator * allocator)51 Amd64Encoder::Amd64Encoder(ArenaAllocator *allocator) : Encoder(allocator, Arch::X86_64, false) {}
52
~Amd64Encoder()53 Amd64Encoder::~Amd64Encoder()
54 {
55 if (masm_ != nullptr) {
56 masm_->~Assembler();
57 masm_ = nullptr;
58 }
59
60 if (code_holder_ != nullptr) {
61 code_holder_->~CodeHolder();
62 code_holder_ = nullptr;
63 }
64
65 if (error_handler_ != nullptr) {
66 error_handler_->~ErrorHandler();
67 error_handler_ = nullptr;
68 }
69
70 if (labels_ != nullptr) {
71 labels_->~Amd64LabelHolder();
72 labels_ = nullptr;
73 }
74 }
75
InitMasm()76 bool Amd64Encoder::InitMasm()
77 {
78 if (masm_ == nullptr) {
79 labels_ = GetAllocator()->New<Amd64LabelHolder>(this);
80 if (labels_ == nullptr) {
81 SetFalseResult();
82 return false;
83 }
84
85 asmjit::Environment env;
86 env.setArch(asmjit::Environment::kArchX64);
87
88 code_holder_ = GetAllocator()->New<asmjit::CodeHolder>(GetAllocator());
89 if (code_holder_ == nullptr) {
90 SetFalseResult();
91 return false;
92 }
93 code_holder_->init(env, 0U);
94
95 masm_ = GetAllocator()->New<asmjit::x86::Assembler>(code_holder_);
96 if (masm_ == nullptr) {
97 SetFalseResult();
98 return false;
99 }
100
101 // Enable strict validation.
102 masm_->addValidationOptions(asmjit::BaseEmitter::kValidationOptionAssembler);
103 error_handler_ = GetAllocator()->New<AsmJitErrorHandler>(this);
104 if (error_handler_ == nullptr) {
105 SetFalseResult();
106 return false;
107 }
108 masm_->setErrorHandler(error_handler_);
109
110 // Make sure that the compiler uses the same scratch registers as the assembler
111 CHECK_EQ(compiler::arch_info::x86_64::TEMP_REGS, GetTarget().GetTempRegsMask());
112 CHECK_EQ(compiler::arch_info::x86_64::TEMP_FP_REGS, GetTarget().GetTempVRegsMask());
113 }
114 return true;
115 }
116
Finalize()117 void Amd64Encoder::Finalize()
118 {
119 auto code = GetMasm()->code();
120 auto code_size = code->codeSize();
121
122 code->flatten();
123 code->resolveUnresolvedLinks();
124
125 auto code_buffer = GetAllocator()->Alloc(code_size);
126
127 code->relocateToBase(reinterpret_cast<uintptr_t>(code_buffer));
128 code->copyFlattenedData(code_buffer, code_size, asmjit::CodeHolder::kCopyPadSectionBuffer);
129 }
130
EncodeJump(LabelHolder::LabelId id)131 void Amd64Encoder::EncodeJump(LabelHolder::LabelId id)
132 {
133 auto label = static_cast<Amd64LabelHolder *>(GetLabels())->GetLabel(id);
134 GetMasm()->jmp(*label);
135 }
136
EncodeJump(LabelHolder::LabelId id,Reg src0,Reg src1,Condition cc)137 void Amd64Encoder::EncodeJump(LabelHolder::LabelId id, Reg src0, Reg src1, Condition cc)
138 {
139 if (src0.IsScalar()) {
140 if (src0.GetSize() == src1.GetSize()) {
141 GetMasm()->cmp(ArchReg(src0), ArchReg(src1));
142 } else if (src0.GetSize() > src1.GetSize()) {
143 ScopedTmpReg tmp_reg(this, src0.GetType());
144 EncodeCast(tmp_reg, false, src1, false);
145 GetMasm()->cmp(ArchReg(src0), ArchReg(tmp_reg));
146 } else {
147 ScopedTmpReg tmp_reg(this, src1.GetType());
148 EncodeCast(tmp_reg, false, src0, false);
149 GetMasm()->cmp(ArchReg(tmp_reg), ArchReg(src1));
150 }
151 } else if (src0.GetType() == FLOAT32_TYPE) {
152 GetMasm()->comiss(ArchVReg(src0), ArchVReg(src1));
153 } else {
154 GetMasm()->comisd(ArchVReg(src0), ArchVReg(src1));
155 }
156
157 auto label = static_cast<Amd64LabelHolder *>(GetLabels())->GetLabel(id);
158 if (src0.IsScalar()) {
159 GetMasm()->j(ArchCc(cc, src0.IsFloat()), *label);
160 return;
161 }
162
163 if (CcMatchesNan(cc)) {
164 GetMasm()->jp(*label);
165 GetMasm()->j(ArchCc(cc, src0.IsFloat()), *label);
166 } else {
167 auto end = GetMasm()->newLabel();
168
169 GetMasm()->jp(end);
170 GetMasm()->j(ArchCc(cc, src0.IsFloat()), *label);
171 GetMasm()->bind(end);
172 }
173 }
174
EncodeJump(LabelHolder::LabelId id,Reg src,Imm imm,Condition cc)175 void Amd64Encoder::EncodeJump(LabelHolder::LabelId id, Reg src, Imm imm, Condition cc)
176 {
177 ASSERT(src.IsScalar());
178
179 auto imm_val = ImmToSignedInt(imm);
180 if (imm_val == 0) {
181 EncodeJump(id, src, cc);
182 return;
183 }
184
185 if (ImmFitsSize(imm_val, src.GetSize())) {
186 auto label = static_cast<Amd64LabelHolder *>(GetLabels())->GetLabel(id);
187
188 GetMasm()->cmp(ArchReg(src), asmjit::imm(imm_val));
189 GetMasm()->j(ArchCc(cc), *label);
190 } else {
191 ScopedTmpReg tmp_reg(this, src.GetType());
192 GetMasm()->mov(ArchReg(tmp_reg), asmjit::imm(imm_val));
193 EncodeJump(id, src, tmp_reg, cc);
194 }
195 }
196
EncodeJumpTest(LabelHolder::LabelId id,Reg src0,Reg src1,Condition cc)197 void Amd64Encoder::EncodeJumpTest(LabelHolder::LabelId id, Reg src0, Reg src1, Condition cc)
198 {
199 ASSERT(src0.IsScalar());
200 if (src0.GetSize() == src1.GetSize()) {
201 GetMasm()->test(ArchReg(src0), ArchReg(src1));
202 } else if (src0.GetSize() > src1.GetSize()) {
203 ScopedTmpReg tmp_reg(this, src0.GetType());
204 EncodeCast(tmp_reg, false, src1, false);
205 GetMasm()->test(ArchReg(src0), ArchReg(tmp_reg));
206 } else {
207 ScopedTmpReg tmp_reg(this, src1.GetType());
208 EncodeCast(tmp_reg, false, src0, false);
209 GetMasm()->test(ArchReg(tmp_reg), ArchReg(src1));
210 }
211
212 auto label = static_cast<Amd64LabelHolder *>(GetLabels())->GetLabel(id);
213 GetMasm()->j(ArchCcTest(cc), *label);
214 }
215
EncodeJumpTest(LabelHolder::LabelId id,Reg src,Imm imm,Condition cc)216 void Amd64Encoder::EncodeJumpTest(LabelHolder::LabelId id, Reg src, Imm imm, Condition cc)
217 {
218 ASSERT(src.IsScalar());
219
220 auto imm_val = ImmToSignedInt(imm);
221 if (ImmFitsSize(imm_val, src.GetSize())) {
222 auto label = static_cast<Amd64LabelHolder *>(GetLabels())->GetLabel(id);
223
224 GetMasm()->test(ArchReg(src), asmjit::imm(imm_val));
225 GetMasm()->j(ArchCcTest(cc), *label);
226 } else {
227 ScopedTmpReg tmp_reg(this, src.GetType());
228 GetMasm()->mov(ArchReg(tmp_reg), asmjit::imm(imm_val));
229 EncodeJumpTest(id, src, tmp_reg, cc);
230 }
231 }
232
EncodeJump(LabelHolder::LabelId id,Reg src,Condition cc)233 void Amd64Encoder::EncodeJump(LabelHolder::LabelId id, Reg src, Condition cc)
234 {
235 if (src.IsScalar()) {
236 auto label = static_cast<Amd64LabelHolder *>(GetLabels())->GetLabel(id);
237
238 GetMasm()->cmp(ArchReg(src), asmjit::imm(0));
239 GetMasm()->j(ArchCc(cc), *label);
240 return;
241 }
242
243 ScopedTmpReg tmp_reg(this, src.GetType());
244 if (src.GetType() == FLOAT32_TYPE) {
245 GetMasm()->xorps(ArchVReg(tmp_reg), ArchVReg(tmp_reg));
246 } else {
247 GetMasm()->xorpd(ArchVReg(tmp_reg), ArchVReg(tmp_reg));
248 }
249 EncodeJump(id, src, tmp_reg, cc);
250 }
251
EncodeJump(Reg dst)252 void Amd64Encoder::EncodeJump(Reg dst)
253 {
254 GetMasm()->jmp(ArchReg(dst));
255 }
256
EncodeJump(RelocationInfo * relocation)257 void Amd64Encoder::EncodeJump(RelocationInfo *relocation)
258 {
259 #ifdef PANDA_TARGET_MACOS
260 LOG(FATAL, COMPILER) << "Not supported in Macos build";
261 #else
262 // NOLINTNEXTLINE(readability-magic-numbers)
263 std::array<uint8_t, 5U> data = {0xe9, 0, 0, 0, 0};
264 GetMasm()->embed(data.data(), data.size());
265
266 constexpr int ADDEND = 4;
267 relocation->offset = GetCursorOffset() - ADDEND;
268 relocation->addend = -ADDEND;
269 relocation->type = R_X86_64_PLT32;
270 #endif
271 }
272
EncodeBitTestAndBranch(LabelHolder::LabelId id,compiler::Reg reg,uint32_t bit_pos,bool bit_value)273 void Amd64Encoder::EncodeBitTestAndBranch(LabelHolder::LabelId id, compiler::Reg reg, uint32_t bit_pos, bool bit_value)
274 {
275 ASSERT(reg.IsScalar() && reg.GetSize() > bit_pos);
276 auto label = static_cast<Amd64LabelHolder *>(GetLabels())->GetLabel(id);
277 if (reg.GetSize() == DOUBLE_WORD_SIZE) {
278 ScopedTmpRegU64 tmp_reg(this);
279 GetMasm()->mov(ArchReg(tmp_reg), asmjit::imm(static_cast<uint64_t>(1) << bit_pos));
280 GetMasm()->test(ArchReg(reg), ArchReg(tmp_reg));
281 } else {
282 GetMasm()->test(ArchReg(reg), asmjit::imm(1U << bit_pos));
283 }
284 if (bit_value) {
285 GetMasm()->j(ArchCc(Condition::NE), *label);
286 } else {
287 GetMasm()->j(ArchCc(Condition::EQ), *label);
288 }
289 }
290
MakeCall(compiler::RelocationInfo * relocation)291 void Amd64Encoder::MakeCall([[maybe_unused]] compiler::RelocationInfo *relocation)
292 {
293 #ifdef PANDA_TARGET_MACOS
294 LOG(FATAL, COMPILER) << "Not supported in Macos build";
295 #else
296 // NOLINTNEXTLINE(readability-magic-numbers)
297 const size_t LEN = 5;
298 std::array<uint8_t, LEN> data = {0xe8, 0, 0, 0, 0};
299 GetMasm()->embed(data.data(), data.size());
300
301 constexpr int APPEND = 4;
302 relocation->offset = GetCursorOffset() - APPEND;
303 relocation->addend = -APPEND;
304 relocation->type = R_X86_64_PLT32;
305 #endif
306 }
307
MakeCall(LabelHolder::LabelId id)308 void Amd64Encoder::MakeCall(LabelHolder::LabelId id)
309 {
310 auto label = static_cast<Amd64LabelHolder *>(GetLabels())->GetLabel(id);
311 GetMasm()->call(*label);
312 }
313
MakeCall(const void * entry_point)314 void Amd64Encoder::MakeCall(const void *entry_point)
315 {
316 ScopedTmpRegU64 tmp_reg(this);
317 GetMasm()->mov(ArchReg(tmp_reg), asmjit::imm(entry_point));
318 GetMasm()->call(ArchReg(tmp_reg));
319 }
320
MakeCall(Reg reg)321 void Amd64Encoder::MakeCall(Reg reg)
322 {
323 GetMasm()->call(ArchReg(reg));
324 }
325
MakeCall(MemRef entry_point)326 void Amd64Encoder::MakeCall(MemRef entry_point)
327 {
328 ScopedTmpRegU64 tmp_reg(this);
329 EncodeLdr(tmp_reg, false, entry_point);
330 GetMasm()->call(ArchReg(tmp_reg));
331 }
332
333 template <typename Func>
EncodeRelativePcMov(Reg reg,intptr_t offset,Func encode_instruction)334 void Amd64Encoder::EncodeRelativePcMov(Reg reg, intptr_t offset, Func encode_instruction)
335 {
336 auto pos = GetMasm()->offset();
337 encode_instruction(reg, offset);
338 offset -= (GetMasm()->offset() - pos);
339 GetMasm()->setOffset(pos);
340 encode_instruction(reg, offset);
341 }
342
MakeCallAot(intptr_t offset)343 void Amd64Encoder::MakeCallAot(intptr_t offset)
344 {
345 ScopedTmpRegU64 tmp_reg(this);
346 EncodeRelativePcMov(tmp_reg, offset, [this](Reg reg, intptr_t offset) {
347 GetMasm()->long_().mov(ArchReg(reg), asmjit::x86::ptr(asmjit::x86::rip, offset));
348 });
349 GetMasm()->call(ArchReg(tmp_reg));
350 }
351
CanMakeCallByOffset(intptr_t offset)352 bool Amd64Encoder::CanMakeCallByOffset(intptr_t offset)
353 {
354 return offset == static_cast<intptr_t>(static_cast<int32_t>(offset));
355 }
356
MakeCallByOffset(intptr_t offset)357 void Amd64Encoder::MakeCallByOffset(intptr_t offset)
358 {
359 GetMasm()->call(GetCursorOffset() + int32_t(offset));
360 }
361
MakeLoadAotTable(intptr_t offset,Reg reg)362 void Amd64Encoder::MakeLoadAotTable(intptr_t offset, Reg reg)
363 {
364 EncodeRelativePcMov(reg, offset, [this](Reg reg, intptr_t offset) {
365 GetMasm()->long_().mov(ArchReg(reg), asmjit::x86::ptr(asmjit::x86::rip, offset));
366 });
367 }
368
MakeLoadAotTableAddr(intptr_t offset,Reg addr,Reg val)369 void Amd64Encoder::MakeLoadAotTableAddr([[maybe_unused]] intptr_t offset, [[maybe_unused]] Reg addr,
370 [[maybe_unused]] Reg val)
371 {
372 EncodeRelativePcMov(addr, offset, [this](Reg reg, intptr_t offset) {
373 GetMasm()->long_().lea(ArchReg(reg), asmjit::x86::ptr(asmjit::x86::rip, offset));
374 });
375 GetMasm()->mov(ArchReg(val), asmjit::x86::ptr(ArchReg(addr)));
376 }
377
EncodeAbort()378 void Amd64Encoder::EncodeAbort()
379 {
380 GetMasm()->int3();
381 }
382
EncodeReturn()383 void Amd64Encoder::EncodeReturn()
384 {
385 GetMasm()->ret();
386 }
387
EncodeMul(Reg dst,Reg src,Imm imm)388 void Amd64Encoder::EncodeMul([[maybe_unused]] Reg dst, [[maybe_unused]] Reg src, [[maybe_unused]] Imm imm)
389 {
390 SetFalseResult();
391 }
392
EncodeNop()393 void Amd64Encoder::EncodeNop()
394 {
395 GetMasm()->nop();
396 }
397
EncodeMov(Reg dst,Reg src)398 void Amd64Encoder::EncodeMov(Reg dst, Reg src)
399 {
400 if (dst == src) {
401 return;
402 }
403
404 if (dst.IsFloat() != src.IsFloat()) {
405 ASSERT(src.GetSize() == dst.GetSize());
406 if (dst.GetSize() == WORD_SIZE) {
407 if (dst.IsFloat()) {
408 GetMasm()->movd(ArchVReg(dst), ArchReg(src));
409 } else {
410 GetMasm()->movd(ArchReg(dst), ArchVReg(src));
411 }
412 } else {
413 ASSERT(dst.GetSize() == DOUBLE_WORD_SIZE);
414 if (dst.IsFloat()) {
415 GetMasm()->movq(ArchVReg(dst), ArchReg(src));
416 } else {
417 GetMasm()->movq(ArchReg(dst), ArchVReg(src));
418 }
419 }
420 return;
421 }
422
423 if (dst.IsFloat()) {
424 ASSERT(src.IsFloat());
425 if (dst.GetType() == FLOAT32_TYPE) {
426 GetMasm()->movss(ArchVReg(dst), ArchVReg(src));
427 } else {
428 GetMasm()->movsd(ArchVReg(dst), ArchVReg(src));
429 }
430 return;
431 }
432
433 if (dst.GetSize() < WORD_SIZE && dst.GetSize() == src.GetSize()) {
434 GetMasm()->xor_(ArchReg(dst, WORD_SIZE), ArchReg(dst, WORD_SIZE));
435 }
436
437 if (dst.GetSize() == src.GetSize()) {
438 GetMasm()->mov(ArchReg(dst), ArchReg(src));
439 } else {
440 EncodeCast(dst, false, src, false);
441 }
442 }
443
EncodeNeg(Reg dst,Reg src)444 void Amd64Encoder::EncodeNeg(Reg dst, Reg src)
445 {
446 if (dst.IsScalar()) {
447 EncodeMov(dst, src);
448 GetMasm()->neg(ArchReg(dst));
449 return;
450 }
451
452 if (dst.GetType() == FLOAT32_TYPE) {
453 ScopedTmpRegF32 tmp(this);
454 CopyImmToXmm(tmp, -0.0F);
455
456 if (dst.GetId() != src.GetId()) {
457 GetMasm()->movsd(ArchVReg(dst), ArchVReg(src));
458 }
459 GetMasm()->xorps(ArchVReg(dst), ArchVReg(tmp));
460 } else {
461 ScopedTmpRegF64 tmp(this);
462 CopyImmToXmm(tmp, -0.0);
463
464 if (dst.GetId() != src.GetId()) {
465 GetMasm()->movsd(ArchVReg(dst), ArchVReg(src));
466 }
467 GetMasm()->xorps(ArchVReg(dst), ArchVReg(tmp));
468 }
469 }
470
EncodeAbs(Reg dst,Reg src)471 void Amd64Encoder::EncodeAbs(Reg dst, Reg src)
472 {
473 if (dst.IsScalar()) {
474 auto size = std::max<uint8_t>(src.GetSize(), WORD_SIZE);
475
476 if (dst.GetId() != src.GetId()) {
477 GetMasm()->mov(ArchReg(dst), ArchReg(src));
478 GetMasm()->neg(ArchReg(dst));
479 GetMasm()->cmovl(ArchReg(dst, size), ArchReg(src, size));
480 } else if (GetScratchRegistersCount() > 0) {
481 ScopedTmpReg tmp_reg(this, dst.GetType());
482
483 GetMasm()->mov(ArchReg(tmp_reg), ArchReg(src));
484 GetMasm()->neg(ArchReg(tmp_reg));
485
486 GetMasm()->cmovl(ArchReg(tmp_reg, size), ArchReg(src, size));
487 GetMasm()->mov(ArchReg(dst), ArchReg(tmp_reg));
488 } else {
489 auto end = GetMasm()->newLabel();
490
491 GetMasm()->test(ArchReg(dst), ArchReg(dst));
492 GetMasm()->jns(end);
493
494 GetMasm()->neg(ArchReg(dst));
495 GetMasm()->bind(end);
496 }
497 return;
498 }
499
500 if (dst.GetType() == FLOAT32_TYPE) {
501 ScopedTmpRegF32 tmp(this);
502 // NOLINTNEXTLINE(readability-magic-numbers)
503 CopyImmToXmm(tmp, uint32_t(0x7fffffff));
504
505 if (dst.GetId() != src.GetId()) {
506 GetMasm()->movss(ArchVReg(dst), ArchVReg(src));
507 }
508 GetMasm()->andps(ArchVReg(dst), ArchVReg(tmp));
509 } else {
510 ScopedTmpRegF64 tmp(this);
511 // NOLINTNEXTLINE(readability-magic-numbers)
512 CopyImmToXmm(tmp, uint64_t(0x7fffffffffffffff));
513
514 if (dst.GetId() != src.GetId()) {
515 GetMasm()->movsd(ArchVReg(dst), ArchVReg(src));
516 }
517 GetMasm()->andps(ArchVReg(dst), ArchVReg(tmp));
518 }
519 }
520
EncodeNot(Reg dst,Reg src)521 void Amd64Encoder::EncodeNot(Reg dst, Reg src)
522 {
523 ASSERT(dst.IsScalar());
524
525 EncodeMov(dst, src);
526 GetMasm()->not_(ArchReg(dst));
527 }
528
EncodeSqrt(Reg dst,Reg src)529 void Amd64Encoder::EncodeSqrt(Reg dst, Reg src)
530 {
531 ASSERT(dst.IsFloat());
532 if (src.GetType() == FLOAT32_TYPE) {
533 GetMasm()->sqrtps(ArchVReg(dst), ArchVReg(src));
534 } else {
535 GetMasm()->sqrtpd(ArchVReg(dst), ArchVReg(src));
536 }
537 }
538
EncodeCastFloatToScalar(Reg dst,bool dst_signed,Reg src)539 void Amd64Encoder::EncodeCastFloatToScalar(Reg dst, bool dst_signed, Reg src)
540 {
541 // We DON'T support casts from float32/64 to int8/16 and bool, because this caste is not declared anywhere
542 // in other languages and architecture, we do not know what the behavior should be.
543 ASSERT(dst.GetSize() >= WORD_SIZE);
544 auto end = GetMasm()->newLabel();
545
546 // if src is NaN, then dst = 0
547 EncodeCastFloatCheckNan(dst, src, end);
548
549 // For JS number cast we treat Infinity as a zero integer value
550 if (IsJsNumberCast() && src.GetType() == FLOAT64_TYPE) {
551 static constexpr uint64_t EXP_BIT_MASK = 0x7FF0000000000000L;
552 ScopedTmpReg cmp_reg(this, src.GetType());
553 ScopedTmpReg tmp_reg(this, src.GetType() == FLOAT64_TYPE ? INT64_TYPE : INT32_TYPE);
554 GetMasm()->mov(ArchReg(tmp_reg, DOUBLE_WORD_SIZE), asmjit::imm(EXP_BIT_MASK));
555 GetMasm()->movq(ArchVReg(cmp_reg), ArchReg(tmp_reg));
556 GetMasm()->ucomisd(ArchVReg(src), ArchVReg(cmp_reg));
557 GetMasm()->je(end);
558 }
559
560 if (dst_signed) {
561 EncodeCastFloatSignCheckRange(dst, src, end);
562 } else {
563 EncodeCastFloatUnsignCheckRange(dst, src, end);
564 }
565
566 if (src.GetType() == FLOAT32_TYPE) {
567 if (dst.GetSize() == DOUBLE_WORD_SIZE) {
568 EncodeCastFloat32ToUint64(dst, src);
569 } else {
570 GetMasm()->cvttss2si(ArchReg(dst, DOUBLE_WORD_SIZE), ArchVReg(src));
571 }
572 } else {
573 if (dst.GetSize() == DOUBLE_WORD_SIZE) {
574 EncodeCastFloat64ToUint64(dst, src);
575 } else {
576 GetMasm()->cvttsd2si(ArchReg(dst, DOUBLE_WORD_SIZE), ArchVReg(src));
577 }
578 }
579
580 GetMasm()->bind(end);
581 }
582
EncodeCastFloat32ToUint64(Reg dst,Reg src)583 void Amd64Encoder::EncodeCastFloat32ToUint64(Reg dst, Reg src)
584 {
585 auto big_number_label = GetMasm()->newLabel();
586 auto end_label = GetMasm()->newLabel();
587 ScopedTmpReg tmp_reg(this, src.GetType());
588 ScopedTmpReg tmp_num(this, dst.GetType());
589
590 // It is max number with max degree that we can load in sign int64
591 // NOLINTNEXTLINE (readability-magic-numbers)
592 GetMasm()->mov(ArchReg(dst, WORD_SIZE), asmjit::imm(0x5F000000));
593 GetMasm()->movd(ArchVReg(tmp_reg), ArchReg(dst, WORD_SIZE));
594 GetMasm()->comiss(ArchVReg(src), ArchVReg(tmp_reg));
595 GetMasm()->jnb(big_number_label);
596
597 GetMasm()->cvttss2si(ArchReg(dst), ArchVReg(src));
598 GetMasm()->jmp(end_label);
599
600 GetMasm()->bind(big_number_label);
601 GetMasm()->subss(ArchVReg(src), ArchVReg(tmp_reg));
602 GetMasm()->cvttss2si(ArchReg(dst), ArchVReg(src));
603 // NOLINTNEXTLINE (readability-magic-numbers)
604 GetMasm()->mov(ArchReg(tmp_num), asmjit::imm(0x8000000000000000));
605 GetMasm()->xor_(ArchReg(dst), ArchReg(tmp_num));
606 GetMasm()->bind(end_label);
607 }
608
EncodeCastFloat64ToUint64(Reg dst,Reg src)609 void Amd64Encoder::EncodeCastFloat64ToUint64(Reg dst, Reg src)
610 {
611 auto big_number_label = GetMasm()->newLabel();
612 auto end_label = GetMasm()->newLabel();
613 ScopedTmpReg tmp_reg(this, src.GetType());
614 ScopedTmpReg tmp_num(this, dst.GetType());
615
616 // It is max number with max degree that we can load in sign int64
617 // NOLINTNEXTLINE (readability-magic-numbers)
618 GetMasm()->mov(ArchReg(dst), asmjit::imm(0x43E0000000000000));
619 GetMasm()->movq(ArchVReg(tmp_reg), ArchReg(dst));
620 GetMasm()->comisd(ArchVReg(src), ArchVReg(tmp_reg));
621 GetMasm()->jnb(big_number_label);
622
623 GetMasm()->cvttsd2si(ArchReg(dst), ArchVReg(src));
624 GetMasm()->jmp(end_label);
625
626 GetMasm()->bind(big_number_label);
627 GetMasm()->subsd(ArchVReg(src), ArchVReg(tmp_reg));
628 GetMasm()->cvttsd2si(ArchReg(dst), ArchVReg(src));
629 // NOLINTNEXTLINE (readability-magic-numbers)
630 GetMasm()->mov(ArchReg(tmp_num), asmjit::imm(0x8000000000000000));
631 GetMasm()->xor_(ArchReg(dst), ArchReg(tmp_num));
632 GetMasm()->bind(end_label);
633 }
634
EncodeCastFloatCheckNan(Reg dst,Reg src,const asmjit::Label & end)635 void Amd64Encoder::EncodeCastFloatCheckNan(Reg dst, Reg src, const asmjit::Label &end)
636 {
637 GetMasm()->xor_(ArchReg(dst, DOUBLE_WORD_SIZE), ArchReg(dst, DOUBLE_WORD_SIZE));
638 if (src.GetType() == FLOAT32_TYPE) {
639 GetMasm()->ucomiss(ArchVReg(src), ArchVReg(src));
640 } else {
641 GetMasm()->ucomisd(ArchVReg(src), ArchVReg(src));
642 }
643 GetMasm()->jp(end);
644 }
645
EncodeCastFloatSignCheckRange(Reg dst,Reg src,const asmjit::Label & end)646 void Amd64Encoder::EncodeCastFloatSignCheckRange(Reg dst, Reg src, const asmjit::Label &end)
647 {
648 // if src < INT_MIN, then dst = INT_MIN
649 // if src >= (INT_MAX + 1), then dst = INT_MAX
650 if (dst.GetSize() == DOUBLE_WORD_SIZE) {
651 EncodeCastFloatCheckRange(dst, src, end, INT64_MIN, INT64_MAX);
652 } else {
653 EncodeCastFloatCheckRange(dst, src, end, INT32_MIN, INT32_MAX);
654 }
655 }
656
EncodeCastFloatCheckRange(Reg dst,Reg src,const asmjit::Label & end,const int64_t min_value,const uint64_t max_value)657 void Amd64Encoder::EncodeCastFloatCheckRange(Reg dst, Reg src, const asmjit::Label &end, const int64_t min_value,
658 const uint64_t max_value)
659 {
660 ScopedTmpReg cmp_reg(this, src.GetType());
661 ScopedTmpReg tmp_reg(this, src.GetType() == FLOAT64_TYPE ? INT64_TYPE : INT32_TYPE);
662
663 GetMasm()->mov(ArchReg(dst, DOUBLE_WORD_SIZE), asmjit::imm(min_value));
664 if (src.GetType() == FLOAT32_TYPE) {
665 GetMasm()->mov(ArchReg(tmp_reg), asmjit::imm(bit_cast<uint32_t>(float(min_value))));
666 GetMasm()->movd(ArchVReg(cmp_reg), ArchReg(tmp_reg));
667 GetMasm()->ucomiss(ArchVReg(src), ArchVReg(cmp_reg));
668 } else {
669 GetMasm()->mov(ArchReg(tmp_reg), asmjit::imm(bit_cast<uint64_t>(double(min_value))));
670 GetMasm()->movq(ArchVReg(cmp_reg), ArchReg(tmp_reg));
671 GetMasm()->ucomisd(ArchVReg(src), ArchVReg(cmp_reg));
672 }
673 GetMasm()->jb(end);
674
675 GetMasm()->mov(ArchReg(dst, DOUBLE_WORD_SIZE), asmjit::imm(max_value));
676 if (src.GetType() == FLOAT32_TYPE) {
677 GetMasm()->mov(ArchReg(tmp_reg), asmjit::imm(bit_cast<uint32_t>(float(max_value) + 1U)));
678 GetMasm()->movd(ArchVReg(cmp_reg), ArchReg(tmp_reg));
679 GetMasm()->ucomiss(ArchVReg(src), ArchVReg(cmp_reg));
680 } else {
681 GetMasm()->mov(ArchReg(tmp_reg), asmjit::imm(bit_cast<uint64_t>(double(max_value) + 1U)));
682 GetMasm()->movq(ArchVReg(cmp_reg), ArchReg(tmp_reg));
683 GetMasm()->ucomisd(ArchVReg(src), ArchVReg(cmp_reg));
684 }
685 GetMasm()->jae(end);
686 }
687
EncodeCastFloatUnsignCheckRange(Reg dst,Reg src,const asmjit::Label & end)688 void Amd64Encoder::EncodeCastFloatUnsignCheckRange(Reg dst, Reg src, const asmjit::Label &end)
689 {
690 // if src < 0, then dst = 0
691 // if src >= (UINT_MAX + 1), then dst = UINT_MAX
692 if (dst.GetSize() == DOUBLE_WORD_SIZE) {
693 EncodeCastFloatCheckRange(dst, src, end, 0, UINT64_MAX);
694 } else {
695 EncodeCastFloatCheckRange(dst, src, end, 0, UINT32_MAX);
696 }
697 }
698
EncodeCastScalarToFloatUnsignDouble(Reg dst,Reg src)699 void Amd64Encoder::EncodeCastScalarToFloatUnsignDouble(Reg dst, Reg src)
700 {
701 if (dst.GetType() == FLOAT32_TYPE) {
702 ScopedTmpRegU64 int1_reg(this);
703 ScopedTmpRegU64 int2_reg(this);
704
705 auto sgn = GetMasm()->newLabel();
706 auto end = GetMasm()->newLabel();
707
708 GetMasm()->test(ArchReg(src), ArchReg(src));
709 GetMasm()->js(sgn);
710 GetMasm()->cvtsi2ss(ArchVReg(dst), ArchReg(src));
711 GetMasm()->jmp(end);
712
713 GetMasm()->bind(sgn);
714 GetMasm()->mov(ArchReg(int1_reg), ArchReg(src));
715 GetMasm()->mov(ArchReg(int2_reg), ArchReg(src));
716 GetMasm()->shr(ArchReg(int2_reg), asmjit::imm(1));
717 GetMasm()->and_(ArchReg(int1_reg, WORD_SIZE), asmjit::imm(1));
718 GetMasm()->or_(ArchReg(int1_reg), ArchReg(int2_reg));
719 GetMasm()->cvtsi2ss(ArchVReg(dst), ArchReg(int1_reg));
720 GetMasm()->addss(ArchVReg(dst), ArchVReg(dst));
721
722 GetMasm()->bind(end);
723 } else {
724 static constexpr std::array<uint32_t, 4> ARR1 = {uint32_t(0x43300000), uint32_t(0x45300000), 0x0, 0x0};
725 static constexpr std::array<uint64_t, 2> ARR2 = {uint64_t(0x4330000000000000), uint64_t(0x4530000000000000)};
726
727 ScopedTmpReg float1_reg(this, dst.GetType());
728 ScopedTmpRegF64 tmp(this);
729
730 GetMasm()->movq(ArchVReg(float1_reg), ArchReg(src));
731 CopyArrayToXmm(tmp, ARR1);
732 GetMasm()->punpckldq(ArchVReg(float1_reg), ArchVReg(tmp));
733 CopyArrayToXmm(tmp, ARR2);
734 GetMasm()->subpd(ArchVReg(float1_reg), ArchVReg(tmp));
735 GetMasm()->movapd(ArchVReg(dst), ArchVReg(float1_reg));
736 GetMasm()->unpckhpd(ArchVReg(dst), ArchVReg(float1_reg));
737 GetMasm()->addsd(ArchVReg(dst), ArchVReg(float1_reg));
738 }
739 }
740
EncodeCastScalarToFloat(Reg dst,Reg src,bool src_signed)741 void Amd64Encoder::EncodeCastScalarToFloat(Reg dst, Reg src, bool src_signed)
742 {
743 if (!src_signed && src.GetSize() == DOUBLE_WORD_SIZE) {
744 EncodeCastScalarToFloatUnsignDouble(dst, src);
745 return;
746 }
747
748 if (src.GetSize() < WORD_SIZE || (src_signed && src.GetSize() == WORD_SIZE)) {
749 if (dst.GetType() == FLOAT32_TYPE) {
750 GetMasm()->cvtsi2ss(ArchVReg(dst), ArchReg(src, WORD_SIZE));
751 } else {
752 GetMasm()->cvtsi2sd(ArchVReg(dst), ArchReg(src, WORD_SIZE));
753 }
754 return;
755 }
756
757 if (!src_signed && src.GetSize() == WORD_SIZE) {
758 ScopedTmpRegU64 int1_reg(this);
759
760 GetMasm()->mov(ArchReg(int1_reg, WORD_SIZE), ArchReg(src, WORD_SIZE));
761 if (dst.GetType() == FLOAT32_TYPE) {
762 GetMasm()->cvtsi2ss(ArchVReg(dst), ArchReg(int1_reg));
763 } else {
764 GetMasm()->cvtsi2sd(ArchVReg(dst), ArchReg(int1_reg));
765 }
766 return;
767 }
768
769 ASSERT(src_signed && src.GetSize() == DOUBLE_WORD_SIZE);
770 if (dst.GetType() == FLOAT32_TYPE) {
771 GetMasm()->cvtsi2ss(ArchVReg(dst), ArchReg(src));
772 } else {
773 GetMasm()->cvtsi2sd(ArchVReg(dst), ArchReg(src));
774 }
775 }
776
EncodeCastToBool(Reg dst,Reg src)777 void Amd64Encoder::EncodeCastToBool(Reg dst, Reg src)
778 {
779 // In ISA says that we only support casts:
780 // i32tou1, i64tou1, u32tou1, u64tou1
781 ASSERT(src.IsScalar());
782 ASSERT(dst.IsScalar());
783
784 // In our ISA minimal type is 32-bit, so bool in 32bit
785 GetMasm()->test(ArchReg(src), ArchReg(src));
786 // One "mov" will be better, then 2 jump. Else other instructions will overwrite the flags.
787 GetMasm()->mov(ArchReg(dst, WORD_SIZE), asmjit::imm(0));
788 GetMasm()->setne(ArchReg(dst));
789 }
790
EncodeCast(Reg dst,bool dst_signed,Reg src,bool src_signed)791 void Amd64Encoder::EncodeCast(Reg dst, bool dst_signed, Reg src, bool src_signed)
792 {
793 if (src.IsFloat() && dst.IsScalar()) {
794 EncodeCastFloatToScalar(dst, dst_signed, src);
795 return;
796 }
797
798 if (src.IsScalar() && dst.IsFloat()) {
799 EncodeCastScalarToFloat(dst, src, src_signed);
800 return;
801 }
802
803 if (src.IsFloat() && dst.IsFloat()) {
804 if (src.GetSize() != dst.GetSize()) {
805 if (src.GetType() == FLOAT32_TYPE) {
806 GetMasm()->cvtss2sd(ArchVReg(dst), ArchVReg(src));
807 } else {
808 GetMasm()->cvtsd2ss(ArchVReg(dst), ArchVReg(src));
809 }
810 return;
811 }
812
813 if (src.GetType() == FLOAT32_TYPE) {
814 GetMasm()->movss(ArchVReg(dst), ArchVReg(src));
815 } else {
816 GetMasm()->movsd(ArchVReg(dst), ArchVReg(src));
817 }
818 return;
819 }
820
821 ASSERT(src.IsScalar() && dst.IsScalar());
822 EncodeCastScalar(dst, dst_signed, src, src_signed);
823 }
824
EncodeCastScalar(Reg dst,bool dst_signed,Reg src,bool src_signed)825 void Amd64Encoder::EncodeCastScalar(Reg dst, bool dst_signed, Reg src, bool src_signed)
826 {
827 auto extend_to_32bit = [this](Reg reg, bool is_signed) {
828 if (reg.GetSize() < WORD_SIZE) {
829 if (is_signed) {
830 GetMasm()->movsx(ArchReg(reg, WORD_SIZE), ArchReg(reg));
831 } else {
832 GetMasm()->movzx(ArchReg(reg, WORD_SIZE), ArchReg(reg));
833 }
834 }
835 };
836
837 if (src.GetSize() >= dst.GetSize()) {
838 if (dst.GetId() != src.GetId()) {
839 GetMasm()->mov(ArchReg(dst), ArchReg(src, dst.GetSize()));
840 }
841 extend_to_32bit(dst, dst_signed);
842 return;
843 }
844
845 if (src_signed) {
846 if (dst.GetSize() < DOUBLE_WORD_SIZE) {
847 GetMasm()->movsx(ArchReg(dst), ArchReg(src));
848 extend_to_32bit(dst, dst_signed);
849 } else if (src.GetSize() == WORD_SIZE) {
850 GetMasm()->movsxd(ArchReg(dst), ArchReg(src));
851 } else {
852 GetMasm()->movsx(ArchReg(dst, WORD_SIZE), ArchReg(src));
853 GetMasm()->movsxd(ArchReg(dst), ArchReg(dst, WORD_SIZE));
854 }
855 return;
856 }
857
858 if (src.GetSize() == WORD_SIZE) {
859 GetMasm()->mov(ArchReg(dst, WORD_SIZE), ArchReg(src));
860 } else if (dst.GetSize() == DOUBLE_WORD_SIZE) {
861 GetMasm()->movzx(ArchReg(dst, WORD_SIZE), ArchReg(src));
862 } else {
863 GetMasm()->movzx(ArchReg(dst), ArchReg(src));
864 extend_to_32bit(dst, dst_signed);
865 }
866 }
867
MakeShift(Shift shift)868 Reg Amd64Encoder::MakeShift(Shift shift)
869 {
870 Reg reg = shift.GetBase();
871 ASSERT(reg.IsValid());
872 if (reg.IsScalar()) {
873 ASSERT(shift.GetType() != ShiftType::INVALID_SHIFT);
874 switch (shift.GetType()) {
875 case ShiftType::LSL:
876 GetMasm()->shl(ArchReg(reg), asmjit::imm(shift.GetScale()));
877 break;
878 case ShiftType::LSR:
879 GetMasm()->shr(ArchReg(reg), asmjit::imm(shift.GetScale()));
880 break;
881 case ShiftType::ASR:
882 GetMasm()->sar(ArchReg(reg), asmjit::imm(shift.GetScale()));
883 break;
884 case ShiftType::ROR:
885 GetMasm()->ror(ArchReg(reg), asmjit::imm(shift.GetScale()));
886 break;
887 default:
888 UNREACHABLE();
889 }
890
891 return reg;
892 }
893
894 // Invalid register type
895 UNREACHABLE();
896 }
897
EncodeAdd(Reg dst,Reg src0,Shift src1)898 void Amd64Encoder::EncodeAdd(Reg dst, Reg src0, Shift src1)
899 {
900 if (dst.IsFloat()) {
901 SetFalseResult();
902 return;
903 }
904
905 ASSERT(dst.GetSize() >= src0.GetSize());
906
907 auto shift_reg = MakeShift(src1);
908
909 if (src0.GetSize() < WORD_SIZE) {
910 EncodeAdd(dst, src0, shift_reg);
911 return;
912 }
913
914 if (src0.GetSize() == DOUBLE_WORD_SIZE && shift_reg.GetSize() < DOUBLE_WORD_SIZE) {
915 GetMasm()->movsxd(ArchReg(shift_reg, DOUBLE_WORD_SIZE), ArchReg(shift_reg));
916 }
917
918 GetMasm()->lea(ArchReg(dst), asmjit::x86::ptr(ArchReg(src0), ArchReg(shift_reg, src0.GetSize())));
919 }
920
EncodeAdd(Reg dst,Reg src0,Reg src1)921 void Amd64Encoder::EncodeAdd(Reg dst, Reg src0, Reg src1)
922 {
923 if (dst.IsScalar()) {
924 auto size = std::max<uint8_t>(WORD_SIZE, dst.GetSize());
925 GetMasm()->lea(ArchReg(dst, size), asmjit::x86::ptr(ArchReg(src0, size), ArchReg(src1, size)));
926 return;
927 }
928
929 if (dst.GetType() == FLOAT32_TYPE) {
930 if (dst.GetId() == src0.GetId()) {
931 GetMasm()->addss(ArchVReg(dst), ArchVReg(src1));
932 } else if (dst.GetId() == src1.GetId()) {
933 GetMasm()->addss(ArchVReg(dst), ArchVReg(src0));
934 } else {
935 GetMasm()->movss(ArchVReg(dst), ArchVReg(src0));
936 GetMasm()->addss(ArchVReg(dst), ArchVReg(src1));
937 }
938 } else {
939 if (dst.GetId() == src0.GetId()) {
940 GetMasm()->addsd(ArchVReg(dst), ArchVReg(src1));
941 } else if (dst.GetId() == src1.GetId()) {
942 GetMasm()->addsd(ArchVReg(dst), ArchVReg(src0));
943 } else {
944 GetMasm()->movsd(ArchVReg(dst), ArchVReg(src0));
945 GetMasm()->addsd(ArchVReg(dst), ArchVReg(src1));
946 }
947 }
948 }
949
EncodeSub(Reg dst,Reg src0,Reg src1)950 void Amd64Encoder::EncodeSub(Reg dst, Reg src0, Reg src1)
951 {
952 if (dst.IsScalar()) {
953 if (dst.GetId() == src0.GetId()) {
954 GetMasm()->sub(ArchReg(dst), ArchReg(src1));
955 } else if (dst.GetId() == src1.GetId()) {
956 GetMasm()->sub(ArchReg(dst), ArchReg(src0));
957 GetMasm()->neg(ArchReg(dst));
958 } else {
959 GetMasm()->mov(ArchReg(dst), ArchReg(src0));
960 GetMasm()->sub(ArchReg(dst), ArchReg(src1));
961 }
962 return;
963 }
964
965 if (dst.GetType() == FLOAT32_TYPE) {
966 if (dst.GetId() == src0.GetId()) {
967 GetMasm()->subss(ArchVReg(dst), ArchVReg(src1));
968 } else if (dst.GetId() != src1.GetId()) {
969 GetMasm()->movss(ArchVReg(dst), ArchVReg(src0));
970 GetMasm()->subss(ArchVReg(dst), ArchVReg(src1));
971 } else {
972 ScopedTmpReg tmp_reg(this, dst.GetType());
973 GetMasm()->movss(ArchVReg(tmp_reg), ArchVReg(src0));
974 GetMasm()->subss(ArchVReg(tmp_reg), ArchVReg(src1));
975 GetMasm()->movss(ArchVReg(dst), ArchVReg(tmp_reg));
976 }
977 } else {
978 if (dst.GetId() == src0.GetId()) {
979 GetMasm()->subsd(ArchVReg(dst), ArchVReg(src1));
980 } else if (dst.GetId() != src1.GetId()) {
981 GetMasm()->movsd(ArchVReg(dst), ArchVReg(src0));
982 GetMasm()->subsd(ArchVReg(dst), ArchVReg(src1));
983 } else {
984 ScopedTmpReg tmp_reg(this, dst.GetType());
985 GetMasm()->movsd(ArchVReg(tmp_reg), ArchVReg(src0));
986 GetMasm()->subsd(ArchVReg(tmp_reg), ArchVReg(src1));
987 GetMasm()->movsd(ArchVReg(dst), ArchVReg(tmp_reg));
988 }
989 }
990 }
991
EncodeMul(Reg dst,Reg src0,Reg src1)992 void Amd64Encoder::EncodeMul(Reg dst, Reg src0, Reg src1)
993 {
994 if (dst.IsScalar()) {
995 auto size = std::max<uint8_t>(WORD_SIZE, dst.GetSize());
996
997 if (dst.GetId() == src0.GetId()) {
998 GetMasm()->imul(ArchReg(dst, size), ArchReg(src1, size));
999 } else if (dst.GetId() == src1.GetId()) {
1000 GetMasm()->imul(ArchReg(dst, size), ArchReg(src0, size));
1001 } else {
1002 GetMasm()->mov(ArchReg(dst, size), ArchReg(src0, size));
1003 GetMasm()->imul(ArchReg(dst, size), ArchReg(src1, size));
1004 }
1005 return;
1006 }
1007
1008 if (dst.GetType() == FLOAT32_TYPE) {
1009 if (dst.GetId() == src0.GetId()) {
1010 GetMasm()->mulss(ArchVReg(dst), ArchVReg(src1));
1011 } else if (dst.GetId() == src1.GetId()) {
1012 GetMasm()->mulss(ArchVReg(dst), ArchVReg(src0));
1013 } else {
1014 GetMasm()->movss(ArchVReg(dst), ArchVReg(src0));
1015 GetMasm()->mulss(ArchVReg(dst), ArchVReg(src1));
1016 }
1017 } else {
1018 if (dst.GetId() == src0.GetId()) {
1019 GetMasm()->mulsd(ArchVReg(dst), ArchVReg(src1));
1020 } else if (dst.GetId() == src1.GetId()) {
1021 GetMasm()->mulsd(ArchVReg(dst), ArchVReg(src0));
1022 } else {
1023 GetMasm()->movsd(ArchVReg(dst), ArchVReg(src0));
1024 GetMasm()->mulsd(ArchVReg(dst), ArchVReg(src1));
1025 }
1026 }
1027 }
1028
EncodeAddOverflow(compiler::LabelHolder::LabelId id,Reg dst,Reg src0,Reg src1,Condition cc)1029 void Amd64Encoder::EncodeAddOverflow(compiler::LabelHolder::LabelId id, Reg dst, Reg src0, Reg src1, Condition cc)
1030 {
1031 ASSERT(!dst.IsFloat() && !src0.IsFloat() && !src1.IsFloat());
1032 ASSERT(cc == Condition::VS || cc == Condition::VC);
1033 auto size = dst.GetSize();
1034 if (dst.GetId() == src0.GetId()) {
1035 GetMasm()->add(ArchReg(dst, size), ArchReg(src1, size));
1036 } else if (dst.GetId() == src1.GetId()) {
1037 GetMasm()->add(ArchReg(dst, size), ArchReg(src0, size));
1038 } else {
1039 GetMasm()->mov(ArchReg(dst, size), ArchReg(src0, size));
1040 GetMasm()->add(ArchReg(dst, size), ArchReg(src1, size));
1041 }
1042 auto label = static_cast<Amd64LabelHolder *>(GetLabels())->GetLabel(id);
1043 GetMasm()->j(ArchCc(cc, false), *label);
1044 }
1045
EncodeSubOverflow(compiler::LabelHolder::LabelId id,Reg dst,Reg src0,Reg src1,Condition cc)1046 void Amd64Encoder::EncodeSubOverflow(compiler::LabelHolder::LabelId id, Reg dst, Reg src0, Reg src1, Condition cc)
1047 {
1048 ASSERT(!dst.IsFloat() && !src0.IsFloat() && !src1.IsFloat());
1049 ASSERT(cc == Condition::VS || cc == Condition::VC);
1050 auto size = dst.GetSize();
1051 if (dst.GetId() == src0.GetId()) {
1052 GetMasm()->sub(ArchReg(dst, size), ArchReg(src1, size));
1053 } else if (dst.GetId() == src1.GetId()) {
1054 ScopedTmpReg tmp_reg(this, dst.GetType());
1055 GetMasm()->mov(ArchReg(tmp_reg, size), ArchReg(src1, size));
1056 GetMasm()->mov(ArchReg(dst, size), ArchReg(src0, size));
1057 GetMasm()->sub(ArchReg(dst, size), ArchReg(tmp_reg, size));
1058 } else {
1059 GetMasm()->mov(ArchReg(dst, size), ArchReg(src0, size));
1060 GetMasm()->sub(ArchReg(dst, size), ArchReg(src1, size));
1061 }
1062 auto label = static_cast<Amd64LabelHolder *>(GetLabels())->GetLabel(id);
1063 GetMasm()->j(ArchCc(cc, false), *label);
1064 }
1065
EncodeDivFloat(Reg dst,Reg src0,Reg src1)1066 void Amd64Encoder::EncodeDivFloat(Reg dst, Reg src0, Reg src1)
1067 {
1068 ASSERT(dst.IsFloat());
1069 if (dst.GetType() == FLOAT32_TYPE) {
1070 if (dst.GetId() == src0.GetId()) {
1071 GetMasm()->divss(ArchVReg(dst), ArchVReg(src1));
1072 } else if (dst.GetId() != src1.GetId()) {
1073 GetMasm()->movss(ArchVReg(dst), ArchVReg(src0));
1074 GetMasm()->divss(ArchVReg(dst), ArchVReg(src1));
1075 } else {
1076 ScopedTmpRegF32 tmp(this);
1077 GetMasm()->movss(ArchVReg(tmp), ArchVReg(src0));
1078 GetMasm()->divss(ArchVReg(tmp), ArchVReg(src1));
1079 GetMasm()->movss(ArchVReg(dst), ArchVReg(tmp));
1080 }
1081 } else {
1082 if (dst.GetId() == src0.GetId()) {
1083 GetMasm()->divsd(ArchVReg(dst), ArchVReg(src1));
1084 } else if (dst.GetId() != src1.GetId()) {
1085 GetMasm()->movsd(ArchVReg(dst), ArchVReg(src0));
1086 GetMasm()->divsd(ArchVReg(dst), ArchVReg(src1));
1087 } else {
1088 ScopedTmpRegF64 tmp(this);
1089 GetMasm()->movsd(ArchVReg(tmp), ArchVReg(src0));
1090 GetMasm()->divsd(ArchVReg(tmp), ArchVReg(src1));
1091 GetMasm()->movsd(ArchVReg(dst), ArchVReg(tmp));
1092 }
1093 }
1094 }
1095
EncodeDiv(Reg dst,bool dst_signed,Reg src0,Reg src1)1096 void Amd64Encoder::EncodeDiv(Reg dst, bool dst_signed, Reg src0, Reg src1)
1097 {
1098 if (dst.IsFloat()) {
1099 EncodeDivFloat(dst, src0, src1);
1100 return;
1101 }
1102
1103 auto neg_path = GetMasm()->newLabel();
1104 auto crossroad = GetMasm()->newLabel();
1105
1106 GetMasm()->cmp(ArchReg(src1), asmjit::imm(-1));
1107 GetMasm()->je(neg_path);
1108
1109 if (dst.GetId() != ConvertRegNumber(asmjit::x86::rdx.id())) {
1110 GetMasm()->push(asmjit::x86::rdx);
1111 }
1112 if (dst.GetId() != ConvertRegNumber(asmjit::x86::rax.id())) {
1113 GetMasm()->push(asmjit::x86::rax);
1114 }
1115
1116 ScopedTmpReg tmp_reg(this, dst.GetType());
1117 Reg op1 {src1};
1118 if (src1.GetId() == ConvertRegNumber(asmjit::x86::rax.id()) ||
1119 src1.GetId() == ConvertRegNumber(asmjit::x86::rdx.id())) {
1120 GetMasm()->mov(ArchReg(tmp_reg), ArchReg(src1));
1121 op1 = Reg(tmp_reg);
1122 }
1123
1124 if (src0.GetId() != ConvertRegNumber(asmjit::x86::rax.id())) {
1125 GetMasm()->mov(asmjit::x86::rax, ArchReg(src0, DOUBLE_WORD_SIZE));
1126 }
1127
1128 if (dst_signed) {
1129 if (dst.GetSize() <= WORD_SIZE) {
1130 GetMasm()->cdq();
1131 } else {
1132 GetMasm()->cqo();
1133 }
1134 GetMasm()->idiv(ArchReg(op1));
1135 } else {
1136 GetMasm()->xor_(asmjit::x86::rdx, asmjit::x86::rdx);
1137 GetMasm()->div(ArchReg(op1));
1138 }
1139
1140 if (dst.GetId() != ConvertRegNumber(asmjit::x86::rax.id())) {
1141 GetMasm()->mov(ArchReg(dst, DOUBLE_WORD_SIZE), asmjit::x86::rax);
1142 GetMasm()->pop(asmjit::x86::rax);
1143 }
1144
1145 if (dst.GetId() != ConvertRegNumber(asmjit::x86::rdx.id())) {
1146 GetMasm()->pop(asmjit::x86::rdx);
1147 }
1148 GetMasm()->jmp(crossroad);
1149
1150 GetMasm()->bind(neg_path);
1151 if (dst.GetId() != src0.GetId()) {
1152 GetMasm()->mov(ArchReg(dst), ArchReg(src0));
1153 }
1154 GetMasm()->neg(ArchReg(dst));
1155
1156 GetMasm()->bind(crossroad);
1157 }
1158
EncodeModFloat(Reg dst,Reg src0,Reg src1)1159 void Amd64Encoder::EncodeModFloat(Reg dst, Reg src0, Reg src1)
1160 {
1161 ASSERT(dst.IsFloat());
1162 if (dst.GetType() == FLOAT32_TYPE) {
1163 using fp = float (*)(float, float);
1164 MakeLibCall(dst, src0, src1, reinterpret_cast<void *>(static_cast<fp>(fmodf)));
1165 } else {
1166 using fp = double (*)(double, double);
1167 MakeLibCall(dst, src0, src1, reinterpret_cast<void *>(static_cast<fp>(fmod)));
1168 }
1169 }
1170
EncodeMod(Reg dst,bool dst_signed,Reg src0,Reg src1)1171 void Amd64Encoder::EncodeMod(Reg dst, bool dst_signed, Reg src0, Reg src1)
1172 {
1173 if (dst.IsFloat()) {
1174 EncodeModFloat(dst, src0, src1);
1175 return;
1176 }
1177
1178 auto zero_path = GetMasm()->newLabel();
1179 auto crossroad = GetMasm()->newLabel();
1180
1181 GetMasm()->cmp(ArchReg(src1), asmjit::imm(-1));
1182 GetMasm()->je(zero_path);
1183
1184 if (dst.GetId() != ConvertRegNumber(asmjit::x86::rax.id())) {
1185 GetMasm()->push(asmjit::x86::rax);
1186 }
1187 if (dst.GetId() != ConvertRegNumber(asmjit::x86::rdx.id())) {
1188 GetMasm()->push(asmjit::x86::rdx);
1189 }
1190
1191 ScopedTmpReg tmp_reg(this, dst.GetType());
1192 Reg op1 {src1};
1193 if (src1.GetId() == ConvertRegNumber(asmjit::x86::rax.id()) ||
1194 src1.GetId() == ConvertRegNumber(asmjit::x86::rdx.id())) {
1195 GetMasm()->mov(ArchReg(tmp_reg), ArchReg(src1));
1196 op1 = Reg(tmp_reg);
1197 }
1198
1199 if (src0.GetId() != ConvertRegNumber(asmjit::x86::rax.id())) {
1200 GetMasm()->mov(asmjit::x86::rax, ArchReg(src0, DOUBLE_WORD_SIZE));
1201 }
1202
1203 if (dst_signed) {
1204 if (dst.GetSize() <= WORD_SIZE) {
1205 GetMasm()->cdq();
1206 } else {
1207 GetMasm()->cqo();
1208 }
1209 GetMasm()->idiv(ArchReg(op1));
1210 } else {
1211 GetMasm()->xor_(asmjit::x86::rdx, asmjit::x86::rdx);
1212 GetMasm()->div(ArchReg(op1));
1213 }
1214
1215 if (dst.GetId() != ConvertRegNumber(asmjit::x86::rdx.id())) {
1216 GetMasm()->mov(ArchReg(dst, DOUBLE_WORD_SIZE), asmjit::x86::rdx);
1217 GetMasm()->pop(asmjit::x86::rdx);
1218 }
1219
1220 if (dst.GetId() != ConvertRegNumber(asmjit::x86::rax.id())) {
1221 GetMasm()->pop(asmjit::x86::rax);
1222 }
1223 GetMasm()->jmp(crossroad);
1224
1225 GetMasm()->bind(zero_path);
1226 GetMasm()->xor_(ArchReg(dst, WORD_SIZE), ArchReg(dst, WORD_SIZE));
1227
1228 GetMasm()->bind(crossroad);
1229 }
1230
EncodeMin(Reg dst,bool dst_signed,Reg src0,Reg src1)1231 void Amd64Encoder::EncodeMin(Reg dst, bool dst_signed, Reg src0, Reg src1)
1232 {
1233 if (dst.IsScalar()) {
1234 ScopedTmpReg tmp_reg(this, dst.GetType());
1235 GetMasm()->mov(ArchReg(tmp_reg), ArchReg(src1));
1236 GetMasm()->cmp(ArchReg(src0), ArchReg(src1));
1237
1238 auto size = std::max<uint8_t>(src0.GetSize(), WORD_SIZE);
1239 if (dst_signed) {
1240 GetMasm()->cmovle(ArchReg(tmp_reg, size), ArchReg(src0, size));
1241 } else {
1242 GetMasm()->cmovb(ArchReg(tmp_reg, size), ArchReg(src0, size));
1243 }
1244 EncodeMov(dst, tmp_reg);
1245 return;
1246 }
1247
1248 EncodeMinMaxFp<false>(dst, src0, src1);
1249 }
1250
EncodeMax(Reg dst,bool dst_signed,Reg src0,Reg src1)1251 void Amd64Encoder::EncodeMax(Reg dst, bool dst_signed, Reg src0, Reg src1)
1252 {
1253 if (dst.IsScalar()) {
1254 ScopedTmpReg tmp_reg(this, dst.GetType());
1255 GetMasm()->mov(ArchReg(tmp_reg), ArchReg(src1));
1256 GetMasm()->cmp(ArchReg(src0), ArchReg(src1));
1257
1258 auto size = std::max<uint8_t>(src0.GetSize(), WORD_SIZE);
1259 if (dst_signed) {
1260 GetMasm()->cmovge(ArchReg(tmp_reg, size), ArchReg(src0, size));
1261 } else {
1262 GetMasm()->cmova(ArchReg(tmp_reg, size), ArchReg(src0, size));
1263 }
1264 EncodeMov(dst, tmp_reg);
1265 return;
1266 }
1267
1268 EncodeMinMaxFp<true>(dst, src0, src1);
1269 }
1270
1271 template <bool is_max>
EncodeMinMaxFp(Reg dst,Reg src0,Reg src1)1272 void Amd64Encoder::EncodeMinMaxFp(Reg dst, Reg src0, Reg src1)
1273 {
1274 auto end = GetMasm()->newLabel();
1275 auto not_equal = GetMasm()->newLabel();
1276 auto got_nan = GetMasm()->newLabel();
1277 auto &src_a = dst.GetId() != src1.GetId() ? src0 : src1;
1278 auto &src_b = src_a.GetId() == src0.GetId() ? src1 : src0;
1279 if (dst.GetType() == FLOAT32_TYPE) {
1280 GetMasm()->movaps(ArchVReg(dst), ArchVReg(src_a));
1281 GetMasm()->ucomiss(ArchVReg(src_b), ArchVReg(src_a));
1282 GetMasm()->jne(not_equal);
1283 GetMasm()->jp(got_nan);
1284 // calculate result for positive/negative zero operands
1285 if (is_max) {
1286 GetMasm()->andps(ArchVReg(dst), ArchVReg(src_b));
1287 } else {
1288 GetMasm()->orps(ArchVReg(dst), ArchVReg(src_b));
1289 }
1290 GetMasm()->jmp(end);
1291 GetMasm()->bind(got_nan);
1292 // if any operand is NaN result is NaN
1293 GetMasm()->por(ArchVReg(dst), ArchVReg(src_b));
1294 GetMasm()->jmp(end);
1295 GetMasm()->bind(not_equal);
1296 if (is_max) {
1297 GetMasm()->maxss(ArchVReg(dst), ArchVReg(src_b));
1298 } else {
1299 GetMasm()->minss(ArchVReg(dst), ArchVReg(src_b));
1300 }
1301 GetMasm()->bind(end);
1302 } else {
1303 GetMasm()->movapd(ArchVReg(dst), ArchVReg(src_a));
1304 GetMasm()->ucomisd(ArchVReg(src_b), ArchVReg(src_a));
1305 GetMasm()->jne(not_equal);
1306 GetMasm()->jp(got_nan);
1307 // calculate result for positive/negative zero operands
1308 if (is_max) {
1309 GetMasm()->andpd(ArchVReg(dst), ArchVReg(src_b));
1310 } else {
1311 GetMasm()->orpd(ArchVReg(dst), ArchVReg(src_b));
1312 }
1313 GetMasm()->jmp(end);
1314 GetMasm()->bind(got_nan);
1315 // if any operand is NaN result is NaN
1316 GetMasm()->por(ArchVReg(dst), ArchVReg(src_b));
1317 GetMasm()->jmp(end);
1318 GetMasm()->bind(not_equal);
1319 if (is_max) {
1320 GetMasm()->maxsd(ArchVReg(dst), ArchVReg(src_b));
1321 } else {
1322 GetMasm()->minsd(ArchVReg(dst), ArchVReg(src_b));
1323 }
1324 GetMasm()->bind(end);
1325 }
1326 }
1327
EncodeShl(Reg dst,Reg src0,Reg src1)1328 void Amd64Encoder::EncodeShl(Reg dst, Reg src0, Reg src1)
1329 {
1330 ASSERT(dst.IsScalar());
1331 ScopedTmpReg tmp_reg(this, dst.GetType());
1332 Reg rcx(ConvertRegNumber(asmjit::x86::rcx.id()), dst.GetType());
1333 GetMasm()->mov(ArchReg(tmp_reg), ArchReg(src0));
1334 if (dst.GetId() != rcx.GetId()) {
1335 GetMasm()->push(ArchReg(rcx, DOUBLE_WORD_SIZE));
1336 }
1337 GetMasm()->mov(ArchReg(rcx), ArchReg(src1));
1338 GetMasm()->shl(ArchReg(tmp_reg), asmjit::x86::cl);
1339 if (dst.GetId() != rcx.GetId()) {
1340 GetMasm()->pop(ArchReg(rcx, DOUBLE_WORD_SIZE));
1341 }
1342 GetMasm()->mov(ArchReg(dst), ArchReg(tmp_reg));
1343 }
1344
EncodeShr(Reg dst,Reg src0,Reg src1)1345 void Amd64Encoder::EncodeShr(Reg dst, Reg src0, Reg src1)
1346 {
1347 ASSERT(dst.IsScalar());
1348 ScopedTmpReg tmp_reg(this, dst.GetType());
1349 Reg rcx(ConvertRegNumber(asmjit::x86::rcx.id()), dst.GetType());
1350 GetMasm()->mov(ArchReg(tmp_reg), ArchReg(src0));
1351 if (dst.GetId() != rcx.GetId()) {
1352 GetMasm()->push(ArchReg(rcx, DOUBLE_WORD_SIZE));
1353 }
1354 GetMasm()->mov(ArchReg(rcx), ArchReg(src1));
1355 GetMasm()->shr(ArchReg(tmp_reg), asmjit::x86::cl);
1356 if (dst.GetId() != rcx.GetId()) {
1357 GetMasm()->pop(ArchReg(rcx, DOUBLE_WORD_SIZE));
1358 }
1359 GetMasm()->mov(ArchReg(dst), ArchReg(tmp_reg));
1360 }
1361
EncodeAShr(Reg dst,Reg src0,Reg src1)1362 void Amd64Encoder::EncodeAShr(Reg dst, Reg src0, Reg src1)
1363 {
1364 ASSERT(dst.IsScalar());
1365 ScopedTmpReg tmp_reg(this, dst.GetType());
1366 Reg rcx(ConvertRegNumber(asmjit::x86::rcx.id()), dst.GetType());
1367 GetMasm()->mov(ArchReg(tmp_reg), ArchReg(src0));
1368 if (dst.GetId() != rcx.GetId()) {
1369 GetMasm()->push(ArchReg(rcx, DOUBLE_WORD_SIZE));
1370 }
1371 GetMasm()->mov(ArchReg(rcx), ArchReg(src1));
1372 GetMasm()->sar(ArchReg(tmp_reg), asmjit::x86::cl);
1373 if (dst.GetId() != rcx.GetId()) {
1374 GetMasm()->pop(ArchReg(rcx, DOUBLE_WORD_SIZE));
1375 }
1376 GetMasm()->mov(ArchReg(dst), ArchReg(tmp_reg));
1377 }
1378
EncodeAnd(Reg dst,Reg src0,Reg src1)1379 void Amd64Encoder::EncodeAnd(Reg dst, Reg src0, Reg src1)
1380 {
1381 ASSERT(dst.IsScalar());
1382 if (dst.GetId() == src0.GetId()) {
1383 GetMasm()->and_(ArchReg(dst), ArchReg(src1));
1384 } else if (dst.GetId() == src1.GetId()) {
1385 GetMasm()->and_(ArchReg(dst), ArchReg(src0));
1386 } else {
1387 GetMasm()->mov(ArchReg(dst), ArchReg(src0));
1388 GetMasm()->and_(ArchReg(dst), ArchReg(src1));
1389 }
1390 }
1391
EncodeOr(Reg dst,Reg src0,Reg src1)1392 void Amd64Encoder::EncodeOr(Reg dst, Reg src0, Reg src1)
1393 {
1394 ASSERT(dst.IsScalar());
1395 if (dst.GetId() == src0.GetId()) {
1396 GetMasm()->or_(ArchReg(dst), ArchReg(src1));
1397 } else if (dst.GetId() == src1.GetId()) {
1398 GetMasm()->or_(ArchReg(dst), ArchReg(src0));
1399 } else {
1400 GetMasm()->mov(ArchReg(dst), ArchReg(src0));
1401 GetMasm()->or_(ArchReg(dst), ArchReg(src1));
1402 }
1403 }
1404
EncodeXor(Reg dst,Reg src0,Reg src1)1405 void Amd64Encoder::EncodeXor(Reg dst, Reg src0, Reg src1)
1406 {
1407 ASSERT(dst.IsScalar());
1408 if (dst.GetId() == src0.GetId()) {
1409 GetMasm()->xor_(ArchReg(dst), ArchReg(src1));
1410 } else if (dst.GetId() == src1.GetId()) {
1411 GetMasm()->xor_(ArchReg(dst), ArchReg(src0));
1412 } else {
1413 GetMasm()->mov(ArchReg(dst), ArchReg(src0));
1414 GetMasm()->xor_(ArchReg(dst), ArchReg(src1));
1415 }
1416 }
1417
EncodeAdd(Reg dst,Reg src,Imm imm)1418 void Amd64Encoder::EncodeAdd(Reg dst, Reg src, Imm imm)
1419 {
1420 if (dst.IsFloat()) {
1421 SetFalseResult();
1422 return;
1423 }
1424
1425 auto imm_val = ImmToSignedInt(imm);
1426 auto size = std::max<uint8_t>(WORD_SIZE, dst.GetSize());
1427 if (ImmFitsSize(imm_val, size)) {
1428 GetMasm()->lea(ArchReg(dst, size), asmjit::x86::ptr(ArchReg(src, size), imm_val));
1429 } else {
1430 if (dst.GetId() != src.GetId()) {
1431 GetMasm()->mov(ArchReg(dst), asmjit::imm(imm_val));
1432 GetMasm()->add(ArchReg(dst), ArchReg(src));
1433 } else {
1434 ScopedTmpReg tmp_reg(this, dst.GetType());
1435 GetMasm()->mov(ArchReg(tmp_reg), asmjit::imm(imm_val));
1436 GetMasm()->add(ArchReg(dst), ArchReg(tmp_reg));
1437 }
1438 }
1439 }
1440
EncodeSub(Reg dst,Reg src,Imm imm)1441 void Amd64Encoder::EncodeSub(Reg dst, Reg src, Imm imm)
1442 {
1443 if (dst.IsFloat()) {
1444 SetFalseResult();
1445 return;
1446 }
1447
1448 auto imm_val = -ImmToSignedInt(imm);
1449 auto size = std::max<uint8_t>(WORD_SIZE, dst.GetSize());
1450 if (ImmFitsSize(imm_val, size)) {
1451 GetMasm()->lea(ArchReg(dst, size), asmjit::x86::ptr(ArchReg(src, size), imm_val));
1452 } else {
1453 if (dst.GetId() != src.GetId()) {
1454 GetMasm()->mov(ArchReg(dst), asmjit::imm(imm_val));
1455 GetMasm()->add(ArchReg(dst), ArchReg(src));
1456 } else {
1457 ScopedTmpReg tmp_reg(this, dst.GetType());
1458 GetMasm()->mov(ArchReg(tmp_reg), asmjit::imm(imm_val));
1459 GetMasm()->add(ArchReg(dst), ArchReg(tmp_reg));
1460 }
1461 }
1462 }
1463
EncodeShl(Reg dst,Reg src,Imm imm)1464 void Amd64Encoder::EncodeShl(Reg dst, Reg src, Imm imm)
1465 {
1466 ASSERT(dst.IsScalar());
1467 EncodeMov(dst, src);
1468 GetMasm()->shl(ArchReg(dst), ArchImm(imm));
1469 }
1470
EncodeShr(Reg dst,Reg src,Imm imm)1471 void Amd64Encoder::EncodeShr(Reg dst, Reg src, Imm imm)
1472 {
1473 ASSERT(dst.IsScalar());
1474
1475 EncodeMov(dst, src);
1476 GetMasm()->shr(ArchReg(dst), ArchImm(imm));
1477 }
1478
EncodeAShr(Reg dst,Reg src,Imm imm)1479 void Amd64Encoder::EncodeAShr(Reg dst, Reg src, Imm imm)
1480 {
1481 ASSERT(dst.IsScalar());
1482 EncodeMov(dst, src);
1483 GetMasm()->sar(ArchReg(dst), ArchImm(imm));
1484 }
1485
EncodeAnd(Reg dst,Reg src,Imm imm)1486 void Amd64Encoder::EncodeAnd(Reg dst, Reg src, Imm imm)
1487 {
1488 ASSERT(dst.IsScalar());
1489 auto imm_val = ImmToUnsignedInt(imm);
1490
1491 switch (imm.GetSize()) {
1492 case BYTE_SIZE:
1493 imm_val |= ~uint64_t(0xFF); // NOLINT
1494 break;
1495 case HALF_SIZE:
1496 imm_val |= ~uint64_t(0xFFFF); // NOLINT
1497 break;
1498 case WORD_SIZE:
1499 imm_val |= ~uint64_t(0xFFFFFFFF); // NOLINT
1500 break;
1501 default:
1502 break;
1503 }
1504
1505 if (dst.GetSize() != DOUBLE_WORD_SIZE) {
1506 // NOLINTNEXTLINE(clang-analyzer-core.UndefinedBinaryOperatorResult)
1507 imm_val &= (uint64_t(1) << dst.GetSize()) - 1;
1508 }
1509
1510 if (ImmFitsSize(imm_val, dst.GetSize())) {
1511 EncodeMov(dst, src);
1512 GetMasm()->and_(ArchReg(dst), imm_val);
1513 } else {
1514 if (dst.GetId() != src.GetId()) {
1515 GetMasm()->mov(ArchReg(dst), asmjit::imm(imm_val));
1516 GetMasm()->and_(ArchReg(dst), ArchReg(src));
1517 } else {
1518 ScopedTmpReg tmp_reg(this, dst.GetType());
1519 GetMasm()->mov(ArchReg(tmp_reg), asmjit::imm(imm_val));
1520 GetMasm()->and_(ArchReg(dst), ArchReg(tmp_reg));
1521 }
1522 }
1523 }
1524
EncodeOr(Reg dst,Reg src,Imm imm)1525 void Amd64Encoder::EncodeOr(Reg dst, Reg src, Imm imm)
1526 {
1527 ASSERT(dst.IsScalar());
1528 auto imm_val = ImmToUnsignedInt(imm);
1529
1530 if (ImmFitsSize(imm_val, dst.GetSize())) {
1531 EncodeMov(dst, src);
1532 GetMasm()->or_(ArchReg(dst), imm_val);
1533 } else {
1534 if (dst.GetId() != src.GetId()) {
1535 GetMasm()->mov(ArchReg(dst), asmjit::imm(imm_val));
1536 GetMasm()->or_(ArchReg(dst), ArchReg(src));
1537 } else {
1538 ScopedTmpReg tmp_reg(this, dst.GetType());
1539 GetMasm()->mov(ArchReg(tmp_reg), asmjit::imm(imm_val));
1540 GetMasm()->or_(ArchReg(dst), ArchReg(tmp_reg));
1541 }
1542 }
1543 }
1544
EncodeXor(Reg dst,Reg src,Imm imm)1545 void Amd64Encoder::EncodeXor(Reg dst, Reg src, Imm imm)
1546 {
1547 ASSERT(dst.IsScalar());
1548 auto imm_val = ImmToUnsignedInt(imm);
1549
1550 if (ImmFitsSize(imm_val, dst.GetSize())) {
1551 EncodeMov(dst, src);
1552 GetMasm()->xor_(ArchReg(dst), imm_val);
1553 } else {
1554 if (dst.GetId() != src.GetId()) {
1555 GetMasm()->mov(ArchReg(dst), asmjit::imm(imm_val));
1556 GetMasm()->xor_(ArchReg(dst), ArchReg(src));
1557 } else {
1558 ScopedTmpReg tmp_reg(this, dst.GetType());
1559 GetMasm()->mov(ArchReg(tmp_reg), asmjit::imm(imm_val));
1560 GetMasm()->xor_(ArchReg(dst), ArchReg(tmp_reg));
1561 }
1562 }
1563 }
1564
EncodeMov(Reg dst,Imm src)1565 void Amd64Encoder::EncodeMov(Reg dst, Imm src)
1566 {
1567 if (dst.IsScalar()) {
1568 if (dst.GetSize() < WORD_SIZE) {
1569 GetMasm()->xor_(ArchReg(dst, WORD_SIZE), ArchReg(dst, WORD_SIZE));
1570 }
1571 GetMasm()->mov(ArchReg(dst), ArchImm(src));
1572 return;
1573 }
1574
1575 if (dst.GetType() == FLOAT32_TYPE) {
1576 ScopedTmpRegU32 tmp_reg(this);
1577 auto val = bit_cast<uint32_t>(src.GetValue<float>());
1578 GetMasm()->mov(ArchReg(tmp_reg), asmjit::imm(val));
1579 GetMasm()->movd(ArchVReg(dst), ArchReg(tmp_reg));
1580 } else {
1581 ScopedTmpRegU64 tmp_reg(this);
1582 auto val = bit_cast<uint64_t>(src.GetValue<double>());
1583 GetMasm()->mov(ArchReg(tmp_reg), asmjit::imm(val));
1584 GetMasm()->movq(ArchVReg(dst), ArchReg(tmp_reg));
1585 }
1586 }
1587
EncodeLdr(Reg dst,bool dst_signed,MemRef mem)1588 void Amd64Encoder::EncodeLdr(Reg dst, bool dst_signed, MemRef mem)
1589 {
1590 auto m = ArchMem(mem).Prepare(GetMasm());
1591
1592 if (dst.GetType() == FLOAT32_TYPE) {
1593 GetMasm()->movss(ArchVReg(dst), m);
1594 return;
1595 }
1596 if (dst.GetType() == FLOAT64_TYPE) {
1597 GetMasm()->movsd(ArchVReg(dst), m);
1598 return;
1599 }
1600
1601 m.setSize(dst.GetSize() / BITS_PER_BYTE);
1602
1603 if (dst_signed && dst.GetSize() < DOUBLE_WORD_SIZE) {
1604 if (dst.GetSize() == WORD_SIZE) {
1605 GetMasm()->movsxd(ArchReg(dst, DOUBLE_WORD_SIZE), m);
1606 } else {
1607 GetMasm()->movsx(ArchReg(dst, DOUBLE_WORD_SIZE), m);
1608 }
1609 return;
1610 }
1611 if (!dst_signed && dst.GetSize() < WORD_SIZE) {
1612 GetMasm()->movzx(ArchReg(dst, WORD_SIZE), m);
1613 return;
1614 }
1615
1616 GetMasm()->mov(ArchReg(dst), m);
1617 }
1618
EncodeLdrAcquire(Reg dst,bool dst_signed,MemRef mem)1619 void Amd64Encoder::EncodeLdrAcquire(Reg dst, bool dst_signed, MemRef mem)
1620 {
1621 EncodeLdr(dst, dst_signed, mem);
1622 // LoadLoad and LoadStore barrier should be here, but this is no-op in amd64 memory model
1623 }
1624
EncodeStr(Reg src,MemRef mem)1625 void Amd64Encoder::EncodeStr(Reg src, MemRef mem)
1626 {
1627 auto m = ArchMem(mem).Prepare(GetMasm());
1628
1629 if (src.GetType() == FLOAT32_TYPE) {
1630 GetMasm()->movss(m, ArchVReg(src));
1631 return;
1632 }
1633 if (src.GetType() == FLOAT64_TYPE) {
1634 GetMasm()->movsd(m, ArchVReg(src));
1635 return;
1636 }
1637
1638 m.setSize(src.GetSize() / BITS_PER_BYTE);
1639 GetMasm()->mov(m, ArchReg(src));
1640 }
1641
EncodeStrRelease(Reg src,MemRef mem)1642 void Amd64Encoder::EncodeStrRelease(Reg src, MemRef mem)
1643 {
1644 // StoreStore barrier should be here, but this is no-op in amd64 memory model
1645 EncodeStr(src, mem);
1646 // this is StoreLoad barrier (which is also full memory barrier in amd64 memory model)
1647 GetMasm()->lock().add(asmjit::x86::dword_ptr(asmjit::x86::rsp), asmjit::imm(0));
1648 }
1649
EncodeStrz(Reg src,MemRef mem)1650 void Amd64Encoder::EncodeStrz(Reg src, MemRef mem)
1651 {
1652 if (src.IsScalar()) {
1653 if (src.GetSize() == DOUBLE_WORD_SIZE) {
1654 GetMasm()->mov(ArchMem(mem).Prepare(GetMasm()), ArchReg(src));
1655 } else {
1656 ScopedTmpRegU64 tmp_reg(this);
1657 GetMasm()->xor_(ArchReg(tmp_reg), ArchReg(tmp_reg));
1658 GetMasm()->mov(ArchReg(tmp_reg, src.GetSize()), ArchReg(src));
1659 GetMasm()->mov(ArchMem(mem).Prepare(GetMasm()), ArchReg(tmp_reg));
1660 }
1661 } else {
1662 if (src.GetType() == FLOAT64_TYPE) {
1663 GetMasm()->movsd(ArchMem(mem).Prepare(GetMasm()), ArchVReg(src));
1664 } else {
1665 ScopedTmpRegF64 tmp_reg(this);
1666
1667 GetMasm()->xorpd(ArchVReg(tmp_reg), ArchVReg(tmp_reg));
1668 GetMasm()->movss(ArchVReg(tmp_reg), ArchVReg(src));
1669 GetMasm()->movsd(ArchMem(mem).Prepare(GetMasm()), ArchVReg(tmp_reg));
1670 }
1671 }
1672 }
1673
EncodeSti(Imm src,MemRef mem)1674 void Amd64Encoder::EncodeSti(Imm src, MemRef mem)
1675 {
1676 if (src.IsFloat()) {
1677 if (src.GetType() == FLOAT32_TYPE) {
1678 EncodeSti(Imm(bit_cast<int32_t>(src.GetValue<float>())), mem);
1679 } else {
1680 EncodeSti(Imm(bit_cast<int64_t>(src.GetValue<double>())), mem);
1681 }
1682 return;
1683 }
1684
1685 auto m = ArchMem(mem).Prepare(GetMasm());
1686 if (src.GetSize() <= HALF_SIZE) {
1687 m.setSize(src.GetSize() / BITS_PER_BYTE);
1688 GetMasm()->mov(m, ArchImm(src));
1689 } else {
1690 m.setSize(DOUBLE_WORD_SIZE_BYTE);
1691
1692 auto imm_val = ImmToSignedInt(src);
1693 if (ImmFitsSize(imm_val, DOUBLE_WORD_SIZE)) {
1694 GetMasm()->mov(m, asmjit::imm(imm_val));
1695 } else {
1696 ScopedTmpRegU64 tmp_reg(this);
1697 GetMasm()->mov(ArchReg(tmp_reg), asmjit::imm(imm_val));
1698 GetMasm()->mov(m, ArchReg(tmp_reg));
1699 }
1700 }
1701 }
1702
EncodeMemCopy(MemRef mem_from,MemRef mem_to,size_t size)1703 void Amd64Encoder::EncodeMemCopy(MemRef mem_from, MemRef mem_to, size_t size)
1704 {
1705 ScopedTmpRegU64 tmp_reg(this);
1706 GetMasm()->mov(ArchReg(tmp_reg, size), ArchMem(mem_from).Prepare(GetMasm()));
1707 GetMasm()->mov(ArchMem(mem_to).Prepare(GetMasm()), ArchReg(tmp_reg, size));
1708 }
1709
EncodeMemCopyz(MemRef mem_from,MemRef mem_to,size_t size)1710 void Amd64Encoder::EncodeMemCopyz(MemRef mem_from, MemRef mem_to, size_t size)
1711 {
1712 ScopedTmpRegU64 tmp_reg(this);
1713 if (size < DOUBLE_WORD_SIZE) {
1714 GetMasm()->xor_(ArchReg(tmp_reg), ArchReg(tmp_reg));
1715 }
1716 GetMasm()->mov(ArchReg(tmp_reg, size), ArchMem(mem_from).Prepare(GetMasm()));
1717 GetMasm()->mov(ArchMem(mem_to).Prepare(GetMasm()), ArchReg(tmp_reg));
1718 }
1719
EncodeCompare(Reg dst,Reg src0,Reg src1,Condition cc)1720 void Amd64Encoder::EncodeCompare(Reg dst, Reg src0, Reg src1, Condition cc)
1721 {
1722 if (src0.IsScalar()) {
1723 GetMasm()->cmp(ArchReg(src0), ArchReg(src1));
1724 } else {
1725 if (src0.GetType() == FLOAT32_TYPE) {
1726 GetMasm()->ucomiss(ArchVReg(src0), ArchVReg(src1));
1727 } else {
1728 GetMasm()->ucomisd(ArchVReg(src0), ArchVReg(src1));
1729 }
1730 }
1731 GetMasm()->mov(ArchReg(dst, DOUBLE_WORD_SIZE), asmjit::imm(0));
1732
1733 if (src0.IsScalar()) {
1734 GetMasm()->set(ArchCc(cc), ArchReg(dst, BYTE_SIZE));
1735 return;
1736 }
1737
1738 auto end = GetMasm()->newLabel();
1739
1740 if (CcMatchesNan(cc)) {
1741 GetMasm()->setp(ArchReg(dst, BYTE_SIZE));
1742 }
1743 GetMasm()->jp(end);
1744 GetMasm()->set(ArchCc(cc, true), ArchReg(dst, BYTE_SIZE));
1745
1746 GetMasm()->bind(end);
1747 }
1748
EncodeCompareTest(Reg dst,Reg src0,Reg src1,Condition cc)1749 void Amd64Encoder::EncodeCompareTest(Reg dst, Reg src0, Reg src1, Condition cc)
1750 {
1751 ASSERT(src0.IsScalar());
1752
1753 GetMasm()->test(ArchReg(src0), ArchReg(src1));
1754
1755 GetMasm()->mov(ArchReg(dst, DOUBLE_WORD_SIZE), asmjit::imm(0));
1756 GetMasm()->set(ArchCcTest(cc), ArchReg(dst, BYTE_SIZE));
1757 }
1758
EncodeCmp(Reg dst,Reg src0,Reg src1,Condition cc)1759 void Amd64Encoder::EncodeCmp(Reg dst, Reg src0, Reg src1, Condition cc)
1760 {
1761 auto end = GetMasm()->newLabel();
1762
1763 if (src0.IsFloat()) {
1764 ASSERT(src1.IsFloat());
1765 ASSERT(cc == Condition::MI || cc == Condition::LT);
1766
1767 if (src0.GetType() == FLOAT32_TYPE) {
1768 GetMasm()->ucomiss(ArchVReg(src0), ArchVReg(src1));
1769 } else {
1770 GetMasm()->ucomisd(ArchVReg(src0), ArchVReg(src1));
1771 }
1772
1773 GetMasm()->mov(ArchReg(dst, DOUBLE_WORD_SIZE), cc == Condition::LT ? asmjit::imm(-1) : asmjit::imm(1));
1774 cc = Condition::LO;
1775
1776 GetMasm()->jp(end);
1777 } else {
1778 ASSERT(src0.IsScalar() && src1.IsScalar());
1779 ASSERT(cc == Condition::LO || cc == Condition::LT);
1780 GetMasm()->cmp(ArchReg(src0), ArchReg(src1));
1781 }
1782 GetMasm()->mov(ArchReg(dst, DOUBLE_WORD_SIZE), asmjit::imm(0));
1783 GetMasm()->setne(ArchReg(dst, BYTE_SIZE));
1784
1785 GetMasm()->j(asmjit::x86::Condition::negate(ArchCc(cc)), end);
1786 GetMasm()->neg(ArchReg(dst));
1787
1788 GetMasm()->bind(end);
1789 }
1790
EncodeSelect(Reg dst,Reg src0,Reg src1,Reg src2,Reg src3,Condition cc)1791 void Amd64Encoder::EncodeSelect(Reg dst, Reg src0, Reg src1, Reg src2, Reg src3, Condition cc)
1792 {
1793 ASSERT(!src0.IsFloat() && !src1.IsFloat());
1794 if (src2.IsScalar()) {
1795 GetMasm()->cmp(ArchReg(src2), ArchReg(src3));
1796 } else if (src2.GetType() == FLOAT32_TYPE) {
1797 GetMasm()->comiss(ArchVReg(src2), ArchVReg(src3));
1798 } else {
1799 GetMasm()->comisd(ArchVReg(src2), ArchVReg(src3));
1800 }
1801
1802 auto size = std::max<uint8_t>(src0.GetSize(), WORD_SIZE);
1803 bool dst_aliased = dst.GetId() == src0.GetId();
1804 ScopedTmpReg tmp_reg(this, dst.GetType());
1805 auto dst_reg = dst_aliased ? ArchReg(tmp_reg, size) : ArchReg(dst, size);
1806
1807 GetMasm()->mov(dst_reg, ArchReg(src1, size));
1808
1809 if (src2.IsScalar()) {
1810 GetMasm()->cmov(ArchCc(cc), dst_reg, ArchReg(src0, size));
1811 } else if (CcMatchesNan(cc)) {
1812 GetMasm()->cmovp(dst_reg, ArchReg(src0, size));
1813 GetMasm()->cmov(ArchCc(cc, src2.IsFloat()), dst_reg, ArchReg(src0, size));
1814 } else {
1815 auto end = GetMasm()->newLabel();
1816
1817 GetMasm()->jp(end);
1818 GetMasm()->cmov(ArchCc(cc, src2.IsFloat()), dst_reg, ArchReg(src0, size));
1819
1820 GetMasm()->bind(end);
1821 }
1822 if (dst_aliased) {
1823 EncodeMov(dst, tmp_reg);
1824 }
1825 }
1826
EncodeSelect(Reg dst,Reg src0,Reg src1,Reg src2,Imm imm,Condition cc)1827 void Amd64Encoder::EncodeSelect(Reg dst, Reg src0, Reg src1, Reg src2, Imm imm, Condition cc)
1828 {
1829 ASSERT(!src0.IsFloat() && !src1.IsFloat() && !src2.IsFloat());
1830
1831 auto imm_val = ImmToSignedInt(imm);
1832 if (ImmFitsSize(imm_val, src2.GetSize())) {
1833 GetMasm()->cmp(ArchReg(src2), asmjit::imm(imm_val));
1834 } else {
1835 ScopedTmpReg tmp_reg(this, src2.GetType());
1836 GetMasm()->mov(ArchReg(tmp_reg), asmjit::imm(imm_val));
1837 GetMasm()->cmp(ArchReg(src2), ArchReg(tmp_reg));
1838 }
1839
1840 ScopedTmpReg tmp_reg(this, dst.GetType());
1841 auto size = std::max<uint8_t>(src0.GetSize(), WORD_SIZE);
1842 bool dst_aliased = dst.GetId() == src0.GetId();
1843 auto dst_reg = dst_aliased ? ArchReg(tmp_reg, size) : ArchReg(dst, size);
1844
1845 GetMasm()->mov(dst_reg, ArchReg(src1, size));
1846 GetMasm()->cmov(ArchCc(cc), dst_reg, ArchReg(src0, size));
1847 if (dst_aliased) {
1848 EncodeMov(dst, tmp_reg);
1849 }
1850 }
1851
EncodeSelectTest(Reg dst,Reg src0,Reg src1,Reg src2,Reg src3,Condition cc)1852 void Amd64Encoder::EncodeSelectTest(Reg dst, Reg src0, Reg src1, Reg src2, Reg src3, Condition cc)
1853 {
1854 ASSERT(!src0.IsFloat() && !src1.IsFloat() && !src2.IsFloat());
1855
1856 GetMasm()->test(ArchReg(src2), ArchReg(src3));
1857
1858 ScopedTmpReg tmp_reg(this, dst.GetType());
1859 auto size = std::max<uint8_t>(src0.GetSize(), WORD_SIZE);
1860 bool dst_aliased = dst.GetId() == src0.GetId();
1861 auto dst_reg = dst_aliased ? ArchReg(tmp_reg, size) : ArchReg(dst, size);
1862
1863 GetMasm()->mov(dst_reg, ArchReg(src1, size));
1864 GetMasm()->cmov(ArchCcTest(cc), dst_reg, ArchReg(src0, size));
1865 if (dst_aliased) {
1866 EncodeMov(dst, tmp_reg);
1867 }
1868 }
1869
EncodeSelectTest(Reg dst,Reg src0,Reg src1,Reg src2,Imm imm,Condition cc)1870 void Amd64Encoder::EncodeSelectTest(Reg dst, Reg src0, Reg src1, Reg src2, Imm imm, Condition cc)
1871 {
1872 ASSERT(!src0.IsFloat() && !src1.IsFloat() && !src2.IsFloat());
1873
1874 auto imm_val = ImmToSignedInt(imm);
1875 if (ImmFitsSize(imm_val, src2.GetSize())) {
1876 GetMasm()->test(ArchReg(src2), asmjit::imm(imm_val));
1877 } else {
1878 ScopedTmpReg tmp_reg(this, src2.GetType());
1879 GetMasm()->mov(ArchReg(tmp_reg), asmjit::imm(imm_val));
1880 GetMasm()->test(ArchReg(src2), ArchReg(tmp_reg));
1881 }
1882
1883 ScopedTmpReg tmp_reg(this, dst.GetType());
1884 auto size = std::max<uint8_t>(src0.GetSize(), WORD_SIZE);
1885 bool dst_aliased = dst.GetId() == src0.GetId();
1886 auto dst_reg = dst_aliased ? ArchReg(tmp_reg, size) : ArchReg(dst, size);
1887
1888 GetMasm()->mov(dst_reg, ArchReg(src1, size));
1889 GetMasm()->cmov(ArchCcTest(cc), dst_reg, ArchReg(src0, size));
1890 if (dst_aliased) {
1891 EncodeMov(dst, tmp_reg);
1892 }
1893 }
1894
EncodeLdp(Reg dst0,Reg dst1,bool dst_signed,MemRef mem)1895 void Amd64Encoder::EncodeLdp(Reg dst0, Reg dst1, bool dst_signed, MemRef mem)
1896 {
1897 ASSERT(dst0.IsFloat() == dst1.IsFloat());
1898 ASSERT(dst0.GetSize() == dst1.GetSize());
1899
1900 auto m = ArchMem(mem).Prepare(GetMasm());
1901
1902 if (dst0.IsFloat()) {
1903 if (dst0.GetType() == FLOAT32_TYPE) {
1904 GetMasm()->movss(ArchVReg(dst0), m);
1905
1906 m.addOffset(WORD_SIZE_BYTE);
1907 GetMasm()->movss(ArchVReg(dst1), m);
1908 } else {
1909 GetMasm()->movsd(ArchVReg(dst0), m);
1910
1911 m.addOffset(DOUBLE_WORD_SIZE_BYTE);
1912 GetMasm()->movsd(ArchVReg(dst1), m);
1913 }
1914 return;
1915 }
1916
1917 if (dst_signed && dst0.GetSize() == WORD_SIZE) {
1918 m.setSize(WORD_SIZE_BYTE);
1919 GetMasm()->movsxd(ArchReg(dst0, DOUBLE_WORD_SIZE), m);
1920
1921 m.addOffset(WORD_SIZE_BYTE);
1922 GetMasm()->movsxd(ArchReg(dst1, DOUBLE_WORD_SIZE), m);
1923 return;
1924 }
1925
1926 GetMasm()->mov(ArchReg(dst0), m);
1927
1928 m.addOffset(dst0.GetSize() / BITS_PER_BYTE);
1929 GetMasm()->mov(ArchReg(dst1), m);
1930 }
1931
EncodeStp(Reg src0,Reg src1,MemRef mem)1932 void Amd64Encoder::EncodeStp(Reg src0, Reg src1, MemRef mem)
1933 {
1934 ASSERT(src0.IsFloat() == src1.IsFloat());
1935 ASSERT(src0.GetSize() == src1.GetSize());
1936
1937 auto m = ArchMem(mem).Prepare(GetMasm());
1938
1939 if (src0.IsFloat()) {
1940 if (src0.GetType() == FLOAT32_TYPE) {
1941 GetMasm()->movss(m, ArchVReg(src0));
1942
1943 m.addOffset(WORD_SIZE_BYTE);
1944 GetMasm()->movss(m, ArchVReg(src1));
1945 } else {
1946 GetMasm()->movsd(m, ArchVReg(src0));
1947
1948 m.addOffset(DOUBLE_WORD_SIZE_BYTE);
1949 GetMasm()->movsd(m, ArchVReg(src1));
1950 }
1951 return;
1952 }
1953
1954 GetMasm()->mov(m, ArchReg(src0));
1955
1956 m.addOffset(src0.GetSize() / BITS_PER_BYTE);
1957 GetMasm()->mov(m, ArchReg(src1));
1958 }
1959
EncodeReverseBytes(Reg dst,Reg src)1960 void Amd64Encoder::EncodeReverseBytes(Reg dst, Reg src)
1961 {
1962 ASSERT(src.GetSize() > BYTE_SIZE);
1963 ASSERT(src.GetSize() == dst.GetSize());
1964
1965 if (src != dst) {
1966 GetMasm()->mov(ArchReg(dst), ArchReg(src));
1967 }
1968
1969 if (src.GetSize() == HALF_SIZE) {
1970 GetMasm()->rol(ArchReg(dst), BYTE_SIZE);
1971 GetMasm()->movsx(ArchReg(dst, WORD_SIZE), ArchReg(dst));
1972 } else {
1973 GetMasm()->bswap(ArchReg(dst));
1974 }
1975 }
1976
CanEncodeImmAddSubCmp(int64_t imm,uint32_t size,bool signed_compare)1977 bool Amd64Encoder::CanEncodeImmAddSubCmp(int64_t imm, uint32_t size, [[maybe_unused]] bool signed_compare)
1978 {
1979 return ImmFitsSize(imm, size);
1980 }
1981
EncodeBitCount(Reg dst0,Reg src0)1982 void Amd64Encoder::EncodeBitCount(Reg dst0, Reg src0)
1983 {
1984 ASSERT(src0.GetSize() == WORD_SIZE || src0.GetSize() == DOUBLE_WORD_SIZE);
1985 ASSERT(dst0.GetSize() == WORD_SIZE);
1986 ASSERT(src0.IsScalar() && dst0.IsScalar());
1987
1988 GetMasm()->popcnt(ArchReg(dst0, src0.GetSize()), ArchReg(src0));
1989 }
1990
EncodeCountLeadingZeroBits(Reg dst,Reg src)1991 void Amd64Encoder::EncodeCountLeadingZeroBits(Reg dst, Reg src)
1992 {
1993 auto end = CreateLabel();
1994 auto zero = CreateLabel();
1995 EncodeJump(zero, src, Condition::EQ);
1996 GetMasm()->bsr(ArchReg(dst), ArchReg(src));
1997 GetMasm()->xor_(ArchReg(dst), asmjit::imm(dst.GetSize() - 1));
1998 EncodeJump(end);
1999
2000 BindLabel(zero);
2001 GetMasm()->mov(ArchReg(dst), asmjit::imm(dst.GetSize()));
2002
2003 BindLabel(end);
2004 }
2005
EncodeCountTrailingZeroBits(Reg dst,Reg src)2006 void Amd64Encoder::EncodeCountTrailingZeroBits(Reg dst, Reg src)
2007 {
2008 ScopedTmpReg tmp(this, src.GetType());
2009 GetMasm()->bsf(ArchReg(tmp), ArchReg(src));
2010 GetMasm()->mov(ArchReg(dst), asmjit::imm(dst.GetSize()));
2011 GetMasm()->cmovne(ArchReg(dst), ArchReg(tmp));
2012 }
2013
EncodeCeil(Reg dst,Reg src)2014 void Amd64Encoder::EncodeCeil(Reg dst, Reg src)
2015 {
2016 // NOLINTNEXTLINE(readability-magic-numbers)
2017 GetMasm()->roundsd(ArchVReg(dst), ArchVReg(src), asmjit::imm(2));
2018 }
2019
EncodeFloor(Reg dst,Reg src)2020 void Amd64Encoder::EncodeFloor(Reg dst, Reg src)
2021 {
2022 GetMasm()->roundsd(ArchVReg(dst), ArchVReg(src), asmjit::imm(1));
2023 }
2024
EncodeRint(Reg dst,Reg src)2025 void Amd64Encoder::EncodeRint(Reg dst, Reg src)
2026 {
2027 GetMasm()->roundsd(ArchVReg(dst), ArchVReg(src), asmjit::imm(0));
2028 }
2029
EncodeRound(Reg dst,Reg src)2030 void Amd64Encoder::EncodeRound(Reg dst, Reg src)
2031 {
2032 ScopedTmpReg t1(this, src.GetType());
2033 ScopedTmpReg t2(this, src.GetType());
2034 ScopedTmpReg t3(this, src.GetType());
2035 ScopedTmpReg t4(this, dst.GetType());
2036
2037 auto skip_incr_id = CreateLabel();
2038 auto done_id = CreateLabel();
2039
2040 auto skip_incr = static_cast<Amd64LabelHolder *>(GetLabels())->GetLabel(skip_incr_id);
2041 auto done = static_cast<Amd64LabelHolder *>(GetLabels())->GetLabel(done_id);
2042
2043 if (src.GetType() == FLOAT32_TYPE) {
2044 GetMasm()->movss(ArchVReg(t2), ArchVReg(src));
2045 GetMasm()->roundss(ArchVReg(t1), ArchVReg(src), asmjit::imm(1));
2046 GetMasm()->subss(ArchVReg(t2), ArchVReg(t1));
2047 // NOLINTNEXTLINE(readability-magic-numbers)
2048 const auto HALF_F = 0.5F;
2049 GetMasm()->mov(ArchReg(t4), asmjit::imm(bit_cast<int32_t, float>(HALF_F)));
2050 GetMasm()->movd(ArchVReg(t3), ArchReg(t4));
2051 GetMasm()->comiss(ArchVReg(t2), ArchVReg(t3));
2052 GetMasm()->j(asmjit::x86::Condition::Code::kB, *skip_incr);
2053 // NOLINTNEXTLINE(readability-magic-numbers)
2054 const auto ONE_F = 1.0F;
2055 GetMasm()->mov(ArchReg(t4), asmjit::imm(bit_cast<int32_t, float>(ONE_F)));
2056 GetMasm()->movd(ArchVReg(t3), ArchReg(t4));
2057 GetMasm()->addss(ArchVReg(t1), ArchVReg(t3));
2058 BindLabel(skip_incr_id);
2059
2060 // NOLINTNEXTLINE(readability-magic-numbers)
2061 GetMasm()->mov(ArchReg(dst), asmjit::imm(0x7FFFFFFF));
2062 GetMasm()->cvtsi2ss(ArchVReg(t2), ArchReg(dst));
2063 GetMasm()->comiss(ArchVReg(t1), ArchVReg(t2));
2064 GetMasm()->j(asmjit::x86::Condition::Code::kAE,
2065 *done); // clipped to max (already in dst), does not jump on unordered
2066 GetMasm()->mov(ArchReg(dst), asmjit::imm(0)); // does not change flags
2067 GetMasm()->j(asmjit::x86::Condition::Code::kParityEven, *done); // NaN mapped to 0 (just moved in dst)
2068 GetMasm()->cvttss2si(ArchReg(dst), ArchVReg(t1));
2069 BindLabel(done_id);
2070 } else if (src.GetType() == FLOAT64_TYPE) {
2071 GetMasm()->movsd(ArchVReg(t2), ArchVReg(src));
2072 GetMasm()->roundsd(ArchVReg(t1), ArchVReg(src), asmjit::imm(1));
2073 GetMasm()->subsd(ArchVReg(t2), ArchVReg(t1));
2074 // NOLINTNEXTLINE(readability-magic-numbers)
2075 const auto HALF = 0.5;
2076 GetMasm()->mov(ArchReg(t4), asmjit::imm(bit_cast<int64_t, double>(HALF)));
2077 GetMasm()->movq(ArchVReg(t3), ArchReg(t4));
2078 GetMasm()->comisd(ArchVReg(t2), ArchVReg(t3));
2079 GetMasm()->j(asmjit::x86::Condition::Code::kB, *skip_incr);
2080 // NOLINTNEXTLINE(readability-magic-numbers)
2081 const auto ONE = 1.0;
2082 GetMasm()->mov(ArchReg(t4), asmjit::imm(bit_cast<int64_t, double>(ONE)));
2083 GetMasm()->movq(ArchVReg(t3), ArchReg(t4));
2084 GetMasm()->addsd(ArchVReg(t1), ArchVReg(t3));
2085 BindLabel(skip_incr_id);
2086
2087 // NOLINTNEXTLINE(readability-magic-numbers)
2088 GetMasm()->mov(ArchReg(dst), asmjit::imm(0x7FFFFFFFFFFFFFFFL));
2089 GetMasm()->cvtsi2sd(ArchVReg(t2), ArchReg(dst));
2090 GetMasm()->comisd(ArchVReg(t1), ArchVReg(t2));
2091 GetMasm()->j(asmjit::x86::Condition::Code::kAE,
2092 *done); // clipped to max (already in dst), does not jump on unordered
2093 GetMasm()->mov(ArchReg(dst), asmjit::imm(0)); // does not change flags
2094 GetMasm()->j(asmjit::x86::Condition::Code::kParityEven, *done); // NaN mapped to 0 (just moved in dst)
2095 GetMasm()->cvttsd2si(ArchReg(dst), ArchVReg(t1));
2096 BindLabel(done_id);
2097 } else {
2098 UNREACHABLE();
2099 }
2100 }
2101
2102 template <typename T>
EncodeReverseBitsImpl(Reg dst0,Reg src0)2103 void Amd64Encoder::EncodeReverseBitsImpl(Reg dst0, Reg src0)
2104 {
2105 ASSERT(std::numeric_limits<T>::is_integer && !std::numeric_limits<T>::is_signed);
2106 [[maybe_unused]] constexpr auto IMM_8 = 8;
2107 ASSERT(sizeof(T) * IMM_8 == dst0.GetSize());
2108 // NOLINTNEXTLINE(modernize-avoid-c-arrays)
2109 static constexpr T MASKS[] = {static_cast<T>(UINT64_C(0x5555555555555555)),
2110 static_cast<T>(UINT64_C(0x3333333333333333)),
2111 static_cast<T>(UINT64_C(0x0f0f0f0f0f0f0f0f))};
2112
2113 ScopedTmpReg tmp(this, dst0.GetType());
2114 ScopedTmpReg imm_holder(this, dst0.GetType());
2115 auto imm_holder_reg = ArchReg(imm_holder);
2116
2117 GetMasm()->mov(ArchReg(dst0), ArchReg(src0));
2118 GetMasm()->mov(ArchReg(tmp), ArchReg(src0));
2119 constexpr auto MAX_ROUNDS = 3;
2120 for (uint64_t round = 0; round < MAX_ROUNDS; round++) {
2121 auto shift = 1U << round;
2122 auto mask = asmjit::imm(MASKS[round]);
2123 GetMasm()->shr(ArchReg(dst0), shift);
2124 if (dst0.GetSize() == DOUBLE_WORD_SIZE) {
2125 GetMasm()->mov(imm_holder_reg, mask);
2126 GetMasm()->and_(ArchReg(tmp), imm_holder_reg);
2127 GetMasm()->and_(ArchReg(dst0), imm_holder_reg);
2128 } else {
2129 GetMasm()->and_(ArchReg(tmp), mask);
2130 GetMasm()->and_(ArchReg(dst0), mask);
2131 }
2132 GetMasm()->shl(ArchReg(tmp), shift);
2133 GetMasm()->or_(ArchReg(dst0), ArchReg(tmp));
2134 constexpr auto ROUND_2 = 2;
2135 if (round != ROUND_2) {
2136 GetMasm()->mov(ArchReg(tmp), ArchReg(dst0));
2137 }
2138 }
2139
2140 GetMasm()->bswap(ArchReg(dst0));
2141 }
2142
EncodeReverseBits(Reg dst0,Reg src0)2143 void Amd64Encoder::EncodeReverseBits(Reg dst0, Reg src0)
2144 {
2145 ASSERT(src0.GetSize() == WORD_SIZE || src0.GetSize() == DOUBLE_WORD_SIZE);
2146 ASSERT(src0.GetSize() == dst0.GetSize());
2147
2148 if (src0.GetSize() == WORD_SIZE) {
2149 EncodeReverseBitsImpl<uint32_t>(dst0, src0);
2150 return;
2151 }
2152
2153 EncodeReverseBitsImpl<uint64_t>(dst0, src0);
2154 }
2155
CanEncodeScale(uint64_t imm,uint32_t size)2156 bool Amd64Encoder::CanEncodeScale(uint64_t imm, [[maybe_unused]] uint32_t size)
2157 {
2158 return imm <= 3U;
2159 }
2160
CanEncodeImmLogical(uint64_t imm,uint32_t size)2161 bool Amd64Encoder::CanEncodeImmLogical(uint64_t imm, uint32_t size)
2162 {
2163 return ImmFitsSize(imm, size);
2164 }
2165
CanEncodeBitCount()2166 bool Amd64Encoder::CanEncodeBitCount()
2167 {
2168 return asmjit::CpuInfo::host().hasFeature(asmjit::x86::Features::kPOPCNT);
2169 }
2170
EncodeIsInf(Reg dst,Reg src)2171 void Amd64Encoder::EncodeIsInf(Reg dst, Reg src)
2172 {
2173 ASSERT(dst.IsScalar() && src.IsFloat());
2174
2175 GetMasm()->xor_(ArchReg(dst, DOUBLE_WORD_SIZE), ArchReg(dst, DOUBLE_WORD_SIZE));
2176
2177 if (src.GetSize() == WORD_SIZE) {
2178 constexpr auto INF_MASK = uint32_t(0x7f800000) << 1U;
2179
2180 ScopedTmpRegU32 tmp_reg(this);
2181 ScopedTmpRegU32 tmp1_reg(this);
2182 auto tmp = ArchReg(tmp_reg);
2183 auto tmp1 = ArchReg(tmp1_reg);
2184
2185 GetMasm()->movd(tmp1, ArchVReg(src));
2186 GetMasm()->shl(tmp1, 1);
2187 GetMasm()->mov(tmp, INF_MASK);
2188 GetMasm()->cmp(tmp, tmp1);
2189 } else {
2190 constexpr auto INF_MASK = uint64_t(0x7ff0000000000000) << 1U;
2191
2192 ScopedTmpRegU64 tmp_reg(this);
2193 ScopedTmpRegU64 tmp1_reg(this);
2194 auto tmp = ArchReg(tmp_reg);
2195 auto tmp1 = ArchReg(tmp1_reg);
2196
2197 GetMasm()->movq(tmp1, ArchVReg(src));
2198 GetMasm()->shl(tmp1, 1);
2199
2200 GetMasm()->mov(tmp, INF_MASK);
2201 GetMasm()->cmp(tmp, tmp1);
2202 }
2203
2204 GetMasm()->sete(ArchReg(dst, BYTE_SIZE));
2205 }
2206
2207 /* Since NaNs have to be canonicalized we compare the
2208 * input with itself, if it is NaN the comparison will
2209 * set the parity flag (PF) */
EncodeFpToBits(Reg dst,Reg src)2210 void Amd64Encoder::EncodeFpToBits(Reg dst, Reg src)
2211 {
2212 ASSERT(dst.IsScalar() && src.IsFloat());
2213
2214 if (dst.GetType() == INT32_TYPE) {
2215 ASSERT(src.GetSize() == WORD_SIZE);
2216
2217 constexpr auto FLOAT_NAN = uint32_t(0x7fc00000);
2218
2219 ScopedTmpRegU32 tmp(this);
2220
2221 GetMasm()->ucomiss(ArchVReg(src), ArchVReg(src));
2222 GetMasm()->mov(ArchReg(tmp), FLOAT_NAN);
2223 GetMasm()->movd(ArchReg(dst), ArchVReg(src));
2224 GetMasm()->cmovpe(ArchReg(dst), ArchReg(tmp));
2225 } else {
2226 ASSERT(src.GetSize() == DOUBLE_WORD_SIZE);
2227
2228 constexpr auto DOUBLE_NAN = uint64_t(0x7ff8000000000000);
2229 ScopedTmpRegU64 tmp(this);
2230
2231 GetMasm()->ucomisd(ArchVReg(src), ArchVReg(src));
2232 GetMasm()->mov(ArchReg(tmp), DOUBLE_NAN);
2233 GetMasm()->movq(ArchReg(dst), ArchVReg(src));
2234 GetMasm()->cmovpe(ArchReg(dst), ArchReg(tmp));
2235 }
2236 }
2237
EncodeMoveBitsRaw(Reg dst,Reg src)2238 void Amd64Encoder::EncodeMoveBitsRaw(Reg dst, Reg src)
2239 {
2240 ASSERT((dst.IsFloat() && src.IsScalar()) || (src.IsFloat() && dst.IsScalar()));
2241 if (src.IsScalar()) {
2242 ASSERT((dst.GetSize() == src.GetSize()));
2243 if (src.GetSize() == WORD_SIZE) {
2244 GetMasm()->movd(ArchVReg(dst), ArchReg(src));
2245 } else {
2246 GetMasm()->movq(ArchVReg(dst), ArchReg(src));
2247 }
2248 } else {
2249 ASSERT((src.GetSize() == dst.GetSize()));
2250 if (dst.GetSize() == WORD_SIZE) {
2251 GetMasm()->movd(ArchReg(dst), ArchVReg(src));
2252 } else {
2253 GetMasm()->movq(ArchReg(dst), ArchVReg(src));
2254 }
2255 }
2256 }
2257
2258 /* Unsafe intrinsics */
EncodeCompareAndSwap(Reg dst,Reg obj,const Reg * offset,Reg val,Reg newval)2259 void Amd64Encoder::EncodeCompareAndSwap(Reg dst, Reg obj, const Reg *offset, Reg val, Reg newval)
2260 {
2261 /*
2262 * movl old, %eax
2263 * lock cmpxchgl new, addr
2264 * sete %al
2265 */
2266 ScopedTmpRegU64 tmp1(this);
2267 ScopedTmpRegU64 tmp2(this);
2268 ScopedTmpRegU64 tmp3(this);
2269 Reg newvalue = newval;
2270 auto addr = ArchMem(MemRef(tmp2)).Prepare(GetMasm());
2271 auto addr_reg = ArchReg(tmp2);
2272 Reg rax(ConvertRegNumber(asmjit::x86::rax.id()), INT64_TYPE);
2273
2274 /* TODO(ayodkev) this is a workaround for the failure of
2275 * jsr166.ScheduledExecutorTest, have to figure out if there
2276 * is less crude way to avoid this */
2277 if (newval.GetId() == rax.GetId()) {
2278 SetFalseResult();
2279 return;
2280 }
2281
2282 if (offset != nullptr) {
2283 GetMasm()->lea(addr_reg, asmjit::x86::ptr(ArchReg(obj), ArchReg(*offset)));
2284 } else {
2285 GetMasm()->mov(addr_reg, ArchReg(obj));
2286 }
2287
2288 /* the [er]ax register will be overwritten by cmpxchg instruction
2289 * save it unless it is set as a destination register */
2290 if (dst.GetId() != rax.GetId()) {
2291 GetMasm()->mov(ArchReg(tmp1), asmjit::x86::rax);
2292 }
2293
2294 /* if the new value comes in [er]ax register we have to use a
2295 * different register as [er]ax will contain the current value */
2296 if (newval.GetId() == rax.GetId()) {
2297 GetMasm()->mov(ArchReg(tmp3, newval.GetSize()), ArchReg(newval));
2298 newvalue = tmp3;
2299 }
2300
2301 if (val.GetId() != rax.GetId()) {
2302 GetMasm()->mov(asmjit::x86::rax, ArchReg(val).r64());
2303 }
2304
2305 GetMasm()->lock().cmpxchg(addr, ArchReg(newvalue));
2306 GetMasm()->sete(ArchReg(dst));
2307
2308 if (dst.GetId() != rax.GetId()) {
2309 GetMasm()->mov(asmjit::x86::rax, ArchReg(tmp1));
2310 }
2311 }
2312
EncodeUnsafeGetAndSet(Reg dst,Reg obj,Reg offset,Reg val)2313 void Amd64Encoder::EncodeUnsafeGetAndSet(Reg dst, Reg obj, Reg offset, Reg val)
2314 {
2315 ScopedTmpRegU64 tmp(this);
2316 auto addr_reg = ArchReg(tmp);
2317 auto addr = ArchMem(MemRef(tmp)).Prepare(GetMasm());
2318 GetMasm()->lea(addr_reg, asmjit::x86::ptr(ArchReg(obj), ArchReg(offset)));
2319 GetMasm()->mov(ArchReg(dst), ArchReg(val));
2320 GetMasm()->lock().xchg(addr, ArchReg(dst));
2321 }
2322
EncodeUnsafeGetAndAdd(Reg dst,Reg obj,Reg offset,Reg val,Reg tmp)2323 void Amd64Encoder::EncodeUnsafeGetAndAdd(Reg dst, Reg obj, Reg offset, Reg val, [[maybe_unused]] Reg tmp)
2324 {
2325 ScopedTmpRegU64 tmp1(this);
2326 auto addr_reg = ArchReg(tmp1);
2327 auto addr = ArchMem(MemRef(tmp1)).Prepare(GetMasm());
2328 GetMasm()->lea(addr_reg, asmjit::x86::ptr(ArchReg(obj), ArchReg(offset)));
2329 GetMasm()->mov(ArchReg(dst), ArchReg(val));
2330 GetMasm()->lock().xadd(addr, ArchReg(dst));
2331 }
2332
EncodeMemoryBarrier(MemoryOrder::Order order)2333 void Amd64Encoder::EncodeMemoryBarrier(MemoryOrder::Order order)
2334 {
2335 if (order == MemoryOrder::Full) {
2336 /* does the same as mfence but faster, not applicable for NT-writes, though */
2337 GetMasm()->lock().add(asmjit::x86::dword_ptr(asmjit::x86::rsp), asmjit::imm(0));
2338 }
2339 }
2340
EncodeStackOverflowCheck(ssize_t offset)2341 void Amd64Encoder::EncodeStackOverflowCheck(ssize_t offset)
2342 {
2343 MemRef mem(GetTarget().GetStackReg(), offset);
2344 auto m = ArchMem(mem).Prepare(GetMasm());
2345 GetMasm()->test(m, ArchReg(GetTarget().GetParamReg(0)));
2346 }
2347
MakeLibCall(Reg dst,Reg src0,Reg src1,void * entry_point)2348 void Amd64Encoder::MakeLibCall(Reg dst, Reg src0, Reg src1, void *entry_point)
2349 {
2350 if (!dst.IsFloat()) {
2351 SetFalseResult();
2352 return;
2353 }
2354
2355 if (dst.GetType() == FLOAT32_TYPE) {
2356 if (!src0.IsFloat() || !src1.IsFloat()) {
2357 SetFalseResult();
2358 return;
2359 }
2360
2361 if (src0.GetId() != asmjit::x86::xmm0.id() || src1.GetId() != asmjit::x86::xmm1.id()) {
2362 ScopedTmpRegF32 tmp(this);
2363 GetMasm()->movss(ArchVReg(tmp), ArchVReg(src1));
2364 GetMasm()->movss(asmjit::x86::xmm0, ArchVReg(src0));
2365 GetMasm()->movss(asmjit::x86::xmm1, ArchVReg(tmp));
2366 }
2367
2368 MakeCall(entry_point);
2369
2370 if (dst.GetId() != asmjit::x86::xmm0.id()) {
2371 GetMasm()->movss(ArchVReg(dst), asmjit::x86::xmm0);
2372 }
2373 } else if (dst.GetType() == FLOAT64_TYPE) {
2374 if (!src0.IsFloat() || !src1.IsFloat()) {
2375 SetFalseResult();
2376 return;
2377 }
2378
2379 if (src0.GetId() != asmjit::x86::xmm0.id() || src1.GetId() != asmjit::x86::xmm1.id()) {
2380 ScopedTmpRegF64 tmp(this);
2381 GetMasm()->movsd(ArchVReg(tmp), ArchVReg(src1));
2382 GetMasm()->movsd(asmjit::x86::xmm0, ArchVReg(src0));
2383 GetMasm()->movsd(asmjit::x86::xmm1, ArchVReg(tmp));
2384 }
2385
2386 MakeCall(entry_point);
2387
2388 if (dst.GetId() != asmjit::x86::xmm0.id()) {
2389 GetMasm()->movsd(ArchVReg(dst), asmjit::x86::xmm0);
2390 }
2391 } else {
2392 UNREACHABLE();
2393 }
2394 }
2395
2396 template <bool is_store>
LoadStoreRegisters(RegMask registers,ssize_t slot,size_t start_reg,bool is_fp)2397 void Amd64Encoder::LoadStoreRegisters(RegMask registers, ssize_t slot, size_t start_reg, bool is_fp)
2398 {
2399 for (size_t i {0}; i < registers.size(); ++i) {
2400 if (!registers.test(i)) {
2401 continue;
2402 }
2403
2404 asmjit::x86::Mem mem = asmjit::x86::ptr(asmjit::x86::rsp, (slot + i - start_reg) * DOUBLE_WORD_SIZE_BYTE);
2405
2406 if constexpr (is_store) { // NOLINT
2407 if (is_fp) {
2408 GetMasm()->movsd(mem, asmjit::x86::xmm(i));
2409 } else {
2410 GetMasm()->mov(mem, asmjit::x86::gpq(ConvertRegNumber(i)));
2411 }
2412 } else { // NOLINT
2413 if (is_fp) {
2414 GetMasm()->movsd(asmjit::x86::xmm(i), mem);
2415 } else {
2416 GetMasm()->mov(asmjit::x86::gpq(ConvertRegNumber(i)), mem);
2417 }
2418 }
2419 }
2420 }
2421
2422 template <bool is_store>
LoadStoreRegisters(RegMask registers,bool is_fp,int32_t slot,Reg base,RegMask mask)2423 void Amd64Encoder::LoadStoreRegisters(RegMask registers, bool is_fp, int32_t slot, Reg base, RegMask mask)
2424 {
2425 auto base_reg = ArchReg(base);
2426 bool has_mask = mask.any();
2427 int32_t index = has_mask ? static_cast<int32_t>(mask.GetMinRegister()) : 0;
2428 slot -= index;
2429 for (size_t i = index; i < registers.size(); ++i) {
2430 if (has_mask) {
2431 if (!mask.test(i)) {
2432 continue;
2433 }
2434 index++;
2435 }
2436 if (!registers.test(i)) {
2437 continue;
2438 }
2439
2440 if (!has_mask) {
2441 index++;
2442 }
2443
2444 // `-1` because we've incremented `index` in advance
2445 asmjit::x86::Mem mem = asmjit::x86::ptr(base_reg, (slot + index - 1) * DOUBLE_WORD_SIZE_BYTE);
2446
2447 if constexpr (is_store) { // NOLINT
2448 if (is_fp) {
2449 GetMasm()->movsd(mem, asmjit::x86::xmm(i));
2450 } else {
2451 GetMasm()->mov(mem, asmjit::x86::gpq(ConvertRegNumber(i)));
2452 }
2453 } else { // NOLINT
2454 if (is_fp) {
2455 GetMasm()->movsd(asmjit::x86::xmm(i), mem);
2456 } else {
2457 GetMasm()->mov(asmjit::x86::gpq(ConvertRegNumber(i)), mem);
2458 }
2459 }
2460 }
2461 }
2462
PushRegisters(RegMask registers,bool is_fp,bool align)2463 void Amd64Encoder::PushRegisters(RegMask registers, bool is_fp, bool align)
2464 {
2465 for (size_t i = 0; i < registers.size(); i++) {
2466 if (registers[i]) {
2467 if (is_fp) {
2468 GetMasm()->sub(asmjit::x86::rsp, DOUBLE_WORD_SIZE_BYTE);
2469 GetMasm()->movsd(asmjit::x86::ptr(asmjit::x86::rsp), ArchVReg(Reg(i, FLOAT64_TYPE)));
2470 } else {
2471 GetMasm()->push(asmjit::x86::gpq(ConvertRegNumber(i)));
2472 }
2473 }
2474 }
2475 if (align && (registers.count() & 1U) != 0) {
2476 GetMasm()->sub(asmjit::x86::rsp, DOUBLE_WORD_SIZE_BYTE);
2477 }
2478 }
2479
PopRegisters(RegMask registers,bool is_fp,bool align)2480 void Amd64Encoder::PopRegisters(RegMask registers, bool is_fp, bool align)
2481 {
2482 if (align && (registers.count() & 1U) != 0) {
2483 GetMasm()->add(asmjit::x86::rsp, DOUBLE_WORD_SIZE_BYTE);
2484 }
2485 for (ssize_t i = registers.size() - 1; i >= 0; i--) {
2486 if (registers[i]) {
2487 if (is_fp) {
2488 GetMasm()->movsd(ArchVReg(Reg(i, FLOAT64_TYPE)), asmjit::x86::ptr(asmjit::x86::rsp));
2489 GetMasm()->add(asmjit::x86::rsp, DOUBLE_WORD_SIZE_BYTE);
2490 } else {
2491 GetMasm()->pop(asmjit::x86::gpq(ConvertRegNumber(i)));
2492 }
2493 }
2494 }
2495 }
2496
2497 template <typename T, size_t n>
CopyArrayToXmm(Reg xmm,const std::array<T,n> & arr)2498 void Amd64Encoder::CopyArrayToXmm(Reg xmm, const std::array<T, n> &arr)
2499 {
2500 static constexpr auto SIZE {n * sizeof(T)};
2501 static_assert((SIZE == DOUBLE_WORD_SIZE_BYTE) || (SIZE == 2U * DOUBLE_WORD_SIZE_BYTE));
2502 ASSERT(xmm.GetType() == FLOAT64_TYPE);
2503
2504 auto data {reinterpret_cast<const uint64_t *>(arr.data())};
2505
2506 ScopedTmpRegU64 tmp_gpr(this);
2507 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
2508 GetMasm()->mov(ArchReg(tmp_gpr), asmjit::imm(data[0]));
2509 GetMasm()->movq(ArchVReg(xmm), ArchReg(tmp_gpr));
2510
2511 if constexpr (SIZE == 2U * DOUBLE_WORD_SIZE_BYTE) {
2512 ScopedTmpRegF64 tmp_xmm(this);
2513 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
2514 GetMasm()->mov(ArchReg(tmp_gpr), asmjit::imm(data[1]));
2515 GetMasm()->movq(ArchVReg(tmp_xmm), ArchReg(tmp_gpr));
2516 GetMasm()->unpcklpd(ArchVReg(xmm), ArchVReg(tmp_xmm));
2517 }
2518 }
2519
2520 template <typename T>
CopyImmToXmm(Reg xmm,T imm)2521 void Amd64Encoder::CopyImmToXmm(Reg xmm, T imm)
2522 {
2523 static_assert((sizeof(imm) == WORD_SIZE_BYTE) || (sizeof(imm) == DOUBLE_WORD_SIZE_BYTE));
2524 ASSERT(xmm.GetSize() == BYTE_SIZE * sizeof(imm));
2525
2526 if constexpr (sizeof(imm) == WORD_SIZE_BYTE) { // NOLINT
2527 ScopedTmpRegU32 tmp_gpr(this);
2528 GetMasm()->mov(ArchReg(tmp_gpr), asmjit::imm(bit_cast<uint32_t>(imm)));
2529 GetMasm()->movd(ArchVReg(xmm), ArchReg(tmp_gpr));
2530 } else { // NOLINT
2531 ScopedTmpRegU64 tmp_gpr(this);
2532 GetMasm()->mov(ArchReg(tmp_gpr), asmjit::imm(bit_cast<uint64_t>(imm)));
2533 GetMasm()->movq(ArchVReg(xmm), ArchReg(tmp_gpr));
2534 }
2535 }
2536
DisasmInstr(std::ostream & stream,size_t pc,ssize_t code_offset) const2537 size_t Amd64Encoder::DisasmInstr(std::ostream &stream, size_t pc, ssize_t code_offset) const
2538 {
2539 if (code_offset < 0) {
2540 (const_cast<Amd64Encoder *>(this))->Finalize();
2541 }
2542 Span code(GetMasm()->bufferData(), GetMasm()->offset());
2543
2544 [[maybe_unused]] size_t data_left = code.Size() - pc;
2545 [[maybe_unused]] constexpr size_t LENGTH = ZYDIS_MAX_INSTRUCTION_LENGTH; // 15 bytes is max inst length in amd64
2546
2547 // Initialize decoder context
2548 ZydisDecoder decoder;
2549 [[maybe_unused]] bool res =
2550 ZYAN_SUCCESS(ZydisDecoderInit(&decoder, ZYDIS_MACHINE_MODE_LONG_64, ZYDIS_ADDRESS_WIDTH_64));
2551
2552 // Initialize formatter
2553 ZydisFormatter formatter;
2554 res &= ZYAN_SUCCESS(ZydisFormatterInit(&formatter, ZYDIS_FORMATTER_STYLE_ATT));
2555 ASSERT(res);
2556
2557 ZydisDecodedInstruction instruction;
2558
2559 res &= ZYAN_SUCCESS(ZydisDecoderDecodeBuffer(&decoder, &code[pc], std::min(LENGTH, data_left), &instruction));
2560
2561 // Format & print the binary instruction structure to human readable format
2562 char buffer[256]; // NOLINT (modernize-avoid-c-arrays, readability-identifier-naming, readability-magic-numbers)
2563 res &= ZYAN_SUCCESS(
2564 ZydisFormatterFormatInstruction(&formatter, &instruction, buffer, sizeof(buffer), uintptr_t(&code[pc])));
2565
2566 ASSERT(res);
2567
2568 // Print disassembly
2569 if (code_offset < 0) {
2570 stream << buffer;
2571 } else {
2572 stream << std::setw(0x8) << std::right << std::setfill('0') << std::hex << pc + code_offset << std::dec
2573 << std::setfill(' ') << ": " << buffer;
2574 }
2575
2576 return pc + instruction.length;
2577 }
2578 } // namespace panda::compiler::amd64
2579