1 /*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #ifndef ART_COMPILER_UTILS_X86_64_ASSEMBLER_X86_64_H_
18 #define ART_COMPILER_UTILS_X86_64_ASSEMBLER_X86_64_H_
19
20 #include <vector>
21
22 #include "arch/x86_64/instruction_set_features_x86_64.h"
23 #include "base/arena_containers.h"
24 #include "base/array_ref.h"
25 #include "base/bit_utils.h"
26 #include "base/globals.h"
27 #include "base/macros.h"
28 #include "constants_x86_64.h"
29 #include "heap_poisoning.h"
30 #include "managed_register_x86_64.h"
31 #include "offsets.h"
32 #include "utils/assembler.h"
33
34 namespace art HIDDEN {
35 namespace x86_64 {
36
37 // Encodes an immediate value for operands.
38 //
39 // Note: Immediates can be 64b on x86-64 for certain instructions, but are often restricted
40 // to 32b.
41 //
42 // Note: As we support cross-compilation, the value type must be int64_t. Please be aware of
43 // conversion rules in expressions regarding negation, especially size_t on 32b.
44 class Immediate : public ValueObject {
45 public:
Immediate(int64_t value_in)46 explicit Immediate(int64_t value_in) : value_(value_in) {}
47
value()48 int64_t value() const { return value_; }
49
is_int8()50 bool is_int8() const { return IsInt<8>(value_); }
is_uint8()51 bool is_uint8() const { return IsUint<8>(value_); }
is_int16()52 bool is_int16() const { return IsInt<16>(value_); }
is_uint16()53 bool is_uint16() const { return IsUint<16>(value_); }
is_int32()54 bool is_int32() const { return IsInt<32>(value_); }
55
56 private:
57 const int64_t value_;
58 };
59
60
61 class Operand : public ValueObject {
62 public:
mod()63 uint8_t mod() const {
64 return (encoding_at(0) >> 6) & 3;
65 }
66
rm()67 Register rm() const {
68 return static_cast<Register>(encoding_at(0) & 7);
69 }
70
scale()71 ScaleFactor scale() const {
72 return static_cast<ScaleFactor>((encoding_at(1) >> 6) & 3);
73 }
74
index()75 Register index() const {
76 return static_cast<Register>((encoding_at(1) >> 3) & 7);
77 }
78
base()79 Register base() const {
80 return static_cast<Register>(encoding_at(1) & 7);
81 }
82
cpu_rm()83 CpuRegister cpu_rm() const {
84 int ext = (rex_ & 1) != 0 ? x86_64::R8 : x86_64::RAX;
85 return static_cast<CpuRegister>(rm() + ext);
86 }
87
cpu_index()88 CpuRegister cpu_index() const {
89 int ext = (rex_ & 2) != 0 ? x86_64::R8 : x86_64::RAX;
90 return static_cast<CpuRegister>(index() + ext);
91 }
92
cpu_base()93 CpuRegister cpu_base() const {
94 int ext = (rex_ & 1) != 0 ? x86_64::R8 : x86_64::RAX;
95 return static_cast<CpuRegister>(base() + ext);
96 }
97
rex()98 uint8_t rex() const {
99 return rex_;
100 }
101
disp8()102 int8_t disp8() const {
103 CHECK_GE(length_, 2);
104 return static_cast<int8_t>(encoding_[length_ - 1]);
105 }
106
disp32()107 int32_t disp32() const {
108 CHECK_GE(length_, 5);
109 int32_t value;
110 memcpy(&value, &encoding_[length_ - 4], sizeof(value));
111 return value;
112 }
113
disp()114 int32_t disp() const {
115 switch (mod()) {
116 case 0:
117 // With mod 00b RBP is special and means disp32 (either in r/m or in SIB base).
118 return (rm() == RBP || (rm() == RSP && base() == RBP)) ? disp32() : 0;
119 case 1:
120 return disp8();
121 case 2:
122 return disp32();
123 default:
124 // Mod 11b means reg/reg, so there is no address and consequently no displacement.
125 DCHECK(false) << "there is no displacement in x86_64 reg/reg operand";
126 UNREACHABLE();
127 }
128 }
129
IsRegister(CpuRegister reg)130 bool IsRegister(CpuRegister reg) const {
131 return ((encoding_[0] & 0xF8) == 0xC0) // Addressing mode is register only.
132 && ((encoding_[0] & 0x07) == reg.LowBits()) // Register codes match.
133 && (reg.NeedsRex() == ((rex_ & 1) != 0)); // REX.000B bits match.
134 }
135
GetFixup()136 AssemblerFixup* GetFixup() const {
137 return fixup_;
138 }
139
140 inline bool operator==(const Operand &op) const {
141 return rex_ == op.rex_ &&
142 length_ == op.length_ &&
143 memcmp(encoding_, op.encoding_, length_) == 0 &&
144 fixup_ == op.fixup_;
145 }
146
147 protected:
148 // Operand can be sub classed (e.g: Address).
Operand()149 Operand() : rex_(0), length_(0), fixup_(nullptr) { }
150
SetModRM(uint8_t mod_in,CpuRegister rm_in)151 void SetModRM(uint8_t mod_in, CpuRegister rm_in) {
152 CHECK_EQ(mod_in & ~3, 0);
153 if (rm_in.NeedsRex()) {
154 rex_ |= 0x41; // REX.000B
155 }
156 encoding_[0] = (mod_in << 6) | rm_in.LowBits();
157 length_ = 1;
158 }
159
SetSIB(ScaleFactor scale_in,CpuRegister index_in,CpuRegister base_in)160 void SetSIB(ScaleFactor scale_in, CpuRegister index_in, CpuRegister base_in) {
161 CHECK_EQ(length_, 1);
162 CHECK_EQ(scale_in & ~3, 0);
163 if (base_in.NeedsRex()) {
164 rex_ |= 0x41; // REX.000B
165 }
166 if (index_in.NeedsRex()) {
167 rex_ |= 0x42; // REX.00X0
168 }
169 encoding_[1] = (scale_in << 6) | (static_cast<uint8_t>(index_in.LowBits()) << 3) |
170 static_cast<uint8_t>(base_in.LowBits());
171 length_ = 2;
172 }
173
SetDisp8(int8_t disp)174 void SetDisp8(int8_t disp) {
175 CHECK(length_ == 1 || length_ == 2);
176 encoding_[length_++] = static_cast<uint8_t>(disp);
177 }
178
SetDisp32(int32_t disp)179 void SetDisp32(int32_t disp) {
180 CHECK(length_ == 1 || length_ == 2);
181 int disp_size = sizeof(disp);
182 memmove(&encoding_[length_], &disp, disp_size);
183 length_ += disp_size;
184 }
185
SetFixup(AssemblerFixup * fixup)186 void SetFixup(AssemblerFixup* fixup) {
187 fixup_ = fixup;
188 }
189
190 private:
191 uint8_t rex_;
192 uint8_t length_;
193 uint8_t encoding_[6];
194 AssemblerFixup* fixup_;
195
Operand(CpuRegister reg)196 explicit Operand(CpuRegister reg) : rex_(0), length_(0), fixup_(nullptr) { SetModRM(3, reg); }
197
198 // Get the operand encoding byte at the given index.
encoding_at(int index_in)199 uint8_t encoding_at(int index_in) const {
200 CHECK_GE(index_in, 0);
201 CHECK_LT(index_in, length_);
202 return encoding_[index_in];
203 }
204
205 friend class X86_64Assembler;
206 };
207
208
209 class Address : public Operand {
210 public:
Address(CpuRegister base_in,int32_t disp)211 Address(CpuRegister base_in, int32_t disp) {
212 Init(base_in, disp);
213 }
214
Address(CpuRegister base_in,Offset disp)215 Address(CpuRegister base_in, Offset disp) {
216 Init(base_in, disp.Int32Value());
217 }
218
Address(CpuRegister base_in,FrameOffset disp)219 Address(CpuRegister base_in, FrameOffset disp) {
220 CHECK_EQ(base_in.AsRegister(), RSP);
221 Init(CpuRegister(RSP), disp.Int32Value());
222 }
223
Address(CpuRegister base_in,MemberOffset disp)224 Address(CpuRegister base_in, MemberOffset disp) {
225 Init(base_in, disp.Int32Value());
226 }
227
Init(CpuRegister base_in,int32_t disp)228 void Init(CpuRegister base_in, int32_t disp) {
229 if (disp == 0 && base_in.LowBits() != RBP) {
230 SetModRM(0, base_in);
231 if (base_in.LowBits() == RSP) {
232 SetSIB(TIMES_1, CpuRegister(RSP), base_in);
233 }
234 } else if (disp >= -128 && disp <= 127) {
235 SetModRM(1, base_in);
236 if (base_in.LowBits() == RSP) {
237 SetSIB(TIMES_1, CpuRegister(RSP), base_in);
238 }
239 SetDisp8(disp);
240 } else {
241 SetModRM(2, base_in);
242 if (base_in.LowBits() == RSP) {
243 SetSIB(TIMES_1, CpuRegister(RSP), base_in);
244 }
245 SetDisp32(disp);
246 }
247 }
248
Address(CpuRegister index_in,ScaleFactor scale_in,int32_t disp)249 Address(CpuRegister index_in, ScaleFactor scale_in, int32_t disp) {
250 CHECK_NE(index_in.AsRegister(), RSP); // Illegal addressing mode.
251 SetModRM(0, CpuRegister(RSP));
252 SetSIB(scale_in, index_in, CpuRegister(RBP));
253 SetDisp32(disp);
254 }
255
Address(CpuRegister base_in,CpuRegister index_in,ScaleFactor scale_in,int32_t disp)256 Address(CpuRegister base_in, CpuRegister index_in, ScaleFactor scale_in, int32_t disp) {
257 CHECK_NE(index_in.AsRegister(), RSP); // Illegal addressing mode.
258 if (disp == 0 && base_in.LowBits() != RBP) {
259 SetModRM(0, CpuRegister(RSP));
260 SetSIB(scale_in, index_in, base_in);
261 } else if (disp >= -128 && disp <= 127) {
262 SetModRM(1, CpuRegister(RSP));
263 SetSIB(scale_in, index_in, base_in);
264 SetDisp8(disp);
265 } else {
266 SetModRM(2, CpuRegister(RSP));
267 SetSIB(scale_in, index_in, base_in);
268 SetDisp32(disp);
269 }
270 }
271
272 // If no_rip is true then the Absolute address isn't RIP relative.
273 static Address Absolute(uintptr_t addr, bool no_rip = false) {
274 Address result;
275 if (no_rip) {
276 result.SetModRM(0, CpuRegister(RSP));
277 result.SetSIB(TIMES_1, CpuRegister(RSP), CpuRegister(RBP));
278 result.SetDisp32(addr);
279 } else {
280 // RIP addressing is done using RBP as the base register.
281 // The value in RBP isn't used. Instead the offset is added to RIP.
282 result.SetModRM(0, CpuRegister(RBP));
283 result.SetDisp32(addr);
284 }
285 return result;
286 }
287
288 // An RIP relative address that will be fixed up later.
RIP(AssemblerFixup * fixup)289 static Address RIP(AssemblerFixup* fixup) {
290 Address result;
291 // RIP addressing is done using RBP as the base register.
292 // The value in RBP isn't used. Instead the offset is added to RIP.
293 result.SetModRM(0, CpuRegister(RBP));
294 result.SetDisp32(0);
295 result.SetFixup(fixup);
296 return result;
297 }
298
299 // If no_rip is true then the Absolute address isn't RIP relative.
300 static Address Absolute(ThreadOffset64 addr, bool no_rip = false) {
301 return Absolute(addr.Int32Value(), no_rip);
302 }
303
304 // Break the address into pieces and reassemble it again with a new displacement.
305 // Note that it may require a new addressing mode if displacement size is changed.
displace(const Address & addr,int32_t disp)306 static Address displace(const Address &addr, int32_t disp) {
307 const int32_t new_disp = addr.disp() + disp;
308 const bool sib = addr.rm() == RSP;
309 const bool rbp = RBP == (sib ? addr.base() : addr.rm());
310 Address new_addr;
311 if (addr.mod() == 0 && rbp) {
312 // Special case: mod 00b and RBP in r/m or SIB base => 32-bit displacement.
313 // This case includes RIP-relative addressing.
314 new_addr.SetModRM(0, addr.cpu_rm());
315 if (sib) {
316 new_addr.SetSIB(addr.scale(), addr.cpu_index(), addr.cpu_base());
317 }
318 new_addr.SetDisp32(new_disp);
319 } else if (new_disp == 0 && !rbp) {
320 // Mod 00b (excluding a special case for RBP) => no displacement.
321 new_addr.SetModRM(0, addr.cpu_rm());
322 if (sib) {
323 new_addr.SetSIB(addr.scale(), addr.cpu_index(), addr.cpu_base());
324 }
325 } else if (new_disp >= -128 && new_disp <= 127) {
326 // Mod 01b => 8-bit displacement.
327 new_addr.SetModRM(1, addr.cpu_rm());
328 if (sib) {
329 new_addr.SetSIB(addr.scale(), addr.cpu_index(), addr.cpu_base());
330 }
331 new_addr.SetDisp8(new_disp);
332 } else {
333 // Mod 10b => 32-bit displacement.
334 new_addr.SetModRM(2, addr.cpu_rm());
335 if (sib) {
336 new_addr.SetSIB(addr.scale(), addr.cpu_index(), addr.cpu_base());
337 }
338 new_addr.SetDisp32(new_disp);
339 }
340 new_addr.SetFixup(addr.GetFixup());
341 return new_addr;
342 }
343
344 inline bool operator==(const Address& addr) const {
345 return static_cast<const Operand&>(*this) == static_cast<const Operand&>(addr);
346 }
347
348 private:
Address()349 Address() {}
350 };
351
352 std::ostream& operator<<(std::ostream& os, const Address& addr);
353
354 /**
355 * Class to handle constant area values.
356 */
357 class ConstantArea {
358 public:
ConstantArea(ArenaAllocator * allocator)359 explicit ConstantArea(ArenaAllocator* allocator)
360 : buffer_(allocator->Adapter(kArenaAllocAssembler)) {}
361
362 // Add a double to the constant area, returning the offset into
363 // the constant area where the literal resides.
364 size_t AddDouble(double v);
365
366 // Add a float to the constant area, returning the offset into
367 // the constant area where the literal resides.
368 size_t AddFloat(float v);
369
370 // Add an int32_t to the constant area, returning the offset into
371 // the constant area where the literal resides.
372 size_t AddInt32(int32_t v);
373
374 // Add an int32_t to the end of the constant area, returning the offset into
375 // the constant area where the literal resides.
376 size_t AppendInt32(int32_t v);
377
378 // Add an int64_t to the constant area, returning the offset into
379 // the constant area where the literal resides.
380 size_t AddInt64(int64_t v);
381
GetSize()382 size_t GetSize() const {
383 return buffer_.size() * elem_size_;
384 }
385
GetBuffer()386 ArrayRef<const int32_t> GetBuffer() const {
387 return ArrayRef<const int32_t>(buffer_);
388 }
389
390 private:
391 static constexpr size_t elem_size_ = sizeof(int32_t);
392 ArenaVector<int32_t> buffer_;
393 };
394
395
396 // This is equivalent to the Label class, used in a slightly different context. We
397 // inherit the functionality of the Label class, but prevent unintended
398 // derived-to-base conversions by making the base class private.
399 class NearLabel : private Label {
400 public:
NearLabel()401 NearLabel() : Label() {}
402
403 // Expose the Label routines that we need.
404 using Label::Position;
405 using Label::LinkPosition;
406 using Label::IsBound;
407 using Label::IsUnused;
408 using Label::IsLinked;
409
410 private:
411 using Label::BindTo;
412 using Label::LinkTo;
413
414 friend class x86_64::X86_64Assembler;
415
416 DISALLOW_COPY_AND_ASSIGN(NearLabel);
417 };
418
419
420 class X86_64Assembler final : public Assembler {
421 public:
422 explicit X86_64Assembler(ArenaAllocator* allocator,
423 const X86_64InstructionSetFeatures* instruction_set_features = nullptr)
Assembler(allocator)424 : Assembler(allocator),
425 constant_area_(allocator),
426 has_AVX_(instruction_set_features != nullptr ? instruction_set_features->HasAVX(): false),
427 has_AVX2_(instruction_set_features != nullptr ? instruction_set_features->HasAVX2() : false) {}
~X86_64Assembler()428 virtual ~X86_64Assembler() {}
429
430 /*
431 * Emit Machine Instructions.
432 */
433 void call(CpuRegister reg);
434 void call(const Address& address);
435 void call(Label* label);
436
437 void pushq(CpuRegister reg);
438 void pushq(const Address& address);
439 void pushq(const Immediate& imm);
440
441 void popq(CpuRegister reg);
442 void popq(const Address& address);
443
444 void movq(CpuRegister dst, const Immediate& src);
445 void movl(CpuRegister dst, const Immediate& src);
446 void movq(CpuRegister dst, CpuRegister src);
447 void movl(CpuRegister dst, CpuRegister src);
448
449 void movntl(const Address& dst, CpuRegister src);
450 void movntq(const Address& dst, CpuRegister src);
451
452 void movq(CpuRegister dst, const Address& src);
453 void movl(CpuRegister dst, const Address& src);
454 void movq(const Address& dst, CpuRegister src);
455 void movq(const Address& dst, const Immediate& imm);
456 void movl(const Address& dst, CpuRegister src);
457 void movl(const Address& dst, const Immediate& imm);
458
459 void cmov(Condition c, CpuRegister dst, CpuRegister src); // This is the 64b version.
460 void cmov(Condition c, CpuRegister dst, CpuRegister src, bool is64bit);
461 void cmov(Condition c, CpuRegister dst, const Address& src, bool is64bit);
462
463 void movzxb(CpuRegister dst, CpuRegister src);
464 void movzxb(CpuRegister dst, const Address& src);
465 void movsxb(CpuRegister dst, CpuRegister src);
466 void movsxb(CpuRegister dst, const Address& src);
467 void movb(CpuRegister dst, const Address& src);
468 void movb(const Address& dst, CpuRegister src);
469 void movb(const Address& dst, const Immediate& imm);
470
471 void movzxw(CpuRegister dst, CpuRegister src);
472 void movzxw(CpuRegister dst, const Address& src);
473 void movsxw(CpuRegister dst, CpuRegister src);
474 void movsxw(CpuRegister dst, const Address& src);
475 void movw(CpuRegister dst, const Address& src);
476 void movw(const Address& dst, CpuRegister src);
477 void movw(const Address& dst, const Immediate& imm);
478
479 void leaq(CpuRegister dst, const Address& src);
480 void leal(CpuRegister dst, const Address& src);
481
482 void movaps(XmmRegister dst, XmmRegister src); // move
483 void movaps(XmmRegister dst, const Address& src); // load aligned
484 void movups(XmmRegister dst, const Address& src); // load unaligned
485 void movaps(const Address& dst, XmmRegister src); // store aligned
486 void movups(const Address& dst, XmmRegister src); // store unaligned
487
488 void vmovaps(XmmRegister dst, XmmRegister src); // move
489 void vmovaps(XmmRegister dst, const Address& src); // load aligned
490 void vmovaps(const Address& dst, XmmRegister src); // store aligned
491 void vmovups(XmmRegister dst, const Address& src); // load unaligned
492 void vmovups(const Address& dst, XmmRegister src); // store unaligned
493
494 void movss(XmmRegister dst, const Address& src);
495 void movss(const Address& dst, XmmRegister src);
496 void movss(XmmRegister dst, XmmRegister src);
497
498 void movsxd(CpuRegister dst, CpuRegister src);
499 void movsxd(CpuRegister dst, const Address& src);
500
501 void movd(XmmRegister dst, CpuRegister src); // Note: this is the r64 version, formally movq.
502 void movd(CpuRegister dst, XmmRegister src); // Note: this is the r64 version, formally movq.
503 void movd(XmmRegister dst, CpuRegister src, bool is64bit);
504 void movd(CpuRegister dst, XmmRegister src, bool is64bit);
505
506 void addss(XmmRegister dst, XmmRegister src);
507 void addss(XmmRegister dst, const Address& src);
508 void subss(XmmRegister dst, XmmRegister src);
509 void subss(XmmRegister dst, const Address& src);
510 void mulss(XmmRegister dst, XmmRegister src);
511 void mulss(XmmRegister dst, const Address& src);
512 void divss(XmmRegister dst, XmmRegister src);
513 void divss(XmmRegister dst, const Address& src);
514
515 void addps(XmmRegister dst, XmmRegister src); // no addr variant (for now)
516 void subps(XmmRegister dst, XmmRegister src);
517 void mulps(XmmRegister dst, XmmRegister src);
518 void divps(XmmRegister dst, XmmRegister src);
519
520 void vmulps(XmmRegister dst, XmmRegister src1, XmmRegister src2);
521 void vmulpd(XmmRegister dst, XmmRegister src1, XmmRegister src2);
522 void vdivps(XmmRegister dst, XmmRegister src1, XmmRegister src2);
523 void vdivpd(XmmRegister dst, XmmRegister src1, XmmRegister src2);
524
525 void vaddps(XmmRegister dst, XmmRegister add_left, XmmRegister add_right);
526 void vsubps(XmmRegister dst, XmmRegister add_left, XmmRegister add_right);
527 void vsubpd(XmmRegister dst, XmmRegister add_left, XmmRegister add_right);
528 void vaddpd(XmmRegister dst, XmmRegister add_left, XmmRegister add_right);
529
530 void vfmadd213ss(XmmRegister accumulator, XmmRegister left, XmmRegister right);
531 void vfmadd213sd(XmmRegister accumulator, XmmRegister left, XmmRegister right);
532
533 void movapd(XmmRegister dst, XmmRegister src); // move
534 void movapd(XmmRegister dst, const Address& src); // load aligned
535 void movupd(XmmRegister dst, const Address& src); // load unaligned
536 void movapd(const Address& dst, XmmRegister src); // store aligned
537 void movupd(const Address& dst, XmmRegister src); // store unaligned
538
539 void vmovapd(XmmRegister dst, XmmRegister src); // move
540 void vmovapd(XmmRegister dst, const Address& src); // load aligned
541 void vmovapd(const Address& dst, XmmRegister src); // store aligned
542 void vmovupd(XmmRegister dst, const Address& src); // load unaligned
543 void vmovupd(const Address& dst, XmmRegister src); // store unaligned
544
545 void movsd(XmmRegister dst, const Address& src);
546 void movsd(const Address& dst, XmmRegister src);
547 void movsd(XmmRegister dst, XmmRegister src);
548
549 void addsd(XmmRegister dst, XmmRegister src);
550 void addsd(XmmRegister dst, const Address& src);
551 void subsd(XmmRegister dst, XmmRegister src);
552 void subsd(XmmRegister dst, const Address& src);
553 void mulsd(XmmRegister dst, XmmRegister src);
554 void mulsd(XmmRegister dst, const Address& src);
555 void divsd(XmmRegister dst, XmmRegister src);
556 void divsd(XmmRegister dst, const Address& src);
557
558 void addpd(XmmRegister dst, XmmRegister src); // no addr variant (for now)
559 void subpd(XmmRegister dst, XmmRegister src);
560 void mulpd(XmmRegister dst, XmmRegister src);
561 void divpd(XmmRegister dst, XmmRegister src);
562
563 void movdqa(XmmRegister dst, XmmRegister src); // move
564 void movdqa(XmmRegister dst, const Address& src); // load aligned
565 void movdqu(XmmRegister dst, const Address& src); // load unaligned
566 void movdqa(const Address& dst, XmmRegister src); // store aligned
567 void movdqu(const Address& dst, XmmRegister src); // store unaligned
568
569 void vmovdqa(XmmRegister dst, XmmRegister src); // move
570 void vmovdqa(XmmRegister dst, const Address& src); // load aligned
571 void vmovdqa(const Address& dst, XmmRegister src); // store aligned
572 void vmovdqu(XmmRegister dst, const Address& src); // load unaligned
573 void vmovdqu(const Address& dst, XmmRegister src); // store unaligned
574
575 void paddb(XmmRegister dst, XmmRegister src); // no addr variant (for now)
576 void psubb(XmmRegister dst, XmmRegister src);
577
578 void vpaddb(XmmRegister dst, XmmRegister add_left, XmmRegister add_right);
579 void vpaddw(XmmRegister dst, XmmRegister add_left, XmmRegister add_right);
580
581 void paddw(XmmRegister dst, XmmRegister src);
582 void psubw(XmmRegister dst, XmmRegister src);
583 void pmullw(XmmRegister dst, XmmRegister src);
584 void vpmullw(XmmRegister dst, XmmRegister src1, XmmRegister src2);
585
586 void vpsubb(XmmRegister dst, XmmRegister src1, XmmRegister src2);
587 void vpsubw(XmmRegister dst, XmmRegister src1, XmmRegister src2);
588 void vpsubd(XmmRegister dst, XmmRegister src1, XmmRegister src2);
589
590 void paddd(XmmRegister dst, XmmRegister src);
591 void psubd(XmmRegister dst, XmmRegister src);
592 void pmulld(XmmRegister dst, XmmRegister src);
593 void vpmulld(XmmRegister dst, XmmRegister src1, XmmRegister src2);
594
595 void vpaddd(XmmRegister dst, XmmRegister src1, XmmRegister src2);
596
597 void paddq(XmmRegister dst, XmmRegister src);
598 void psubq(XmmRegister dst, XmmRegister src);
599
600 void vpaddq(XmmRegister dst, XmmRegister add_left, XmmRegister add_right);
601 void vpsubq(XmmRegister dst, XmmRegister add_left, XmmRegister add_right);
602
603 void paddusb(XmmRegister dst, XmmRegister src);
604 void paddsb(XmmRegister dst, XmmRegister src);
605 void paddusw(XmmRegister dst, XmmRegister src);
606 void paddsw(XmmRegister dst, XmmRegister src);
607 void psubusb(XmmRegister dst, XmmRegister src);
608 void psubsb(XmmRegister dst, XmmRegister src);
609 void psubusw(XmmRegister dst, XmmRegister src);
610 void psubsw(XmmRegister dst, XmmRegister src);
611
612 void cvtsi2ss(XmmRegister dst, CpuRegister src); // Note: this is the r/m32 version.
613 void cvtsi2ss(XmmRegister dst, CpuRegister src, bool is64bit);
614 void cvtsi2ss(XmmRegister dst, const Address& src, bool is64bit);
615 void cvtsi2sd(XmmRegister dst, CpuRegister src); // Note: this is the r/m32 version.
616 void cvtsi2sd(XmmRegister dst, CpuRegister src, bool is64bit);
617 void cvtsi2sd(XmmRegister dst, const Address& src, bool is64bit);
618
619 void cvtss2si(CpuRegister dst, XmmRegister src); // Note: this is the r32 version.
620 void cvtss2sd(XmmRegister dst, XmmRegister src);
621 void cvtss2sd(XmmRegister dst, const Address& src);
622
623 void cvtsd2si(CpuRegister dst, XmmRegister src); // Note: this is the r32 version.
624 void cvtsd2ss(XmmRegister dst, XmmRegister src);
625 void cvtsd2ss(XmmRegister dst, const Address& src);
626
627 void cvttss2si(CpuRegister dst, XmmRegister src); // Note: this is the r32 version.
628 void cvttss2si(CpuRegister dst, XmmRegister src, bool is64bit);
629 void cvttsd2si(CpuRegister dst, XmmRegister src); // Note: this is the r32 version.
630 void cvttsd2si(CpuRegister dst, XmmRegister src, bool is64bit);
631
632 void cvtdq2ps(XmmRegister dst, XmmRegister src);
633 void cvtdq2pd(XmmRegister dst, XmmRegister src);
634
635 void comiss(XmmRegister a, XmmRegister b);
636 void comiss(XmmRegister a, const Address& b);
637 void comisd(XmmRegister a, XmmRegister b);
638 void comisd(XmmRegister a, const Address& b);
639 void ucomiss(XmmRegister a, XmmRegister b);
640 void ucomiss(XmmRegister a, const Address& b);
641 void ucomisd(XmmRegister a, XmmRegister b);
642 void ucomisd(XmmRegister a, const Address& b);
643
644 void roundsd(XmmRegister dst, XmmRegister src, const Immediate& imm);
645 void roundss(XmmRegister dst, XmmRegister src, const Immediate& imm);
646
647 void sqrtsd(XmmRegister dst, XmmRegister src);
648 void sqrtss(XmmRegister dst, XmmRegister src);
649
650 void xorpd(XmmRegister dst, const Address& src);
651 void xorpd(XmmRegister dst, XmmRegister src);
652 void xorps(XmmRegister dst, const Address& src);
653 void xorps(XmmRegister dst, XmmRegister src);
654 void pxor(XmmRegister dst, XmmRegister src); // no addr variant (for now)
655 void vpxor(XmmRegister dst, XmmRegister src1, XmmRegister src2);
656 void vxorps(XmmRegister dst, XmmRegister src1, XmmRegister src2);
657 void vxorpd(XmmRegister dst, XmmRegister src1, XmmRegister src2);
658
659 void andpd(XmmRegister dst, const Address& src);
660 void andpd(XmmRegister dst, XmmRegister src);
661 void andps(XmmRegister dst, XmmRegister src); // no addr variant (for now)
662 void pand(XmmRegister dst, XmmRegister src);
663 void vpand(XmmRegister dst, XmmRegister src1, XmmRegister src2);
664 void vandps(XmmRegister dst, XmmRegister src1, XmmRegister src2);
665 void vandpd(XmmRegister dst, XmmRegister src1, XmmRegister src2);
666
667 void andn(CpuRegister dst, CpuRegister src1, CpuRegister src2);
668 void andnpd(XmmRegister dst, XmmRegister src); // no addr variant (for now)
669 void andnps(XmmRegister dst, XmmRegister src);
670 void pandn(XmmRegister dst, XmmRegister src);
671 void vpandn(XmmRegister dst, XmmRegister src1, XmmRegister src2);
672 void vandnps(XmmRegister dst, XmmRegister src1, XmmRegister src2);
673 void vandnpd(XmmRegister dst, XmmRegister src1, XmmRegister src2);
674
675 void orpd(XmmRegister dst, XmmRegister src); // no addr variant (for now)
676 void orps(XmmRegister dst, XmmRegister src);
677 void por(XmmRegister dst, XmmRegister src);
678 void vpor(XmmRegister dst, XmmRegister src1, XmmRegister src2);
679 void vorps(XmmRegister dst, XmmRegister src1, XmmRegister src2);
680 void vorpd(XmmRegister dst, XmmRegister src1, XmmRegister src2);
681
682 void pavgb(XmmRegister dst, XmmRegister src); // no addr variant (for now)
683 void pavgw(XmmRegister dst, XmmRegister src);
684 void psadbw(XmmRegister dst, XmmRegister src);
685 void pmaddwd(XmmRegister dst, XmmRegister src);
686 void vpmaddwd(XmmRegister dst, XmmRegister src1, XmmRegister src2);
687 void phaddw(XmmRegister dst, XmmRegister src);
688 void phaddd(XmmRegister dst, XmmRegister src);
689 void haddps(XmmRegister dst, XmmRegister src);
690 void haddpd(XmmRegister dst, XmmRegister src);
691 void phsubw(XmmRegister dst, XmmRegister src);
692 void phsubd(XmmRegister dst, XmmRegister src);
693 void hsubps(XmmRegister dst, XmmRegister src);
694 void hsubpd(XmmRegister dst, XmmRegister src);
695
696 void pminsb(XmmRegister dst, XmmRegister src); // no addr variant (for now)
697 void pmaxsb(XmmRegister dst, XmmRegister src);
698 void pminsw(XmmRegister dst, XmmRegister src);
699 void pmaxsw(XmmRegister dst, XmmRegister src);
700 void pminsd(XmmRegister dst, XmmRegister src);
701 void pmaxsd(XmmRegister dst, XmmRegister src);
702
703 void pminub(XmmRegister dst, XmmRegister src); // no addr variant (for now)
704 void pmaxub(XmmRegister dst, XmmRegister src);
705 void pminuw(XmmRegister dst, XmmRegister src);
706 void pmaxuw(XmmRegister dst, XmmRegister src);
707 void pminud(XmmRegister dst, XmmRegister src);
708 void pmaxud(XmmRegister dst, XmmRegister src);
709
710 void minps(XmmRegister dst, XmmRegister src); // no addr variant (for now)
711 void maxps(XmmRegister dst, XmmRegister src);
712 void minpd(XmmRegister dst, XmmRegister src);
713 void maxpd(XmmRegister dst, XmmRegister src);
714
715 void pcmpeqb(XmmRegister dst, XmmRegister src);
716 void pcmpeqw(XmmRegister dst, XmmRegister src);
717 void pcmpeqd(XmmRegister dst, XmmRegister src);
718 void pcmpeqq(XmmRegister dst, XmmRegister src);
719
720 void pcmpgtb(XmmRegister dst, XmmRegister src);
721 void pcmpgtw(XmmRegister dst, XmmRegister src);
722 void pcmpgtd(XmmRegister dst, XmmRegister src);
723 void pcmpgtq(XmmRegister dst, XmmRegister src); // SSE4.2
724
725 void shufpd(XmmRegister dst, XmmRegister src, const Immediate& imm);
726 void shufps(XmmRegister dst, XmmRegister src, const Immediate& imm);
727 void pshufd(XmmRegister dst, XmmRegister src, const Immediate& imm);
728
729 void punpcklbw(XmmRegister dst, XmmRegister src);
730 void punpcklwd(XmmRegister dst, XmmRegister src);
731 void punpckldq(XmmRegister dst, XmmRegister src);
732 void punpcklqdq(XmmRegister dst, XmmRegister src);
733
734 void punpckhbw(XmmRegister dst, XmmRegister src);
735 void punpckhwd(XmmRegister dst, XmmRegister src);
736 void punpckhdq(XmmRegister dst, XmmRegister src);
737 void punpckhqdq(XmmRegister dst, XmmRegister src);
738
739 void psllw(XmmRegister reg, const Immediate& shift_count);
740 void pslld(XmmRegister reg, const Immediate& shift_count);
741 void psllq(XmmRegister reg, const Immediate& shift_count);
742
743 void psraw(XmmRegister reg, const Immediate& shift_count);
744 void psrad(XmmRegister reg, const Immediate& shift_count);
745 // no psraq
746
747 void psrlw(XmmRegister reg, const Immediate& shift_count);
748 void psrld(XmmRegister reg, const Immediate& shift_count);
749 void psrlq(XmmRegister reg, const Immediate& shift_count);
750 void psrldq(XmmRegister reg, const Immediate& shift_count);
751
752 void flds(const Address& src);
753 void fstps(const Address& dst);
754 void fsts(const Address& dst);
755
756 void fldl(const Address& src);
757 void fstpl(const Address& dst);
758 void fstl(const Address& dst);
759
760 void fstsw();
761
762 void fucompp();
763
764 void fnstcw(const Address& dst);
765 void fldcw(const Address& src);
766
767 void fistpl(const Address& dst);
768 void fistps(const Address& dst);
769 void fildl(const Address& src);
770 void filds(const Address& src);
771
772 void fincstp();
773 void ffree(const Immediate& index);
774
775 void fsin();
776 void fcos();
777 void fptan();
778 void fprem();
779
780 void xchgb(CpuRegister dst, CpuRegister src);
781 void xchgb(CpuRegister reg, const Address& address);
782
783 void xchgw(CpuRegister dst, CpuRegister src);
784 void xchgw(CpuRegister reg, const Address& address);
785
786 void xchgl(CpuRegister dst, CpuRegister src);
787 void xchgl(CpuRegister reg, const Address& address);
788
789 void xchgq(CpuRegister dst, CpuRegister src);
790 void xchgq(CpuRegister reg, const Address& address);
791
792 void xaddb(CpuRegister dst, CpuRegister src);
793 void xaddb(const Address& address, CpuRegister reg);
794
795 void xaddw(CpuRegister dst, CpuRegister src);
796 void xaddw(const Address& address, CpuRegister reg);
797
798 void xaddl(CpuRegister dst, CpuRegister src);
799 void xaddl(const Address& address, CpuRegister reg);
800
801 void xaddq(CpuRegister dst, CpuRegister src);
802 void xaddq(const Address& address, CpuRegister reg);
803
804 void cmpb(const Address& address, const Immediate& imm);
805 void cmpw(const Address& address, const Immediate& imm);
806
807 void cmpl(CpuRegister reg, const Immediate& imm);
808 void cmpl(CpuRegister reg0, CpuRegister reg1);
809 void cmpl(CpuRegister reg, const Address& address);
810 void cmpl(const Address& address, CpuRegister reg);
811 void cmpl(const Address& address, const Immediate& imm);
812
813 void cmpq(CpuRegister reg0, CpuRegister reg1);
814 void cmpq(CpuRegister reg0, const Immediate& imm);
815 void cmpq(CpuRegister reg0, const Address& address);
816 void cmpq(const Address& address, const Immediate& imm);
817
818 void testl(CpuRegister reg1, CpuRegister reg2);
819 void testl(CpuRegister reg, const Address& address);
820 void testl(CpuRegister reg, const Immediate& imm);
821
822 void testq(CpuRegister reg1, CpuRegister reg2);
823 void testq(CpuRegister reg, const Address& address);
824
825 void testb(const Address& address, const Immediate& imm);
826 void testl(const Address& address, const Immediate& imm);
827
828 void andl(CpuRegister dst, const Immediate& imm);
829 void andl(CpuRegister dst, CpuRegister src);
830 void andl(CpuRegister reg, const Address& address);
831 void andq(CpuRegister dst, const Immediate& imm);
832 void andq(CpuRegister dst, CpuRegister src);
833 void andq(CpuRegister reg, const Address& address);
834 void andw(const Address& address, const Immediate& imm);
835
836 void orl(CpuRegister dst, const Immediate& imm);
837 void orl(CpuRegister dst, CpuRegister src);
838 void orl(CpuRegister reg, const Address& address);
839 void orq(CpuRegister dst, CpuRegister src);
840 void orq(CpuRegister dst, const Immediate& imm);
841 void orq(CpuRegister reg, const Address& address);
842
843 void xorl(CpuRegister dst, CpuRegister src);
844 void xorl(CpuRegister dst, const Immediate& imm);
845 void xorl(CpuRegister reg, const Address& address);
846 void xorq(CpuRegister dst, const Immediate& imm);
847 void xorq(CpuRegister dst, CpuRegister src);
848 void xorq(CpuRegister reg, const Address& address);
849
850 void addl(CpuRegister dst, CpuRegister src);
851 void addl(CpuRegister reg, const Immediate& imm);
852 void addl(CpuRegister reg, const Address& address);
853 void addl(const Address& address, CpuRegister reg);
854 void addl(const Address& address, const Immediate& imm);
855 void addw(const Address& address, const Immediate& imm);
856
857 void addq(CpuRegister reg, const Immediate& imm);
858 void addq(CpuRegister dst, CpuRegister src);
859 void addq(CpuRegister dst, const Address& address);
860
861 void subl(CpuRegister dst, CpuRegister src);
862 void subl(CpuRegister reg, const Immediate& imm);
863 void subl(CpuRegister reg, const Address& address);
864
865 void subq(CpuRegister reg, const Immediate& imm);
866 void subq(CpuRegister dst, CpuRegister src);
867 void subq(CpuRegister dst, const Address& address);
868
869 void cdq();
870 void cqo();
871
872 void idivl(CpuRegister reg);
873 void idivq(CpuRegister reg);
874 void divl(CpuRegister reg);
875 void divq(CpuRegister reg);
876
877 void imull(CpuRegister dst, CpuRegister src);
878 void imull(CpuRegister reg, const Immediate& imm);
879 void imull(CpuRegister dst, CpuRegister src, const Immediate& imm);
880 void imull(CpuRegister reg, const Address& address);
881
882 void imulq(CpuRegister src);
883 void imulq(CpuRegister dst, CpuRegister src);
884 void imulq(CpuRegister reg, const Immediate& imm);
885 void imulq(CpuRegister reg, const Address& address);
886 void imulq(CpuRegister dst, CpuRegister reg, const Immediate& imm);
887
888 void imull(CpuRegister reg);
889 void imull(const Address& address);
890
891 void mull(CpuRegister reg);
892 void mull(const Address& address);
893
894 void shll(CpuRegister reg, const Immediate& imm);
895 void shll(CpuRegister operand, CpuRegister shifter);
896 void shrl(CpuRegister reg, const Immediate& imm);
897 void shrl(CpuRegister operand, CpuRegister shifter);
898 void sarl(CpuRegister reg, const Immediate& imm);
899 void sarl(CpuRegister operand, CpuRegister shifter);
900
901 void shlq(CpuRegister reg, const Immediate& imm);
902 void shlq(CpuRegister operand, CpuRegister shifter);
903 void shrq(CpuRegister reg, const Immediate& imm);
904 void shrq(CpuRegister operand, CpuRegister shifter);
905 void sarq(CpuRegister reg, const Immediate& imm);
906 void sarq(CpuRegister operand, CpuRegister shifter);
907
908 void negl(CpuRegister reg);
909 void negq(CpuRegister reg);
910
911 void notl(CpuRegister reg);
912 void notq(CpuRegister reg);
913
914 void enter(const Immediate& imm);
915 void leave();
916
917 void ret();
918 void ret(const Immediate& imm);
919
920 void nop();
921 void int3();
922 void hlt();
923
924 void j(Condition condition, Label* label);
925 void j(Condition condition, NearLabel* label);
926 void jrcxz(NearLabel* label);
927
928 void jmp(CpuRegister reg);
929 void jmp(const Address& address);
930 void jmp(Label* label);
931 void jmp(NearLabel* label);
932
933 X86_64Assembler* lock();
934 void cmpxchgb(const Address& address, CpuRegister reg);
935 void cmpxchgw(const Address& address, CpuRegister reg);
936 void cmpxchgl(const Address& address, CpuRegister reg);
937 void cmpxchgq(const Address& address, CpuRegister reg);
938
939 void mfence();
940
941 X86_64Assembler* gs();
942
943 void setcc(Condition condition, CpuRegister dst);
944
945 void bswapl(CpuRegister dst);
946 void bswapq(CpuRegister dst);
947
948 void bsfl(CpuRegister dst, CpuRegister src);
949 void bsfl(CpuRegister dst, const Address& src);
950 void bsfq(CpuRegister dst, CpuRegister src);
951 void bsfq(CpuRegister dst, const Address& src);
952
953 void blsi(CpuRegister dst, CpuRegister src); // no addr variant (for now)
954 void blsmsk(CpuRegister dst, CpuRegister src); // no addr variant (for now)
955 void blsr(CpuRegister dst, CpuRegister src); // no addr variant (for now)
956
957 void bsrl(CpuRegister dst, CpuRegister src);
958 void bsrl(CpuRegister dst, const Address& src);
959 void bsrq(CpuRegister dst, CpuRegister src);
960 void bsrq(CpuRegister dst, const Address& src);
961
962 void popcntl(CpuRegister dst, CpuRegister src);
963 void popcntl(CpuRegister dst, const Address& src);
964 void popcntq(CpuRegister dst, CpuRegister src);
965 void popcntq(CpuRegister dst, const Address& src);
966
967 void rorl(CpuRegister reg, const Immediate& imm);
968 void rorl(CpuRegister operand, CpuRegister shifter);
969 void roll(CpuRegister reg, const Immediate& imm);
970 void roll(CpuRegister operand, CpuRegister shifter);
971
972 void rorq(CpuRegister reg, const Immediate& imm);
973 void rorq(CpuRegister operand, CpuRegister shifter);
974 void rolq(CpuRegister reg, const Immediate& imm);
975 void rolq(CpuRegister operand, CpuRegister shifter);
976
977 void repne_scasb();
978 void repne_scasw();
979 void repe_cmpsw();
980 void repe_cmpsl();
981 void repe_cmpsq();
982 void rep_movsw();
983 void rep_movsb();
984 void rep_movsl();
985
986 void ud2();
987
988 //
989 // Macros for High-level operations.
990 //
991
992 void AddImmediate(CpuRegister reg, const Immediate& imm);
993
994 void LoadDoubleConstant(XmmRegister dst, double value);
995
LockCmpxchgb(const Address & address,CpuRegister reg)996 void LockCmpxchgb(const Address& address, CpuRegister reg) {
997 lock()->cmpxchgb(address, reg);
998 }
999
LockCmpxchgw(const Address & address,CpuRegister reg)1000 void LockCmpxchgw(const Address& address, CpuRegister reg) {
1001 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1002 // We make sure that the operand size override bytecode is emited before the lock bytecode.
1003 // We test against clang which enforces this bytecode order.
1004 EmitOperandSizeOverride();
1005 EmitUint8(0xF0);
1006 EmitOptionalRex32(reg, address);
1007 EmitUint8(0x0F);
1008 EmitUint8(0xB1);
1009 EmitOperand(reg.LowBits(), address);
1010 }
1011
LockCmpxchgl(const Address & address,CpuRegister reg)1012 void LockCmpxchgl(const Address& address, CpuRegister reg) {
1013 lock()->cmpxchgl(address, reg);
1014 }
1015
LockCmpxchgq(const Address & address,CpuRegister reg)1016 void LockCmpxchgq(const Address& address, CpuRegister reg) {
1017 lock()->cmpxchgq(address, reg);
1018 }
1019
LockXaddb(const Address & address,CpuRegister reg)1020 void LockXaddb(const Address& address, CpuRegister reg) {
1021 lock()->xaddb(address, reg);
1022 }
1023
LockXaddw(const Address & address,CpuRegister reg)1024 void LockXaddw(const Address& address, CpuRegister reg) {
1025 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1026 // We make sure that the operand size override bytecode is emited before the lock bytecode.
1027 // We test against clang which enforces this bytecode order.
1028 EmitOperandSizeOverride();
1029 EmitUint8(0xF0);
1030 EmitOptionalRex32(reg, address);
1031 EmitUint8(0x0F);
1032 EmitUint8(0xC1);
1033 EmitOperand(reg.LowBits(), address);
1034 }
1035
LockXaddl(const Address & address,CpuRegister reg)1036 void LockXaddl(const Address& address, CpuRegister reg) {
1037 lock()->xaddl(address, reg);
1038 }
1039
LockXaddq(const Address & address,CpuRegister reg)1040 void LockXaddq(const Address& address, CpuRegister reg) {
1041 lock()->xaddq(address, reg);
1042 }
1043
1044 //
1045 // Misc. functionality
1046 //
PreferredLoopAlignment()1047 int PreferredLoopAlignment() { return 16; }
1048 void Align(int alignment, int offset);
1049 void Bind(Label* label) override;
Jump(Label * label)1050 void Jump(Label* label) override {
1051 jmp(label);
1052 }
1053 void Bind(NearLabel* label);
1054
1055 // Add a double to the constant area, returning the offset into
1056 // the constant area where the literal resides.
AddDouble(double v)1057 size_t AddDouble(double v) { return constant_area_.AddDouble(v); }
1058
1059 // Add a float to the constant area, returning the offset into
1060 // the constant area where the literal resides.
AddFloat(float v)1061 size_t AddFloat(float v) { return constant_area_.AddFloat(v); }
1062
1063 // Add an int32_t to the constant area, returning the offset into
1064 // the constant area where the literal resides.
AddInt32(int32_t v)1065 size_t AddInt32(int32_t v) {
1066 return constant_area_.AddInt32(v);
1067 }
1068
1069 // Add an int32_t to the end of the constant area, returning the offset into
1070 // the constant area where the literal resides.
AppendInt32(int32_t v)1071 size_t AppendInt32(int32_t v) {
1072 return constant_area_.AppendInt32(v);
1073 }
1074
1075 // Add an int64_t to the constant area, returning the offset into
1076 // the constant area where the literal resides.
AddInt64(int64_t v)1077 size_t AddInt64(int64_t v) { return constant_area_.AddInt64(v); }
1078
1079 // Add the contents of the constant area to the assembler buffer.
1080 void AddConstantArea();
1081
1082 // Is the constant area empty? Return true if there are no literals in the constant area.
IsConstantAreaEmpty()1083 bool IsConstantAreaEmpty() const { return constant_area_.GetSize() == 0; }
1084
1085 // Return the current size of the constant area.
ConstantAreaSize()1086 size_t ConstantAreaSize() const { return constant_area_.GetSize(); }
1087
1088 //
1089 // Heap poisoning.
1090 //
1091
1092 // Poison a heap reference contained in `reg`.
PoisonHeapReference(CpuRegister reg)1093 void PoisonHeapReference(CpuRegister reg) { negl(reg); }
1094 // Unpoison a heap reference contained in `reg`.
UnpoisonHeapReference(CpuRegister reg)1095 void UnpoisonHeapReference(CpuRegister reg) { negl(reg); }
1096 // Poison a heap reference contained in `reg` if heap poisoning is enabled.
MaybePoisonHeapReference(CpuRegister reg)1097 void MaybePoisonHeapReference(CpuRegister reg) {
1098 if (kPoisonHeapReferences) {
1099 PoisonHeapReference(reg);
1100 }
1101 }
1102 // Unpoison a heap reference contained in `reg` if heap poisoning is enabled.
MaybeUnpoisonHeapReference(CpuRegister reg)1103 void MaybeUnpoisonHeapReference(CpuRegister reg) {
1104 if (kPoisonHeapReferences) {
1105 UnpoisonHeapReference(reg);
1106 }
1107 }
1108
1109 bool CpuHasAVXorAVX2FeatureFlag();
1110
1111 private:
1112 void EmitUint8(uint8_t value);
1113 void EmitInt32(int32_t value);
1114 void EmitInt64(int64_t value);
1115 void EmitRegisterOperand(uint8_t rm, uint8_t reg);
1116 void EmitXmmRegisterOperand(uint8_t rm, XmmRegister reg);
1117 void EmitFixup(AssemblerFixup* fixup);
1118 void EmitOperandSizeOverride();
1119
1120 void EmitOperand(uint8_t rm, const Operand& operand);
1121 void EmitImmediate(const Immediate& imm, bool is_16_op = false);
1122 void EmitComplex(
1123 uint8_t rm, const Operand& operand, const Immediate& immediate, bool is_16_op = false);
1124 void EmitLabel(Label* label, int instruction_size);
1125 void EmitLabelLink(Label* label);
1126 void EmitLabelLink(NearLabel* label);
1127
1128 void EmitGenericShift(bool wide, int rm, CpuRegister reg, const Immediate& imm);
1129 void EmitGenericShift(bool wide, int rm, CpuRegister operand, CpuRegister shifter);
1130
1131 // If any input is not false, output the necessary rex prefix.
1132 void EmitOptionalRex(bool force, bool w, bool r, bool x, bool b);
1133
1134 // Emit a rex prefix byte if necessary for reg. ie if reg is a register in the range R8 to R15.
1135 void EmitOptionalRex32(CpuRegister reg);
1136 void EmitOptionalRex32(CpuRegister dst, CpuRegister src);
1137 void EmitOptionalRex32(XmmRegister dst, XmmRegister src);
1138 void EmitOptionalRex32(CpuRegister dst, XmmRegister src);
1139 void EmitOptionalRex32(XmmRegister dst, CpuRegister src);
1140 void EmitOptionalRex32(const Operand& operand);
1141 void EmitOptionalRex32(CpuRegister dst, const Operand& operand);
1142 void EmitOptionalRex32(XmmRegister dst, const Operand& operand);
1143
1144 // Emit a REX.W prefix plus necessary register bit encodings.
1145 void EmitRex64();
1146 void EmitRex64(CpuRegister reg);
1147 void EmitRex64(const Operand& operand);
1148 void EmitRex64(CpuRegister dst, CpuRegister src);
1149 void EmitRex64(CpuRegister dst, const Operand& operand);
1150 void EmitRex64(XmmRegister dst, const Operand& operand);
1151 void EmitRex64(XmmRegister dst, CpuRegister src);
1152 void EmitRex64(CpuRegister dst, XmmRegister src);
1153
1154 // Emit a REX prefix to normalize byte registers plus necessary register bit encodings.
1155 // `normalize_both` parameter controls if the REX prefix is checked only for the `src` register
1156 // (which is the case for instructions like `movzxb rax, bpl`), or for both `src` and `dst`
1157 // registers (which is the case of instructions like `xchg bpl, al`). By default only `src` is
1158 // used to decide if REX is needed.
1159 void EmitOptionalByteRegNormalizingRex32(CpuRegister dst,
1160 CpuRegister src,
1161 bool normalize_both = false);
1162 void EmitOptionalByteRegNormalizingRex32(CpuRegister dst, const Operand& operand);
1163
1164 uint8_t EmitVexPrefixByteZero(bool is_twobyte_form);
1165 uint8_t EmitVexPrefixByteOne(bool R, bool X, bool B, int SET_VEX_M);
1166 uint8_t EmitVexPrefixByteOne(bool R,
1167 X86_64ManagedRegister operand,
1168 int SET_VEX_L,
1169 int SET_VEX_PP);
1170 uint8_t EmitVexPrefixByteTwo(bool W,
1171 X86_64ManagedRegister operand,
1172 int SET_VEX_L,
1173 int SET_VEX_PP);
1174 uint8_t EmitVexPrefixByteTwo(bool W,
1175 int SET_VEX_L,
1176 int SET_VEX_PP);
1177
1178 // Helper function to emit a shorter variant of XCHG if at least one operand is RAX/EAX/AX.
1179 bool try_xchg_rax(CpuRegister dst,
1180 CpuRegister src,
1181 void (X86_64Assembler::*prefix_fn)(CpuRegister));
1182
1183 ConstantArea constant_area_;
1184 bool has_AVX_; // x86 256bit SIMD AVX.
1185 bool has_AVX2_; // x86 256bit SIMD AVX 2.0.
1186
1187 DISALLOW_COPY_AND_ASSIGN(X86_64Assembler);
1188 };
1189
EmitUint8(uint8_t value)1190 inline void X86_64Assembler::EmitUint8(uint8_t value) {
1191 buffer_.Emit<uint8_t>(value);
1192 }
1193
EmitInt32(int32_t value)1194 inline void X86_64Assembler::EmitInt32(int32_t value) {
1195 buffer_.Emit<int32_t>(value);
1196 }
1197
EmitInt64(int64_t value)1198 inline void X86_64Assembler::EmitInt64(int64_t value) {
1199 // Write this 64-bit value as two 32-bit words for alignment reasons
1200 // (this is essentially when running on ARM, which does not allow
1201 // 64-bit unaligned accesses). We assume little-endianness here.
1202 EmitInt32(Low32Bits(value));
1203 EmitInt32(High32Bits(value));
1204 }
1205
EmitRegisterOperand(uint8_t rm,uint8_t reg)1206 inline void X86_64Assembler::EmitRegisterOperand(uint8_t rm, uint8_t reg) {
1207 CHECK_GE(rm, 0);
1208 CHECK_LT(rm, 8);
1209 buffer_.Emit<uint8_t>((0xC0 | (reg & 7)) + (rm << 3));
1210 }
1211
EmitXmmRegisterOperand(uint8_t rm,XmmRegister reg)1212 inline void X86_64Assembler::EmitXmmRegisterOperand(uint8_t rm, XmmRegister reg) {
1213 EmitRegisterOperand(rm, static_cast<uint8_t>(reg.AsFloatRegister()));
1214 }
1215
EmitFixup(AssemblerFixup * fixup)1216 inline void X86_64Assembler::EmitFixup(AssemblerFixup* fixup) {
1217 buffer_.EmitFixup(fixup);
1218 }
1219
EmitOperandSizeOverride()1220 inline void X86_64Assembler::EmitOperandSizeOverride() {
1221 EmitUint8(0x66);
1222 }
1223
1224 } // namespace x86_64
1225 } // namespace art
1226
1227 #endif // ART_COMPILER_UTILS_X86_64_ASSEMBLER_X86_64_H_
1228