• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2014 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 // Assembler to produce x86-64 instructions. Somewhat influenced by V8 assembler.
18 
19 #ifndef BERBERIS_ASSEMBLER_X86_64_H_
20 #define BERBERIS_ASSEMBLER_X86_64_H_
21 
22 #include <type_traits>  // std::is_same
23 
24 #include "berberis/assembler/x86_32_and_x86_64.h"
25 #include "berberis/base/logging.h"
26 
27 namespace berberis {
28 
29 class MachindeCode;
30 
31 namespace x86_64 {
32 
33 class Assembler : public x86_32_and_x86_64::Assembler<Assembler> {
34  public:
35   using BaseAssembler = x86_32_and_x86_64::Assembler<Assembler>;
36   using FinalAssembler = Assembler;
37 
Assembler(MachineCode * code)38   explicit Assembler(MachineCode* code) : BaseAssembler(code) {}
39 
40   static constexpr Register no_register{0x80};
41   static constexpr Register rax{0};
42   static constexpr Register rcx{1};
43   static constexpr Register rdx{2};
44   static constexpr Register rbx{3};
45   static constexpr Register rsp{4};
46   static constexpr Register rbp{5};
47   static constexpr Register rsi{6};
48   static constexpr Register rdi{7};
49   static constexpr Register r8{8};
50   static constexpr Register r9{9};
51   static constexpr Register r10{10};
52   static constexpr Register r11{11};
53   static constexpr Register r12{12};
54   static constexpr Register r13{13};
55   static constexpr Register r14{14};
56   static constexpr Register r15{15};
57 
58   static constexpr XMMRegister no_xmm_register{0x80};
59   static constexpr XMMRegister xmm0{0};
60   static constexpr XMMRegister xmm1{1};
61   static constexpr XMMRegister xmm2{2};
62   static constexpr XMMRegister xmm3{3};
63   static constexpr XMMRegister xmm4{4};
64   static constexpr XMMRegister xmm5{5};
65   static constexpr XMMRegister xmm6{6};
66   static constexpr XMMRegister xmm7{7};
67   static constexpr XMMRegister xmm8{8};
68   static constexpr XMMRegister xmm9{9};
69   static constexpr XMMRegister xmm10{10};
70   static constexpr XMMRegister xmm11{11};
71   static constexpr XMMRegister xmm12{12};
72   static constexpr XMMRegister xmm13{13};
73   static constexpr XMMRegister xmm14{14};
74   static constexpr XMMRegister xmm15{15};
75 
76   static constexpr YMMRegister no_ymm_register{0x80};
77   static constexpr YMMRegister ymm0{0};
78   static constexpr YMMRegister ymm1{1};
79   static constexpr YMMRegister ymm2{2};
80   static constexpr YMMRegister ymm3{3};
81   static constexpr YMMRegister ymm4{4};
82   static constexpr YMMRegister ymm5{5};
83   static constexpr YMMRegister ymm6{6};
84   static constexpr YMMRegister ymm7{7};
85   static constexpr YMMRegister ymm8{8};
86   static constexpr YMMRegister ymm9{9};
87   static constexpr YMMRegister ymm10{10};
88   static constexpr YMMRegister ymm11{11};
89   static constexpr YMMRegister ymm12{12};
90   static constexpr YMMRegister ymm13{13};
91   static constexpr YMMRegister ymm14{14};
92   static constexpr YMMRegister ymm15{15};
93 
94   // Macroassembler uses these names to support both x86-32 and x86-64 modes.
95   static constexpr Register gpr_a{0};
96   static constexpr Register gpr_c{1};
97   static constexpr Register gpr_d{2};
98   static constexpr Register gpr_b{3};
99   static constexpr Register gpr_s{4};
100 
101 // Instructions.
102 #include "berberis/assembler/gen_assembler_x86_64-inl.h"  // NOLINT generated file!
103 
104   // Historical curiosity: x86-32 mode has Movq for memory-to-xmm operations.
105   // x86-64 added another one, with different opcode but since they are functionally equivalent
106   // GNU Assembler and Clang use old one both in 32-bit mode and 64-bit mode thus we are doing
107   // the same.
108 
109   // Unhide Decl(Mem) hidden by Decl(Reg).
110   using BaseAssembler::Decl;
111 
112   // Unhide Decw(Mem) hidden by Decw(Reg).
113   using BaseAssembler::Decw;
114 
115   // Unhide Incl(Mem) hidden by Incl(Reg).
116   using BaseAssembler::Incl;
117 
118   // Unhide Incw(Mem) hidden by Incw(Reg).
119   using BaseAssembler::Incw;
120 
121   // Unhide Movq(Mem, XMMReg) and Movq(XMMReg, Mem) hidden by Movq(Reg, Imm) and many others.
122   using BaseAssembler::Movq;
123 
124   // Unhide Xchgl(Mem, Reg) hidden by modified version below.
125   using BaseAssembler::Xchgl;
126 
127   // Unhide Vmov*(Mem, Reg) hidden by Vmov*(Reg, Reg).
128   using BaseAssembler::Vmovapd;
129   using BaseAssembler::Vmovaps;
130   using BaseAssembler::Vmovdqa;
131   using BaseAssembler::Vmovdqu;
132   using BaseAssembler::Vmovq;
133   using BaseAssembler::Vmovsd;
134   using BaseAssembler::Vmovss;
135 
Xchgl(Register dest,Register src)136   void Xchgl(Register dest, Register src) {
137     // In 32-bit mode "xchgl %eax, %eax" did nothing and was often reused as "nop".
138     //
139     // On x86-64 "xchgl %eax, %eax" clears top half of %eax register, but having single-byte nop
140     // is too convenient, thus, as special exception, 0x90 is not interpreted as "xchgl %eax, %eax",
141     // but was kept as "nop" - thus longer encoding for "xchgl %eax, %eax" must be used.
142 
143     if (IsAccumulator(src) && IsAccumulator(dest)) {
144       Emit16(0xc087);
145     } else {
146       BaseAssembler::Xchgl(dest, src);
147     }
148   }
149 
150   // TODO(b/127356868): decide what to do with these functions when cross-arch assembler is used.
151 
152 #ifdef __amd64__
153 
154   // Unhide Call(Reg), hidden by special version below.
155   using BaseAssembler::Call;
156 
Call(const void * target)157   void Call(const void* target) {
158     // There are no call instruction with properties we need thus we emulate it.
159     // This is what the following code looks like when decoded with objdump (if
160     // target address is 0x123456789abcdef0):
161     //   0: ff 15 02 00 00 00        callq  *0x2(%rip) # 0x8
162     //   6: eb 08                    jmp    0x10
163     //   8: f0 de bc 9a 78 56 34 12  lock fidivrs 0x12345678(%rdx,%rbx,4)
164     // First we do call - with address taken from last 8 bytes, then we jump over
165     // these 8 bytes.
166     Emit64(0x08eb0000000215ff);
167     Emit64(bit_cast<int64_t>(target));
168   }
169 
170   // Unhide Jcc(Label), hidden by special version below.
171   using BaseAssembler::Jcc;
172 
173   // Make sure only type void* can be passed to function below, not Label* or any other pointer.
174   template <typename T>
175   auto Jcc(Condition cc, T* target) -> void = delete;
176 
177   template <typename T>
178   auto Jcc(Condition cc, T target)
179       -> std::enable_if_t<std::is_integral_v<T> && sizeof(uintptr_t) < sizeof(T)> = delete;
180 
Jcc(Condition cc,uintptr_t target)181   void Jcc(Condition cc, uintptr_t target) {
182     if (cc == Condition::kAlways) {
183       Jmp(target);
184       return;
185     } else if (cc == Condition::kNever) {
186       return;
187     }
188     CHECK_EQ(0, static_cast<uint8_t>(cc) & 0xF0);
189     // There are no Jcc instruction with properties we need thus we emulate it.
190     // This is what the following code looks like when decoded with objdump (if
191     // target address is 0x123456789abcdef0):
192     //   0: 75 0e                   jne    0x10
193     //   2: ff 25 00 00 00 00       jmpq   *0x0(%rip) # 0x8
194     //   8: f0 de bc 9a 78 56 34 12 lock fidivrs 0x12345678(%rdx,%rbx,4)
195     // We are doing relative jump for the inverted condition (because Jcc could
196     // only jump ±2GiB and in 64 bit mode which is not enough to reach arbitrary
197     // address), then jmpq with address stored right after jmpq.
198     Emit64(0x0000000025ff'0e70 | static_cast<int8_t>(ToReverseCond(cc)));
199     Emit64(bit_cast<int64_t>(target));
200   }
201 
202   // Emit short relative jcc to an absolute address.
203   //
204   // This is used to shorten jcc in the code installed in lower 2G address space.
205   // Use this if the target is also within this address space.
206   void Jcc32(Condition cc, uintptr_t target) {
207     if (cc == Condition::kAlways) {
208       Jmp32(target);
209       return;
210     } else if (cc == Condition::kNever) {
211       return;
212     }
213     CHECK_EQ(static_cast<uint8_t>(cc) & 0xf0, 0);
214     Emit8(0x0f);
215     Emit8(0x80 | static_cast<uint8_t>(cc));
216     Emit32(0xcccc'cccc);
217     // Set last 4 bytes to displacement from current pc to 'target'.
218     AddRelocation(pc() - 4, RelocationType::RelocAbsToDisp32, pc(), bit_cast<intptr_t>(target));
219   }
220 
Jcc(Condition cc,const void * target)221   void Jcc(Condition cc, const void* target) { Jcc(cc, bit_cast<uintptr_t>(target)); }
222 
223   // Unhide Jmp(Reg), hidden by special version below.
224   using BaseAssembler::Jmp;
225 
226   // Make sure only type void* can be passed to function below, not Label* or any other pointer.
227   template <typename T>
228   auto Jmp(T* target) -> void = delete;
229 
230   template <typename T>
231   auto Jmp(T target)
232       -> std::enable_if_t<std::is_integral_v<T> && sizeof(uintptr_t) < sizeof(T)> = delete;
233 
Jmp(uintptr_t target)234   void Jmp(uintptr_t target) {
235     // There are no jump instruction with properties we need thus we emulate it.
236     // This is what the following code looks like when decoded with objdump (if
237     // target address is 0x123456789abcdef0):
238     //   0: ff 25 00 00 00 00       jmpq   *0x0(%rip) # 0x6
239     //   6: f0 de bc 9a 78 56 34 12 lock fidivrs 0x12345678(%rdx,%rbx,4)
240     // We are doing jump to the address stored right after jmpq using %rip-relative
241     // addressing (with offset 0).
242     Emit16(0x25ff);
243     Emit32(0x00000000);
244     Emit64(bit_cast<int64_t>(target));
245   }
246 
247   // Emit short relative jump to an absolute address.
248   //
249   // This is used to shorten jmps in the code installed in lower 2G address space.
250   // Use this if the target is also within this address space.
Jmp32(uintptr_t target)251   void Jmp32(uintptr_t target) {
252     Emit8(0xe9);
253     Emit32(0xcccc'cccc);
254     AddRelocation(pc() - 4, RelocationType::RelocAbsToDisp32, pc(), target);
255   }
256 
257   void Jmp(const void* target) { Jmp(bit_cast<uintptr_t>(target)); }
258 
259 #endif
260 
261  private:
262   Assembler() = delete;
263   Assembler(const Assembler&) = delete;
264   Assembler(Assembler&&) = delete;
265   void operator=(const Assembler&) = delete;
266   void operator=(Assembler&&) = delete;
267   using DerivedAssemblerType = Assembler;
268 
269   static Register Accumulator() { return rax; }
270   static bool IsAccumulator(Register reg) { return reg == rax; }
271 
272   struct Register64Bit {
273     explicit constexpr Register64Bit(Register reg) : num_(reg.num_) {}
274     uint8_t num_;
275   };
276 
277   struct Memory64Bit {
278     explicit Memory64Bit(const Operand& op) : operand(op) {}
279     Operand operand;
280   };
281 
282   struct Label64Bit {
283     explicit Label64Bit(const LabelOperand& l) : label(l.label) {}
284     const Label& label;
285   };
286 
287   // This type is only used by CmpXchg16b and acts similarly to Memory64Bit there.
288   using Memory128Bit = Memory64Bit;
289   using Label128Bit = Label64Bit;
290 
291   // Check if a given type is "a register with size" (for EmitInstruction).
292   template <typename ArgumentType>
293   struct IsRegister {
294     static constexpr bool value = std::is_same_v<ArgumentType, Register8Bit> ||
295                                   std::is_same_v<ArgumentType, Register32Bit> ||
296                                   std::is_same_v<ArgumentType, Register64Bit>;
297   };
298 
299   // Check if a given type is "a memory operand with size" (for EmitInstruction).
300   template <typename ArgumentType>
301   struct IsMemoryOperand {
302     static constexpr bool value =
303         std::is_same_v<ArgumentType, Memory32Bit> || std::is_same_v<ArgumentType, Memory64Bit>;
304   };
305 
306   template <typename ArgumentType>
307   struct IsLabelOperand {
308     static constexpr bool value =
309         std::is_same_v<ArgumentType, Label32Bit> || std::is_same_v<ArgumentType, Label64Bit>;
310   };
311 
312   template <typename... ArgumentsTypes>
313   void EmitRex(ArgumentsTypes... arguments) {
314     constexpr auto registers_count = kCountArguments<IsRegister, ArgumentsTypes...>;
315     constexpr auto operands_count = kCountArguments<IsMemoryOperand, ArgumentsTypes...>;
316     static_assert(registers_count + operands_count <= 2,
317                   "Only two-arguments instructions are supported, not VEX or EVEX");
318     uint8_t rex = 0;
319     if constexpr (registers_count == 2) {
320       rex = Rex<0b0100>(ArgumentByType<0, IsRegister>(arguments...)) |
321             Rex<0b0001>(ArgumentByType<1, IsRegister>(arguments...));
322     } else if constexpr (registers_count == 1 && operands_count == 1) {
323       rex = Rex<0b0100>(ArgumentByType<0, IsRegister>(arguments...)) |
324             Rex(ArgumentByType<0, IsMemoryOperand>(arguments...));
325     } else if constexpr (registers_count == 1) {
326       rex = Rex<0b0001>(ArgumentByType<0, IsRegister>(arguments...));
327     } else if constexpr (operands_count == 1) {
328       rex = Rex(ArgumentByType<0, IsMemoryOperand>(arguments...));
329     }
330     if (rex) {
331       Emit8(rex);
332     }
333   }
334 
335   template <uint8_t base_rex, typename ArgumentType>
336   uint8_t Rex(ArgumentType argument) {
337     if (argument.num_ & 0b1000) {
338       // 64-bit argument requires REX.W bit
339       if (std::is_same_v<ArgumentType, Register64Bit>) {
340         return 0b0100'1000 | base_rex;
341       }
342       return 0b0100'0000 | base_rex;
343     }
344     // 8-bit argument requires REX (even if without any bits).
345     if (std::is_same_v<ArgumentType, Register8Bit> && argument.num_ > 3) {
346       return 0b0100'0000;
347     }
348     if (std::is_same_v<ArgumentType, Register64Bit>) {
349       return 0b0100'1000;
350     }
351     return 0;
352   }
353 
Rex(Operand operand)354   uint8_t Rex(Operand operand) {
355     // REX.B and REX.X always come from operand.
356     uint8_t rex = ((operand.base.num_ & 0b1000) >> 3) | ((operand.index.num_ & 0b1000) >> 2);
357     if (rex) {
358       // We actually need rex byte here.
359       return 0b0100'0000 | rex;
360     } else {
361       return 0;
362     }
363   }
364 
Rex(Memory32Bit operand)365   uint8_t Rex(Memory32Bit operand) { return Rex(operand.operand); }
366 
Rex(Memory64Bit operand)367   uint8_t Rex(Memory64Bit operand) {
368     // 64-bit argument requires REX.W bit - and thus REX itself.
369     return 0b0100'1000 | Rex(operand.operand);
370   }
371 
372   template <typename RegisterType>
373   [[nodiscard]] static bool IsSwapProfitable(RegisterType rm_arg, RegisterType vex_arg) {
374     // In 64bit mode we may use more compact encoding if operand encoded in rm is low register.
375     // Return true if we may achieve that by swapping arguments.
376     return rm_arg.num_ >= 8 && vex_arg.num_ < 8;
377   }
378 
379   template <uint8_t byte1,
380             uint8_t byte2,
381             uint8_t byte3,
382             bool reg_is_opcode_extension,
383             typename... ArgumentsTypes>
384   void EmitVex(ArgumentsTypes... arguments) {
385     constexpr auto registers_count = kCountArguments<IsRegister, ArgumentsTypes...>;
386     constexpr auto operands_count = kCountArguments<IsMemoryOperand, ArgumentsTypes...>;
387     constexpr auto labels_count = kCountArguments<IsLabelOperand, ArgumentsTypes...>;
388     constexpr auto vvvv_parameter = 2 - reg_is_opcode_extension - operands_count - labels_count;
389     int vvvv = 0;
390     if constexpr (registers_count > vvvv_parameter) {
391       vvvv = ArgumentByType<vvvv_parameter, IsRegister>(arguments...).num_;
392     }
393     auto vex2 = byte2 | 0b111'00000;
394     if constexpr (operands_count == 1) {
395       auto operand = ArgumentByType<0, IsMemoryOperand>(arguments...);
396       vex2 ^= (operand.operand.base.num_ & 0b1000) << 2;
397       vex2 ^= (operand.operand.index.num_ & 0b1000) << 3;
398       if constexpr (!reg_is_opcode_extension) {
399         vex2 ^= (ArgumentByType<0, IsRegister>(arguments...).num_ & 0b1000) << 4;
400       }
401     } else if constexpr (labels_count == 1) {
402       if constexpr (!reg_is_opcode_extension) {
403         vex2 ^= (ArgumentByType<0, IsRegister>(arguments...).num_ & 0b1000) << 4;
404       }
405     } else if constexpr (registers_count > 0) {
406       if constexpr (reg_is_opcode_extension) {
407         vex2 ^= (ArgumentByType<0, IsRegister>(arguments...).num_ & 0b1000) << 2;
408       } else {
409         vex2 ^= (ArgumentByType<0, IsRegister>(arguments...).num_ & 0b1000) << 4;
410         vex2 ^= (ArgumentByType<1, IsRegister>(arguments...).num_ & 0b1000) << 2;
411       }
412     }
413     if (byte1 == 0xC4 && (vex2 & 0b0'1'1'11111) == 0b0'1'1'00001 && (byte3 & 0b1'0000'0'00) == 0) {
414       Emit16((0xc5 | ((vex2 & 0b1'0'0'00000) << 8) | (byte3 << 8) |
415               0b0'1111'000'00000000) ^ (vvvv << 11));
416     } else {
417       Emit8(byte1);
418       Emit16((vex2 | (byte3 << 8) | 0b0'1111'000'00000000) ^ (vvvv << 11));
419     }
420   }
421 
422   template <typename ArgumentType>
EmitRegisterInOpcode(uint8_t opcode,ArgumentType argument)423   void EmitRegisterInOpcode(uint8_t opcode, ArgumentType argument) {
424     Emit8(opcode | (argument.num_ & 0b111));
425   }
426 
427   template <typename ArgumentType1, typename ArgumentType2>
EmitModRM(ArgumentType1 argument1,ArgumentType2 argument2)428   void EmitModRM(ArgumentType1 argument1, ArgumentType2 argument2) {
429     Emit8(0xC0 | ((argument1.num_ & 0b111) << 3) | (argument2.num_ & 0b111));
430   }
431 
432   template <typename ArgumentType>
EmitModRM(uint8_t opcode_extension,ArgumentType argument)433   void EmitModRM(uint8_t opcode_extension, ArgumentType argument) {
434     CHECK_LE(opcode_extension, 0b111);
435     Emit8(0xC0 | (opcode_extension << 3) | (argument.num_ & 0b111));
436   }
437 
438   template <typename ArgumentType>
EmitOperandOp(ArgumentType argument,Operand operand)439   constexpr void EmitOperandOp(ArgumentType argument, Operand operand) {
440     EmitOperandOp(static_cast<int>(argument.num_ & 0b111), operand);
441   }
442 
443   template <size_t kImmediatesSize, typename ArgumentType>
EmitRipOp(ArgumentType argument,const Label & label)444   constexpr void EmitRipOp(ArgumentType argument, const Label& label) {
445     EmitRipOp<kImmediatesSize>(static_cast<int>(argument.num_) & 0b111, label);
446   }
447 
448   // Emit the ModR/M byte, and optionally the SIB byte and
449   // 1- or 4-byte offset for a memory operand.  Also used to encode
450   // a three-bit opcode extension into the ModR/M byte.
451   constexpr void EmitOperandOp(int num_ber, const Operand& addr);
452   // Helper functions to handle various ModR/M and SIB combinations.
453   // Should *only* be called from EmitOperandOp!
454   constexpr void EmitIndexDispOperand(int reg, const Operand& addr);
455   template <typename ArgType, void (AssemblerBase::*)(ArgType)>
456   constexpr void EmitBaseIndexDispOperand(int base_modrm_and_sib, const Operand& addr);
457   // Emit ModR/M for rip-addressig.
458   template <size_t kImmediatesSize>
459   constexpr void EmitRipOp(int num_, const Label& label);
460 
461   friend BaseAssembler;
462 };
463 
464 // This function looks big, but when we are emitting Operand with fixed registers
465 // (which is the most common case) all "if"s below are calculated statically which
466 // makes effective size of that function very small.
467 //
468 // But for this to happen function have to be inline and in header.
EmitOperandOp(int num_ber,const Operand & addr)469 constexpr inline void Assembler::EmitOperandOp(int num_ber, const Operand& addr) {
470   // Additional info (register num_ber, etc) is limited to 3 bits.
471   CHECK_LE(unsigned(num_ber), 7);
472 
473   // Reg field must be shifted by 3 bits.
474   int reg = num_ber << 3;
475 
476   // On x86 %rsp cannot be index, only base.
477   CHECK(addr.index != rsp);
478 
479   // If base is not %rsp/r12 and we don't have index, then we don't have SIB byte.
480   // All other cases have "ModR/M" and SIB bytes.
481   if (addr.base != rsp && addr.base != r12 && addr.index == no_register) {
482     // If we have base register then we could use the same logic as for other common cases.
483     if (addr.base != no_register) {
484       EmitBaseIndexDispOperand<uint8_t, &Assembler::Emit8>((addr.base.num_ & 7) | reg, addr);
485     } else {
486       Emit16(0x2504 | reg);
487       Emit32(addr.disp);
488     }
489   } else if (addr.index == no_register) {
490     // Note: when ModR/M and SIB are used "no index" is encoded as if %rsp is used in place of
491     // index (that's why %rsp couldn't be used as index - see check above).
492     EmitBaseIndexDispOperand<int16_t, &Assembler::Emit16>(
493         0x2004 | ((addr.base.num_ & 7) << 8) | reg, addr);
494   } else if (addr.base == no_register) {
495     EmitIndexDispOperand(reg, addr);
496   } else {
497     EmitBaseIndexDispOperand<int16_t, &Assembler::Emit16>(0x04 | (addr.scale << 14) |
498                                                               ((addr.index.num_ & 7) << 11) |
499                                                               ((addr.base.num_ & 7) << 8) | reg,
500                                                           addr);
501   }
502 }
503 
EmitIndexDispOperand(int reg,const Operand & addr)504 constexpr inline void Assembler::EmitIndexDispOperand(int reg, const Operand& addr) {
505   // We only have index here, no base, use SIB but put %rbp in "base" field.
506   Emit16(0x0504 | (addr.scale << 14) | ((addr.index.num_ & 7) << 11) | reg);
507   Emit32(addr.disp);
508 }
509 
510 template <size_t kImmediatesSize>
EmitRipOp(int num_,const Label & label)511 constexpr inline void Assembler::EmitRipOp(int num_, const Label& label) {
512   Emit8(0x05 | (num_ << 3));
513   jumps_.push_back(Jump{&label, pc(), false});
514   Emit32(0xfffffffc - kImmediatesSize);
515 }
516 
517 template <typename ArgType, void (AssemblerBase::* EmitBase)(ArgType)>
EmitBaseIndexDispOperand(int base_modrm_and_sib,const Operand & addr)518 constexpr inline void Assembler::EmitBaseIndexDispOperand(int base_modrm_and_sib,
519                                                           const Operand& addr) {
520   if (addr.disp == 0 && addr.base != rbp && addr.base != r13) {
521     // We can omit zero displacement only if base isn't %rbp/%r13
522     (this->*EmitBase)(base_modrm_and_sib);
523   } else if (IsInRange<int8_t>(addr.disp)) {
524     // If disp could it in byte then use byte-disp.
525     (this->*EmitBase)(base_modrm_and_sib | 0x40);
526     Emit8(addr.disp);
527   } else {
528     // Otherwise use full-disp.
529     (this->*EmitBase)(base_modrm_and_sib | 0x80);
530     Emit32(addr.disp);
531   }
532 }
533 
Movq(Register dest,int64_t imm64)534 constexpr inline void Assembler::Movq(Register dest, int64_t imm64) {
535   if (IsInRange<uint32_t>(imm64)) {
536     // Shorter encoding.
537     Movl(dest, static_cast<uint32_t>(imm64));
538   } else if (IsInRange<int32_t>(imm64)) {
539     // Slightly longer encoding.
540     EmitInstruction<0xc7, 0x00>(Register64Bit(dest), static_cast<int32_t>(imm64));
541   } else {
542     // Longest encoding.
543     EmitInstruction<0xb8>(Register64Bit(dest), imm64);
544   }
545 }
546 
Vmovapd(XMMRegister arg0,XMMRegister arg1)547 constexpr inline void Assembler::Vmovapd(XMMRegister arg0, XMMRegister arg1) {
548   if (IsSwapProfitable(arg1, arg0)) {
549     return EmitInstruction<0xc4, 0x01, 0x01, 0x29>(VectorRegister128Bit(arg1),
550                                                    VectorRegister128Bit(arg0));
551   }
552   EmitInstruction<0xc4, 0x01, 0x01, 0x28>(VectorRegister128Bit(arg0), VectorRegister128Bit(arg1));
553 }
554 
Vmovapd(YMMRegister arg0,YMMRegister arg1)555 constexpr inline void Assembler::Vmovapd(YMMRegister arg0, YMMRegister arg1) {
556   if (IsSwapProfitable(arg1, arg0)) {
557     return EmitInstruction<0xc4, 0x01, 0x05, 0x29>(VectorRegister256Bit(arg1),
558                                                    VectorRegister256Bit(arg0));
559   }
560   EmitInstruction<0xc4, 0x01, 0x05, 0x28>(VectorRegister256Bit(arg0), VectorRegister256Bit(arg1));
561 }
562 
Vmovaps(XMMRegister arg0,XMMRegister arg1)563 constexpr inline void Assembler::Vmovaps(XMMRegister arg0, XMMRegister arg1) {
564   if (IsSwapProfitable(arg1, arg0)) {
565     return EmitInstruction<0xc4, 0x01, 0x00, 0x29>(VectorRegister128Bit(arg1),
566                                                    VectorRegister128Bit(arg0));
567   }
568   EmitInstruction<0xc4, 0x01, 0x00, 0x28>(VectorRegister128Bit(arg0), VectorRegister128Bit(arg1));
569 }
570 
Vmovaps(YMMRegister arg0,YMMRegister arg1)571 constexpr inline void Assembler::Vmovaps(YMMRegister arg0, YMMRegister arg1) {
572   if (IsSwapProfitable(arg1, arg0)) {
573     return EmitInstruction<0xc4, 0x01, 0x04, 0x29>(VectorRegister256Bit(arg1),
574                                                    VectorRegister256Bit(arg0));
575   }
576   EmitInstruction<0xc4, 0x01, 0x04, 0x28>(VectorRegister256Bit(arg0), VectorRegister256Bit(arg1));
577 }
578 
Vmovdqa(XMMRegister arg0,XMMRegister arg1)579 constexpr inline void Assembler::Vmovdqa(XMMRegister arg0, XMMRegister arg1) {
580   if (IsSwapProfitable(arg1, arg0)) {
581     return EmitInstruction<0xc4, 0x01, 0x01, 0x7F>(VectorRegister128Bit(arg1),
582                                                    VectorRegister128Bit(arg0));
583   }
584   EmitInstruction<0xc4, 0x01, 0x01, 0x6F>(VectorRegister128Bit(arg0), VectorRegister128Bit(arg1));
585 }
586 
Vmovdqa(YMMRegister arg0,YMMRegister arg1)587 constexpr inline void Assembler::Vmovdqa(YMMRegister arg0, YMMRegister arg1) {
588   if (IsSwapProfitable(arg1, arg0)) {
589     return EmitInstruction<0xc4, 0x01, 0x05, 0x7F>(VectorRegister256Bit(arg1),
590                                                    VectorRegister256Bit(arg0));
591   }
592   EmitInstruction<0xc4, 0x01, 0x05, 0x6F>(VectorRegister256Bit(arg0), VectorRegister256Bit(arg1));
593 }
594 
Vmovdqu(XMMRegister arg0,XMMRegister arg1)595 constexpr inline void Assembler::Vmovdqu(XMMRegister arg0, XMMRegister arg1) {
596   if (IsSwapProfitable(arg1, arg0)) {
597     return EmitInstruction<0xc4, 0x01, 0x02, 0x7F>(VectorRegister128Bit(arg1),
598                                                    VectorRegister128Bit(arg0));
599   }
600   EmitInstruction<0xc4, 0x01, 0x02, 0x6F>(VectorRegister128Bit(arg0), VectorRegister128Bit(arg1));
601 }
602 
Vmovdqu(YMMRegister arg0,YMMRegister arg1)603 constexpr inline void Assembler::Vmovdqu(YMMRegister arg0, YMMRegister arg1) {
604   if (IsSwapProfitable(arg1, arg0)) {
605     return EmitInstruction<0xc4, 0x01, 0x06, 0x7F>(VectorRegister256Bit(arg1),
606                                                    VectorRegister256Bit(arg0));
607   }
608   EmitInstruction<0xc4, 0x01, 0x06, 0x6F>(VectorRegister256Bit(arg0), VectorRegister256Bit(arg1));
609 }
610 
Vmovsd(XMMRegister arg0,XMMRegister arg1,XMMRegister arg2)611 constexpr inline void Assembler::Vmovsd(XMMRegister arg0, XMMRegister arg1, XMMRegister arg2) {
612   if (IsSwapProfitable(arg2, arg0)) {
613     return EmitInstruction<0xc4, 0x01, 0x03, 0x11>(
614         VectorRegister128Bit(arg2), VectorRegister128Bit(arg0), VectorRegister128Bit(arg1));
615   }
616   EmitInstruction<0xc4, 0x01, 0x03, 0x10>(
617       VectorRegister128Bit(arg0), VectorRegister128Bit(arg2), VectorRegister128Bit(arg1));
618 }
619 
Vmovss(XMMRegister arg0,XMMRegister arg1,XMMRegister arg2)620 constexpr inline void Assembler::Vmovss(XMMRegister arg0, XMMRegister arg1, XMMRegister arg2) {
621   if (IsSwapProfitable(arg2, arg0)) {
622     return EmitInstruction<0xc4, 0x01, 0x02, 0x11>(
623         VectorRegister128Bit(arg2), VectorRegister128Bit(arg0), VectorRegister128Bit(arg1));
624   }
625   EmitInstruction<0xc4, 0x01, 0x02, 0x10>(
626       VectorRegister128Bit(arg0), VectorRegister128Bit(arg2), VectorRegister128Bit(arg1));
627 }
628 
Xchgq(Register dest,Register src)629 constexpr inline void Assembler::Xchgq(Register dest, Register src) {
630   // We compare output to that from clang and thus want to produce the same code.
631   // 0x48 0x90 is suboptimal encoding for that operation (pure 0x90 does the same
632   // and this is what gcc + gas are producing), but this is what clang <= 8 does.
633   if (IsAccumulator(src) && IsAccumulator(dest)) {
634     Emit8(0x90);
635   } else if (IsAccumulator(src) || IsAccumulator(dest)) {
636     Register other = IsAccumulator(src) ? dest : src;
637     EmitInstruction<0x90>(Register64Bit(other));
638   } else {
639   // Clang 8 (after r330298) puts dest before src.  We are comparing output
640   // to clang in exhaustive test thus we want to match clang behavior exactly.
641   EmitInstruction<0x87>(Register64Bit(dest), Register64Bit(src));
642   }
643 }
644 
645 }  // namespace x86_64
646 
647 }  // namespace berberis
648 
649 #endif  // BERBERIS_ASSEMBLER_X86_64_H_
650