• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2014 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef BERBERIS_ASSEMBLER_COMMON_X86_H_
18 #define BERBERIS_ASSEMBLER_COMMON_X86_H_
19 
20 #include <cstddef>  // std::size_t
21 #include <initializer_list>
22 #include <iterator>     // std::begin, std::end, std::next
23 #include <limits>       // std::is_integral
24 #include <type_traits>  // std::enable_if, std::is_integral
25 
26 #include "berberis/assembler/common.h"
27 #include "berberis/base/bit_util.h"
28 #include "berberis/base/logging.h"
29 #include "berberis/base/macros.h"  // DISALLOW_IMPLICIT_CONSTRUCTORS
30 
31 namespace berberis {
32 
33 // AssemblerX86 includes implementation of most x86 assembler instructions.
34 //
35 // x86-32 and x86-64 assemblers are nearly identical, but difference lies in handling
36 // of very low-level instruction details: almost all instructions on x86-64 could include
37 // REX byte which is needed if new registers (%r8 to %r15 or %xmm8 to %xmm15) are used.
38 //
39 // To handle that difference efficiently AssemblerX86 is CRTP class: it's parameterized
40 // by its own descendant and pull certain functions (e.g. GetHighBit or Rex8Size) from
41 // its implementation.
42 //
43 // Certain functions are only implemented by its descendant (since there are instructions
44 // which only exist in x86-32 mode and instructions which only exist in x86-64 mode).
45 
46 template <typename Assembler>
47 class AssemblerX86 : public AssemblerBase {
48  public:
AssemblerX86(MachineCode * code)49   explicit AssemblerX86(MachineCode* code) : AssemblerBase(code) {}
50 
51   enum class Condition {
52     kInvalidCondition = -1,
53 
54     kOverflow = 0,
55     kNoOverflow = 1,
56     kBelow = 2,
57     kAboveEqual = 3,
58     kEqual = 4,
59     kNotEqual = 5,
60     kBelowEqual = 6,
61     kAbove = 7,
62     kNegative = 8,
63     kPositive = 9,
64     kParityEven = 10,
65     kParityOdd = 11,
66     kLess = 12,
67     kGreaterEqual = 13,
68     kLessEqual = 14,
69     kGreater = 15,
70     kAlways = 16,
71     kNever = 17,
72 
73     // aka...
74     kCarry = kBelow,
75     kNotCarry = kAboveEqual,
76     kZero = kEqual,
77     kNotZero = kNotEqual,
78     kSign = kNegative,
79     kNotSign = kPositive
80   };
81 
82   struct Register {
83     // Note: we couldn't make the following private because of peculiarities of C++ (see
84     // https://stackoverflow.com/questions/24527395/compiler-error-when-initializing-constexpr-static-class-member
85     // for explanation), but you are not supposed to access num or use GetHighBit() and GetLowBits()
86     // functions.  Treat that type as opaque cookie.
87 
88     constexpr bool operator==(const Register& reg) const { return num == reg.num; }
89 
90     constexpr bool operator!=(const Register& reg) const { return num != reg.num; }
91 
92     uint8_t num;
93   };
94 
95   struct XMMRegister {
96     // Note: we couldn't make the following private because of peculiarities of C++ (see
97     // https://stackoverflow.com/questions/24527395/compiler-error-when-initializing-constexpr-static-class-member
98     // for explanation), but you are not supposed to access num or use GetHighBit() and GetLowBits()
99     // functions.  Treat that type as opaque cookie.
100 
101     constexpr bool operator==(const XMMRegister& reg) const { return num == reg.num; }
102 
103     constexpr bool operator!=(const XMMRegister& reg) const { return num != reg.num; }
104 
105     uint8_t num;
106   };
107 
108   enum ScaleFactor { kTimesOne = 0, kTimesTwo = 1, kTimesFour = 2, kTimesEight = 3 };
109 
110   struct Operand {
rexOperand111     constexpr uint8_t rex() const {
112       return Assembler::kIsX86_64 ? ((index.num & 0x08) >> 2) | ((base.num & 0x08) >> 3) : 0;
113     }
114 
RequiresRexOperand115     constexpr bool RequiresRex() const {
116       return Assembler::kIsX86_64 ? ((index.num & 0x08) | (base.num & 0x08)) : false;
117     }
118 
119     Register base = Assembler::no_register;
120     Register index = Assembler::no_register;
121     ScaleFactor scale = kTimesOne;
122     int32_t disp = 0;
123   };
124 
125   struct LabelOperand {
126     const Label& label;
127   };
128 
129   // Macro operations.
Finalize()130   void Finalize() { ResolveJumps(); }
131 
Align(uint32_t m)132   void Align(uint32_t m) {
133     uint32_t mask = m - 1;
134     uint32_t addr = pc();
135     Nop((m - (addr & mask)) & mask);
136   }
137 
Nop(uint32_t bytes)138   void Nop(uint32_t bytes) {
139     static const uint32_t kNumNops = 15;
140     static const uint8_t nop1[] = {0x90};
141     static const uint8_t nop2[] = {0x66, 0x90};
142     static const uint8_t nop3[] = {0x0f, 0x1f, 0x00};
143     static const uint8_t nop4[] = {0x0f, 0x1f, 0x40, 0x00};
144     static const uint8_t nop5[] = {0x0f, 0x1f, 0x44, 0x00, 0x00};
145     static const uint8_t nop6[] = {0x66, 0x0f, 0x1f, 0x44, 0x00, 0x0};
146     static const uint8_t nop7[] = {0x0f, 0x1f, 0x80, 0x00, 0x00, 0x0, 0x00};
147     static const uint8_t nop8[] = {0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00};
148     static const uint8_t nop9[] = {0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00};
149     static const uint8_t nop10[] = {0x66, 0x2e, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00};
150     static const uint8_t nop11[] = {
151         0x66, 0x66, 0x2e, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00};
152     static const uint8_t nop12[] = {
153         0x66, 0x66, 0x66, 0x2e, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00};
154     static const uint8_t nop13[] = {
155         0x66, 0x66, 0x66, 0x66, 0x2e, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00};
156     static const uint8_t nop14[] = {
157         0x66, 0x66, 0x66, 0x66, 0x66, 0x2e, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00};
158     static const uint8_t nop15[] = {
159         0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x2e, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00};
160 
161     static const uint8_t* nops[kNumNops] = {nop1,
162                                             nop2,
163                                             nop3,
164                                             nop4,
165                                             nop5,
166                                             nop6,
167                                             nop7,
168                                             nop8,
169                                             nop9,
170                                             nop10,
171                                             nop11,
172                                             nop12,
173                                             nop13,
174                                             nop14,
175                                             nop15};
176     // Common case.
177     if (bytes == 1) {
178       Emit8(nop1[0]);
179       return;
180     }
181 
182     while (bytes > 0) {
183       uint32_t len = bytes;
184       if (len > kNumNops) {
185         len = kNumNops;
186       }
187       EmitSequence(nops[len - 1], len);
188       bytes -= len;
189     }
190   }
191 
192 // Instructions.
193 #include "berberis/assembler/gen_assembler_common_x86-inl.h"  // NOLINT generated file
194 
195   // Flow control.
Jmp(int32_t offset)196   void Jmp(int32_t offset) {
197     uint32_t start = pc();
198     if (offset > -124 && offset < 124) {
199       Emit8(0xeb);
200       Emit8((offset - 1 - (pc() - start)) & 0xFF);
201     } else {
202       Emit8(0xe9);
203       Emit32(offset - 4 - (pc() - start));
204     }
205   }
206 
Call(int32_t offset)207   void Call(int32_t offset) {
208     uint32_t start = pc();
209     Emit8(0xe8);
210     Emit32(offset - 4 - (pc() - start));
211   }
212 
Jcc(Condition cc,int32_t offset)213   void Jcc(Condition cc, int32_t offset) {
214     if (cc == Condition::kAlways) {
215       Jmp(offset);
216       return;
217     } else if (cc == Condition::kNever) {
218       return;
219     }
220     CHECK_EQ(0, static_cast<uint8_t>(cc) & 0xF0);
221     uint32_t start = pc();
222     if (offset > -124 && offset < 124) {
223       Emit8(0x70 | static_cast<uint8_t>(cc));
224       Emit8(offset - 1 - (pc() - start));
225     } else {
226       Emit8(0x0F);
227       Emit8(0x80 | static_cast<uint8_t>(cc));
228       Emit32(offset - 4 - (pc() - start));
229     }
230   }
231 
232  protected:
233   // Helper types to distinguish argument types.
234   struct Register8Bit {
Register8BitRegister8Bit235     explicit constexpr Register8Bit(Register reg) : num(reg.num) {}
236     uint8_t num;
237   };
238 
239   struct Register32Bit {
Register32BitRegister32Bit240     explicit constexpr Register32Bit(Register reg) : num(reg.num) {}
Register32BitRegister32Bit241     explicit constexpr Register32Bit(XMMRegister reg) : num(reg.num) {}
242     uint8_t num;
243   };
244 
245   // 16-bit and 128-bit vector registers follow the same rules as 32-bit registers.
246   typedef Register32Bit Register16Bit;
247   typedef Register32Bit VectorRegister128Bit;
248   // Certain instructions (Enter/Leave, Jcc/Jmp/Loop, Call/Ret, Push/Pop) always operate
249   // on registers of default size (32-bit in 32-bit mode, 64-bit in 64-bit mode (see
250   // "Instructions Not Requiring REX Prefix in 64-Bit Mode" table in 24594 AMD Manual)
251   // Map these to Register32Bit, too, since they don't need REX.W even in 64-bit mode.
252   typedef Register32Bit RegisterDefaultBit;
253 
254   struct Memory32Bit {
Memory32BitMemory32Bit255     explicit Memory32Bit(const Operand& op) : operand(op) {}
256     Operand operand;
257   };
258 
259   // 8-bit, 16-bit, 128-bit memory behave the same as 32-bit memory.
260   // Only 64-bit memory is different.
261   typedef Memory32Bit Memory8Bit;
262   typedef Memory32Bit Memory16Bit;
263   // Most vector instructions don't need to use REX.W to access 64-bit or 128-bit memory.
264   typedef Memory32Bit VectorMemory32Bit;
265   typedef Memory32Bit VectorMemory64Bit;
266   typedef Memory32Bit VectorMemory128Bit;
267   // X87 instructions always use the same encoding - even for 64-bit or 28-bytes
268   // memory operands (like in fldenv/fnstenv)
269   typedef Memory32Bit MemoryX87;
270 
271   // Labels types for memory quantities.  Note that names are similar to the ones before because
272   // they are autogenerated.  E.g. VectorLabel32Bit should be read as “VECTOR's operation LABEL
273   // for 32-BIT quantity in memory”.
274   struct Label32Bit {
Label32BitLabel32Bit275     explicit Label32Bit(const struct LabelOperand& l) : label(l.label) {}
276     const Label& label;
277   };
278 
279   // 8-bit, 16-bit, 128-bit memory behave the same as 32-bit memory.
280   // Only 64-bit memory is different.
281   typedef Label32Bit Label8Bit;
282   typedef Label32Bit Label16Bit;
283   // Most vector instructions don't need to use REX.W to access 64-bit or 128-bit memory.
284   typedef Label32Bit VectorLabel32Bit;
285   typedef Label32Bit VectorLabel64Bit;
286   typedef Label32Bit VectorLabel128Bit;
287   // X87 instructions always use the same encoding - even for 64-bit or 28-bytes
288   // memory operands (like in fldenv/fnstenv)
289   typedef Label32Bit LabelX87;
290 
IsLegacyPrefix(int code)291   static constexpr bool IsLegacyPrefix(int code) {
292     // Legacy prefixes used as opcode extensions in SSE.
293     // Lock is used by cmpxchg.
294     return (code == 0x66) || (code == 0xf2) || (code == 0xf3) || (code == 0xf0);
295   }
296 
297   // Delegate check to Assembler::template IsRegister.
298   template <typename ArgumentType>
299   struct IsCondition {
300     static constexpr bool value = std::is_same_v<ArgumentType, Condition>;
301   };
302 
303   template <typename ArgumentType>
304   struct IsRegister {
305     static constexpr bool value = Assembler::template IsRegister<ArgumentType>::value;
306   };
307 
308   template <typename ArgumentType>
309   struct IsMemoryOperand {
310     static constexpr bool value = Assembler::template IsMemoryOperand<ArgumentType>::value;
311   };
312 
313   template <typename ArgumentType>
314   struct IsLabelOperand {
315     static constexpr bool value = Assembler::template IsLabelOperand<ArgumentType>::value;
316   };
317 
318   template <typename ArgumentType>
319   struct IsImmediate {
320     static constexpr bool value =
321         std::is_integral_v<ArgumentType> &&
322         ((sizeof(ArgumentType) == sizeof(int8_t)) || (sizeof(ArgumentType) == sizeof(int16_t)) ||
323          (sizeof(ArgumentType) == sizeof(int32_t)) || (sizeof(ArgumentType) == sizeof(int64_t)));
324   };
325 
326   // Count number of arguments selected by Predicate.
327   template <template <typename> typename Predicate, typename... ArgumentTypes>
328   static constexpr std::size_t kCountArguments = ((Predicate<ArgumentTypes>::value ? 1 : 0) + ... +
329                                                   0);
330 
331   // Extract arguments selected by Predicate.
332   //
333   // Note: This interface begs for the trick used in EmitFunctionTypeHelper in make_intrinsics.cc
334   // in conjunction with structured bindings.
335   //
336   // Unfortunately returning std::tuple slows down AssemblerTest by about 30% when libc++ and clang
337   // are used together (no slowdown on GCC, no slowdown on clang+libstdc++).
338   //
339   // TODO(http://b/140721204): refactor when it would be safe to return std::tuple from function.
340   //
341   template <std::size_t index,
342             template <typename>
343             typename Predicate,
344             typename ArgumentType,
345             typename... ArgumentTypes>
ArgumentByType(ArgumentType argument,ArgumentTypes...arguments)346   static constexpr auto ArgumentByType(ArgumentType argument, ArgumentTypes... arguments) {
347     if constexpr (Predicate<std::decay_t<ArgumentType>>::value) {
348       if constexpr (index == 0) {
349         return argument;
350       } else {
351         return ArgumentByType<index - 1, Predicate>(arguments...);
352       }
353     } else {
354       return ArgumentByType<index, Predicate>(arguments...);
355     }
356   }
357 
358   // Emit immediates - they always come at the end and don't affect anything except rip-addressig.
EmitImmediates()359   static constexpr void EmitImmediates() {}
360 
361   template <typename FirstArgumentType, typename... ArgumentTypes>
EmitImmediates(FirstArgumentType first_argument,ArgumentTypes...other_arguments)362   void EmitImmediates(FirstArgumentType first_argument, ArgumentTypes... other_arguments) {
363     if constexpr (std::is_integral_v<FirstArgumentType> &&
364                   sizeof(FirstArgumentType) == sizeof(int8_t)) {
365       Emit8(first_argument);
366     } else if constexpr (std::is_integral_v<FirstArgumentType> &&
367                          sizeof(FirstArgumentType) == sizeof(int16_t)) {
368       Emit16(first_argument);
369     } else if constexpr (std::is_integral_v<FirstArgumentType> &&
370                          sizeof(FirstArgumentType) == sizeof(int32_t)) {
371       Emit32(first_argument);
372     } else if constexpr (std::is_integral_v<FirstArgumentType> &&
373                          sizeof(FirstArgumentType) == sizeof(int64_t)) {
374       Emit64(first_argument);
375     }
376     EmitImmediates(other_arguments...);
377   }
378 
379   template <typename ArgumentType>
ImmediateSize()380   static constexpr size_t ImmediateSize() {
381     if constexpr (std::is_integral_v<ArgumentType> && sizeof(ArgumentType) == sizeof(int8_t)) {
382       return 1;
383     } else if constexpr (std::is_integral_v<ArgumentType> &&
384                          sizeof(ArgumentType) == sizeof(int16_t)) {
385       return 2;
386     } else if constexpr (std::is_integral_v<ArgumentType> &&
387                          sizeof(ArgumentType) == sizeof(int32_t)) {
388       return 4;
389     } else if constexpr (std::is_integral_v<ArgumentType> &&
390                          sizeof(ArgumentType) == sizeof(int64_t)) {
391       return 8;
392     } else {
393       static_assert(!std::is_integral_v<ArgumentType>);
394       return 0;
395     }
396   }
397 
398   template <typename... ArgumentTypes>
ImmediatesSize()399   static constexpr size_t ImmediatesSize() {
400     return (ImmediateSize<ArgumentTypes>() + ... + 0);
401   }
402 
403   // Struct type to pass information about opcodes.
404   template <uint8_t... kOpcodes>
405   struct Opcodes {};
406 
407   template <uint8_t... kOpcodes>
OpcodesCount(Opcodes<kOpcodes...>)408   static constexpr size_t OpcodesCount(Opcodes<kOpcodes...>) {
409     return sizeof...(kOpcodes);
410   }
411 
412   template <uint8_t kOpcode, uint8_t... kOpcodes>
FirstOpcode(Opcodes<kOpcode,kOpcodes...>)413   static constexpr uint8_t FirstOpcode(Opcodes<kOpcode, kOpcodes...>) {
414     return kOpcode;
415   }
416 
417   template <uint8_t kOpcode, uint8_t... kOpcodes>
SkipFirstOpcodeFromType(Opcodes<kOpcode,kOpcodes...>)418   static constexpr auto SkipFirstOpcodeFromType(Opcodes<kOpcode, kOpcodes...>) {
419     return Opcodes<kOpcodes...>{};
420   }
421 
422   template <uint8_t kOpcode, uint8_t... kOpcodes>
EmitLegacyPrefixes(Opcodes<kOpcode,kOpcodes...> opcodes)423   auto EmitLegacyPrefixes(Opcodes<kOpcode, kOpcodes...> opcodes) {
424     if constexpr (IsLegacyPrefix(kOpcode)) {
425       Emit8(kOpcode);
426       return EmitLegacyPrefixes(Opcodes<kOpcodes...>{});
427     } else {
428       return opcodes;
429     }
430   }
431 
432   // Note: We may need separate x87 EmitInstruction if we would want to support
433   // full set of x86 instructions.
434   //
435   // That's because 8087 was completely separate piece of silicone which was only
436   // partially driven by 8086:
437   //     https://en.wikipedia.org/wiki/Intel_8087
438   //
439   // In particular it had the following properties:
440   //   1. It had its own separate subset of opcodes - because it did its own decoding.
441   //   2. It had separate set of registers and could *only* access these.
442   //   2a. The 8086, in turn, *couldn't* access these registers at all.
443   //   3. To access memory it was designed to take address from address bus.
444   //
445   // This means that:
446   //   1. x87 instructions are easily recognizable - all instructions with opcodes 0xd8
447   //      to 0xdf are x87 instructions, all instructions with other opcodes are not.
448   //   2. We could be sure that x87 registers would only be used with x87 instructions
449   //      and other types of registers wouldn't be used with these.
450   //   3. We still would use normal registers for memory access, but REX.W bit wouldn't
451   //      be used for 64-bit quantities, whether they are floating point numbers or integers.
452   //
453   // Right now we only use EmitInstruction to emit x87 instructions which are using memory
454   // operands - and it works well enough for that because of #3.
455 
456   // If you want to understand how this function works (and how helper function like Vex and
457   // Rex work), you need good understanding of AMD/Intel Instruction format.
458   //
459   // Intel manual includes the most precise explanation, but it's VERY hard to read.
460   //
461   // AMD manual is much easier to read, but it doesn't include description of EVEX
462   // instructions and is less precise. Diagram on page 2 of Volume 3 is especially helpful:
463   //   https://www.amd.com/system/files/TechDocs/24594.pdf#page=42
464   //
465   // And the most concise (albeit unofficial) in on osdev Wiki:
466   //   https://wiki.osdev.org/X86-64_Instruction_Encoding
467 
468   // Note: if you change this function (or any of the helper functions) then remove --fast
469   // option from ExhaustiveAssemblerTest to run full blackbox comparison to clang.
470 
471   template <typename InstructionOpcodes, typename... ArgumentsTypes>
EmitInstruction(ArgumentsTypes...arguments)472   void EmitInstruction(ArgumentsTypes... arguments) {
473     auto opcodes_no_prefixes = EmitLegacyPrefixes(InstructionOpcodes{});
474     // We don't yet support any XOP-encoded instructions, but they are 100% identical to vex ones,
475     // except they are using 0x8F prefix, not 0xC4 prefix.
476     constexpr auto vex_xop = [&](auto opcodes) {
477       if constexpr (OpcodesCount(opcodes) < 3) {
478         return false;
479       // Note that JSON files use AMD approach: bytes are specified as in AMD manual (only we are
480       // replacing ¬R/¬X/¬B and vvvv bits with zeros).
481       //
482       // In particular it means that vex-encoded instructions should be specified with 0xC4 even if
483       // they are always emitted with 0xC4-to-0xC5 folding.
484       } else if constexpr (FirstOpcode(opcodes) == 0xC4 || FirstOpcode(opcodes) == 0x8F) {
485         return true;
486       }
487       return false;
488     }(opcodes_no_prefixes);
489     constexpr auto conditions_count = kCountArguments<IsCondition, ArgumentsTypes...>;
490     constexpr auto operands_count = kCountArguments<IsMemoryOperand, ArgumentsTypes...>;
491     constexpr auto labels_count = kCountArguments<IsLabelOperand, ArgumentsTypes...>;
492     constexpr auto registers_count = kCountArguments<IsRegister, ArgumentsTypes...>;
493     // We need to know if Reg field (in ModRM byte) is an opcode extension or if opcode extension
494     // goes into the immediate field.
495     constexpr auto reg_is_opcode_extension =
496         (registers_count + operands_count > 0) &&
497         (registers_count + operands_count + labels_count <
498          2 + vex_xop * (OpcodesCount(opcodes_no_prefixes) - 4));
499     static_assert((registers_count + operands_count + labels_count + conditions_count +
500                    kCountArguments<IsImmediate, ArgumentsTypes...>) == sizeof...(ArgumentsTypes),
501                   "Only registers (with specified size), Operands (with specified size), "
502                   "Conditions, and Immediates are supported.");
503     static_assert(operands_count <= 1, "Only one operand is allowed in instruction.");
504     static_assert(labels_count <= 1, "Only one label is allowed in instruction.");
505     // 0x0f is an opcode extension, if it's not there then we only have one byte opcode.
506     auto opcodes_no_prefixes_no_opcode_extension = [&](auto opcodes) {
507       if constexpr (vex_xop) {
508         static_assert(conditions_count == 0,
509                       "No conditionals are supported in vex/xop instructions.");
510         static_assert((registers_count + operands_count + labels_count) <= 4,
511                       "Up to four-arguments in vex/xop instructions are supported.");
512         constexpr auto vex_xop_byte1 = FirstOpcode(opcodes);
513         constexpr auto vex_xop_byte2 = FirstOpcode(SkipFirstOpcodeFromType(opcodes));
514         constexpr auto vex_xop_byte3 =
515             FirstOpcode(SkipFirstOpcodeFromType(SkipFirstOpcodeFromType(opcodes)));
516         static_cast<Assembler*>(this)
517             ->template EmitVex<vex_xop_byte1,
518                                vex_xop_byte2,
519                                vex_xop_byte3,
520                                reg_is_opcode_extension>(arguments...);
521         return SkipFirstOpcodeFromType(SkipFirstOpcodeFromType(SkipFirstOpcodeFromType(opcodes)));
522       } else {
523         static_assert(conditions_count <= 1, "Only one condition is allowed in instruction.");
524         static_assert((registers_count + operands_count + labels_count) <= 2,
525                       "Only two-arguments legacy instructions are supported.");
526         static_cast<Assembler*>(this)->EmitRex(arguments...);
527         if constexpr (FirstOpcode(opcodes) == 0x0F) {
528           Emit8(0x0F);
529           auto opcodes_no_prefixes_no_opcode_0x0F_extension = SkipFirstOpcodeFromType(opcodes);
530           if constexpr (FirstOpcode(opcodes_no_prefixes_no_opcode_0x0F_extension) == 0x38) {
531             Emit8(0x38);
532             return SkipFirstOpcodeFromType(opcodes_no_prefixes_no_opcode_0x0F_extension);
533           } else if constexpr (FirstOpcode(opcodes_no_prefixes_no_opcode_0x0F_extension) == 0x3A) {
534             Emit8(0x3A);
535             return SkipFirstOpcodeFromType(opcodes_no_prefixes_no_opcode_0x0F_extension);
536           } else {
537             return opcodes_no_prefixes_no_opcode_0x0F_extension;
538           }
539         } else {
540           return opcodes;
541         }
542       }
543     }(opcodes_no_prefixes);
544     // These are older 8086 instructions which encode register number in the opcode itself.
545     if constexpr (registers_count == 1 && operands_count == 0 && labels_count == 0 &&
546                   OpcodesCount(opcodes_no_prefixes_no_opcode_extension) == 1) {
547       static_cast<Assembler*>(this)->EmitRegisterInOpcode(
548           FirstOpcode(opcodes_no_prefixes_no_opcode_extension),
549           ArgumentByType<0, IsRegister>(arguments...));
550       EmitImmediates(arguments...);
551     } else {
552       // Emit "main" single-byte opcode.
553       if constexpr (conditions_count == 1) {
554         auto condition_code = static_cast<uint8_t>(ArgumentByType<0, IsCondition>(arguments...));
555         CHECK_EQ(0, condition_code & 0xF0);
556         Emit8(FirstOpcode(opcodes_no_prefixes_no_opcode_extension) | condition_code);
557       } else {
558         Emit8(FirstOpcode(opcodes_no_prefixes_no_opcode_extension));
559       }
560       auto extra_opcodes = SkipFirstOpcodeFromType(opcodes_no_prefixes_no_opcode_extension);
561       if constexpr (reg_is_opcode_extension) {
562         if constexpr (operands_count == 1) {
563           static_cast<Assembler*>(this)->EmitOperandOp(
564               static_cast<int>(FirstOpcode(extra_opcodes)),
565               ArgumentByType<0, IsMemoryOperand>(arguments...).operand);
566         } else if constexpr (labels_count == 1) {
567           static_cast<Assembler*>(this)->template EmitRipOp<ImmediatesSize<ArgumentsTypes...>()>(
568               static_cast<int>(FirstOpcode(extra_opcodes)),
569               ArgumentByType<0, IsLabelOperand>(arguments...).label);
570         } else {
571           static_cast<Assembler*>(this)->EmitModRM(this->FirstOpcode(extra_opcodes),
572                                                    ArgumentByType<0, IsRegister>(arguments...));
573         }
574       } else if constexpr (registers_count > 0) {
575         if constexpr (operands_count == 1) {
576           static_cast<Assembler*>(this)->EmitOperandOp(
577               ArgumentByType<0, IsRegister>(arguments...),
578               ArgumentByType<0, IsMemoryOperand>(arguments...).operand);
579         } else if constexpr (labels_count == 1) {
580           static_cast<Assembler*>(this)->template EmitRipOp<ImmediatesSize<ArgumentsTypes...>()>(
581               ArgumentByType<0, IsRegister>(arguments...),
582               ArgumentByType<0, IsLabelOperand>(arguments...).label);
583         } else {
584           static_cast<Assembler*>(this)->EmitModRM(ArgumentByType<0, IsRegister>(arguments...),
585                                                    ArgumentByType<1, IsRegister>(arguments...));
586         }
587       }
588       // If reg is an opcode extension then we already used that element.
589       if constexpr (reg_is_opcode_extension) {
590         static_assert(OpcodesCount(extra_opcodes) == 1);
591       } else if constexpr (OpcodesCount(extra_opcodes) > 0) {
592         // Final opcode byte(s) - they are in the place where immediate is expected.
593         // Cmpsps/Cmppd and 3DNow! instructions are using it.
594         static_assert(OpcodesCount(extra_opcodes) == 1);
595         Emit8(FirstOpcode(extra_opcodes));
596       }
597       if constexpr (registers_count + operands_count + labels_count == 4) {
598         if constexpr (kCountArguments<IsImmediate, ArgumentsTypes...> == 1) {
599           Emit8((ArgumentByType<registers_count - 1, IsRegister>(arguments...).num << 4) |
600                 ArgumentByType<0, IsImmediate>(arguments...));
601         } else {
602           static_assert(kCountArguments<IsImmediate, ArgumentsTypes...> == 0);
603           Emit8(ArgumentByType<registers_count - 1, IsRegister>(arguments...).num << 4);
604         }
605       } else {
606         EmitImmediates(arguments...);
607       }
608     }
609   }
610 
611   void ResolveJumps();
612 
613  private:
614   DISALLOW_IMPLICIT_CONSTRUCTORS(AssemblerX86);
615 };
616 
617 // Return the reverse condition.
618 template <typename Condition>
ToReverseCond(Condition cond)619 inline constexpr Condition ToReverseCond(Condition cond) {
620   CHECK(cond != Condition::kInvalidCondition);
621   // Condition has a nice property that given a condition, you can get
622   // its reverse condition by flipping the least significant bit.
623   return Condition(static_cast<int>(cond) ^ 1);
624 }
625 
626 template <typename Condition>
GetCondName(Condition cond)627 inline constexpr const char* GetCondName(Condition cond) {
628   switch (cond) {
629     case Condition::kOverflow:
630       return "O";
631     case Condition::kNoOverflow:
632       return "NO";
633     case Condition::kBelow:
634       return "B";
635     case Condition::kAboveEqual:
636       return "AE";
637     case Condition::kEqual:
638       return "Z";
639     case Condition::kNotEqual:
640       return "NZ";
641     case Condition::kBelowEqual:
642       return "BE";
643     case Condition::kAbove:
644       return "A";
645     case Condition::kNegative:
646       return "N";
647     case Condition::kPositive:
648       return "PL";
649     case Condition::kParityEven:
650       return "PE";
651     case Condition::kParityOdd:
652       return "PO";
653     case Condition::kLess:
654       return "LS";
655     case Condition::kGreaterEqual:
656       return "GE";
657     case Condition::kLessEqual:
658       return "LE";
659     case Condition::kGreater:
660       return "GT";
661     default:
662       return "??";
663   }
664 }
665 
666 template <typename Assembler>
Pmov(XMMRegister dest,XMMRegister src)667 inline void AssemblerX86<Assembler>::Pmov(XMMRegister dest, XMMRegister src) {
668   // SSE does not have operations for register-to-register integer move and
669   // Intel explicitly recommends to use pshufd instead on Pentium4:
670   //   See https://software.intel.com/en-us/articles/
671   //               fast-simd-integer-move-for-the-intel-pentiumr-4-processor
672   // These recommendations are CPU-dependent, though, thus we will need to
673   // investigate this question further before we could decide when to use
674   // movaps (or movapd) and when to use pshufd.
675   //
676   // TODO(khim): investigate performance problems related to integer MOVs
677   Movaps(dest, src);
678 }
679 
680 template <typename Assembler>
Call(const Label & label)681 inline void AssemblerX86<Assembler>::Call(const Label& label) {
682   if (label.IsBound()) {
683     int32_t offset = label.position() - pc();
684     Call(offset);
685   } else {
686     Emit8(0xe8);
687     Emit32(0xfffffffc);
688     jumps_.push_back(Jump{&label, pc() - 4, false});
689   }
690 }
691 
692 template <typename Assembler>
Jcc(Condition cc,const Label & label)693 inline void AssemblerX86<Assembler>::Jcc(Condition cc, const Label& label) {
694   if (cc == Condition::kAlways) {
695     Jmp(label);
696     return;
697   } else if (cc == Condition::kNever) {
698     return;
699   }
700   CHECK_EQ(0, static_cast<uint8_t>(cc) & 0xF0);
701   // TODO(eaeltsin): may be remove IsBound case?
702   // Then jcc by label will be of fixed size (5 bytes)
703   if (label.IsBound()) {
704     int32_t offset = label.position() - pc();
705     Jcc(cc, offset);
706   } else {
707     Emit16(0x800f | (static_cast<uint8_t>(cc) << 8));
708     Emit32(0xfffffffc);
709     jumps_.push_back(Jump{&label, pc() - 4, false});
710   }
711 }
712 
713 template <typename Assembler>
Jmp(const Label & label)714 inline void AssemblerX86<Assembler>::Jmp(const Label& label) {
715   // TODO(eaeltsin): may be remove IsBound case?
716   // Then jmp by label will be of fixed size (5 bytes)
717   if (label.IsBound()) {
718     int32_t offset = label.position() - pc();
719     Jmp(offset);
720   } else {
721     Emit8(0xe9);
722     Emit32(0xfffffffc);
723     jumps_.push_back(Jump{&label, pc() - 4, false});
724   }
725 }
726 
727 template <typename Assembler>
ResolveJumps()728 inline void AssemblerX86<Assembler>::ResolveJumps() {
729   for (const auto& jump : jumps_) {
730     const Label* label = jump.label;
731     uint32_t pc = jump.pc;
732     CHECK(label->IsBound());
733     if (jump.is_recovery) {
734       // Add pc -> label correspondence to recovery map.
735       AddRelocation(0, RelocationType::RelocRecoveryPoint, pc, label->position());
736     } else {
737       int32_t offset = label->position() - pc;
738       *AddrAs<int32_t>(pc) += offset;
739     }
740   }
741 }
742 
743 // Code size optimized instructions: they have different variants depending on registers used.
744 
745 template <typename Assembler>
Xchgl(Register dest,Register src)746 inline void AssemblerX86<Assembler>::Xchgl(Register dest, Register src) {
747   if (Assembler::IsAccumulator(src) || Assembler::IsAccumulator(dest)) {
748     Register other = Assembler::IsAccumulator(src) ? dest : src;
749     EmitInstruction<Opcodes<0x90>>(Register32Bit(other));
750   } else {
751     // Clang 8 (after r330298) swaps these two arguments.  We are comparing output
752     // to clang in exhaustive test thus we want to match clang behavior exactly.
753 #if __clang_major__ >= 8
754     EmitInstruction<Opcodes<0x87>>(Register32Bit(dest), Register32Bit(src));
755 #else
756     EmitInstruction<Opcodes<0x87>>(Register32Bit(src), Register32Bit(dest));
757 #endif
758   }
759 }
760 
761 }  // namespace berberis
762 
763 #endif  // BERBERIS_ASSEMBLER_COMMON_X86_H_
764