1 /*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #ifndef BERBERIS_ASSEMBLER_COMMON_X86_H_
18 #define BERBERIS_ASSEMBLER_COMMON_X86_H_
19
20 #include <cstddef> // std::size_t
21 #include <initializer_list>
22 #include <iterator> // std::begin, std::end, std::next
23 #include <limits> // std::is_integral
24 #include <type_traits> // std::enable_if, std::is_integral
25
26 #include "berberis/assembler/common.h"
27 #include "berberis/base/bit_util.h"
28 #include "berberis/base/logging.h"
29 #include "berberis/base/macros.h" // DISALLOW_IMPLICIT_CONSTRUCTORS
30
31 namespace berberis {
32
33 // AssemblerX86 includes implementation of most x86 assembler instructions.
34 //
35 // x86-32 and x86-64 assemblers are nearly identical, but difference lies in handling
36 // of very low-level instruction details: almost all instructions on x86-64 could include
37 // REX byte which is needed if new registers (%r8 to %r15 or %xmm8 to %xmm15) are used.
38 //
39 // To handle that difference efficiently AssemblerX86 is CRTP class: it's parameterized
40 // by its own descendant and pull certain functions (e.g. GetHighBit or Rex8Size) from
41 // its implementation.
42 //
43 // Certain functions are only implemented by its descendant (since there are instructions
44 // which only exist in x86-32 mode and instructions which only exist in x86-64 mode).
45
46 template <typename Assembler>
47 class AssemblerX86 : public AssemblerBase {
48 public:
AssemblerX86(MachineCode * code)49 explicit AssemblerX86(MachineCode* code) : AssemblerBase(code) {}
50
51 enum class Condition {
52 kInvalidCondition = -1,
53
54 kOverflow = 0,
55 kNoOverflow = 1,
56 kBelow = 2,
57 kAboveEqual = 3,
58 kEqual = 4,
59 kNotEqual = 5,
60 kBelowEqual = 6,
61 kAbove = 7,
62 kNegative = 8,
63 kPositive = 9,
64 kParityEven = 10,
65 kParityOdd = 11,
66 kLess = 12,
67 kGreaterEqual = 13,
68 kLessEqual = 14,
69 kGreater = 15,
70 kAlways = 16,
71 kNever = 17,
72
73 // aka...
74 kCarry = kBelow,
75 kNotCarry = kAboveEqual,
76 kZero = kEqual,
77 kNotZero = kNotEqual,
78 kSign = kNegative,
79 kNotSign = kPositive
80 };
81
82 struct Register {
83 // Note: we couldn't make the following private because of peculiarities of C++ (see
84 // https://stackoverflow.com/questions/24527395/compiler-error-when-initializing-constexpr-static-class-member
85 // for explanation), but you are not supposed to access num or use GetHighBit() and GetLowBits()
86 // functions. Treat that type as opaque cookie.
87
88 constexpr bool operator==(const Register& reg) const { return num == reg.num; }
89
90 constexpr bool operator!=(const Register& reg) const { return num != reg.num; }
91
92 uint8_t num;
93 };
94
95 struct XMMRegister {
96 // Note: we couldn't make the following private because of peculiarities of C++ (see
97 // https://stackoverflow.com/questions/24527395/compiler-error-when-initializing-constexpr-static-class-member
98 // for explanation), but you are not supposed to access num or use GetHighBit() and GetLowBits()
99 // functions. Treat that type as opaque cookie.
100
101 constexpr bool operator==(const XMMRegister& reg) const { return num == reg.num; }
102
103 constexpr bool operator!=(const XMMRegister& reg) const { return num != reg.num; }
104
105 uint8_t num;
106 };
107
108 enum ScaleFactor { kTimesOne = 0, kTimesTwo = 1, kTimesFour = 2, kTimesEight = 3 };
109
110 struct Operand {
rexOperand111 constexpr uint8_t rex() const {
112 return Assembler::kIsX86_64 ? ((index.num & 0x08) >> 2) | ((base.num & 0x08) >> 3) : 0;
113 }
114
RequiresRexOperand115 constexpr bool RequiresRex() const {
116 return Assembler::kIsX86_64 ? ((index.num & 0x08) | (base.num & 0x08)) : false;
117 }
118
119 Register base = Assembler::no_register;
120 Register index = Assembler::no_register;
121 ScaleFactor scale = kTimesOne;
122 int32_t disp = 0;
123 };
124
125 struct LabelOperand {
126 const Label& label;
127 };
128
129 // Macro operations.
Finalize()130 void Finalize() { ResolveJumps(); }
131
Align(uint32_t m)132 void Align(uint32_t m) {
133 uint32_t mask = m - 1;
134 uint32_t addr = pc();
135 Nop((m - (addr & mask)) & mask);
136 }
137
Nop(uint32_t bytes)138 void Nop(uint32_t bytes) {
139 static const uint32_t kNumNops = 15;
140 static const uint8_t nop1[] = {0x90};
141 static const uint8_t nop2[] = {0x66, 0x90};
142 static const uint8_t nop3[] = {0x0f, 0x1f, 0x00};
143 static const uint8_t nop4[] = {0x0f, 0x1f, 0x40, 0x00};
144 static const uint8_t nop5[] = {0x0f, 0x1f, 0x44, 0x00, 0x00};
145 static const uint8_t nop6[] = {0x66, 0x0f, 0x1f, 0x44, 0x00, 0x0};
146 static const uint8_t nop7[] = {0x0f, 0x1f, 0x80, 0x00, 0x00, 0x0, 0x00};
147 static const uint8_t nop8[] = {0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00};
148 static const uint8_t nop9[] = {0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00};
149 static const uint8_t nop10[] = {0x66, 0x2e, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00};
150 static const uint8_t nop11[] = {
151 0x66, 0x66, 0x2e, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00};
152 static const uint8_t nop12[] = {
153 0x66, 0x66, 0x66, 0x2e, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00};
154 static const uint8_t nop13[] = {
155 0x66, 0x66, 0x66, 0x66, 0x2e, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00};
156 static const uint8_t nop14[] = {
157 0x66, 0x66, 0x66, 0x66, 0x66, 0x2e, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00};
158 static const uint8_t nop15[] = {
159 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x2e, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00};
160
161 static const uint8_t* nops[kNumNops] = {nop1,
162 nop2,
163 nop3,
164 nop4,
165 nop5,
166 nop6,
167 nop7,
168 nop8,
169 nop9,
170 nop10,
171 nop11,
172 nop12,
173 nop13,
174 nop14,
175 nop15};
176 // Common case.
177 if (bytes == 1) {
178 Emit8(nop1[0]);
179 return;
180 }
181
182 while (bytes > 0) {
183 uint32_t len = bytes;
184 if (len > kNumNops) {
185 len = kNumNops;
186 }
187 EmitSequence(nops[len - 1], len);
188 bytes -= len;
189 }
190 }
191
192 // Instructions.
193 #include "berberis/assembler/gen_assembler_common_x86-inl.h" // NOLINT generated file
194
195 // Flow control.
Jmp(int32_t offset)196 void Jmp(int32_t offset) {
197 uint32_t start = pc();
198 if (offset > -124 && offset < 124) {
199 Emit8(0xeb);
200 Emit8((offset - 1 - (pc() - start)) & 0xFF);
201 } else {
202 Emit8(0xe9);
203 Emit32(offset - 4 - (pc() - start));
204 }
205 }
206
Call(int32_t offset)207 void Call(int32_t offset) {
208 uint32_t start = pc();
209 Emit8(0xe8);
210 Emit32(offset - 4 - (pc() - start));
211 }
212
Jcc(Condition cc,int32_t offset)213 void Jcc(Condition cc, int32_t offset) {
214 if (cc == Condition::kAlways) {
215 Jmp(offset);
216 return;
217 } else if (cc == Condition::kNever) {
218 return;
219 }
220 CHECK_EQ(0, static_cast<uint8_t>(cc) & 0xF0);
221 uint32_t start = pc();
222 if (offset > -124 && offset < 124) {
223 Emit8(0x70 | static_cast<uint8_t>(cc));
224 Emit8(offset - 1 - (pc() - start));
225 } else {
226 Emit8(0x0F);
227 Emit8(0x80 | static_cast<uint8_t>(cc));
228 Emit32(offset - 4 - (pc() - start));
229 }
230 }
231
232 protected:
233 // Helper types to distinguish argument types.
234 struct Register8Bit {
Register8BitRegister8Bit235 explicit constexpr Register8Bit(Register reg) : num(reg.num) {}
236 uint8_t num;
237 };
238
239 struct Register32Bit {
Register32BitRegister32Bit240 explicit constexpr Register32Bit(Register reg) : num(reg.num) {}
Register32BitRegister32Bit241 explicit constexpr Register32Bit(XMMRegister reg) : num(reg.num) {}
242 uint8_t num;
243 };
244
245 // 16-bit and 128-bit vector registers follow the same rules as 32-bit registers.
246 typedef Register32Bit Register16Bit;
247 typedef Register32Bit VectorRegister128Bit;
248 // Certain instructions (Enter/Leave, Jcc/Jmp/Loop, Call/Ret, Push/Pop) always operate
249 // on registers of default size (32-bit in 32-bit mode, 64-bit in 64-bit mode (see
250 // "Instructions Not Requiring REX Prefix in 64-Bit Mode" table in 24594 AMD Manual)
251 // Map these to Register32Bit, too, since they don't need REX.W even in 64-bit mode.
252 typedef Register32Bit RegisterDefaultBit;
253
254 struct Memory32Bit {
Memory32BitMemory32Bit255 explicit Memory32Bit(const Operand& op) : operand(op) {}
256 Operand operand;
257 };
258
259 // 8-bit, 16-bit, 128-bit memory behave the same as 32-bit memory.
260 // Only 64-bit memory is different.
261 typedef Memory32Bit Memory8Bit;
262 typedef Memory32Bit Memory16Bit;
263 // Most vector instructions don't need to use REX.W to access 64-bit or 128-bit memory.
264 typedef Memory32Bit VectorMemory32Bit;
265 typedef Memory32Bit VectorMemory64Bit;
266 typedef Memory32Bit VectorMemory128Bit;
267 // X87 instructions always use the same encoding - even for 64-bit or 28-bytes
268 // memory operands (like in fldenv/fnstenv)
269 typedef Memory32Bit MemoryX87;
270
271 // Labels types for memory quantities. Note that names are similar to the ones before because
272 // they are autogenerated. E.g. VectorLabel32Bit should be read as “VECTOR's operation LABEL
273 // for 32-BIT quantity in memory”.
274 struct Label32Bit {
Label32BitLabel32Bit275 explicit Label32Bit(const struct LabelOperand& l) : label(l.label) {}
276 const Label& label;
277 };
278
279 // 8-bit, 16-bit, 128-bit memory behave the same as 32-bit memory.
280 // Only 64-bit memory is different.
281 typedef Label32Bit Label8Bit;
282 typedef Label32Bit Label16Bit;
283 // Most vector instructions don't need to use REX.W to access 64-bit or 128-bit memory.
284 typedef Label32Bit VectorLabel32Bit;
285 typedef Label32Bit VectorLabel64Bit;
286 typedef Label32Bit VectorLabel128Bit;
287 // X87 instructions always use the same encoding - even for 64-bit or 28-bytes
288 // memory operands (like in fldenv/fnstenv)
289 typedef Label32Bit LabelX87;
290
IsLegacyPrefix(int code)291 static constexpr bool IsLegacyPrefix(int code) {
292 // Legacy prefixes used as opcode extensions in SSE.
293 // Lock is used by cmpxchg.
294 return (code == 0x66) || (code == 0xf2) || (code == 0xf3) || (code == 0xf0);
295 }
296
297 // Delegate check to Assembler::template IsRegister.
298 template <typename ArgumentType>
299 struct IsCondition {
300 static constexpr bool value = std::is_same_v<ArgumentType, Condition>;
301 };
302
303 template <typename ArgumentType>
304 struct IsRegister {
305 static constexpr bool value = Assembler::template IsRegister<ArgumentType>::value;
306 };
307
308 template <typename ArgumentType>
309 struct IsMemoryOperand {
310 static constexpr bool value = Assembler::template IsMemoryOperand<ArgumentType>::value;
311 };
312
313 template <typename ArgumentType>
314 struct IsLabelOperand {
315 static constexpr bool value = Assembler::template IsLabelOperand<ArgumentType>::value;
316 };
317
318 template <typename ArgumentType>
319 struct IsImmediate {
320 static constexpr bool value =
321 std::is_integral_v<ArgumentType> &&
322 ((sizeof(ArgumentType) == sizeof(int8_t)) || (sizeof(ArgumentType) == sizeof(int16_t)) ||
323 (sizeof(ArgumentType) == sizeof(int32_t)) || (sizeof(ArgumentType) == sizeof(int64_t)));
324 };
325
326 // Count number of arguments selected by Predicate.
327 template <template <typename> typename Predicate, typename... ArgumentTypes>
328 static constexpr std::size_t kCountArguments = ((Predicate<ArgumentTypes>::value ? 1 : 0) + ... +
329 0);
330
331 // Extract arguments selected by Predicate.
332 //
333 // Note: This interface begs for the trick used in EmitFunctionTypeHelper in make_intrinsics.cc
334 // in conjunction with structured bindings.
335 //
336 // Unfortunately returning std::tuple slows down AssemblerTest by about 30% when libc++ and clang
337 // are used together (no slowdown on GCC, no slowdown on clang+libstdc++).
338 //
339 // TODO(http://b/140721204): refactor when it would be safe to return std::tuple from function.
340 //
341 template <std::size_t index,
342 template <typename>
343 typename Predicate,
344 typename ArgumentType,
345 typename... ArgumentTypes>
ArgumentByType(ArgumentType argument,ArgumentTypes...arguments)346 static constexpr auto ArgumentByType(ArgumentType argument, ArgumentTypes... arguments) {
347 if constexpr (Predicate<std::decay_t<ArgumentType>>::value) {
348 if constexpr (index == 0) {
349 return argument;
350 } else {
351 return ArgumentByType<index - 1, Predicate>(arguments...);
352 }
353 } else {
354 return ArgumentByType<index, Predicate>(arguments...);
355 }
356 }
357
358 // Emit immediates - they always come at the end and don't affect anything except rip-addressig.
EmitImmediates()359 static constexpr void EmitImmediates() {}
360
361 template <typename FirstArgumentType, typename... ArgumentTypes>
EmitImmediates(FirstArgumentType first_argument,ArgumentTypes...other_arguments)362 void EmitImmediates(FirstArgumentType first_argument, ArgumentTypes... other_arguments) {
363 if constexpr (std::is_integral_v<FirstArgumentType> &&
364 sizeof(FirstArgumentType) == sizeof(int8_t)) {
365 Emit8(first_argument);
366 } else if constexpr (std::is_integral_v<FirstArgumentType> &&
367 sizeof(FirstArgumentType) == sizeof(int16_t)) {
368 Emit16(first_argument);
369 } else if constexpr (std::is_integral_v<FirstArgumentType> &&
370 sizeof(FirstArgumentType) == sizeof(int32_t)) {
371 Emit32(first_argument);
372 } else if constexpr (std::is_integral_v<FirstArgumentType> &&
373 sizeof(FirstArgumentType) == sizeof(int64_t)) {
374 Emit64(first_argument);
375 }
376 EmitImmediates(other_arguments...);
377 }
378
379 template <typename ArgumentType>
ImmediateSize()380 static constexpr size_t ImmediateSize() {
381 if constexpr (std::is_integral_v<ArgumentType> && sizeof(ArgumentType) == sizeof(int8_t)) {
382 return 1;
383 } else if constexpr (std::is_integral_v<ArgumentType> &&
384 sizeof(ArgumentType) == sizeof(int16_t)) {
385 return 2;
386 } else if constexpr (std::is_integral_v<ArgumentType> &&
387 sizeof(ArgumentType) == sizeof(int32_t)) {
388 return 4;
389 } else if constexpr (std::is_integral_v<ArgumentType> &&
390 sizeof(ArgumentType) == sizeof(int64_t)) {
391 return 8;
392 } else {
393 static_assert(!std::is_integral_v<ArgumentType>);
394 return 0;
395 }
396 }
397
398 template <typename... ArgumentTypes>
ImmediatesSize()399 static constexpr size_t ImmediatesSize() {
400 return (ImmediateSize<ArgumentTypes>() + ... + 0);
401 }
402
403 // Struct type to pass information about opcodes.
404 template <uint8_t... kOpcodes>
405 struct Opcodes {};
406
407 template <uint8_t... kOpcodes>
OpcodesCount(Opcodes<kOpcodes...>)408 static constexpr size_t OpcodesCount(Opcodes<kOpcodes...>) {
409 return sizeof...(kOpcodes);
410 }
411
412 template <uint8_t kOpcode, uint8_t... kOpcodes>
FirstOpcode(Opcodes<kOpcode,kOpcodes...>)413 static constexpr uint8_t FirstOpcode(Opcodes<kOpcode, kOpcodes...>) {
414 return kOpcode;
415 }
416
417 template <uint8_t kOpcode, uint8_t... kOpcodes>
SkipFirstOpcodeFromType(Opcodes<kOpcode,kOpcodes...>)418 static constexpr auto SkipFirstOpcodeFromType(Opcodes<kOpcode, kOpcodes...>) {
419 return Opcodes<kOpcodes...>{};
420 }
421
422 template <uint8_t kOpcode, uint8_t... kOpcodes>
EmitLegacyPrefixes(Opcodes<kOpcode,kOpcodes...> opcodes)423 auto EmitLegacyPrefixes(Opcodes<kOpcode, kOpcodes...> opcodes) {
424 if constexpr (IsLegacyPrefix(kOpcode)) {
425 Emit8(kOpcode);
426 return EmitLegacyPrefixes(Opcodes<kOpcodes...>{});
427 } else {
428 return opcodes;
429 }
430 }
431
432 // Note: We may need separate x87 EmitInstruction if we would want to support
433 // full set of x86 instructions.
434 //
435 // That's because 8087 was completely separate piece of silicone which was only
436 // partially driven by 8086:
437 // https://en.wikipedia.org/wiki/Intel_8087
438 //
439 // In particular it had the following properties:
440 // 1. It had its own separate subset of opcodes - because it did its own decoding.
441 // 2. It had separate set of registers and could *only* access these.
442 // 2a. The 8086, in turn, *couldn't* access these registers at all.
443 // 3. To access memory it was designed to take address from address bus.
444 //
445 // This means that:
446 // 1. x87 instructions are easily recognizable - all instructions with opcodes 0xd8
447 // to 0xdf are x87 instructions, all instructions with other opcodes are not.
448 // 2. We could be sure that x87 registers would only be used with x87 instructions
449 // and other types of registers wouldn't be used with these.
450 // 3. We still would use normal registers for memory access, but REX.W bit wouldn't
451 // be used for 64-bit quantities, whether they are floating point numbers or integers.
452 //
453 // Right now we only use EmitInstruction to emit x87 instructions which are using memory
454 // operands - and it works well enough for that because of #3.
455
456 // If you want to understand how this function works (and how helper function like Vex and
457 // Rex work), you need good understanding of AMD/Intel Instruction format.
458 //
459 // Intel manual includes the most precise explanation, but it's VERY hard to read.
460 //
461 // AMD manual is much easier to read, but it doesn't include description of EVEX
462 // instructions and is less precise. Diagram on page 2 of Volume 3 is especially helpful:
463 // https://www.amd.com/system/files/TechDocs/24594.pdf#page=42
464 //
465 // And the most concise (albeit unofficial) in on osdev Wiki:
466 // https://wiki.osdev.org/X86-64_Instruction_Encoding
467
468 // Note: if you change this function (or any of the helper functions) then remove --fast
469 // option from ExhaustiveAssemblerTest to run full blackbox comparison to clang.
470
471 template <typename InstructionOpcodes, typename... ArgumentsTypes>
EmitInstruction(ArgumentsTypes...arguments)472 void EmitInstruction(ArgumentsTypes... arguments) {
473 auto opcodes_no_prefixes = EmitLegacyPrefixes(InstructionOpcodes{});
474 // We don't yet support any XOP-encoded instructions, but they are 100% identical to vex ones,
475 // except they are using 0x8F prefix, not 0xC4 prefix.
476 constexpr auto vex_xop = [&](auto opcodes) {
477 if constexpr (OpcodesCount(opcodes) < 3) {
478 return false;
479 // Note that JSON files use AMD approach: bytes are specified as in AMD manual (only we are
480 // replacing ¬R/¬X/¬B and vvvv bits with zeros).
481 //
482 // In particular it means that vex-encoded instructions should be specified with 0xC4 even if
483 // they are always emitted with 0xC4-to-0xC5 folding.
484 } else if constexpr (FirstOpcode(opcodes) == 0xC4 || FirstOpcode(opcodes) == 0x8F) {
485 return true;
486 }
487 return false;
488 }(opcodes_no_prefixes);
489 constexpr auto conditions_count = kCountArguments<IsCondition, ArgumentsTypes...>;
490 constexpr auto operands_count = kCountArguments<IsMemoryOperand, ArgumentsTypes...>;
491 constexpr auto labels_count = kCountArguments<IsLabelOperand, ArgumentsTypes...>;
492 constexpr auto registers_count = kCountArguments<IsRegister, ArgumentsTypes...>;
493 // We need to know if Reg field (in ModRM byte) is an opcode extension or if opcode extension
494 // goes into the immediate field.
495 constexpr auto reg_is_opcode_extension =
496 (registers_count + operands_count > 0) &&
497 (registers_count + operands_count + labels_count <
498 2 + vex_xop * (OpcodesCount(opcodes_no_prefixes) - 4));
499 static_assert((registers_count + operands_count + labels_count + conditions_count +
500 kCountArguments<IsImmediate, ArgumentsTypes...>) == sizeof...(ArgumentsTypes),
501 "Only registers (with specified size), Operands (with specified size), "
502 "Conditions, and Immediates are supported.");
503 static_assert(operands_count <= 1, "Only one operand is allowed in instruction.");
504 static_assert(labels_count <= 1, "Only one label is allowed in instruction.");
505 // 0x0f is an opcode extension, if it's not there then we only have one byte opcode.
506 auto opcodes_no_prefixes_no_opcode_extension = [&](auto opcodes) {
507 if constexpr (vex_xop) {
508 static_assert(conditions_count == 0,
509 "No conditionals are supported in vex/xop instructions.");
510 static_assert((registers_count + operands_count + labels_count) <= 4,
511 "Up to four-arguments in vex/xop instructions are supported.");
512 constexpr auto vex_xop_byte1 = FirstOpcode(opcodes);
513 constexpr auto vex_xop_byte2 = FirstOpcode(SkipFirstOpcodeFromType(opcodes));
514 constexpr auto vex_xop_byte3 =
515 FirstOpcode(SkipFirstOpcodeFromType(SkipFirstOpcodeFromType(opcodes)));
516 static_cast<Assembler*>(this)
517 ->template EmitVex<vex_xop_byte1,
518 vex_xop_byte2,
519 vex_xop_byte3,
520 reg_is_opcode_extension>(arguments...);
521 return SkipFirstOpcodeFromType(SkipFirstOpcodeFromType(SkipFirstOpcodeFromType(opcodes)));
522 } else {
523 static_assert(conditions_count <= 1, "Only one condition is allowed in instruction.");
524 static_assert((registers_count + operands_count + labels_count) <= 2,
525 "Only two-arguments legacy instructions are supported.");
526 static_cast<Assembler*>(this)->EmitRex(arguments...);
527 if constexpr (FirstOpcode(opcodes) == 0x0F) {
528 Emit8(0x0F);
529 auto opcodes_no_prefixes_no_opcode_0x0F_extension = SkipFirstOpcodeFromType(opcodes);
530 if constexpr (FirstOpcode(opcodes_no_prefixes_no_opcode_0x0F_extension) == 0x38) {
531 Emit8(0x38);
532 return SkipFirstOpcodeFromType(opcodes_no_prefixes_no_opcode_0x0F_extension);
533 } else if constexpr (FirstOpcode(opcodes_no_prefixes_no_opcode_0x0F_extension) == 0x3A) {
534 Emit8(0x3A);
535 return SkipFirstOpcodeFromType(opcodes_no_prefixes_no_opcode_0x0F_extension);
536 } else {
537 return opcodes_no_prefixes_no_opcode_0x0F_extension;
538 }
539 } else {
540 return opcodes;
541 }
542 }
543 }(opcodes_no_prefixes);
544 // These are older 8086 instructions which encode register number in the opcode itself.
545 if constexpr (registers_count == 1 && operands_count == 0 && labels_count == 0 &&
546 OpcodesCount(opcodes_no_prefixes_no_opcode_extension) == 1) {
547 static_cast<Assembler*>(this)->EmitRegisterInOpcode(
548 FirstOpcode(opcodes_no_prefixes_no_opcode_extension),
549 ArgumentByType<0, IsRegister>(arguments...));
550 EmitImmediates(arguments...);
551 } else {
552 // Emit "main" single-byte opcode.
553 if constexpr (conditions_count == 1) {
554 auto condition_code = static_cast<uint8_t>(ArgumentByType<0, IsCondition>(arguments...));
555 CHECK_EQ(0, condition_code & 0xF0);
556 Emit8(FirstOpcode(opcodes_no_prefixes_no_opcode_extension) | condition_code);
557 } else {
558 Emit8(FirstOpcode(opcodes_no_prefixes_no_opcode_extension));
559 }
560 auto extra_opcodes = SkipFirstOpcodeFromType(opcodes_no_prefixes_no_opcode_extension);
561 if constexpr (reg_is_opcode_extension) {
562 if constexpr (operands_count == 1) {
563 static_cast<Assembler*>(this)->EmitOperandOp(
564 static_cast<int>(FirstOpcode(extra_opcodes)),
565 ArgumentByType<0, IsMemoryOperand>(arguments...).operand);
566 } else if constexpr (labels_count == 1) {
567 static_cast<Assembler*>(this)->template EmitRipOp<ImmediatesSize<ArgumentsTypes...>()>(
568 static_cast<int>(FirstOpcode(extra_opcodes)),
569 ArgumentByType<0, IsLabelOperand>(arguments...).label);
570 } else {
571 static_cast<Assembler*>(this)->EmitModRM(this->FirstOpcode(extra_opcodes),
572 ArgumentByType<0, IsRegister>(arguments...));
573 }
574 } else if constexpr (registers_count > 0) {
575 if constexpr (operands_count == 1) {
576 static_cast<Assembler*>(this)->EmitOperandOp(
577 ArgumentByType<0, IsRegister>(arguments...),
578 ArgumentByType<0, IsMemoryOperand>(arguments...).operand);
579 } else if constexpr (labels_count == 1) {
580 static_cast<Assembler*>(this)->template EmitRipOp<ImmediatesSize<ArgumentsTypes...>()>(
581 ArgumentByType<0, IsRegister>(arguments...),
582 ArgumentByType<0, IsLabelOperand>(arguments...).label);
583 } else {
584 static_cast<Assembler*>(this)->EmitModRM(ArgumentByType<0, IsRegister>(arguments...),
585 ArgumentByType<1, IsRegister>(arguments...));
586 }
587 }
588 // If reg is an opcode extension then we already used that element.
589 if constexpr (reg_is_opcode_extension) {
590 static_assert(OpcodesCount(extra_opcodes) == 1);
591 } else if constexpr (OpcodesCount(extra_opcodes) > 0) {
592 // Final opcode byte(s) - they are in the place where immediate is expected.
593 // Cmpsps/Cmppd and 3DNow! instructions are using it.
594 static_assert(OpcodesCount(extra_opcodes) == 1);
595 Emit8(FirstOpcode(extra_opcodes));
596 }
597 if constexpr (registers_count + operands_count + labels_count == 4) {
598 if constexpr (kCountArguments<IsImmediate, ArgumentsTypes...> == 1) {
599 Emit8((ArgumentByType<registers_count - 1, IsRegister>(arguments...).num << 4) |
600 ArgumentByType<0, IsImmediate>(arguments...));
601 } else {
602 static_assert(kCountArguments<IsImmediate, ArgumentsTypes...> == 0);
603 Emit8(ArgumentByType<registers_count - 1, IsRegister>(arguments...).num << 4);
604 }
605 } else {
606 EmitImmediates(arguments...);
607 }
608 }
609 }
610
611 void ResolveJumps();
612
613 private:
614 DISALLOW_IMPLICIT_CONSTRUCTORS(AssemblerX86);
615 };
616
617 // Return the reverse condition.
618 template <typename Condition>
ToReverseCond(Condition cond)619 inline constexpr Condition ToReverseCond(Condition cond) {
620 CHECK(cond != Condition::kInvalidCondition);
621 // Condition has a nice property that given a condition, you can get
622 // its reverse condition by flipping the least significant bit.
623 return Condition(static_cast<int>(cond) ^ 1);
624 }
625
626 template <typename Condition>
GetCondName(Condition cond)627 inline constexpr const char* GetCondName(Condition cond) {
628 switch (cond) {
629 case Condition::kOverflow:
630 return "O";
631 case Condition::kNoOverflow:
632 return "NO";
633 case Condition::kBelow:
634 return "B";
635 case Condition::kAboveEqual:
636 return "AE";
637 case Condition::kEqual:
638 return "Z";
639 case Condition::kNotEqual:
640 return "NZ";
641 case Condition::kBelowEqual:
642 return "BE";
643 case Condition::kAbove:
644 return "A";
645 case Condition::kNegative:
646 return "N";
647 case Condition::kPositive:
648 return "PL";
649 case Condition::kParityEven:
650 return "PE";
651 case Condition::kParityOdd:
652 return "PO";
653 case Condition::kLess:
654 return "LS";
655 case Condition::kGreaterEqual:
656 return "GE";
657 case Condition::kLessEqual:
658 return "LE";
659 case Condition::kGreater:
660 return "GT";
661 default:
662 return "??";
663 }
664 }
665
666 template <typename Assembler>
Pmov(XMMRegister dest,XMMRegister src)667 inline void AssemblerX86<Assembler>::Pmov(XMMRegister dest, XMMRegister src) {
668 // SSE does not have operations for register-to-register integer move and
669 // Intel explicitly recommends to use pshufd instead on Pentium4:
670 // See https://software.intel.com/en-us/articles/
671 // fast-simd-integer-move-for-the-intel-pentiumr-4-processor
672 // These recommendations are CPU-dependent, though, thus we will need to
673 // investigate this question further before we could decide when to use
674 // movaps (or movapd) and when to use pshufd.
675 //
676 // TODO(khim): investigate performance problems related to integer MOVs
677 Movaps(dest, src);
678 }
679
680 template <typename Assembler>
Call(const Label & label)681 inline void AssemblerX86<Assembler>::Call(const Label& label) {
682 if (label.IsBound()) {
683 int32_t offset = label.position() - pc();
684 Call(offset);
685 } else {
686 Emit8(0xe8);
687 Emit32(0xfffffffc);
688 jumps_.push_back(Jump{&label, pc() - 4, false});
689 }
690 }
691
692 template <typename Assembler>
Jcc(Condition cc,const Label & label)693 inline void AssemblerX86<Assembler>::Jcc(Condition cc, const Label& label) {
694 if (cc == Condition::kAlways) {
695 Jmp(label);
696 return;
697 } else if (cc == Condition::kNever) {
698 return;
699 }
700 CHECK_EQ(0, static_cast<uint8_t>(cc) & 0xF0);
701 // TODO(eaeltsin): may be remove IsBound case?
702 // Then jcc by label will be of fixed size (5 bytes)
703 if (label.IsBound()) {
704 int32_t offset = label.position() - pc();
705 Jcc(cc, offset);
706 } else {
707 Emit16(0x800f | (static_cast<uint8_t>(cc) << 8));
708 Emit32(0xfffffffc);
709 jumps_.push_back(Jump{&label, pc() - 4, false});
710 }
711 }
712
713 template <typename Assembler>
Jmp(const Label & label)714 inline void AssemblerX86<Assembler>::Jmp(const Label& label) {
715 // TODO(eaeltsin): may be remove IsBound case?
716 // Then jmp by label will be of fixed size (5 bytes)
717 if (label.IsBound()) {
718 int32_t offset = label.position() - pc();
719 Jmp(offset);
720 } else {
721 Emit8(0xe9);
722 Emit32(0xfffffffc);
723 jumps_.push_back(Jump{&label, pc() - 4, false});
724 }
725 }
726
727 template <typename Assembler>
ResolveJumps()728 inline void AssemblerX86<Assembler>::ResolveJumps() {
729 for (const auto& jump : jumps_) {
730 const Label* label = jump.label;
731 uint32_t pc = jump.pc;
732 CHECK(label->IsBound());
733 if (jump.is_recovery) {
734 // Add pc -> label correspondence to recovery map.
735 AddRelocation(0, RelocationType::RelocRecoveryPoint, pc, label->position());
736 } else {
737 int32_t offset = label->position() - pc;
738 *AddrAs<int32_t>(pc) += offset;
739 }
740 }
741 }
742
743 // Code size optimized instructions: they have different variants depending on registers used.
744
745 template <typename Assembler>
Xchgl(Register dest,Register src)746 inline void AssemblerX86<Assembler>::Xchgl(Register dest, Register src) {
747 if (Assembler::IsAccumulator(src) || Assembler::IsAccumulator(dest)) {
748 Register other = Assembler::IsAccumulator(src) ? dest : src;
749 EmitInstruction<Opcodes<0x90>>(Register32Bit(other));
750 } else {
751 // Clang 8 (after r330298) swaps these two arguments. We are comparing output
752 // to clang in exhaustive test thus we want to match clang behavior exactly.
753 #if __clang_major__ >= 8
754 EmitInstruction<Opcodes<0x87>>(Register32Bit(dest), Register32Bit(src));
755 #else
756 EmitInstruction<Opcodes<0x87>>(Register32Bit(src), Register32Bit(dest));
757 #endif
758 }
759 }
760
761 } // namespace berberis
762
763 #endif // BERBERIS_ASSEMBLER_COMMON_X86_H_
764