1 /*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #ifndef BERBERIS_ASSEMBLER_COMMON_X86_H_
18 #define BERBERIS_ASSEMBLER_COMMON_X86_H_
19
20 #include <cstddef> // std::size_t
21 #include <cstdint>
22 #include <type_traits> // std::enable_if, std::is_integral
23
24 #include "berberis/assembler/common.h"
25 #include "berberis/base/bit_util.h"
26 #include "berberis/base/checks.h"
27 #include "berberis/base/macros.h" // DISALLOW_IMPLICIT_CONSTRUCTORS
28
29 namespace berberis {
30
31 // AssemblerX86 includes implementation of most x86 assembler instructions.
32 //
33 // x86-32 and x86-64 assemblers are nearly identical, but difference lies in handling
34 // of very low-level instruction details: almost all instructions on x86-64 could include
35 // REX byte which is needed if new registers (%r8 to %r15 or %xmm8 to %xmm15) are used.
36 //
37 // To handle that difference efficiently AssemblerX86 is CRTP class: it's parameterized
38 // by its own descendant and pull certain functions (e.g. GetHighBit or Rex8Size) from
39 // its implementation.
40 //
41 // Certain functions are only implemented by its descendant (since there are instructions
42 // which only exist in x86-32 mode and instructions which only exist in x86-64 mode).
43
44 template <typename Assembler>
45 class AssemblerX86 : public AssemblerBase {
46 public:
AssemblerX86(MachineCode * code)47 explicit AssemblerX86(MachineCode* code) : AssemblerBase(code) {}
48
49 enum class Condition {
50 kInvalidCondition = -1,
51
52 kOverflow = 0,
53 kNoOverflow = 1,
54 kBelow = 2,
55 kAboveEqual = 3,
56 kEqual = 4,
57 kNotEqual = 5,
58 kBelowEqual = 6,
59 kAbove = 7,
60 kNegative = 8,
61 kPositive = 9,
62 kParityEven = 10,
63 kParityOdd = 11,
64 kLess = 12,
65 kGreaterEqual = 13,
66 kLessEqual = 14,
67 kGreater = 15,
68 kAlways = 16,
69 kNever = 17,
70
71 // aka...
72 kCarry = kBelow,
73 kNotCarry = kAboveEqual,
74 kZero = kEqual,
75 kNotZero = kNotEqual,
76 kSign = kNegative,
77 kNotSign = kPositive
78 };
79
80 struct Register {
81 // Note: we couldn't make the following private because of peculiarities of C++ (see
82 // https://stackoverflow.com/questions/24527395/compiler-error-when-initializing-constexpr-static-class-member
83 // for explanation), but you are not supposed to access num or use GetHighBit() and GetLowBits()
84 // functions. Treat that type as opaque cookie.
85
86 constexpr bool operator==(const Register& reg) const { return num == reg.num; }
87
88 constexpr bool operator!=(const Register& reg) const { return num != reg.num; }
89
90 uint8_t num;
91 };
92
93 struct X87Register {
94 // Note: we couldn't make the following private because of peculiarities of C++ (see
95 // https://stackoverflow.com/questions/24527395/compiler-error-when-initializing-constexpr-static-class-member
96 // for explanation), but you are not supposed to access num or use GetHighBit() and GetLowBits()
97 // functions. Treat that type as opaque cookie.
98
99 constexpr bool operator==(const Register& reg) const { return num == reg.num; }
100
101 constexpr bool operator!=(const Register& reg) const { return num != reg.num; }
102
103 uint8_t num;
104 };
105
106 static constexpr X87Register st{0};
107 static constexpr X87Register st0{0};
108 static constexpr X87Register st1{1};
109 static constexpr X87Register st2{2};
110 static constexpr X87Register st3{3};
111 static constexpr X87Register st4{4};
112 static constexpr X87Register st5{5};
113 static constexpr X87Register st6{6};
114 static constexpr X87Register st7{7};
115
116 struct XMMRegister {
117 // Note: we couldn't make the following private because of peculiarities of C++ (see
118 // https://stackoverflow.com/questions/24527395/compiler-error-when-initializing-constexpr-static-class-member
119 // for explanation), but you are not supposed to access num or use GetHighBit() and GetLowBits()
120 // functions. Treat that type as opaque cookie.
121
122 constexpr bool operator==(const XMMRegister& reg) const { return num == reg.num; }
123
124 constexpr bool operator!=(const XMMRegister& reg) const { return num != reg.num; }
125
126 uint8_t num;
127 };
128
129 enum ScaleFactor { kTimesOne = 0, kTimesTwo = 1, kTimesFour = 2, kTimesEight = 3 };
130
131 struct Operand {
rexOperand132 constexpr uint8_t rex() const {
133 return Assembler::kIsX86_64 ? ((index.num & 0x08) >> 2) | ((base.num & 0x08) >> 3) : 0;
134 }
135
RequiresRexOperand136 constexpr bool RequiresRex() const {
137 return Assembler::kIsX86_64 ? ((index.num & 0x08) | (base.num & 0x08)) : false;
138 }
139
140 Register base = Assembler::no_register;
141 Register index = Assembler::no_register;
142 ScaleFactor scale = kTimesOne;
143 int32_t disp = 0;
144 };
145
146 struct LabelOperand {
147 const Label& label;
148 };
149
150 // Macro operations.
Finalize()151 void Finalize() { ResolveJumps(); }
152
P2Align(uint32_t m)153 void P2Align(uint32_t m) {
154 uint32_t mask = m - 1;
155 uint32_t addr = pc();
156 Nop((m - (addr & mask)) & mask);
157 }
158
Nop(uint32_t bytes)159 void Nop(uint32_t bytes) {
160 static const uint32_t kNumNops = 15;
161 static const uint8_t nop1[] = {0x90};
162 static const uint8_t nop2[] = {0x66, 0x90};
163 static const uint8_t nop3[] = {0x0f, 0x1f, 0x00};
164 static const uint8_t nop4[] = {0x0f, 0x1f, 0x40, 0x00};
165 static const uint8_t nop5[] = {0x0f, 0x1f, 0x44, 0x00, 0x00};
166 static const uint8_t nop6[] = {0x66, 0x0f, 0x1f, 0x44, 0x00, 0x0};
167 static const uint8_t nop7[] = {0x0f, 0x1f, 0x80, 0x00, 0x00, 0x0, 0x00};
168 static const uint8_t nop8[] = {0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00};
169 static const uint8_t nop9[] = {0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00};
170 static const uint8_t nop10[] = {0x66, 0x2e, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00};
171 static const uint8_t nop11[] = {
172 0x66, 0x66, 0x2e, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00};
173 static const uint8_t nop12[] = {
174 0x66, 0x66, 0x66, 0x2e, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00};
175 static const uint8_t nop13[] = {
176 0x66, 0x66, 0x66, 0x66, 0x2e, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00};
177 static const uint8_t nop14[] = {
178 0x66, 0x66, 0x66, 0x66, 0x66, 0x2e, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00};
179 static const uint8_t nop15[] = {
180 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x2e, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00};
181
182 static const uint8_t* nops[kNumNops] = {nop1,
183 nop2,
184 nop3,
185 nop4,
186 nop5,
187 nop6,
188 nop7,
189 nop8,
190 nop9,
191 nop10,
192 nop11,
193 nop12,
194 nop13,
195 nop14,
196 nop15};
197 // Common case.
198 if (bytes == 1) {
199 Emit8(nop1[0]);
200 return;
201 }
202
203 while (bytes > 0) {
204 uint32_t len = bytes;
205 if (len > kNumNops) {
206 len = kNumNops;
207 }
208 EmitSequence(nops[len - 1], len);
209 bytes -= len;
210 }
211 }
212
213 // Instructions.
214 #include "berberis/assembler/gen_assembler_common_x86-inl.h" // NOLINT generated file
215
216 // Flow control.
Jmp(int32_t offset)217 void Jmp(int32_t offset) {
218 CHECK_GE(offset, INT32_MIN + 2);
219 int32_t short_offset = offset - 2;
220 if (IsInRange<int8_t>(short_offset)) {
221 Emit8(0xeb);
222 Emit8(static_cast<int8_t>(short_offset));
223 } else {
224 CHECK_GE(offset, INT32_MIN + 5);
225 Emit8(0xe9);
226 Emit32(offset - 5);
227 }
228 }
229
Call(int32_t offset)230 void Call(int32_t offset) {
231 CHECK_GE(offset, INT32_MIN + 5);
232 Emit8(0xe8);
233 Emit32(offset - 5);
234 }
235
Jcc(Condition cc,int32_t offset)236 void Jcc(Condition cc, int32_t offset) {
237 if (cc == Condition::kAlways) {
238 Jmp(offset);
239 return;
240 }
241 if (cc == Condition::kNever) {
242 return;
243 }
244 CHECK_EQ(0, static_cast<uint8_t>(cc) & 0xf0);
245 CHECK_GE(offset, INT32_MIN + 2);
246 int32_t short_offset = offset - 2;
247 if (IsInRange<int8_t>(short_offset)) {
248 Emit8(0x70 | static_cast<uint8_t>(cc));
249 Emit8(static_cast<int8_t>(short_offset));
250 } else {
251 CHECK_GE(offset, INT32_MIN + 6);
252 Emit8(0x0f);
253 Emit8(0x80 | static_cast<uint8_t>(cc));
254 Emit32(offset - 6);
255 }
256 }
257
258 protected:
259 // Helper types to distinguish argument types.
260 struct Register8Bit {
Register8BitRegister8Bit261 explicit constexpr Register8Bit(Register reg) : num(reg.num) {}
262 uint8_t num;
263 };
264
265 struct Register32Bit {
Register32BitRegister32Bit266 explicit constexpr Register32Bit(Register reg) : num(reg.num) {}
Register32BitRegister32Bit267 explicit constexpr Register32Bit(XMMRegister reg) : num(reg.num) {}
268 uint8_t num;
269 };
270
271 // 16-bit and 128-bit vector registers follow the same rules as 32-bit registers.
272 using Register16Bit = Register32Bit;
273 using VectorRegister128Bit = Register32Bit;
274 // Certain instructions (Enter/Leave, Jcc/Jmp/Loop, Call/Ret, Push/Pop) always operate
275 // on registers of default size (32-bit in 32-bit mode, 64-bit in 64-bit mode (see
276 // "Instructions Not Requiring REX Prefix in 64-Bit Mode" table in 24594 AMD Manual)
277 // Map these to Register32Bit, too, since they don't need REX.W even in 64-bit mode.
278 //
279 // x87 instructions fall into that category, too, since they were not expanded in x86-64 mode.
280 using RegisterDefaultBit = Register32Bit;
281
282 struct Memory32Bit {
Memory32BitMemory32Bit283 explicit Memory32Bit(const Operand& op) : operand(op) {}
284 Operand operand;
285 };
286
287 // 8-bit, 16-bit, 128-bit memory behave the same as 32-bit memory.
288 // Only 64-bit memory is different.
289 using Memory8Bit = Memory32Bit;
290 using Memory16Bit = Memory32Bit;
291 // X87 instructions always use the same encoding - even for 64-bit or 28-bytes
292 // memory operands (like in fldenv/fnstenv)
293 using MemoryX87 = Memory32Bit;
294 using MemoryX8716Bit = Memory32Bit;
295 using MemoryX8732Bit = Memory32Bit;
296 using MemoryX8764Bit = Memory32Bit;
297 using MemoryX8780Bit = Memory32Bit;
298 // Most vector instructions don't need to use REX.W to access 64-bit or 128-bit memory.
299 using VectorMemory32Bit = Memory32Bit;
300 using VectorMemory64Bit = Memory32Bit;
301 using VectorMemory128Bit = Memory32Bit;
302
303 // Labels types for memory quantities. Note that names are similar to the ones before because
304 // they are autogenerated. E.g. VectorLabel32Bit should be read as “VECTOR's operation LABEL
305 // for 32-BIT quantity in memory”.
306 struct Label32Bit {
Label32BitLabel32Bit307 explicit Label32Bit(const struct LabelOperand& l) : label(l.label) {}
308 const Label& label;
309 };
310
311 // 8-bit, 16-bit, 128-bit memory behave the same as 32-bit memory.
312 // Only 64-bit memory is different.
313 using Label8Bit = Label32Bit;
314 using Label16Bit = Label32Bit;
315 // X87 instructions always use the same encoding - even for 64-bit or 28-bytes
316 // memory operands (like in fldenv/fnstenv)
317 using LabelX87 = Label32Bit;
318 using LabelX8716Bit = Label32Bit;
319 using LabelX8732Bit = Label32Bit;
320 using LabelX8764Bit = Label32Bit;
321 using LabelX8780Bit = Label32Bit;
322 // Most vector instructions don't need to use REX.W to access 64-bit or 128-bit memory.
323 using VectorLabel32Bit = Label32Bit;
324 using VectorLabel64Bit = Label32Bit;
325 using VectorLabel128Bit = Label32Bit;
326
IsLegacyPrefix(int code)327 static constexpr bool IsLegacyPrefix(int code) {
328 // Legacy prefixes used as opcode extensions in SSE.
329 // Lock is used by cmpxchg.
330 return (code == 0x66) || (code == 0xf2) || (code == 0xf3) || (code == 0xf0);
331 }
332
333 // Delegate check to Assembler::template IsRegister.
334 template <typename ArgumentType>
335 struct IsCondition {
336 static constexpr bool value = std::is_same_v<ArgumentType, Condition>;
337 };
338
339 template <typename ArgumentType>
340 struct IsRegister {
341 static constexpr bool value = Assembler::template IsRegister<ArgumentType>::value ||
342 std::is_same_v<ArgumentType, X87Register>;
343 };
344
345 template <typename ArgumentType>
346 struct IsMemoryOperand {
347 static constexpr bool value = Assembler::template IsMemoryOperand<ArgumentType>::value;
348 };
349
350 template <typename ArgumentType>
351 struct IsLabelOperand {
352 static constexpr bool value = Assembler::template IsLabelOperand<ArgumentType>::value;
353 };
354
355 template <typename ArgumentType>
356 struct IsImmediate {
357 static constexpr bool value =
358 std::is_integral_v<ArgumentType> &&
359 ((sizeof(ArgumentType) == sizeof(int8_t)) || (sizeof(ArgumentType) == sizeof(int16_t)) ||
360 (sizeof(ArgumentType) == sizeof(int32_t)) || (sizeof(ArgumentType) == sizeof(int64_t)));
361 };
362
363 // Count number of arguments selected by Predicate.
364 template <template <typename> typename Predicate, typename... ArgumentTypes>
365 static constexpr std::size_t kCountArguments = ((Predicate<ArgumentTypes>::value ? 1 : 0) + ... +
366 0);
367
368 // Extract arguments selected by Predicate.
369 //
370 // Note: This interface begs for the trick used in EmitFunctionTypeHelper in make_intrinsics.cc
371 // in conjunction with structured bindings.
372 //
373 // Unfortunately returning std::tuple slows down AssemblerTest by about 30% when libc++ and clang
374 // are used together (no slowdown on GCC, no slowdown on clang+libstdc++).
375 //
376 // TODO(http://b/140721204): refactor when it would be safe to return std::tuple from function.
377 //
378 template <std::size_t index,
379 template <typename>
380 typename Predicate,
381 typename ArgumentType,
382 typename... ArgumentTypes>
ArgumentByType(ArgumentType argument,ArgumentTypes...arguments)383 static constexpr auto ArgumentByType(ArgumentType argument, ArgumentTypes... arguments) {
384 if constexpr (Predicate<std::decay_t<ArgumentType>>::value) {
385 if constexpr (index == 0) {
386 return argument;
387 } else {
388 return ArgumentByType<index - 1, Predicate>(arguments...);
389 }
390 } else {
391 return ArgumentByType<index, Predicate>(arguments...);
392 }
393 }
394
395 // Emit immediates - they always come at the end and don't affect anything except rip-addressig.
EmitImmediates()396 static constexpr void EmitImmediates() {}
397
398 template <typename FirstArgumentType, typename... ArgumentTypes>
EmitImmediates(FirstArgumentType first_argument,ArgumentTypes...other_arguments)399 void EmitImmediates(FirstArgumentType first_argument, ArgumentTypes... other_arguments) {
400 if constexpr (std::is_integral_v<FirstArgumentType> &&
401 sizeof(FirstArgumentType) == sizeof(int8_t)) {
402 Emit8(first_argument);
403 } else if constexpr (std::is_integral_v<FirstArgumentType> &&
404 sizeof(FirstArgumentType) == sizeof(int16_t)) {
405 Emit16(first_argument);
406 } else if constexpr (std::is_integral_v<FirstArgumentType> &&
407 sizeof(FirstArgumentType) == sizeof(int32_t)) {
408 Emit32(first_argument);
409 } else if constexpr (std::is_integral_v<FirstArgumentType> &&
410 sizeof(FirstArgumentType) == sizeof(int64_t)) {
411 Emit64(first_argument);
412 }
413 EmitImmediates(other_arguments...);
414 }
415
416 template <typename ArgumentType>
ImmediateSize()417 static constexpr size_t ImmediateSize() {
418 if constexpr (std::is_integral_v<ArgumentType> && sizeof(ArgumentType) == sizeof(int8_t)) {
419 return 1;
420 } else if constexpr (std::is_integral_v<ArgumentType> &&
421 sizeof(ArgumentType) == sizeof(int16_t)) {
422 return 2;
423 } else if constexpr (std::is_integral_v<ArgumentType> &&
424 sizeof(ArgumentType) == sizeof(int32_t)) {
425 return 4;
426 } else if constexpr (std::is_integral_v<ArgumentType> &&
427 sizeof(ArgumentType) == sizeof(int64_t)) {
428 return 8;
429 } else {
430 static_assert(!std::is_integral_v<ArgumentType>);
431 return 0;
432 }
433 }
434
435 template <typename... ArgumentTypes>
ImmediatesSize()436 static constexpr size_t ImmediatesSize() {
437 return (ImmediateSize<ArgumentTypes>() + ... + 0);
438 }
439
440 // Struct type to pass information about opcodes.
441 template <uint8_t... kOpcodes>
442 struct Opcodes {};
443
444 template <uint8_t... kOpcodes>
OpcodesCount(Opcodes<kOpcodes...>)445 static constexpr size_t OpcodesCount(Opcodes<kOpcodes...>) {
446 return sizeof...(kOpcodes);
447 }
448
449 template <uint8_t kOpcode, uint8_t... kOpcodes>
FirstOpcode(Opcodes<kOpcode,kOpcodes...>)450 static constexpr uint8_t FirstOpcode(Opcodes<kOpcode, kOpcodes...>) {
451 return kOpcode;
452 }
453
454 template <uint8_t kOpcode, uint8_t... kOpcodes>
SkipFirstOpcodeFromType(Opcodes<kOpcode,kOpcodes...>)455 static constexpr auto SkipFirstOpcodeFromType(Opcodes<kOpcode, kOpcodes...>) {
456 return Opcodes<kOpcodes...>{};
457 }
458
459 template <uint8_t kOpcode, uint8_t... kOpcodes>
EmitLegacyPrefixes(Opcodes<kOpcode,kOpcodes...> opcodes)460 auto EmitLegacyPrefixes(Opcodes<kOpcode, kOpcodes...> opcodes) {
461 if constexpr (IsLegacyPrefix(kOpcode)) {
462 Emit8(kOpcode);
463 return EmitLegacyPrefixes(Opcodes<kOpcodes...>{});
464 } else {
465 return opcodes;
466 }
467 }
468
469 // Note: We may need separate x87 EmitInstruction if we would want to support
470 // full set of x86 instructions.
471 //
472 // That's because 8087 was completely separate piece of silicone which was only
473 // partially driven by 8086:
474 // https://en.wikipedia.org/wiki/Intel_8087
475 //
476 // In particular it had the following properties:
477 // 1. It had its own separate subset of opcodes - because it did its own decoding.
478 // 2. It had separate set of registers and could *only* access these.
479 // 2a. The 8086, in turn, *couldn't* access these registers at all.
480 // 3. To access memory it was designed to take address from address bus.
481 //
482 // This means that:
483 // 1. x87 instructions are easily recognizable - all instructions with opcodes 0xd8
484 // to 0xdf are x87 instructions, all instructions with other opcodes are not.
485 // 2. We could be sure that x87 registers would only be used with x87 instructions
486 // and other types of registers wouldn't be used with these.
487 // 3. We still would use normal registers for memory access, but REX.W bit wouldn't
488 // be used for 64-bit quantities, whether they are floating point numbers or integers.
489 //
490 // Right now we only use EmitInstruction to emit x87 instructions which are using memory
491 // operands - and it works well enough for that because of #3.
492
493 // If you want to understand how this function works (and how helper function like Vex and
494 // Rex work), you need good understanding of AMD/Intel Instruction format.
495 //
496 // Intel manual includes the most precise explanation, but it's VERY hard to read.
497 //
498 // AMD manual is much easier to read, but it doesn't include description of EVEX
499 // instructions and is less precise. Diagram on page 2 of Volume 3 is especially helpful:
500 // https://www.amd.com/system/files/TechDocs/24594.pdf#page=42
501 //
502 // And the most concise (albeit unofficial) in on osdev Wiki:
503 // https://wiki.osdev.org/X86-64_Instruction_Encoding
504
505 // Note: if you change this function (or any of the helper functions) then remove --fast
506 // option from ExhaustiveAssemblerTest to run full blackbox comparison to clang.
507
508 template <typename InstructionOpcodes, typename... ArgumentsTypes>
EmitInstruction(ArgumentsTypes...arguments)509 void EmitInstruction(ArgumentsTypes... arguments) {
510 auto opcodes_no_prefixes = EmitLegacyPrefixes(InstructionOpcodes{});
511 // We don't yet support any XOP-encoded instructions, but they are 100% identical to vex ones,
512 // except they are using 0x8F prefix, not 0xC4 prefix.
513 constexpr auto vex_xop = [&](auto opcodes) {
514 if constexpr (OpcodesCount(opcodes) < 3) {
515 return false;
516 // Note that JSON files use AMD approach: bytes are specified as in AMD manual (only we are
517 // replacing ¬R/¬X/¬B and vvvv bits with zeros).
518 //
519 // In particular it means that vex-encoded instructions should be specified with 0xC4 even if
520 // they are always emitted with 0xC4-to-0xC5 folding.
521 } else if constexpr (FirstOpcode(opcodes) == 0xC4 || FirstOpcode(opcodes) == 0x8F) {
522 return true;
523 }
524 return false;
525 }(opcodes_no_prefixes);
526 constexpr auto conditions_count = kCountArguments<IsCondition, ArgumentsTypes...>;
527 constexpr auto operands_count = kCountArguments<IsMemoryOperand, ArgumentsTypes...>;
528 constexpr auto labels_count = kCountArguments<IsLabelOperand, ArgumentsTypes...>;
529 constexpr auto registers_count = kCountArguments<IsRegister, ArgumentsTypes...>;
530 // We need to know if Reg field (in ModRM byte) is an opcode extension or if opcode extension
531 // goes into the immediate field.
532 constexpr auto reg_is_opcode_extension =
533 (registers_count + operands_count > 0) &&
534 (registers_count + operands_count + labels_count <
535 2 + vex_xop * (OpcodesCount(opcodes_no_prefixes) - 4));
536 static_assert((registers_count + operands_count + labels_count + conditions_count +
537 kCountArguments<IsImmediate, ArgumentsTypes...>) == sizeof...(ArgumentsTypes),
538 "Only registers (with specified size), Operands (with specified size), "
539 "Conditions, and Immediates are supported.");
540 static_assert(operands_count <= 1, "Only one operand is allowed in instruction.");
541 static_assert(labels_count <= 1, "Only one label is allowed in instruction.");
542 // 0x0f is an opcode extension, if it's not there then we only have one byte opcode.
543 auto opcodes_no_prefixes_no_opcode_extension = [&](auto opcodes) {
544 if constexpr (vex_xop) {
545 static_assert(conditions_count == 0,
546 "No conditionals are supported in vex/xop instructions.");
547 static_assert((registers_count + operands_count + labels_count) <= 4,
548 "Up to four-arguments in vex/xop instructions are supported.");
549 constexpr auto vex_xop_byte1 = FirstOpcode(opcodes);
550 constexpr auto vex_xop_byte2 = FirstOpcode(SkipFirstOpcodeFromType(opcodes));
551 constexpr auto vex_xop_byte3 =
552 FirstOpcode(SkipFirstOpcodeFromType(SkipFirstOpcodeFromType(opcodes)));
553 static_cast<Assembler*>(this)
554 ->template EmitVex<vex_xop_byte1,
555 vex_xop_byte2,
556 vex_xop_byte3,
557 reg_is_opcode_extension>(arguments...);
558 return SkipFirstOpcodeFromType(SkipFirstOpcodeFromType(SkipFirstOpcodeFromType(opcodes)));
559 } else {
560 static_assert(conditions_count <= 1, "Only one condition is allowed in instruction.");
561 static_assert((registers_count + operands_count + labels_count) <= 2,
562 "Only two-arguments legacy instructions are supported.");
563 static_cast<Assembler*>(this)->EmitRex(arguments...);
564 if constexpr (FirstOpcode(opcodes) == 0x0F) {
565 Emit8(0x0F);
566 auto opcodes_no_prefixes_no_opcode_0x0F_extension = SkipFirstOpcodeFromType(opcodes);
567 if constexpr (FirstOpcode(opcodes_no_prefixes_no_opcode_0x0F_extension) == 0x38) {
568 Emit8(0x38);
569 return SkipFirstOpcodeFromType(opcodes_no_prefixes_no_opcode_0x0F_extension);
570 } else if constexpr (FirstOpcode(opcodes_no_prefixes_no_opcode_0x0F_extension) == 0x3A) {
571 Emit8(0x3A);
572 return SkipFirstOpcodeFromType(opcodes_no_prefixes_no_opcode_0x0F_extension);
573 } else {
574 return opcodes_no_prefixes_no_opcode_0x0F_extension;
575 }
576 } else {
577 return opcodes;
578 }
579 }
580 }(opcodes_no_prefixes);
581 // These are older 8086 instructions which encode register number in the opcode itself.
582 if constexpr (registers_count == 1 && operands_count == 0 && labels_count == 0 &&
583 OpcodesCount(opcodes_no_prefixes_no_opcode_extension) == 1) {
584 static_cast<Assembler*>(this)->EmitRegisterInOpcode(
585 FirstOpcode(opcodes_no_prefixes_no_opcode_extension),
586 ArgumentByType<0, IsRegister>(arguments...));
587 EmitImmediates(arguments...);
588 } else {
589 // Emit "main" single-byte opcode.
590 if constexpr (conditions_count == 1) {
591 auto condition_code = static_cast<uint8_t>(ArgumentByType<0, IsCondition>(arguments...));
592 CHECK_EQ(0, condition_code & 0xF0);
593 Emit8(FirstOpcode(opcodes_no_prefixes_no_opcode_extension) | condition_code);
594 } else {
595 Emit8(FirstOpcode(opcodes_no_prefixes_no_opcode_extension));
596 }
597 auto extra_opcodes = SkipFirstOpcodeFromType(opcodes_no_prefixes_no_opcode_extension);
598 if constexpr (reg_is_opcode_extension) {
599 if constexpr (operands_count == 1) {
600 static_cast<Assembler*>(this)->EmitOperandOp(
601 static_cast<int>(FirstOpcode(extra_opcodes)),
602 ArgumentByType<0, IsMemoryOperand>(arguments...).operand);
603 } else if constexpr (labels_count == 1) {
604 static_cast<Assembler*>(this)->template EmitRipOp<ImmediatesSize<ArgumentsTypes...>()>(
605 static_cast<int>(FirstOpcode(extra_opcodes)),
606 ArgumentByType<0, IsLabelOperand>(arguments...).label);
607 } else {
608 static_cast<Assembler*>(this)->EmitModRM(this->FirstOpcode(extra_opcodes),
609 ArgumentByType<0, IsRegister>(arguments...));
610 }
611 } else if constexpr (registers_count > 0) {
612 if constexpr (operands_count == 1) {
613 static_cast<Assembler*>(this)->EmitOperandOp(
614 ArgumentByType<0, IsRegister>(arguments...),
615 ArgumentByType<0, IsMemoryOperand>(arguments...).operand);
616 } else if constexpr (labels_count == 1) {
617 static_cast<Assembler*>(this)->template EmitRipOp<ImmediatesSize<ArgumentsTypes...>()>(
618 ArgumentByType<0, IsRegister>(arguments...),
619 ArgumentByType<0, IsLabelOperand>(arguments...).label);
620 } else {
621 static_cast<Assembler*>(this)->EmitModRM(ArgumentByType<0, IsRegister>(arguments...),
622 ArgumentByType<1, IsRegister>(arguments...));
623 }
624 }
625 // If reg is an opcode extension then we already used that element.
626 if constexpr (reg_is_opcode_extension) {
627 static_assert(OpcodesCount(extra_opcodes) == 1);
628 } else if constexpr (OpcodesCount(extra_opcodes) > 0) {
629 // Final opcode byte(s) - they are in the place where immediate is expected.
630 // Cmpsps/Cmppd and 3DNow! instructions are using it.
631 static_assert(OpcodesCount(extra_opcodes) == 1);
632 Emit8(FirstOpcode(extra_opcodes));
633 }
634 if constexpr (registers_count + operands_count + labels_count == 4) {
635 if constexpr (kCountArguments<IsImmediate, ArgumentsTypes...> == 1) {
636 Emit8((ArgumentByType<registers_count - 1, IsRegister>(arguments...).num << 4) |
637 ArgumentByType<0, IsImmediate>(arguments...));
638 } else {
639 static_assert(kCountArguments<IsImmediate, ArgumentsTypes...> == 0);
640 Emit8(ArgumentByType<registers_count - 1, IsRegister>(arguments...).num << 4);
641 }
642 } else {
643 EmitImmediates(arguments...);
644 }
645 }
646 }
647
648 void ResolveJumps();
649
650 private:
651 DISALLOW_IMPLICIT_CONSTRUCTORS(AssemblerX86);
652 };
653
654 // Return the reverse condition.
655 template <typename Condition>
ToReverseCond(Condition cond)656 inline constexpr Condition ToReverseCond(Condition cond) {
657 CHECK(cond != Condition::kInvalidCondition);
658 // Condition has a nice property that given a condition, you can get
659 // its reverse condition by flipping the least significant bit.
660 return Condition(static_cast<int>(cond) ^ 1);
661 }
662
663 template <typename Condition>
GetCondName(Condition cond)664 inline constexpr const char* GetCondName(Condition cond) {
665 switch (cond) {
666 case Condition::kOverflow:
667 return "O";
668 case Condition::kNoOverflow:
669 return "NO";
670 case Condition::kBelow:
671 return "B";
672 case Condition::kAboveEqual:
673 return "AE";
674 case Condition::kEqual:
675 return "Z";
676 case Condition::kNotEqual:
677 return "NZ";
678 case Condition::kBelowEqual:
679 return "BE";
680 case Condition::kAbove:
681 return "A";
682 case Condition::kNegative:
683 return "N";
684 case Condition::kPositive:
685 return "PL";
686 case Condition::kParityEven:
687 return "PE";
688 case Condition::kParityOdd:
689 return "PO";
690 case Condition::kLess:
691 return "LS";
692 case Condition::kGreaterEqual:
693 return "GE";
694 case Condition::kLessEqual:
695 return "LE";
696 case Condition::kGreater:
697 return "GT";
698 default:
699 return "??";
700 }
701 }
702
703 template <typename Assembler>
Pmov(XMMRegister dest,XMMRegister src)704 inline void AssemblerX86<Assembler>::Pmov(XMMRegister dest, XMMRegister src) {
705 // SSE does not have operations for register-to-register integer move and
706 // Intel explicitly recommends to use pshufd instead on Pentium4:
707 // See https://software.intel.com/en-us/articles/
708 // fast-simd-integer-move-for-the-intel-pentiumr-4-processor
709 // These recommendations are CPU-dependent, though, thus we will need to
710 // investigate this question further before we could decide when to use
711 // movaps (or movapd) and when to use pshufd.
712 //
713 // TODO(khim): investigate performance problems related to integer MOVs
714 Movaps(dest, src);
715 }
716
717 template <typename Assembler>
Call(const Label & label)718 inline void AssemblerX86<Assembler>::Call(const Label& label) {
719 if (label.IsBound()) {
720 int32_t offset = label.position() - pc();
721 Call(offset);
722 } else {
723 Emit8(0xe8);
724 Emit32(0xfffffffc);
725 jumps_.push_back(Jump{&label, pc() - 4, false});
726 }
727 }
728
729 template <typename Assembler>
Jcc(Condition cc,const Label & label)730 inline void AssemblerX86<Assembler>::Jcc(Condition cc, const Label& label) {
731 if (cc == Condition::kAlways) {
732 Jmp(label);
733 return;
734 } else if (cc == Condition::kNever) {
735 return;
736 }
737 CHECK_EQ(0, static_cast<uint8_t>(cc) & 0xF0);
738 // TODO(eaeltsin): may be remove IsBound case?
739 // Then jcc by label will be of fixed size (5 bytes)
740 if (label.IsBound()) {
741 int32_t offset = label.position() - pc();
742 Jcc(cc, offset);
743 } else {
744 Emit16(0x800f | (static_cast<uint8_t>(cc) << 8));
745 Emit32(0xfffffffc);
746 jumps_.push_back(Jump{&label, pc() - 4, false});
747 }
748 }
749
750 template <typename Assembler>
Jmp(const Label & label)751 inline void AssemblerX86<Assembler>::Jmp(const Label& label) {
752 // TODO(eaeltsin): may be remove IsBound case?
753 // Then jmp by label will be of fixed size (5 bytes)
754 if (label.IsBound()) {
755 int32_t offset = label.position() - pc();
756 Jmp(offset);
757 } else {
758 Emit8(0xe9);
759 Emit32(0xfffffffc);
760 jumps_.push_back(Jump{&label, pc() - 4, false});
761 }
762 }
763
764 template <typename Assembler>
ResolveJumps()765 inline void AssemblerX86<Assembler>::ResolveJumps() {
766 for (const auto& jump : jumps_) {
767 const Label* label = jump.label;
768 uint32_t pc = jump.pc;
769 CHECK(label->IsBound());
770 if (jump.is_recovery) {
771 // Add pc -> label correspondence to recovery map.
772 AddRelocation(0, RelocationType::RelocRecoveryPoint, pc, label->position());
773 } else {
774 int32_t offset = label->position() - pc;
775 *AddrAs<int32_t>(pc) += offset;
776 }
777 }
778 }
779
780 // Code size optimized instructions: they have different variants depending on registers used.
781
782 template <typename Assembler>
Xchgl(Register dest,Register src)783 inline void AssemblerX86<Assembler>::Xchgl(Register dest, Register src) {
784 if (Assembler::IsAccumulator(src) || Assembler::IsAccumulator(dest)) {
785 Register other = Assembler::IsAccumulator(src) ? dest : src;
786 EmitInstruction<Opcodes<0x90>>(Register32Bit(other));
787 } else {
788 // Clang 8 (after r330298) puts dest before src. We are comparing output
789 // to clang in exhaustive test thus we want to match clang behavior exactly.
790 EmitInstruction<Opcodes<0x87>>(Register32Bit(dest), Register32Bit(src));
791 }
792 }
793
794 } // namespace berberis
795
796 #endif // BERBERIS_ASSEMBLER_COMMON_X86_H_
797