1 /*
2 * Tiny Code Generator for QEMU
3 *
4 * Copyright (c) 2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25 #include "tcg-be-ldst.h"
26
27 #ifdef _WIN32
28 // For some reason, the Mingw32 headers define the 'small' macro which
29 // prevents this source from compiling.
30 #undef small
31 #endif
32
33 #ifndef NDEBUG
34 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
35 #if TCG_TARGET_REG_BITS == 64
36 "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
37 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
38 #else
39 "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
40 #endif
41 };
42 #endif
43
44 static const int tcg_target_reg_alloc_order[] = {
45 #if TCG_TARGET_REG_BITS == 64
46 TCG_REG_RBP,
47 TCG_REG_RBX,
48 TCG_REG_R12,
49 TCG_REG_R13,
50 TCG_REG_R14,
51 TCG_REG_R15,
52 TCG_REG_R10,
53 TCG_REG_R11,
54 TCG_REG_R9,
55 TCG_REG_R8,
56 TCG_REG_RCX,
57 TCG_REG_RDX,
58 TCG_REG_RSI,
59 TCG_REG_RDI,
60 TCG_REG_RAX,
61 #else
62 TCG_REG_EBX,
63 TCG_REG_ESI,
64 TCG_REG_EDI,
65 TCG_REG_EBP,
66 TCG_REG_ECX,
67 TCG_REG_EDX,
68 TCG_REG_EAX,
69 #endif
70 };
71
72 static const int tcg_target_call_iarg_regs[] = {
73 #if TCG_TARGET_REG_BITS == 64
74 #if defined(_WIN64)
75 TCG_REG_RCX,
76 TCG_REG_RDX,
77 #else
78 TCG_REG_RDI,
79 TCG_REG_RSI,
80 TCG_REG_RDX,
81 TCG_REG_RCX,
82 #endif
83 TCG_REG_R8,
84 TCG_REG_R9,
85 #else
86 /* 32 bit mode uses stack based calling convention (GCC default). */
87 #endif
88 };
89
90 static const int tcg_target_call_oarg_regs[] = {
91 TCG_REG_EAX,
92 #if TCG_TARGET_REG_BITS == 32
93 TCG_REG_EDX
94 #endif
95 };
96
97 /* Registers used with L constraint, which are the first argument
98 registers on x86_64, and two random call clobbered registers on
99 i386. */
100 #if TCG_TARGET_REG_BITS == 64
101 # define TCG_REG_L0 tcg_target_call_iarg_regs[0]
102 # define TCG_REG_L1 tcg_target_call_iarg_regs[1]
103 #else
104 # define TCG_REG_L0 TCG_REG_EAX
105 # define TCG_REG_L1 TCG_REG_EDX
106 #endif
107
108 /* For 32-bit, we are going to attempt to determine at runtime whether cmov
109 is available. However, the host compiler must supply <cpuid.h>, as we're
110 not going to go so far as our own inline assembly. */
111 #if TCG_TARGET_REG_BITS == 64
112 # define have_cmov 1
113 #elif defined(CONFIG_CPUID_H)
114 #include <cpuid.h>
115 static bool have_cmov;
116 #else
117 # define have_cmov 0
118 #endif
119
120 static uint8_t *tb_ret_addr;
121
patch_reloc(uint8_t * code_ptr,int type,intptr_t value,intptr_t addend)122 static void patch_reloc(uint8_t *code_ptr, int type,
123 intptr_t value, intptr_t addend)
124 {
125 value += addend;
126 switch(type) {
127 case R_386_PC32:
128 value -= (uintptr_t)code_ptr;
129 if (value != (int32_t)value) {
130 tcg_abort();
131 }
132 *(uint32_t *)code_ptr = value;
133 break;
134 case R_386_PC8:
135 value -= (uintptr_t)code_ptr;
136 if (value != (int8_t)value) {
137 tcg_abort();
138 }
139 *(uint8_t *)code_ptr = value;
140 break;
141 default:
142 tcg_abort();
143 }
144 }
145
146 /* parse target specific constraints */
target_parse_constraint(TCGArgConstraint * ct,const char ** pct_str)147 static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
148 {
149 const char *ct_str;
150
151 ct_str = *pct_str;
152 switch(ct_str[0]) {
153 case 'a':
154 ct->ct |= TCG_CT_REG;
155 tcg_regset_set_reg(ct->u.regs, TCG_REG_EAX);
156 break;
157 case 'b':
158 ct->ct |= TCG_CT_REG;
159 tcg_regset_set_reg(ct->u.regs, TCG_REG_EBX);
160 break;
161 case 'c':
162 ct->ct |= TCG_CT_REG;
163 tcg_regset_set_reg(ct->u.regs, TCG_REG_ECX);
164 break;
165 case 'd':
166 ct->ct |= TCG_CT_REG;
167 tcg_regset_set_reg(ct->u.regs, TCG_REG_EDX);
168 break;
169 case 'S':
170 ct->ct |= TCG_CT_REG;
171 tcg_regset_set_reg(ct->u.regs, TCG_REG_ESI);
172 break;
173 case 'D':
174 ct->ct |= TCG_CT_REG;
175 tcg_regset_set_reg(ct->u.regs, TCG_REG_EDI);
176 break;
177 case 'q':
178 ct->ct |= TCG_CT_REG;
179 if (TCG_TARGET_REG_BITS == 64) {
180 tcg_regset_set32(ct->u.regs, 0, 0xffff);
181 } else {
182 tcg_regset_set32(ct->u.regs, 0, 0xf);
183 }
184 break;
185 case 'Q':
186 ct->ct |= TCG_CT_REG;
187 tcg_regset_set32(ct->u.regs, 0, 0xf);
188 break;
189 case 'r':
190 ct->ct |= TCG_CT_REG;
191 if (TCG_TARGET_REG_BITS == 64) {
192 tcg_regset_set32(ct->u.regs, 0, 0xffff);
193 } else {
194 tcg_regset_set32(ct->u.regs, 0, 0xff);
195 }
196 break;
197
198 /* qemu_ld/st address constraint */
199 case 'L':
200 ct->ct |= TCG_CT_REG;
201 if (TCG_TARGET_REG_BITS == 64) {
202 tcg_regset_set32(ct->u.regs, 0, 0xffff);
203 } else {
204 tcg_regset_set32(ct->u.regs, 0, 0xff);
205 }
206 tcg_regset_reset_reg(ct->u.regs, TCG_REG_L0);
207 tcg_regset_reset_reg(ct->u.regs, TCG_REG_L1);
208 break;
209
210 case 'e':
211 ct->ct |= TCG_CT_CONST_S32;
212 break;
213 case 'Z':
214 ct->ct |= TCG_CT_CONST_U32;
215 break;
216
217 default:
218 return -1;
219 }
220 ct_str++;
221 *pct_str = ct_str;
222 return 0;
223 }
224
225 /* test if a constant matches the constraint */
tcg_target_const_match(tcg_target_long val,const TCGArgConstraint * arg_ct)226 static inline int tcg_target_const_match(tcg_target_long val,
227 const TCGArgConstraint *arg_ct)
228 {
229 int ct = arg_ct->ct;
230 if (ct & TCG_CT_CONST) {
231 return 1;
232 }
233 if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) {
234 return 1;
235 }
236 if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) {
237 return 1;
238 }
239 return 0;
240 }
241
242 #if TCG_TARGET_REG_BITS == 64
243 # define LOWREGMASK(x) ((x) & 7)
244 #else
245 # define LOWREGMASK(x) (x)
246 #endif
247
248 #define P_EXT 0x100 /* 0x0f opcode prefix */
249 #define P_DATA16 0x200 /* 0x66 opcode prefix */
250 #if TCG_TARGET_REG_BITS == 64
251 # define P_ADDR32 0x400 /* 0x67 opcode prefix */
252 # define P_REXW 0x800 /* Set REX.W = 1 */
253 # define P_REXB_R 0x1000 /* REG field as byte register */
254 # define P_REXB_RM 0x2000 /* R/M field as byte register */
255 # define P_GS 0x4000 /* gs segment override */
256 #else
257 # define P_ADDR32 0
258 # define P_REXW 0
259 # define P_REXB_R 0
260 # define P_REXB_RM 0
261 # define P_GS 0
262 #endif
263
264 #define OPC_ARITH_EvIz (0x81)
265 #define OPC_ARITH_EvIb (0x83)
266 #define OPC_ARITH_GvEv (0x03) /* ... plus (ARITH_FOO << 3) */
267 #define OPC_ADD_GvEv (OPC_ARITH_GvEv | (ARITH_ADD << 3))
268 #define OPC_BSWAP (0xc8 | P_EXT)
269 #define OPC_CALL_Jz (0xe8)
270 #define OPC_CMOVCC (0x40 | P_EXT) /* ... plus condition code */
271 #define OPC_CMP_GvEv (OPC_ARITH_GvEv | (ARITH_CMP << 3))
272 #define OPC_DEC_r32 (0x48)
273 #define OPC_IMUL_GvEv (0xaf | P_EXT)
274 #define OPC_IMUL_GvEvIb (0x6b)
275 #define OPC_IMUL_GvEvIz (0x69)
276 #define OPC_INC_r32 (0x40)
277 #define OPC_JCC_long (0x80 | P_EXT) /* ... plus condition code */
278 #define OPC_JCC_short (0x70) /* ... plus condition code */
279 #define OPC_JMP_long (0xe9)
280 #define OPC_JMP_short (0xeb)
281 #define OPC_LEA (0x8d)
282 #define OPC_MOVB_EvGv (0x88) /* stores, more or less */
283 #define OPC_MOVL_EvGv (0x89) /* stores, more or less */
284 #define OPC_MOVL_GvEv (0x8b) /* loads, more or less */
285 #define OPC_MOVB_EvIz (0xc6)
286 #define OPC_MOVL_EvIz (0xc7)
287 #define OPC_MOVL_Iv (0xb8)
288 #define OPC_MOVSBL (0xbe | P_EXT)
289 #define OPC_MOVSWL (0xbf | P_EXT)
290 #define OPC_MOVSLQ (0x63 | P_REXW)
291 #define OPC_MOVZBL (0xb6 | P_EXT)
292 #define OPC_MOVZWL (0xb7 | P_EXT)
293 #define OPC_POP_r32 (0x58)
294 #define OPC_PUSH_r32 (0x50)
295 #define OPC_PUSH_Iv (0x68)
296 #define OPC_PUSH_Ib (0x6a)
297 #define OPC_RET (0xc3)
298 #define OPC_SETCC (0x90 | P_EXT | P_REXB_RM) /* ... plus cc */
299 #define OPC_SHIFT_1 (0xd1)
300 #define OPC_SHIFT_Ib (0xc1)
301 #define OPC_SHIFT_cl (0xd3)
302 #define OPC_TESTL (0x85)
303 #define OPC_XCHG_ax_r32 (0x90)
304
305 #define OPC_GRP3_Ev (0xf7)
306 #define OPC_GRP5 (0xff)
307
308 /* Group 1 opcode extensions for 0x80-0x83.
309 These are also used as modifiers for OPC_ARITH. */
310 #define ARITH_ADD 0
311 #define ARITH_OR 1
312 #define ARITH_ADC 2
313 #define ARITH_SBB 3
314 #define ARITH_AND 4
315 #define ARITH_SUB 5
316 #define ARITH_XOR 6
317 #define ARITH_CMP 7
318
319 /* Group 2 opcode extensions for 0xc0, 0xc1, 0xd0-0xd3. */
320 #define SHIFT_ROL 0
321 #define SHIFT_ROR 1
322 #define SHIFT_SHL 4
323 #define SHIFT_SHR 5
324 #define SHIFT_SAR 7
325
326 /* Group 3 opcode extensions for 0xf6, 0xf7. To be used with OPC_GRP3. */
327 #define EXT3_NOT 2
328 #define EXT3_NEG 3
329 #define EXT3_MUL 4
330 #define EXT3_IMUL 5
331 #define EXT3_DIV 6
332 #define EXT3_IDIV 7
333
334 /* Group 5 opcode extensions for 0xff. To be used with OPC_GRP5. */
335 #define EXT5_INC_Ev 0
336 #define EXT5_DEC_Ev 1
337 #define EXT5_CALLN_Ev 2
338 #define EXT5_JMPN_Ev 4
339
340 /* Condition codes to be added to OPC_JCC_{long,short}. */
341 #define JCC_JMP (-1)
342 #define JCC_JO 0x0
343 #define JCC_JNO 0x1
344 #define JCC_JB 0x2
345 #define JCC_JAE 0x3
346 #define JCC_JE 0x4
347 #define JCC_JNE 0x5
348 #define JCC_JBE 0x6
349 #define JCC_JA 0x7
350 #define JCC_JS 0x8
351 #define JCC_JNS 0x9
352 #define JCC_JP 0xa
353 #define JCC_JNP 0xb
354 #define JCC_JL 0xc
355 #define JCC_JGE 0xd
356 #define JCC_JLE 0xe
357 #define JCC_JG 0xf
358
359 static const uint8_t tcg_cond_to_jcc[] = {
360 [TCG_COND_EQ] = JCC_JE,
361 [TCG_COND_NE] = JCC_JNE,
362 [TCG_COND_LT] = JCC_JL,
363 [TCG_COND_GE] = JCC_JGE,
364 [TCG_COND_LE] = JCC_JLE,
365 [TCG_COND_GT] = JCC_JG,
366 [TCG_COND_LTU] = JCC_JB,
367 [TCG_COND_GEU] = JCC_JAE,
368 [TCG_COND_LEU] = JCC_JBE,
369 [TCG_COND_GTU] = JCC_JA,
370 };
371
372 #if TCG_TARGET_REG_BITS == 64
tcg_out_opc(TCGContext * s,int opc,int r,int rm,int x)373 static void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x)
374 {
375 int rex;
376
377 if (opc & P_GS) {
378 tcg_out8(s, 0x65);
379 }
380 if (opc & P_DATA16) {
381 /* We should never be asking for both 16 and 64-bit operation. */
382 assert((opc & P_REXW) == 0);
383 tcg_out8(s, 0x66);
384 }
385 if (opc & P_ADDR32) {
386 tcg_out8(s, 0x67);
387 }
388
389 rex = 0;
390 rex |= (opc & P_REXW) >> 8; /* REX.W */
391 rex |= (r & 8) >> 1; /* REX.R */
392 rex |= (x & 8) >> 2; /* REX.X */
393 rex |= (rm & 8) >> 3; /* REX.B */
394
395 /* P_REXB_{R,RM} indicates that the given register is the low byte.
396 For %[abcd]l we need no REX prefix, but for %{si,di,bp,sp}l we do,
397 as otherwise the encoding indicates %[abcd]h. Note that the values
398 that are ORed in merely indicate that the REX byte must be present;
399 those bits get discarded in output. */
400 rex |= opc & (r >= 4 ? P_REXB_R : 0);
401 rex |= opc & (rm >= 4 ? P_REXB_RM : 0);
402
403 if (rex) {
404 tcg_out8(s, (uint8_t)(rex | 0x40));
405 }
406
407 if (opc & P_EXT) {
408 tcg_out8(s, 0x0f);
409 }
410 tcg_out8(s, opc);
411 }
412 #else
tcg_out_opc(TCGContext * s,int opc)413 static void tcg_out_opc(TCGContext *s, int opc)
414 {
415 if (opc & P_DATA16) {
416 tcg_out8(s, 0x66);
417 }
418 if (opc & P_EXT) {
419 tcg_out8(s, 0x0f);
420 }
421 tcg_out8(s, opc);
422 }
423 /* Discard the register arguments to tcg_out_opc early, so as not to penalize
424 the 32-bit compilation paths. This method works with all versions of gcc,
425 whereas relying on optimization may not be able to exclude them. */
426 #define tcg_out_opc(s, opc, r, rm, x) (tcg_out_opc)(s, opc)
427 #endif
428
tcg_out_modrm(TCGContext * s,int opc,int r,int rm)429 static void tcg_out_modrm(TCGContext *s, int opc, int r, int rm)
430 {
431 tcg_out_opc(s, opc, r, rm, 0);
432 tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
433 }
434
435 /* Output an opcode with a full "rm + (index<<shift) + offset" address mode.
436 We handle either RM and INDEX missing with a negative value. In 64-bit
437 mode for absolute addresses, ~RM is the size of the immediate operand
438 that will follow the instruction. */
439
tcg_out_modrm_sib_offset(TCGContext * s,int opc,int r,int rm,int index,int shift,intptr_t offset)440 static void tcg_out_modrm_sib_offset(TCGContext *s, int opc, int r, int rm,
441 int index, int shift, intptr_t offset)
442 {
443 int mod, len;
444
445 if (index < 0 && rm < 0) {
446 if (TCG_TARGET_REG_BITS == 64) {
447 /* Try for a rip-relative addressing mode. This has replaced
448 the 32-bit-mode absolute addressing encoding. */
449 intptr_t pc = (intptr_t)s->code_ptr + 5 + ~rm;
450 intptr_t disp = offset - pc;
451 if (disp == (int32_t)disp) {
452 tcg_out_opc(s, opc, r, 0, 0);
453 tcg_out8(s, (LOWREGMASK(r) << 3) | 5);
454 tcg_out32(s, disp);
455 return;
456 }
457
458 /* Try for an absolute address encoding. This requires the
459 use of the MODRM+SIB encoding and is therefore larger than
460 rip-relative addressing. */
461 if (offset == (int32_t)offset) {
462 tcg_out_opc(s, opc, r, 0, 0);
463 tcg_out8(s, (LOWREGMASK(r) << 3) | 4);
464 tcg_out8(s, (4 << 3) | 5);
465 tcg_out32(s, offset);
466 return;
467 }
468
469 /* ??? The memory isn't directly addressable. */
470 tcg_abort();
471 } else {
472 /* Absolute address. */
473 tcg_out_opc(s, opc, r, 0, 0);
474 tcg_out8(s, (r << 3) | 5);
475 tcg_out32(s, offset);
476 return;
477 }
478 }
479
480 /* Find the length of the immediate addend. Note that the encoding
481 that would be used for (%ebp) indicates absolute addressing. */
482 if (rm < 0) {
483 mod = 0, len = 4, rm = 5;
484 } else if (offset == 0 && LOWREGMASK(rm) != TCG_REG_EBP) {
485 mod = 0, len = 0;
486 } else if (offset == (int8_t)offset) {
487 mod = 0x40, len = 1;
488 } else {
489 mod = 0x80, len = 4;
490 }
491
492 /* Use a single byte MODRM format if possible. Note that the encoding
493 that would be used for %esp is the escape to the two byte form. */
494 if (index < 0 && LOWREGMASK(rm) != TCG_REG_ESP) {
495 /* Single byte MODRM format. */
496 tcg_out_opc(s, opc, r, rm, 0);
497 tcg_out8(s, mod | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
498 } else {
499 /* Two byte MODRM+SIB format. */
500
501 /* Note that the encoding that would place %esp into the index
502 field indicates no index register. In 64-bit mode, the REX.X
503 bit counts, so %r12 can be used as the index. */
504 if (index < 0) {
505 index = 4;
506 } else {
507 assert(index != TCG_REG_ESP);
508 }
509
510 tcg_out_opc(s, opc, r, rm, index);
511 tcg_out8(s, mod | (LOWREGMASK(r) << 3) | 4);
512 tcg_out8(s, (shift << 6) | (LOWREGMASK(index) << 3) | LOWREGMASK(rm));
513 }
514
515 if (len == 1) {
516 tcg_out8(s, offset);
517 } else if (len == 4) {
518 tcg_out32(s, offset);
519 }
520 }
521
522 /* A simplification of the above with no index or shift. */
tcg_out_modrm_offset(TCGContext * s,int opc,int r,int rm,intptr_t offset)523 static inline void tcg_out_modrm_offset(TCGContext *s, int opc, int r,
524 int rm, intptr_t offset)
525 {
526 tcg_out_modrm_sib_offset(s, opc, r, rm, -1, 0, offset);
527 }
528
529 /* Generate dest op= src. Uses the same ARITH_* codes as tgen_arithi. */
tgen_arithr(TCGContext * s,int subop,int dest,int src)530 static inline void tgen_arithr(TCGContext *s, int subop, int dest, int src)
531 {
532 /* Propagate an opcode prefix, such as P_REXW. */
533 int ext = subop & ~0x7;
534 subop &= 0x7;
535
536 tcg_out_modrm(s, OPC_ARITH_GvEv + (subop << 3) + ext, dest, src);
537 }
538
tcg_out_mov(TCGContext * s,TCGType type,TCGReg ret,TCGReg arg)539 static inline void tcg_out_mov(TCGContext *s, TCGType type,
540 TCGReg ret, TCGReg arg)
541 {
542 if (arg != ret) {
543 int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0);
544 tcg_out_modrm(s, opc, ret, arg);
545 }
546 }
547
tcg_out_movi(TCGContext * s,TCGType type,TCGReg ret,tcg_target_long arg)548 static void tcg_out_movi(TCGContext *s, TCGType type,
549 TCGReg ret, tcg_target_long arg)
550 {
551 tcg_target_long diff;
552
553 if (arg == 0) {
554 tgen_arithr(s, ARITH_XOR, ret, ret);
555 return;
556 }
557 if (arg == (uint32_t)arg || type == TCG_TYPE_I32) {
558 tcg_out_opc(s, OPC_MOVL_Iv + LOWREGMASK(ret), 0, ret, 0);
559 tcg_out32(s, arg);
560 return;
561 }
562 if (arg == (int32_t)arg) {
563 tcg_out_modrm(s, OPC_MOVL_EvIz + P_REXW, 0, ret);
564 tcg_out32(s, arg);
565 return;
566 }
567
568 /* Try a 7 byte pc-relative lea before the 10 byte movq. */
569 diff = arg - ((uintptr_t)s->code_ptr + 7);
570 if (diff == (int32_t)diff) {
571 tcg_out_opc(s, OPC_LEA | P_REXW, ret, 0, 0);
572 tcg_out8(s, (LOWREGMASK(ret) << 3) | 5);
573 tcg_out32(s, diff);
574 return;
575 }
576
577 tcg_out_opc(s, OPC_MOVL_Iv + P_REXW + LOWREGMASK(ret), 0, ret, 0);
578 tcg_out64(s, arg);
579 }
580
tcg_out_pushi(TCGContext * s,tcg_target_long val)581 static inline void tcg_out_pushi(TCGContext *s, tcg_target_long val)
582 {
583 if (val == (int8_t)val) {
584 tcg_out_opc(s, OPC_PUSH_Ib, 0, 0, 0);
585 tcg_out8(s, val);
586 } else if (val == (int32_t)val) {
587 tcg_out_opc(s, OPC_PUSH_Iv, 0, 0, 0);
588 tcg_out32(s, val);
589 } else {
590 tcg_abort();
591 }
592 }
593
tcg_out_push(TCGContext * s,int reg)594 static inline void tcg_out_push(TCGContext *s, int reg)
595 {
596 tcg_out_opc(s, OPC_PUSH_r32 + LOWREGMASK(reg), 0, reg, 0);
597 }
598
tcg_out_pop(TCGContext * s,int reg)599 static inline void tcg_out_pop(TCGContext *s, int reg)
600 {
601 tcg_out_opc(s, OPC_POP_r32 + LOWREGMASK(reg), 0, reg, 0);
602 }
603
tcg_out_ld(TCGContext * s,TCGType type,TCGReg ret,TCGReg arg1,intptr_t arg2)604 static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
605 TCGReg arg1, intptr_t arg2)
606 {
607 int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0);
608 tcg_out_modrm_offset(s, opc, ret, arg1, arg2);
609 }
610
tcg_out_st(TCGContext * s,TCGType type,TCGReg arg,TCGReg arg1,intptr_t arg2)611 static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
612 TCGReg arg1, intptr_t arg2)
613 {
614 int opc = OPC_MOVL_EvGv + (type == TCG_TYPE_I64 ? P_REXW : 0);
615 tcg_out_modrm_offset(s, opc, arg, arg1, arg2);
616 }
617
tcg_out_sti(TCGContext * s,TCGType type,TCGReg base,tcg_target_long ofs,tcg_target_long val)618 static inline void tcg_out_sti(TCGContext *s, TCGType type, TCGReg base,
619 tcg_target_long ofs, tcg_target_long val)
620 {
621 int opc = OPC_MOVL_EvIz + (type == TCG_TYPE_I64 ? P_REXW : 0);
622 tcg_out_modrm_offset(s, opc, 0, base, ofs);
623 tcg_out32(s, val);
624 }
625
tcg_out_shifti(TCGContext * s,int subopc,int reg,int count)626 static void tcg_out_shifti(TCGContext *s, int subopc, int reg, int count)
627 {
628 /* Propagate an opcode prefix, such as P_DATA16. */
629 int ext = subopc & ~0x7;
630 subopc &= 0x7;
631
632 if (count == 1) {
633 tcg_out_modrm(s, OPC_SHIFT_1 + ext, subopc, reg);
634 } else {
635 tcg_out_modrm(s, OPC_SHIFT_Ib + ext, subopc, reg);
636 tcg_out8(s, count);
637 }
638 }
639
tcg_out_bswap32(TCGContext * s,int reg)640 static inline void tcg_out_bswap32(TCGContext *s, int reg)
641 {
642 tcg_out_opc(s, OPC_BSWAP + LOWREGMASK(reg), 0, reg, 0);
643 }
644
tcg_out_rolw_8(TCGContext * s,int reg)645 static inline void tcg_out_rolw_8(TCGContext *s, int reg)
646 {
647 tcg_out_shifti(s, SHIFT_ROL + P_DATA16, reg, 8);
648 }
649
tcg_out_ext8u(TCGContext * s,int dest,int src)650 static inline void tcg_out_ext8u(TCGContext *s, int dest, int src)
651 {
652 /* movzbl */
653 assert(src < 4 || TCG_TARGET_REG_BITS == 64);
654 tcg_out_modrm(s, OPC_MOVZBL + P_REXB_RM, dest, src);
655 }
656
tcg_out_ext8s(TCGContext * s,int dest,int src,int rexw)657 static void tcg_out_ext8s(TCGContext *s, int dest, int src, int rexw)
658 {
659 /* movsbl */
660 assert(src < 4 || TCG_TARGET_REG_BITS == 64);
661 tcg_out_modrm(s, OPC_MOVSBL + P_REXB_RM + rexw, dest, src);
662 }
663
tcg_out_ext16u(TCGContext * s,int dest,int src)664 static inline void tcg_out_ext16u(TCGContext *s, int dest, int src)
665 {
666 /* movzwl */
667 tcg_out_modrm(s, OPC_MOVZWL, dest, src);
668 }
669
tcg_out_ext16s(TCGContext * s,int dest,int src,int rexw)670 static inline void tcg_out_ext16s(TCGContext *s, int dest, int src, int rexw)
671 {
672 /* movsw[lq] */
673 tcg_out_modrm(s, OPC_MOVSWL + rexw, dest, src);
674 }
675
tcg_out_ext32u(TCGContext * s,int dest,int src)676 static inline void tcg_out_ext32u(TCGContext *s, int dest, int src)
677 {
678 /* 32-bit mov zero extends. */
679 tcg_out_modrm(s, OPC_MOVL_GvEv, dest, src);
680 }
681
tcg_out_ext32s(TCGContext * s,int dest,int src)682 static inline void tcg_out_ext32s(TCGContext *s, int dest, int src)
683 {
684 tcg_out_modrm(s, OPC_MOVSLQ, dest, src);
685 }
686
tcg_out_bswap64(TCGContext * s,int reg)687 static inline void tcg_out_bswap64(TCGContext *s, int reg)
688 {
689 tcg_out_opc(s, OPC_BSWAP + P_REXW + LOWREGMASK(reg), 0, reg, 0);
690 }
691
tgen_arithi(TCGContext * s,int c,int r0,tcg_target_long val,int cf)692 static void tgen_arithi(TCGContext *s, int c, int r0,
693 tcg_target_long val, int cf)
694 {
695 int rexw = 0;
696
697 if (TCG_TARGET_REG_BITS == 64) {
698 rexw = c & -8;
699 c &= 7;
700 }
701
702 /* ??? While INC is 2 bytes shorter than ADDL $1, they also induce
703 partial flags update stalls on Pentium4 and are not recommended
704 by current Intel optimization manuals. */
705 if (!cf && (c == ARITH_ADD || c == ARITH_SUB) && (val == 1 || val == -1)) {
706 int is_inc = (c == ARITH_ADD) ^ (val < 0);
707 if (TCG_TARGET_REG_BITS == 64) {
708 /* The single-byte increment encodings are re-tasked as the
709 REX prefixes. Use the MODRM encoding. */
710 tcg_out_modrm(s, OPC_GRP5 + rexw,
711 (is_inc ? EXT5_INC_Ev : EXT5_DEC_Ev), r0);
712 } else {
713 tcg_out8(s, (is_inc ? OPC_INC_r32 : OPC_DEC_r32) + r0);
714 }
715 return;
716 }
717
718 if (c == ARITH_AND) {
719 if (TCG_TARGET_REG_BITS == 64) {
720 if (val == 0xffffffffu) {
721 tcg_out_ext32u(s, r0, r0);
722 return;
723 }
724 if (val == (uint32_t)val) {
725 /* AND with no high bits set can use a 32-bit operation. */
726 rexw = 0;
727 }
728 }
729 if (val == 0xffu && (r0 < 4 || TCG_TARGET_REG_BITS == 64)) {
730 tcg_out_ext8u(s, r0, r0);
731 return;
732 }
733 if (val == 0xffffu) {
734 tcg_out_ext16u(s, r0, r0);
735 return;
736 }
737 }
738
739 if (val == (int8_t)val) {
740 tcg_out_modrm(s, OPC_ARITH_EvIb + rexw, c, r0);
741 tcg_out8(s, val);
742 return;
743 }
744 if (rexw == 0 || val == (int32_t)val) {
745 tcg_out_modrm(s, OPC_ARITH_EvIz + rexw, c, r0);
746 tcg_out32(s, val);
747 return;
748 }
749
750 tcg_abort();
751 }
752
tcg_out_addi(TCGContext * s,int reg,tcg_target_long val)753 static void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val)
754 {
755 if (val != 0) {
756 tgen_arithi(s, ARITH_ADD + P_REXW, reg, val, 0);
757 }
758 }
759
760 /* Use SMALL != 0 to force a short forward branch. */
tcg_out_jxx(TCGContext * s,int opc,int label_index,int small)761 static void tcg_out_jxx(TCGContext *s, int opc, int label_index, int small)
762 {
763 int32_t val, val1;
764 TCGLabel *l = &s->labels[label_index];
765
766 if (l->has_value) {
767 val = l->u.value - (intptr_t)s->code_ptr;
768 val1 = val - 2;
769 if ((int8_t)val1 == val1) {
770 if (opc == -1) {
771 tcg_out8(s, OPC_JMP_short);
772 } else {
773 tcg_out8(s, OPC_JCC_short + opc);
774 }
775 tcg_out8(s, val1);
776 } else {
777 if (small) {
778 tcg_abort();
779 }
780 if (opc == -1) {
781 tcg_out8(s, OPC_JMP_long);
782 tcg_out32(s, val - 5);
783 } else {
784 tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
785 tcg_out32(s, val - 6);
786 }
787 }
788 } else if (small) {
789 if (opc == -1) {
790 tcg_out8(s, OPC_JMP_short);
791 } else {
792 tcg_out8(s, OPC_JCC_short + opc);
793 }
794 tcg_out_reloc(s, s->code_ptr, R_386_PC8, label_index, -1);
795 s->code_ptr += 1;
796 } else {
797 if (opc == -1) {
798 tcg_out8(s, OPC_JMP_long);
799 } else {
800 tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
801 }
802 tcg_out_reloc(s, s->code_ptr, R_386_PC32, label_index, -4);
803 s->code_ptr += 4;
804 }
805 }
806
tcg_out_cmp(TCGContext * s,TCGArg arg1,TCGArg arg2,int const_arg2,int rexw)807 static void tcg_out_cmp(TCGContext *s, TCGArg arg1, TCGArg arg2,
808 int const_arg2, int rexw)
809 {
810 if (const_arg2) {
811 if (arg2 == 0) {
812 /* test r, r */
813 tcg_out_modrm(s, OPC_TESTL + rexw, arg1, arg1);
814 } else {
815 tgen_arithi(s, ARITH_CMP + rexw, arg1, arg2, 0);
816 }
817 } else {
818 tgen_arithr(s, ARITH_CMP + rexw, arg1, arg2);
819 }
820 }
821
tcg_out_brcond32(TCGContext * s,TCGCond cond,TCGArg arg1,TCGArg arg2,int const_arg2,int label_index,int small)822 static void tcg_out_brcond32(TCGContext *s, TCGCond cond,
823 TCGArg arg1, TCGArg arg2, int const_arg2,
824 int label_index, int small)
825 {
826 tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
827 tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index, small);
828 }
829
830 #if TCG_TARGET_REG_BITS == 64
tcg_out_brcond64(TCGContext * s,TCGCond cond,TCGArg arg1,TCGArg arg2,int const_arg2,int label_index,int small)831 static void tcg_out_brcond64(TCGContext *s, TCGCond cond,
832 TCGArg arg1, TCGArg arg2, int const_arg2,
833 int label_index, int small)
834 {
835 tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
836 tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index, small);
837 }
838 #else
839 /* XXX: we implement it at the target level to avoid having to
840 handle cross basic blocks temporaries */
tcg_out_brcond2(TCGContext * s,const TCGArg * args,const int * const_args,int small)841 static void tcg_out_brcond2(TCGContext *s, const TCGArg *args,
842 const int *const_args, int small)
843 {
844 int label_next;
845 label_next = gen_new_label();
846 switch(args[4]) {
847 case TCG_COND_EQ:
848 tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
849 label_next, 1);
850 tcg_out_brcond32(s, TCG_COND_EQ, args[1], args[3], const_args[3],
851 args[5], small);
852 break;
853 case TCG_COND_NE:
854 tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
855 args[5], small);
856 tcg_out_brcond32(s, TCG_COND_NE, args[1], args[3], const_args[3],
857 args[5], small);
858 break;
859 case TCG_COND_LT:
860 tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
861 args[5], small);
862 tcg_out_jxx(s, JCC_JNE, label_next, 1);
863 tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
864 args[5], small);
865 break;
866 case TCG_COND_LE:
867 tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
868 args[5], small);
869 tcg_out_jxx(s, JCC_JNE, label_next, 1);
870 tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
871 args[5], small);
872 break;
873 case TCG_COND_GT:
874 tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
875 args[5], small);
876 tcg_out_jxx(s, JCC_JNE, label_next, 1);
877 tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
878 args[5], small);
879 break;
880 case TCG_COND_GE:
881 tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
882 args[5], small);
883 tcg_out_jxx(s, JCC_JNE, label_next, 1);
884 tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
885 args[5], small);
886 break;
887 case TCG_COND_LTU:
888 tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
889 args[5], small);
890 tcg_out_jxx(s, JCC_JNE, label_next, 1);
891 tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
892 args[5], small);
893 break;
894 case TCG_COND_LEU:
895 tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
896 args[5], small);
897 tcg_out_jxx(s, JCC_JNE, label_next, 1);
898 tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
899 args[5], small);
900 break;
901 case TCG_COND_GTU:
902 tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
903 args[5], small);
904 tcg_out_jxx(s, JCC_JNE, label_next, 1);
905 tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
906 args[5], small);
907 break;
908 case TCG_COND_GEU:
909 tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
910 args[5], small);
911 tcg_out_jxx(s, JCC_JNE, label_next, 1);
912 tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
913 args[5], small);
914 break;
915 default:
916 tcg_abort();
917 }
918 tcg_out_label(s, label_next, s->code_ptr);
919 }
920 #endif
921
tcg_out_setcond32(TCGContext * s,TCGCond cond,TCGArg dest,TCGArg arg1,TCGArg arg2,int const_arg2)922 static void tcg_out_setcond32(TCGContext *s, TCGCond cond, TCGArg dest,
923 TCGArg arg1, TCGArg arg2, int const_arg2)
924 {
925 tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
926 tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
927 tcg_out_ext8u(s, dest, dest);
928 }
929
930 #if TCG_TARGET_REG_BITS == 64
tcg_out_setcond64(TCGContext * s,TCGCond cond,TCGArg dest,TCGArg arg1,TCGArg arg2,int const_arg2)931 static void tcg_out_setcond64(TCGContext *s, TCGCond cond, TCGArg dest,
932 TCGArg arg1, TCGArg arg2, int const_arg2)
933 {
934 tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
935 tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
936 tcg_out_ext8u(s, dest, dest);
937 }
938 #else
tcg_out_setcond2(TCGContext * s,const TCGArg * args,const int * const_args)939 static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
940 const int *const_args)
941 {
942 TCGArg new_args[6];
943 int label_true, label_over;
944
945 memcpy(new_args, args+1, 5*sizeof(TCGArg));
946
947 if (args[0] == args[1] || args[0] == args[2]
948 || (!const_args[3] && args[0] == args[3])
949 || (!const_args[4] && args[0] == args[4])) {
950 /* When the destination overlaps with one of the argument
951 registers, don't do anything tricky. */
952 label_true = gen_new_label();
953 label_over = gen_new_label();
954
955 new_args[5] = label_true;
956 tcg_out_brcond2(s, new_args, const_args+1, 1);
957
958 tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
959 tcg_out_jxx(s, JCC_JMP, label_over, 1);
960 tcg_out_label(s, label_true, s->code_ptr);
961
962 tcg_out_movi(s, TCG_TYPE_I32, args[0], 1);
963 tcg_out_label(s, label_over, s->code_ptr);
964 } else {
965 /* When the destination does not overlap one of the arguments,
966 clear the destination first, jump if cond false, and emit an
967 increment in the true case. This results in smaller code. */
968
969 tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
970
971 label_over = gen_new_label();
972 new_args[4] = tcg_invert_cond(new_args[4]);
973 new_args[5] = label_over;
974 tcg_out_brcond2(s, new_args, const_args+1, 1);
975
976 tgen_arithi(s, ARITH_ADD, args[0], 1, 0);
977 tcg_out_label(s, label_over, s->code_ptr);
978 }
979 }
980 #endif
981
tcg_out_movcond32(TCGContext * s,TCGCond cond,TCGArg dest,TCGArg c1,TCGArg c2,int const_c2,TCGArg v1)982 static void tcg_out_movcond32(TCGContext *s, TCGCond cond, TCGArg dest,
983 TCGArg c1, TCGArg c2, int const_c2,
984 TCGArg v1)
985 {
986 tcg_out_cmp(s, c1, c2, const_c2, 0);
987 if (have_cmov) {
988 tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond], dest, v1);
989 } else {
990 int over = gen_new_label();
991 tcg_out_jxx(s, tcg_cond_to_jcc[tcg_invert_cond(cond)], over, 1);
992 tcg_out_mov(s, TCG_TYPE_I32, dest, v1);
993 tcg_out_label(s, over, s->code_ptr);
994 }
995 }
996
997 #if TCG_TARGET_REG_BITS == 64
tcg_out_movcond64(TCGContext * s,TCGCond cond,TCGArg dest,TCGArg c1,TCGArg c2,int const_c2,TCGArg v1)998 static void tcg_out_movcond64(TCGContext *s, TCGCond cond, TCGArg dest,
999 TCGArg c1, TCGArg c2, int const_c2,
1000 TCGArg v1)
1001 {
1002 tcg_out_cmp(s, c1, c2, const_c2, P_REXW);
1003 tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond] | P_REXW, dest, v1);
1004 }
1005 #endif
1006
tcg_out_branch(TCGContext * s,int call,uintptr_t dest)1007 static void tcg_out_branch(TCGContext *s, int call, uintptr_t dest)
1008 {
1009 intptr_t disp = dest - (intptr_t)s->code_ptr - 5;
1010
1011 if (disp == (int32_t)disp) {
1012 tcg_out_opc(s, call ? OPC_CALL_Jz : OPC_JMP_long, 0, 0, 0);
1013 tcg_out32(s, disp);
1014 } else {
1015 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R10, dest);
1016 tcg_out_modrm(s, OPC_GRP5,
1017 call ? EXT5_CALLN_Ev : EXT5_JMPN_Ev, TCG_REG_R10);
1018 }
1019 }
1020
tcg_out_calli(TCGContext * s,uintptr_t dest)1021 static inline void tcg_out_calli(TCGContext *s, uintptr_t dest)
1022 {
1023 tcg_out_branch(s, 1, dest);
1024 }
1025
tcg_out_jmp(TCGContext * s,uintptr_t dest)1026 static void tcg_out_jmp(TCGContext *s, uintptr_t dest)
1027 {
1028 tcg_out_branch(s, 0, dest);
1029 }
1030
1031 #if defined(CONFIG_SOFTMMU)
1032 /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
1033 * int mmu_idx, uintptr_t ra)
1034 */
1035 static const void * const qemu_ld_helpers[16] = {
1036 [MO_UB] = helper_ret_ldub_mmu,
1037 [MO_LEUW] = helper_le_lduw_mmu,
1038 [MO_LEUL] = helper_le_ldul_mmu,
1039 [MO_LEQ] = helper_le_ldq_mmu,
1040 [MO_BEUW] = helper_be_lduw_mmu,
1041 [MO_BEUL] = helper_be_ldul_mmu,
1042 [MO_BEQ] = helper_be_ldq_mmu,
1043 };
1044
1045 /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
1046 * uintxx_t val, int mmu_idx, uintptr_t ra)
1047 */
1048 static const void * const qemu_st_helpers[16] = {
1049 [MO_UB] = helper_ret_stb_mmu,
1050 [MO_LEUW] = helper_le_stw_mmu,
1051 [MO_LEUL] = helper_le_stl_mmu,
1052 [MO_LEQ] = helper_le_stq_mmu,
1053 [MO_BEUW] = helper_be_stw_mmu,
1054 [MO_BEUL] = helper_be_stl_mmu,
1055 [MO_BEQ] = helper_be_stq_mmu,
1056 };
1057
1058 /* Perform the TLB load and compare.
1059
1060 Inputs:
1061 ADDRLO and ADDRHI contain the low and high part of the address.
1062
1063 MEM_INDEX and S_BITS are the memory context and log2 size of the load.
1064
1065 WHICH is the offset into the CPUTLBEntry structure of the slot to read.
1066 This should be offsetof addr_read or addr_write.
1067
1068 Outputs:
1069 LABEL_PTRS is filled with 1 (32-bit addresses) or 2 (64-bit addresses)
1070 positions of the displacements of forward jumps to the TLB miss case.
1071
1072 Second argument register is loaded with the low part of the address.
1073 In the TLB hit case, it has been adjusted as indicated by the TLB
1074 and so is a host address. In the TLB miss case, it continues to
1075 hold a guest address.
1076
1077 First argument register is clobbered. */
1078
tcg_out_tlb_load(TCGContext * s,TCGReg addrlo,TCGReg addrhi,int mem_index,TCGMemOp s_bits,uint8_t ** label_ptr,int which)1079 static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
1080 int mem_index, TCGMemOp s_bits,
1081 uint8_t **label_ptr, int which)
1082 {
1083 const TCGReg r0 = TCG_REG_L0;
1084 const TCGReg r1 = TCG_REG_L1;
1085 TCGType ttype = TCG_TYPE_I32;
1086 TCGType htype = TCG_TYPE_I32;
1087 int trexw = 0, hrexw = 0;
1088
1089 if (TCG_TARGET_REG_BITS == 64) {
1090 if (TARGET_LONG_BITS == 64) {
1091 ttype = TCG_TYPE_I64;
1092 trexw = P_REXW;
1093 }
1094 if (TCG_TYPE_PTR == TCG_TYPE_I64) {
1095 htype = TCG_TYPE_I64;
1096 hrexw = P_REXW;
1097 }
1098 }
1099
1100 tcg_out_mov(s, htype, r0, addrlo);
1101 tcg_out_mov(s, ttype, r1, addrlo);
1102
1103 tcg_out_shifti(s, SHIFT_SHR + hrexw, r0,
1104 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1105
1106 tgen_arithi(s, ARITH_AND + trexw, r1,
1107 TARGET_PAGE_MASK | ((1 << s_bits) - 1), 0);
1108 tgen_arithi(s, ARITH_AND + hrexw, r0,
1109 (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS, 0);
1110
1111 tcg_out_modrm_sib_offset(s, OPC_LEA + hrexw, r0, TCG_AREG0, r0, 0,
1112 offsetof(CPUArchState, tlb_table[mem_index][0])
1113 + which);
1114
1115 /* cmp 0(r0), r1 */
1116 tcg_out_modrm_offset(s, OPC_CMP_GvEv + trexw, r1, r0, 0);
1117
1118 /* Prepare for both the fast path add of the tlb addend, and the slow
1119 path function argument setup. There are two cases worth note:
1120 For 32-bit guest and x86_64 host, MOVL zero-extends the guest address
1121 before the fastpath ADDQ below. For 64-bit guest and x32 host, MOVQ
1122 copies the entire guest address for the slow path, while truncation
1123 for the 32-bit host happens with the fastpath ADDL below. */
1124 tcg_out_mov(s, ttype, r1, addrlo);
1125
1126 /* jne slow_path */
1127 tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
1128 label_ptr[0] = s->code_ptr;
1129 s->code_ptr += 4;
1130
1131 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1132 /* cmp 4(r0), addrhi */
1133 tcg_out_modrm_offset(s, OPC_CMP_GvEv, addrhi, r0, 4);
1134
1135 /* jne slow_path */
1136 tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
1137 label_ptr[1] = s->code_ptr;
1138 s->code_ptr += 4;
1139 }
1140
1141 /* TLB Hit. */
1142
1143 /* add addend(r0), r1 */
1144 tcg_out_modrm_offset(s, OPC_ADD_GvEv + hrexw, r1, r0,
1145 offsetof(CPUTLBEntry, addend) - which);
1146 }
1147
1148 /*
1149 * Record the context of a call to the out of line helper code for the slow path
1150 * for a load or store, so that we can later generate the correct helper code
1151 */
add_qemu_ldst_label(TCGContext * s,int is_ld,TCGMemOp opc,TCGReg datalo,TCGReg datahi,TCGReg addrlo,TCGReg addrhi,int mem_index,uint8_t * raddr,uint8_t ** label_ptr)1152 static void add_qemu_ldst_label(TCGContext *s, int is_ld, TCGMemOp opc,
1153 TCGReg datalo, TCGReg datahi,
1154 TCGReg addrlo, TCGReg addrhi,
1155 int mem_index, uint8_t *raddr,
1156 uint8_t **label_ptr)
1157 {
1158 TCGLabelQemuLdst *label = new_ldst_label(s);
1159
1160 label->is_ld = is_ld;
1161 label->opc = opc;
1162 label->datalo_reg = datalo;
1163 label->datahi_reg = datahi;
1164 label->addrlo_reg = addrlo;
1165 label->addrhi_reg = addrhi;
1166 label->mem_index = mem_index;
1167 label->raddr = raddr;
1168 label->label_ptr[0] = label_ptr[0];
1169 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1170 label->label_ptr[1] = label_ptr[1];
1171 }
1172 }
1173
1174 /*
1175 * Generate code for the slow path for a load at the end of block
1176 */
tcg_out_qemu_ld_slow_path(TCGContext * s,TCGLabelQemuLdst * l)1177 static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1178 {
1179 TCGMemOp opc = l->opc;
1180 TCGReg data_reg;
1181 uint8_t **label_ptr = &l->label_ptr[0];
1182
1183 /* resolve label address */
1184 *(uint32_t *)label_ptr[0] = (uint32_t)(s->code_ptr - label_ptr[0] - 4);
1185 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1186 *(uint32_t *)label_ptr[1] = (uint32_t)(s->code_ptr - label_ptr[1] - 4);
1187 }
1188
1189 if (TCG_TARGET_REG_BITS == 32) {
1190 int ofs = 0;
1191
1192 tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs);
1193 ofs += 4;
1194
1195 tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs);
1196 ofs += 4;
1197
1198 if (TARGET_LONG_BITS == 64) {
1199 tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs);
1200 ofs += 4;
1201 }
1202
1203 tcg_out_sti(s, TCG_TYPE_I32, TCG_REG_ESP, ofs, l->mem_index);
1204 ofs += 4;
1205
1206 tcg_out_sti(s, TCG_TYPE_I32, TCG_REG_ESP, ofs, (uintptr_t)l->raddr);
1207 } else {
1208 tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
1209 /* The second argument is already loaded with addrlo. */
1210 tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2],
1211 l->mem_index);
1212 tcg_out_movi(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[3],
1213 (uintptr_t)l->raddr);
1214 }
1215
1216 tcg_out_calli(s, (uintptr_t)qemu_ld_helpers[opc & ~MO_SIGN]);
1217
1218 data_reg = l->datalo_reg;
1219 switch (opc & MO_SSIZE) {
1220 case MO_SB:
1221 tcg_out_ext8s(s, data_reg, TCG_REG_EAX, P_REXW);
1222 break;
1223 case MO_SW:
1224 tcg_out_ext16s(s, data_reg, TCG_REG_EAX, P_REXW);
1225 break;
1226 #if TCG_TARGET_REG_BITS == 64
1227 case MO_SL:
1228 tcg_out_ext32s(s, data_reg, TCG_REG_EAX);
1229 break;
1230 #endif
1231 case MO_UB:
1232 case MO_UW:
1233 /* Note that the helpers have zero-extended to tcg_target_long. */
1234 case MO_UL:
1235 tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
1236 break;
1237 case MO_Q:
1238 if (TCG_TARGET_REG_BITS == 64) {
1239 tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_RAX);
1240 } else if (data_reg == TCG_REG_EDX) {
1241 /* xchg %edx, %eax */
1242 tcg_out_opc(s, OPC_XCHG_ax_r32 + TCG_REG_EDX, 0, 0, 0);
1243 tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EAX);
1244 } else {
1245 tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
1246 tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EDX);
1247 }
1248 break;
1249 default:
1250 tcg_abort();
1251 }
1252
1253 /* Jump to the code corresponding to next IR of qemu_st */
1254 tcg_out_jmp(s, (uintptr_t)l->raddr);
1255 }
1256
1257 /*
1258 * Generate code for the slow path for a store at the end of block
1259 */
tcg_out_qemu_st_slow_path(TCGContext * s,TCGLabelQemuLdst * l)1260 static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1261 {
1262 TCGMemOp opc = l->opc;
1263 TCGMemOp s_bits = opc & MO_SIZE;
1264 uint8_t **label_ptr = &l->label_ptr[0];
1265 TCGReg retaddr;
1266
1267 /* resolve label address */
1268 *(uint32_t *)label_ptr[0] = (uint32_t)(s->code_ptr - label_ptr[0] - 4);
1269 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1270 *(uint32_t *)label_ptr[1] = (uint32_t)(s->code_ptr - label_ptr[1] - 4);
1271 }
1272
1273 if (TCG_TARGET_REG_BITS == 32) {
1274 int ofs = 0;
1275
1276 tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs);
1277 ofs += 4;
1278
1279 tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs);
1280 ofs += 4;
1281
1282 if (TARGET_LONG_BITS == 64) {
1283 tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs);
1284 ofs += 4;
1285 }
1286
1287 tcg_out_st(s, TCG_TYPE_I32, l->datalo_reg, TCG_REG_ESP, ofs);
1288 ofs += 4;
1289
1290 if (s_bits == MO_64) {
1291 tcg_out_st(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_ESP, ofs);
1292 ofs += 4;
1293 }
1294
1295 tcg_out_sti(s, TCG_TYPE_I32, TCG_REG_ESP, ofs, l->mem_index);
1296 ofs += 4;
1297
1298 retaddr = TCG_REG_EAX;
1299 tcg_out_movi(s, TCG_TYPE_I32, retaddr, (uintptr_t)l->raddr);
1300 tcg_out_st(s, TCG_TYPE_I32, retaddr, TCG_REG_ESP, ofs);
1301 } else {
1302 tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
1303 /* The second argument is already loaded with addrlo. */
1304 tcg_out_mov(s, (s_bits == MO_64 ? TCG_TYPE_I64 : TCG_TYPE_I32),
1305 tcg_target_call_iarg_regs[2], l->datalo_reg);
1306 tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3],
1307 l->mem_index);
1308
1309 if (ARRAY_SIZE(tcg_target_call_iarg_regs) > 4) {
1310 retaddr = tcg_target_call_iarg_regs[4];
1311 tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr);
1312 } else {
1313 retaddr = TCG_REG_RAX;
1314 tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr);
1315 tcg_out_st(s, TCG_TYPE_PTR, retaddr, TCG_REG_ESP, 0);
1316 }
1317 }
1318
1319 /* "Tail call" to the helper, with the return address back inline. */
1320 tcg_out_push(s, retaddr);
1321 tcg_out_jmp(s, (uintptr_t)qemu_st_helpers[opc]);
1322 }
1323 #elif defined(__x86_64__) && defined(__linux__)
1324 # include <asm/prctl.h>
1325 # include <sys/prctl.h>
1326
1327 int arch_prctl(int code, unsigned long addr);
1328
1329 static int guest_base_flags;
setup_guest_base_seg(void)1330 static inline void setup_guest_base_seg(void)
1331 {
1332 if (arch_prctl(ARCH_SET_GS, GUEST_BASE) == 0) {
1333 guest_base_flags = P_GS;
1334 }
1335 }
1336 #else
1337 # define guest_base_flags 0
setup_guest_base_seg(void)1338 static inline void setup_guest_base_seg(void) { }
1339 #endif /* SOFTMMU */
1340
tcg_out_qemu_ld_direct(TCGContext * s,TCGReg datalo,TCGReg datahi,TCGReg base,intptr_t ofs,int seg,TCGMemOp memop)1341 static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
1342 TCGReg base, intptr_t ofs, int seg,
1343 TCGMemOp memop)
1344 {
1345 const TCGMemOp bswap = memop & MO_BSWAP;
1346
1347 switch (memop & MO_SSIZE) {
1348 case MO_UB:
1349 tcg_out_modrm_offset(s, OPC_MOVZBL + seg, datalo, base, ofs);
1350 break;
1351 case MO_SB:
1352 tcg_out_modrm_offset(s, OPC_MOVSBL + P_REXW + seg, datalo, base, ofs);
1353 break;
1354 case MO_UW:
1355 tcg_out_modrm_offset(s, OPC_MOVZWL + seg, datalo, base, ofs);
1356 if (bswap) {
1357 tcg_out_rolw_8(s, datalo);
1358 }
1359 break;
1360 case MO_SW:
1361 if (bswap) {
1362 tcg_out_modrm_offset(s, OPC_MOVZWL + seg, datalo, base, ofs);
1363 tcg_out_rolw_8(s, datalo);
1364 tcg_out_modrm(s, OPC_MOVSWL + P_REXW, datalo, datalo);
1365 } else {
1366 tcg_out_modrm_offset(s, OPC_MOVSWL + P_REXW + seg,
1367 datalo, base, ofs);
1368 }
1369 break;
1370 case MO_UL:
1371 tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg, datalo, base, ofs);
1372 if (bswap) {
1373 tcg_out_bswap32(s, datalo);
1374 }
1375 break;
1376 #if TCG_TARGET_REG_BITS == 64
1377 case MO_SL:
1378 if (bswap) {
1379 tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg, datalo, base, ofs);
1380 tcg_out_bswap32(s, datalo);
1381 tcg_out_ext32s(s, datalo, datalo);
1382 } else {
1383 tcg_out_modrm_offset(s, OPC_MOVSLQ + seg, datalo, base, ofs);
1384 }
1385 break;
1386 #endif
1387 case MO_Q:
1388 if (TCG_TARGET_REG_BITS == 64) {
1389 tcg_out_modrm_offset(s, OPC_MOVL_GvEv + P_REXW + seg,
1390 datalo, base, ofs);
1391 if (bswap) {
1392 tcg_out_bswap64(s, datalo);
1393 }
1394 } else {
1395 if (bswap) {
1396 int t = datalo;
1397 datalo = datahi;
1398 datahi = t;
1399 }
1400 if (base != datalo) {
1401 tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg,
1402 datalo, base, ofs);
1403 tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg,
1404 datahi, base, ofs + 4);
1405 } else {
1406 tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg,
1407 datahi, base, ofs + 4);
1408 tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg,
1409 datalo, base, ofs);
1410 }
1411 if (bswap) {
1412 tcg_out_bswap32(s, datalo);
1413 tcg_out_bswap32(s, datahi);
1414 }
1415 }
1416 break;
1417 default:
1418 tcg_abort();
1419 }
1420 }
1421
1422 /* XXX: qemu_ld and qemu_st could be modified to clobber only EDX and
1423 EAX. It will be useful once fixed registers globals are less
1424 common. */
tcg_out_qemu_ld(TCGContext * s,const TCGArg * args,bool is64)1425 static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
1426 {
1427 TCGReg datalo, datahi, addrlo;
1428 TCGReg addrhi __attribute__((unused));
1429 TCGMemOp opc;
1430 #if defined(CONFIG_SOFTMMU)
1431 int mem_index;
1432 TCGMemOp s_bits;
1433 uint8_t *label_ptr[2];
1434 #endif
1435
1436 datalo = *args++;
1437 datahi = (TCG_TARGET_REG_BITS == 32 && is64 ? *args++ : 0);
1438 addrlo = *args++;
1439 addrhi = (TARGET_LONG_BITS > TCG_TARGET_REG_BITS ? *args++ : 0);
1440 opc = *args++;
1441
1442 #if defined(CONFIG_SOFTMMU)
1443 mem_index = *args++;
1444 s_bits = opc & MO_SIZE;
1445
1446 tcg_out_tlb_load(s, addrlo, addrhi, mem_index, s_bits,
1447 label_ptr, offsetof(CPUTLBEntry, addr_read));
1448
1449 /* TLB Hit. */
1450 tcg_out_qemu_ld_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc);
1451
1452 /* Record the current context of a load into ldst label */
1453 add_qemu_ldst_label(s, 1, opc, datalo, datahi, addrlo, addrhi,
1454 mem_index, s->code_ptr, label_ptr);
1455 #else
1456 {
1457 int32_t offset = GUEST_BASE;
1458 TCGReg base = addrlo;
1459 int seg = 0;
1460
1461 /* ??? We assume all operations have left us with register contents
1462 that are zero extended. So far this appears to be true. If we
1463 want to enforce this, we can either do an explicit zero-extension
1464 here, or (if GUEST_BASE == 0, or a segment register is in use)
1465 use the ADDR32 prefix. For now, do nothing. */
1466 if (GUEST_BASE && guest_base_flags) {
1467 seg = guest_base_flags;
1468 offset = 0;
1469 } else if (TCG_TARGET_REG_BITS == 64 && offset != GUEST_BASE) {
1470 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, GUEST_BASE);
1471 tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L1, base);
1472 base = TCG_REG_L1;
1473 offset = 0;
1474 }
1475
1476 tcg_out_qemu_ld_direct(s, datalo, datahi, base, offset, seg, opc);
1477 }
1478 #endif
1479 }
1480
tcg_out_qemu_st_direct(TCGContext * s,TCGReg datalo,TCGReg datahi,TCGReg base,intptr_t ofs,int seg,TCGMemOp memop)1481 static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
1482 TCGReg base, intptr_t ofs, int seg,
1483 TCGMemOp memop)
1484 {
1485 const TCGMemOp bswap = memop & MO_BSWAP;
1486
1487 /* ??? Ideally we wouldn't need a scratch register. For user-only,
1488 we could perform the bswap twice to restore the original value
1489 instead of moving to the scratch. But as it is, the L constraint
1490 means that TCG_REG_L0 is definitely free here. */
1491 const TCGReg scratch = TCG_REG_L0;
1492
1493 switch (memop & MO_SIZE) {
1494 case MO_8:
1495 /* In 32-bit mode, 8-byte stores can only happen from [abcd]x.
1496 Use the scratch register if necessary. */
1497 if (TCG_TARGET_REG_BITS == 32 && datalo >= 4) {
1498 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1499 datalo = scratch;
1500 }
1501 tcg_out_modrm_offset(s, OPC_MOVB_EvGv + P_REXB_R + seg,
1502 datalo, base, ofs);
1503 break;
1504 case MO_16:
1505 if (bswap) {
1506 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1507 tcg_out_rolw_8(s, scratch);
1508 datalo = scratch;
1509 }
1510 tcg_out_modrm_offset(s, OPC_MOVL_EvGv + P_DATA16 + seg,
1511 datalo, base, ofs);
1512 break;
1513 case MO_32:
1514 if (bswap) {
1515 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1516 tcg_out_bswap32(s, scratch);
1517 datalo = scratch;
1518 }
1519 tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, datalo, base, ofs);
1520 break;
1521 case MO_64:
1522 if (TCG_TARGET_REG_BITS == 64) {
1523 if (bswap) {
1524 tcg_out_mov(s, TCG_TYPE_I64, scratch, datalo);
1525 tcg_out_bswap64(s, scratch);
1526 datalo = scratch;
1527 }
1528 tcg_out_modrm_offset(s, OPC_MOVL_EvGv + P_REXW + seg,
1529 datalo, base, ofs);
1530 } else if (bswap) {
1531 tcg_out_mov(s, TCG_TYPE_I32, scratch, datahi);
1532 tcg_out_bswap32(s, scratch);
1533 tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs);
1534 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1535 tcg_out_bswap32(s, scratch);
1536 tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs+4);
1537 } else {
1538 tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, datalo, base, ofs);
1539 tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, datahi, base, ofs+4);
1540 }
1541 break;
1542 default:
1543 tcg_abort();
1544 }
1545 }
1546
tcg_out_qemu_st(TCGContext * s,const TCGArg * args,bool is64)1547 static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
1548 {
1549 TCGReg datalo, datahi, addrlo;
1550 TCGReg addrhi __attribute__((unused));
1551 TCGMemOp opc;
1552 #if defined(CONFIG_SOFTMMU)
1553 int mem_index;
1554 TCGMemOp s_bits;
1555 uint8_t *label_ptr[2];
1556 #endif
1557
1558 datalo = *args++;
1559 datahi = (TCG_TARGET_REG_BITS == 32 && is64 ? *args++ : 0);
1560 addrlo = *args++;
1561 addrhi = (TARGET_LONG_BITS > TCG_TARGET_REG_BITS ? *args++ : 0);
1562 opc = *args++;
1563
1564 #if defined(CONFIG_SOFTMMU)
1565 mem_index = *args++;
1566 s_bits = opc & MO_SIZE;
1567
1568 tcg_out_tlb_load(s, addrlo, addrhi, mem_index, s_bits,
1569 label_ptr, offsetof(CPUTLBEntry, addr_write));
1570
1571 /* TLB Hit. */
1572 tcg_out_qemu_st_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc);
1573
1574 /* Record the current context of a store into ldst label */
1575 add_qemu_ldst_label(s, 0, opc, datalo, datahi, addrlo, addrhi,
1576 mem_index, s->code_ptr, label_ptr);
1577 #else
1578 {
1579 int32_t offset = GUEST_BASE;
1580 TCGReg base = addrlo;
1581 int seg = 0;
1582
1583 /* ??? We assume all operations have left us with register contents
1584 that are zero extended. So far this appears to be true. If we
1585 want to enforce this, we can either do an explicit zero-extension
1586 here, or (if GUEST_BASE == 0, or a segment register is in use)
1587 use the ADDR32 prefix. For now, do nothing. */
1588 if (GUEST_BASE && guest_base_flags) {
1589 seg = guest_base_flags;
1590 offset = 0;
1591 } else if (TCG_TARGET_REG_BITS == 64 && offset != GUEST_BASE) {
1592 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, GUEST_BASE);
1593 tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L1, base);
1594 base = TCG_REG_L1;
1595 offset = 0;
1596 }
1597
1598 tcg_out_qemu_st_direct(s, datalo, datahi, base, offset, seg, opc);
1599 }
1600 #endif
1601 }
1602
tcg_out_op(TCGContext * s,TCGOpcode opc,const TCGArg * args,const int * const_args)1603 static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
1604 const TCGArg *args, const int *const_args)
1605 {
1606 int c, rexw = 0;
1607
1608 #if TCG_TARGET_REG_BITS == 64
1609 # define OP_32_64(x) \
1610 case glue(glue(INDEX_op_, x), _i64): \
1611 rexw = P_REXW; /* FALLTHRU */ \
1612 case glue(glue(INDEX_op_, x), _i32)
1613 #else
1614 # define OP_32_64(x) \
1615 case glue(glue(INDEX_op_, x), _i32)
1616 #endif
1617
1618 switch(opc) {
1619 case INDEX_op_exit_tb:
1620 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_EAX, args[0]);
1621 tcg_out_jmp(s, (uintptr_t)tb_ret_addr);
1622 break;
1623 case INDEX_op_goto_tb:
1624 if (s->tb_jmp_offset) {
1625 /* direct jump method */
1626 tcg_out8(s, OPC_JMP_long); /* jmp im */
1627 s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf;
1628 tcg_out32(s, 0);
1629 } else {
1630 /* indirect jump method */
1631 tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, -1,
1632 (intptr_t)(s->tb_next + args[0]));
1633 }
1634 s->tb_next_offset[args[0]] = s->code_ptr - s->code_buf;
1635 break;
1636 case INDEX_op_call:
1637 if (const_args[0]) {
1638 tcg_out_calli(s, args[0]);
1639 } else {
1640 /* call *reg */
1641 tcg_out_modrm(s, OPC_GRP5, EXT5_CALLN_Ev, args[0]);
1642 }
1643 break;
1644 case INDEX_op_br:
1645 tcg_out_jxx(s, JCC_JMP, args[0], 0);
1646 break;
1647 case INDEX_op_movi_i32:
1648 tcg_out_movi(s, TCG_TYPE_I32, args[0], args[1]);
1649 break;
1650 OP_32_64(ld8u):
1651 /* Note that we can ignore REXW for the zero-extend to 64-bit. */
1652 tcg_out_modrm_offset(s, OPC_MOVZBL, args[0], args[1], args[2]);
1653 break;
1654 OP_32_64(ld8s):
1655 tcg_out_modrm_offset(s, OPC_MOVSBL + rexw, args[0], args[1], args[2]);
1656 break;
1657 OP_32_64(ld16u):
1658 /* Note that we can ignore REXW for the zero-extend to 64-bit. */
1659 tcg_out_modrm_offset(s, OPC_MOVZWL, args[0], args[1], args[2]);
1660 break;
1661 OP_32_64(ld16s):
1662 tcg_out_modrm_offset(s, OPC_MOVSWL + rexw, args[0], args[1], args[2]);
1663 break;
1664 #if TCG_TARGET_REG_BITS == 64
1665 case INDEX_op_ld32u_i64:
1666 #endif
1667 case INDEX_op_ld_i32:
1668 tcg_out_ld(s, TCG_TYPE_I32, args[0], args[1], args[2]);
1669 break;
1670
1671 OP_32_64(st8):
1672 if (const_args[0]) {
1673 tcg_out_modrm_offset(s, OPC_MOVB_EvIz,
1674 0, args[1], args[2]);
1675 tcg_out8(s, args[0]);
1676 } else {
1677 tcg_out_modrm_offset(s, OPC_MOVB_EvGv | P_REXB_R,
1678 args[0], args[1], args[2]);
1679 }
1680 break;
1681 OP_32_64(st16):
1682 if (const_args[0]) {
1683 tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_DATA16,
1684 0, args[1], args[2]);
1685 tcg_out16(s, args[0]);
1686 } else {
1687 tcg_out_modrm_offset(s, OPC_MOVL_EvGv | P_DATA16,
1688 args[0], args[1], args[2]);
1689 }
1690 break;
1691 #if TCG_TARGET_REG_BITS == 64
1692 case INDEX_op_st32_i64:
1693 #endif
1694 case INDEX_op_st_i32:
1695 if (const_args[0]) {
1696 tcg_out_modrm_offset(s, OPC_MOVL_EvIz, 0, args[1], args[2]);
1697 tcg_out32(s, args[0]);
1698 } else {
1699 tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
1700 }
1701 break;
1702
1703 OP_32_64(add):
1704 /* For 3-operand addition, use LEA. */
1705 if (args[0] != args[1]) {
1706 TCGArg a0 = args[0], a1 = args[1], a2 = args[2], c3 = 0;
1707
1708 if (const_args[2]) {
1709 c3 = a2, a2 = -1;
1710 } else if (a0 == a2) {
1711 /* Watch out for dest = src + dest, since we've removed
1712 the matching constraint on the add. */
1713 tgen_arithr(s, ARITH_ADD + rexw, a0, a1);
1714 break;
1715 }
1716
1717 tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, a1, a2, 0, c3);
1718 break;
1719 }
1720 c = ARITH_ADD;
1721 goto gen_arith;
1722 OP_32_64(sub):
1723 c = ARITH_SUB;
1724 goto gen_arith;
1725 OP_32_64(and):
1726 c = ARITH_AND;
1727 goto gen_arith;
1728 OP_32_64(or):
1729 c = ARITH_OR;
1730 goto gen_arith;
1731 OP_32_64(xor):
1732 c = ARITH_XOR;
1733 goto gen_arith;
1734 gen_arith:
1735 if (const_args[2]) {
1736 tgen_arithi(s, c + rexw, args[0], args[2], 0);
1737 } else {
1738 tgen_arithr(s, c + rexw, args[0], args[2]);
1739 }
1740 break;
1741
1742 OP_32_64(mul):
1743 if (const_args[2]) {
1744 int32_t val;
1745 val = args[2];
1746 if (val == (int8_t)val) {
1747 tcg_out_modrm(s, OPC_IMUL_GvEvIb + rexw, args[0], args[0]);
1748 tcg_out8(s, val);
1749 } else {
1750 tcg_out_modrm(s, OPC_IMUL_GvEvIz + rexw, args[0], args[0]);
1751 tcg_out32(s, val);
1752 }
1753 } else {
1754 tcg_out_modrm(s, OPC_IMUL_GvEv + rexw, args[0], args[2]);
1755 }
1756 break;
1757
1758 OP_32_64(div2):
1759 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IDIV, args[4]);
1760 break;
1761 OP_32_64(divu2):
1762 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_DIV, args[4]);
1763 break;
1764
1765 OP_32_64(shl):
1766 c = SHIFT_SHL;
1767 goto gen_shift;
1768 OP_32_64(shr):
1769 c = SHIFT_SHR;
1770 goto gen_shift;
1771 OP_32_64(sar):
1772 c = SHIFT_SAR;
1773 goto gen_shift;
1774 OP_32_64(rotl):
1775 c = SHIFT_ROL;
1776 goto gen_shift;
1777 OP_32_64(rotr):
1778 c = SHIFT_ROR;
1779 goto gen_shift;
1780 gen_shift:
1781 if (const_args[2]) {
1782 tcg_out_shifti(s, c + rexw, args[0], args[2]);
1783 } else {
1784 tcg_out_modrm(s, OPC_SHIFT_cl + rexw, c, args[0]);
1785 }
1786 break;
1787
1788 case INDEX_op_brcond_i32:
1789 tcg_out_brcond32(s, args[2], args[0], args[1], const_args[1],
1790 args[3], 0);
1791 break;
1792 case INDEX_op_setcond_i32:
1793 tcg_out_setcond32(s, args[3], args[0], args[1],
1794 args[2], const_args[2]);
1795 break;
1796 case INDEX_op_movcond_i32:
1797 tcg_out_movcond32(s, args[5], args[0], args[1],
1798 args[2], const_args[2], args[3]);
1799 break;
1800
1801 OP_32_64(bswap16):
1802 tcg_out_rolw_8(s, args[0]);
1803 break;
1804 OP_32_64(bswap32):
1805 tcg_out_bswap32(s, args[0]);
1806 break;
1807
1808 OP_32_64(neg):
1809 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NEG, args[0]);
1810 break;
1811 OP_32_64(not):
1812 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NOT, args[0]);
1813 break;
1814
1815 OP_32_64(ext8s):
1816 tcg_out_ext8s(s, args[0], args[1], rexw);
1817 break;
1818 OP_32_64(ext16s):
1819 tcg_out_ext16s(s, args[0], args[1], rexw);
1820 break;
1821 OP_32_64(ext8u):
1822 tcg_out_ext8u(s, args[0], args[1]);
1823 break;
1824 OP_32_64(ext16u):
1825 tcg_out_ext16u(s, args[0], args[1]);
1826 break;
1827
1828 case INDEX_op_qemu_ld_i32:
1829 tcg_out_qemu_ld(s, args, 0);
1830 break;
1831 case INDEX_op_qemu_ld_i64:
1832 tcg_out_qemu_ld(s, args, 1);
1833 break;
1834 case INDEX_op_qemu_st_i32:
1835 tcg_out_qemu_st(s, args, 0);
1836 break;
1837 case INDEX_op_qemu_st_i64:
1838 tcg_out_qemu_st(s, args, 1);
1839 break;
1840
1841 OP_32_64(mulu2):
1842 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_MUL, args[3]);
1843 break;
1844 OP_32_64(muls2):
1845 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IMUL, args[3]);
1846 break;
1847 OP_32_64(add2):
1848 if (const_args[4]) {
1849 tgen_arithi(s, ARITH_ADD + rexw, args[0], args[4], 1);
1850 } else {
1851 tgen_arithr(s, ARITH_ADD + rexw, args[0], args[4]);
1852 }
1853 if (const_args[5]) {
1854 tgen_arithi(s, ARITH_ADC + rexw, args[1], args[5], 1);
1855 } else {
1856 tgen_arithr(s, ARITH_ADC + rexw, args[1], args[5]);
1857 }
1858 break;
1859 OP_32_64(sub2):
1860 if (const_args[4]) {
1861 tgen_arithi(s, ARITH_SUB + rexw, args[0], args[4], 1);
1862 } else {
1863 tgen_arithr(s, ARITH_SUB + rexw, args[0], args[4]);
1864 }
1865 if (const_args[5]) {
1866 tgen_arithi(s, ARITH_SBB + rexw, args[1], args[5], 1);
1867 } else {
1868 tgen_arithr(s, ARITH_SBB + rexw, args[1], args[5]);
1869 }
1870 break;
1871
1872 #if TCG_TARGET_REG_BITS == 32
1873 case INDEX_op_brcond2_i32:
1874 tcg_out_brcond2(s, args, const_args, 0);
1875 break;
1876 case INDEX_op_setcond2_i32:
1877 tcg_out_setcond2(s, args, const_args);
1878 break;
1879 #else /* TCG_TARGET_REG_BITS == 64 */
1880 case INDEX_op_movi_i64:
1881 tcg_out_movi(s, TCG_TYPE_I64, args[0], args[1]);
1882 break;
1883 case INDEX_op_ld32s_i64:
1884 tcg_out_modrm_offset(s, OPC_MOVSLQ, args[0], args[1], args[2]);
1885 break;
1886 case INDEX_op_ld_i64:
1887 tcg_out_ld(s, TCG_TYPE_I64, args[0], args[1], args[2]);
1888 break;
1889 case INDEX_op_st_i64:
1890 if (const_args[0]) {
1891 tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_REXW,
1892 0, args[1], args[2]);
1893 tcg_out32(s, args[0]);
1894 } else {
1895 tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]);
1896 }
1897 break;
1898
1899 case INDEX_op_brcond_i64:
1900 tcg_out_brcond64(s, args[2], args[0], args[1], const_args[1],
1901 args[3], 0);
1902 break;
1903 case INDEX_op_setcond_i64:
1904 tcg_out_setcond64(s, args[3], args[0], args[1],
1905 args[2], const_args[2]);
1906 break;
1907 case INDEX_op_movcond_i64:
1908 tcg_out_movcond64(s, args[5], args[0], args[1],
1909 args[2], const_args[2], args[3]);
1910 break;
1911
1912 case INDEX_op_bswap64_i64:
1913 tcg_out_bswap64(s, args[0]);
1914 break;
1915 case INDEX_op_ext32u_i64:
1916 tcg_out_ext32u(s, args[0], args[1]);
1917 break;
1918 case INDEX_op_ext32s_i64:
1919 tcg_out_ext32s(s, args[0], args[1]);
1920 break;
1921 #endif
1922
1923 OP_32_64(deposit):
1924 if (args[3] == 0 && args[4] == 8) {
1925 /* load bits 0..7 */
1926 tcg_out_modrm(s, OPC_MOVB_EvGv | P_REXB_R | P_REXB_RM,
1927 args[2], args[0]);
1928 } else if (args[3] == 8 && args[4] == 8) {
1929 /* load bits 8..15 */
1930 tcg_out_modrm(s, OPC_MOVB_EvGv, args[2], args[0] + 4);
1931 } else if (args[3] == 0 && args[4] == 16) {
1932 /* load bits 0..15 */
1933 tcg_out_modrm(s, OPC_MOVL_EvGv | P_DATA16, args[2], args[0]);
1934 } else {
1935 tcg_abort();
1936 }
1937 break;
1938
1939 default:
1940 tcg_abort();
1941 }
1942
1943 #undef OP_32_64
1944 }
1945
1946 static const TCGTargetOpDef x86_op_defs[] = {
1947 { INDEX_op_exit_tb, { } },
1948 { INDEX_op_goto_tb, { } },
1949 { INDEX_op_call, { "ri" } },
1950 { INDEX_op_br, { } },
1951 { INDEX_op_mov_i32, { "r", "r" } },
1952 { INDEX_op_movi_i32, { "r" } },
1953 { INDEX_op_ld8u_i32, { "r", "r" } },
1954 { INDEX_op_ld8s_i32, { "r", "r" } },
1955 { INDEX_op_ld16u_i32, { "r", "r" } },
1956 { INDEX_op_ld16s_i32, { "r", "r" } },
1957 { INDEX_op_ld_i32, { "r", "r" } },
1958 { INDEX_op_st8_i32, { "qi", "r" } },
1959 { INDEX_op_st16_i32, { "ri", "r" } },
1960 { INDEX_op_st_i32, { "ri", "r" } },
1961
1962 { INDEX_op_add_i32, { "r", "r", "ri" } },
1963 { INDEX_op_sub_i32, { "r", "0", "ri" } },
1964 { INDEX_op_mul_i32, { "r", "0", "ri" } },
1965 { INDEX_op_div2_i32, { "a", "d", "0", "1", "r" } },
1966 { INDEX_op_divu2_i32, { "a", "d", "0", "1", "r" } },
1967 { INDEX_op_and_i32, { "r", "0", "ri" } },
1968 { INDEX_op_or_i32, { "r", "0", "ri" } },
1969 { INDEX_op_xor_i32, { "r", "0", "ri" } },
1970
1971 { INDEX_op_shl_i32, { "r", "0", "ci" } },
1972 { INDEX_op_shr_i32, { "r", "0", "ci" } },
1973 { INDEX_op_sar_i32, { "r", "0", "ci" } },
1974 { INDEX_op_rotl_i32, { "r", "0", "ci" } },
1975 { INDEX_op_rotr_i32, { "r", "0", "ci" } },
1976
1977 { INDEX_op_brcond_i32, { "r", "ri" } },
1978
1979 { INDEX_op_bswap16_i32, { "r", "0" } },
1980 { INDEX_op_bswap32_i32, { "r", "0" } },
1981
1982 { INDEX_op_neg_i32, { "r", "0" } },
1983
1984 { INDEX_op_not_i32, { "r", "0" } },
1985
1986 { INDEX_op_ext8s_i32, { "r", "q" } },
1987 { INDEX_op_ext16s_i32, { "r", "r" } },
1988 { INDEX_op_ext8u_i32, { "r", "q" } },
1989 { INDEX_op_ext16u_i32, { "r", "r" } },
1990
1991 { INDEX_op_setcond_i32, { "q", "r", "ri" } },
1992
1993 { INDEX_op_deposit_i32, { "Q", "0", "Q" } },
1994 #if TCG_TARGET_HAS_movcond_i32
1995 { INDEX_op_movcond_i32, { "r", "r", "ri", "r", "0" } },
1996 #endif
1997
1998 { INDEX_op_mulu2_i32, { "a", "d", "a", "r" } },
1999 { INDEX_op_muls2_i32, { "a", "d", "a", "r" } },
2000 { INDEX_op_add2_i32, { "r", "r", "0", "1", "ri", "ri" } },
2001 { INDEX_op_sub2_i32, { "r", "r", "0", "1", "ri", "ri" } },
2002
2003 #if TCG_TARGET_REG_BITS == 32
2004 { INDEX_op_brcond2_i32, { "r", "r", "ri", "ri" } },
2005 { INDEX_op_setcond2_i32, { "r", "r", "r", "ri", "ri" } },
2006 #else
2007 { INDEX_op_mov_i64, { "r", "r" } },
2008 { INDEX_op_movi_i64, { "r" } },
2009 { INDEX_op_ld8u_i64, { "r", "r" } },
2010 { INDEX_op_ld8s_i64, { "r", "r" } },
2011 { INDEX_op_ld16u_i64, { "r", "r" } },
2012 { INDEX_op_ld16s_i64, { "r", "r" } },
2013 { INDEX_op_ld32u_i64, { "r", "r" } },
2014 { INDEX_op_ld32s_i64, { "r", "r" } },
2015 { INDEX_op_ld_i64, { "r", "r" } },
2016 { INDEX_op_st8_i64, { "ri", "r" } },
2017 { INDEX_op_st16_i64, { "ri", "r" } },
2018 { INDEX_op_st32_i64, { "ri", "r" } },
2019 { INDEX_op_st_i64, { "re", "r" } },
2020
2021 { INDEX_op_add_i64, { "r", "r", "re" } },
2022 { INDEX_op_mul_i64, { "r", "0", "re" } },
2023 { INDEX_op_div2_i64, { "a", "d", "0", "1", "r" } },
2024 { INDEX_op_divu2_i64, { "a", "d", "0", "1", "r" } },
2025 { INDEX_op_sub_i64, { "r", "0", "re" } },
2026 { INDEX_op_and_i64, { "r", "0", "reZ" } },
2027 { INDEX_op_or_i64, { "r", "0", "re" } },
2028 { INDEX_op_xor_i64, { "r", "0", "re" } },
2029
2030 { INDEX_op_shl_i64, { "r", "0", "ci" } },
2031 { INDEX_op_shr_i64, { "r", "0", "ci" } },
2032 { INDEX_op_sar_i64, { "r", "0", "ci" } },
2033 { INDEX_op_rotl_i64, { "r", "0", "ci" } },
2034 { INDEX_op_rotr_i64, { "r", "0", "ci" } },
2035
2036 { INDEX_op_brcond_i64, { "r", "re" } },
2037 { INDEX_op_setcond_i64, { "r", "r", "re" } },
2038
2039 { INDEX_op_bswap16_i64, { "r", "0" } },
2040 { INDEX_op_bswap32_i64, { "r", "0" } },
2041 { INDEX_op_bswap64_i64, { "r", "0" } },
2042 { INDEX_op_neg_i64, { "r", "0" } },
2043 { INDEX_op_not_i64, { "r", "0" } },
2044
2045 { INDEX_op_ext8s_i64, { "r", "r" } },
2046 { INDEX_op_ext16s_i64, { "r", "r" } },
2047 { INDEX_op_ext32s_i64, { "r", "r" } },
2048 { INDEX_op_ext8u_i64, { "r", "r" } },
2049 { INDEX_op_ext16u_i64, { "r", "r" } },
2050 { INDEX_op_ext32u_i64, { "r", "r" } },
2051
2052 { INDEX_op_deposit_i64, { "Q", "0", "Q" } },
2053 { INDEX_op_movcond_i64, { "r", "r", "re", "r", "0" } },
2054
2055 { INDEX_op_mulu2_i64, { "a", "d", "a", "r" } },
2056 { INDEX_op_muls2_i64, { "a", "d", "a", "r" } },
2057 { INDEX_op_add2_i64, { "r", "r", "0", "1", "re", "re" } },
2058 { INDEX_op_sub2_i64, { "r", "r", "0", "1", "re", "re" } },
2059 #endif
2060
2061 #if TCG_TARGET_REG_BITS == 64
2062 { INDEX_op_qemu_ld_i32, { "r", "L" } },
2063 { INDEX_op_qemu_st_i32, { "L", "L" } },
2064 { INDEX_op_qemu_ld_i64, { "r", "L" } },
2065 { INDEX_op_qemu_st_i64, { "L", "L" } },
2066 #elif TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
2067 { INDEX_op_qemu_ld_i32, { "r", "L" } },
2068 { INDEX_op_qemu_st_i32, { "L", "L" } },
2069 { INDEX_op_qemu_ld_i64, { "r", "r", "L" } },
2070 { INDEX_op_qemu_st_i64, { "L", "L", "L" } },
2071 #else
2072 { INDEX_op_qemu_ld_i32, { "r", "L", "L" } },
2073 { INDEX_op_qemu_st_i32, { "L", "L", "L" } },
2074 { INDEX_op_qemu_ld_i64, { "r", "r", "L", "L" } },
2075 { INDEX_op_qemu_st_i64, { "L", "L", "L", "L" } },
2076 #endif
2077 { -1 },
2078 };
2079
2080 static int tcg_target_callee_save_regs[] = {
2081 #if TCG_TARGET_REG_BITS == 64
2082 TCG_REG_RBP,
2083 TCG_REG_RBX,
2084 #if defined(_WIN64)
2085 TCG_REG_RDI,
2086 TCG_REG_RSI,
2087 #endif
2088 TCG_REG_R12,
2089 TCG_REG_R13,
2090 TCG_REG_R14, /* Currently used for the global env. */
2091 TCG_REG_R15,
2092 #else
2093 TCG_REG_EBP, /* Currently used for the global env. */
2094 TCG_REG_EBX,
2095 TCG_REG_ESI,
2096 TCG_REG_EDI,
2097 #endif
2098 };
2099
2100 /* Compute frame size via macros, to share between tcg_target_qemu_prologue
2101 and tcg_register_jit. */
2102
2103 #define PUSH_SIZE \
2104 ((1 + ARRAY_SIZE(tcg_target_callee_save_regs)) \
2105 * (TCG_TARGET_REG_BITS / 8))
2106
2107 #define FRAME_SIZE \
2108 ((PUSH_SIZE \
2109 + TCG_STATIC_CALL_ARGS_SIZE \
2110 + CPU_TEMP_BUF_NLONGS * sizeof(long) \
2111 + TCG_TARGET_STACK_ALIGN - 1) \
2112 & ~(TCG_TARGET_STACK_ALIGN - 1))
2113
2114 /* Generate global QEMU prologue and epilogue code */
tcg_target_qemu_prologue(TCGContext * s)2115 static void tcg_target_qemu_prologue(TCGContext *s)
2116 {
2117 int i, stack_addend;
2118
2119 /* TB prologue */
2120
2121 /* Reserve some stack space, also for TCG temps. */
2122 stack_addend = FRAME_SIZE - PUSH_SIZE;
2123 tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE,
2124 CPU_TEMP_BUF_NLONGS * sizeof(long));
2125
2126 /* Save all callee saved registers. */
2127 for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
2128 tcg_out_push(s, tcg_target_callee_save_regs[i]);
2129 }
2130
2131 #if TCG_TARGET_REG_BITS == 32
2132 tcg_out_ld(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP,
2133 (ARRAY_SIZE(tcg_target_callee_save_regs) + 1) * 4);
2134 tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
2135 /* jmp *tb. */
2136 tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, TCG_REG_ESP,
2137 (ARRAY_SIZE(tcg_target_callee_save_regs) + 2) * 4
2138 + stack_addend);
2139 #else
2140 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2141 tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
2142 /* jmp *tb. */
2143 tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, tcg_target_call_iarg_regs[1]);
2144 #endif
2145
2146 /* TB epilogue */
2147 tb_ret_addr = s->code_ptr;
2148
2149 tcg_out_addi(s, TCG_REG_CALL_STACK, stack_addend);
2150
2151 for (i = ARRAY_SIZE(tcg_target_callee_save_regs) - 1; i >= 0; i--) {
2152 tcg_out_pop(s, tcg_target_callee_save_regs[i]);
2153 }
2154 tcg_out_opc(s, OPC_RET, 0, 0, 0);
2155
2156 #if !defined(CONFIG_SOFTMMU)
2157 /* Try to set up a segment register to point to GUEST_BASE. */
2158 if (GUEST_BASE) {
2159 setup_guest_base_seg();
2160 }
2161 #endif
2162 }
2163
tcg_target_init(TCGContext * s)2164 static void tcg_target_init(TCGContext *s)
2165 {
2166 /* For 32-bit, 99% certainty that we're running on hardware that supports
2167 cmov, but we still need to check. In case cmov is not available, we'll
2168 use a small forward branch. */
2169 #ifndef have_cmov
2170 {
2171 unsigned a, b, c, d;
2172 have_cmov = (__get_cpuid(1, &a, &b, &c, &d) && (d & bit_CMOV));
2173 }
2174 #endif
2175
2176 if (TCG_TARGET_REG_BITS == 64) {
2177 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff);
2178 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffff);
2179 } else {
2180 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xff);
2181 }
2182
2183 tcg_regset_clear(tcg_target_call_clobber_regs);
2184 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EAX);
2185 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EDX);
2186 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_ECX);
2187 if (TCG_TARGET_REG_BITS == 64) {
2188 #if !defined(_WIN64)
2189 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RDI);
2190 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RSI);
2191 #endif
2192 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8);
2193 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9);
2194 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10);
2195 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11);
2196 }
2197
2198 tcg_regset_clear(s->reserved_regs);
2199 tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
2200
2201 tcg_add_target_add_op_defs(x86_op_defs);
2202 }
2203
2204 typedef struct {
2205 DebugFrameCIE cie;
2206 DebugFrameFDEHeader fde;
2207 uint8_t fde_def_cfa[4];
2208 uint8_t fde_reg_ofs[14];
2209 } DebugFrame;
2210
2211 /* We're expecting a 2 byte uleb128 encoded value. */
2212 QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
2213
2214 #if !defined(__ELF__)
2215 /* Host machine without ELF. */
2216 #elif TCG_TARGET_REG_BITS == 64
2217 #define ELF_HOST_MACHINE EM_X86_64
2218 static DebugFrame debug_frame = {
2219 .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2220 .cie.id = -1,
2221 .cie.version = 1,
2222 .cie.code_align = 1,
2223 .cie.data_align = 0x78, /* sleb128 -8 */
2224 .cie.return_column = 16,
2225
2226 /* Total FDE size does not include the "len" member. */
2227 .fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, fde.cie_offset),
2228
2229 .fde_def_cfa = {
2230 12, 7, /* DW_CFA_def_cfa %rsp, ... */
2231 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
2232 (FRAME_SIZE >> 7)
2233 },
2234 .fde_reg_ofs = {
2235 0x90, 1, /* DW_CFA_offset, %rip, -8 */
2236 /* The following ordering must match tcg_target_callee_save_regs. */
2237 0x86, 2, /* DW_CFA_offset, %rbp, -16 */
2238 0x83, 3, /* DW_CFA_offset, %rbx, -24 */
2239 0x8c, 4, /* DW_CFA_offset, %r12, -32 */
2240 0x8d, 5, /* DW_CFA_offset, %r13, -40 */
2241 0x8e, 6, /* DW_CFA_offset, %r14, -48 */
2242 0x8f, 7, /* DW_CFA_offset, %r15, -56 */
2243 }
2244 };
2245 #else
2246 #define ELF_HOST_MACHINE EM_386
2247 static DebugFrame debug_frame = {
2248 .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2249 .cie.id = -1,
2250 .cie.version = 1,
2251 .cie.code_align = 1,
2252 .cie.data_align = 0x7c, /* sleb128 -4 */
2253 .cie.return_column = 8,
2254
2255 /* Total FDE size does not include the "len" member. */
2256 .fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, fde.cie_offset),
2257
2258 .fde_def_cfa = {
2259 12, 4, /* DW_CFA_def_cfa %esp, ... */
2260 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
2261 (FRAME_SIZE >> 7)
2262 },
2263 .fde_reg_ofs = {
2264 0x88, 1, /* DW_CFA_offset, %eip, -4 */
2265 /* The following ordering must match tcg_target_callee_save_regs. */
2266 0x85, 2, /* DW_CFA_offset, %ebp, -8 */
2267 0x83, 3, /* DW_CFA_offset, %ebx, -12 */
2268 0x86, 4, /* DW_CFA_offset, %esi, -16 */
2269 0x87, 5, /* DW_CFA_offset, %edi, -20 */
2270 }
2271 };
2272 #endif
2273
2274 #if defined(ELF_HOST_MACHINE)
tcg_register_jit(void * buf,size_t buf_size)2275 void tcg_register_jit(void *buf, size_t buf_size)
2276 {
2277 debug_frame.fde.func_start = (uintptr_t)buf;
2278 debug_frame.fde.func_len = buf_size;
2279
2280 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
2281 }
2282 #endif
2283