1 /*
2 * Tiny Code Generator for QEMU
3 *
4 * Copyright (c) 2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25 #ifndef NDEBUG
26 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
27 #if TCG_TARGET_REG_BITS == 64
28 "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
29 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
30 #else
31 "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
32 #endif
33 };
34 #endif
35
36 static const int tcg_target_reg_alloc_order[] = {
37 #if TCG_TARGET_REG_BITS == 64
38 TCG_REG_RBP,
39 TCG_REG_RBX,
40 TCG_REG_R12,
41 TCG_REG_R13,
42 TCG_REG_R14,
43 TCG_REG_R15,
44 TCG_REG_R10,
45 TCG_REG_R11,
46 TCG_REG_R9,
47 TCG_REG_R8,
48 TCG_REG_RCX,
49 TCG_REG_RDX,
50 TCG_REG_RSI,
51 TCG_REG_RDI,
52 TCG_REG_RAX,
53 #else
54 TCG_REG_EAX,
55 TCG_REG_EDX,
56 TCG_REG_ECX,
57 TCG_REG_EBX,
58 TCG_REG_ESI,
59 TCG_REG_EDI,
60 TCG_REG_EBP,
61 #endif
62 };
63
64 static const int tcg_target_call_iarg_regs[] = {
65 #if TCG_TARGET_REG_BITS == 64
66 TCG_REG_RDI,
67 TCG_REG_RSI,
68 TCG_REG_RDX,
69 TCG_REG_RCX,
70 TCG_REG_R8,
71 TCG_REG_R9,
72 #else
73 TCG_REG_EAX,
74 TCG_REG_EDX,
75 TCG_REG_ECX
76 #endif
77 };
78
79 static const int tcg_target_call_oarg_regs[2] = {
80 TCG_REG_EAX,
81 TCG_REG_EDX
82 };
83
84 static uint8_t *tb_ret_addr;
85
patch_reloc(uint8_t * code_ptr,int type,tcg_target_long value,tcg_target_long addend)86 static void patch_reloc(uint8_t *code_ptr, int type,
87 tcg_target_long value, tcg_target_long addend)
88 {
89 value += addend;
90 switch(type) {
91 case R_386_PC32:
92 value -= (uintptr_t)code_ptr;
93 if (value != (int32_t)value) {
94 tcg_abort();
95 }
96 *(uint32_t *)code_ptr = value;
97 break;
98 case R_386_PC8:
99 value -= (uintptr_t)code_ptr;
100 if (value != (int8_t)value) {
101 tcg_abort();
102 }
103 *(uint8_t *)code_ptr = value;
104 break;
105 default:
106 tcg_abort();
107 }
108 }
109
110 /* maximum number of register used for input function arguments */
tcg_target_get_call_iarg_regs_count(int flags)111 static inline int tcg_target_get_call_iarg_regs_count(int flags)
112 {
113 if (TCG_TARGET_REG_BITS == 64) {
114 return 6;
115 }
116
117 flags &= TCG_CALL_TYPE_MASK;
118 switch(flags) {
119 case TCG_CALL_TYPE_STD:
120 return 0;
121 case TCG_CALL_TYPE_REGPARM_1:
122 case TCG_CALL_TYPE_REGPARM_2:
123 case TCG_CALL_TYPE_REGPARM:
124 return flags - TCG_CALL_TYPE_REGPARM_1 + 1;
125 default:
126 tcg_abort();
127 }
128 }
129
130 /* parse target specific constraints */
target_parse_constraint(TCGArgConstraint * ct,const char ** pct_str)131 static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
132 {
133 const char *ct_str;
134
135 ct_str = *pct_str;
136 switch(ct_str[0]) {
137 case 'a':
138 ct->ct |= TCG_CT_REG;
139 tcg_regset_set_reg(ct->u.regs, TCG_REG_EAX);
140 break;
141 case 'b':
142 ct->ct |= TCG_CT_REG;
143 tcg_regset_set_reg(ct->u.regs, TCG_REG_EBX);
144 break;
145 case 'c':
146 ct->ct |= TCG_CT_REG;
147 tcg_regset_set_reg(ct->u.regs, TCG_REG_ECX);
148 break;
149 case 'd':
150 ct->ct |= TCG_CT_REG;
151 tcg_regset_set_reg(ct->u.regs, TCG_REG_EDX);
152 break;
153 case 'S':
154 ct->ct |= TCG_CT_REG;
155 tcg_regset_set_reg(ct->u.regs, TCG_REG_ESI);
156 break;
157 case 'D':
158 ct->ct |= TCG_CT_REG;
159 tcg_regset_set_reg(ct->u.regs, TCG_REG_EDI);
160 break;
161 case 'q':
162 ct->ct |= TCG_CT_REG;
163 if (TCG_TARGET_REG_BITS == 64) {
164 tcg_regset_set32(ct->u.regs, 0, 0xffff);
165 } else {
166 tcg_regset_set32(ct->u.regs, 0, 0xf);
167 }
168 break;
169 case 'r':
170 ct->ct |= TCG_CT_REG;
171 if (TCG_TARGET_REG_BITS == 64) {
172 tcg_regset_set32(ct->u.regs, 0, 0xffff);
173 } else {
174 tcg_regset_set32(ct->u.regs, 0, 0xff);
175 }
176 break;
177
178 /* qemu_ld/st address constraint */
179 case 'L':
180 ct->ct |= TCG_CT_REG;
181 if (TCG_TARGET_REG_BITS == 64) {
182 tcg_regset_set32(ct->u.regs, 0, 0xffff);
183 tcg_regset_reset_reg(ct->u.regs, TCG_REG_RSI);
184 tcg_regset_reset_reg(ct->u.regs, TCG_REG_RDI);
185 } else {
186 tcg_regset_set32(ct->u.regs, 0, 0xff);
187 tcg_regset_reset_reg(ct->u.regs, TCG_REG_EAX);
188 tcg_regset_reset_reg(ct->u.regs, TCG_REG_EDX);
189 }
190 break;
191
192 case 'e':
193 ct->ct |= TCG_CT_CONST_S32;
194 break;
195 case 'Z':
196 ct->ct |= TCG_CT_CONST_U32;
197 break;
198
199 default:
200 return -1;
201 }
202 ct_str++;
203 *pct_str = ct_str;
204 return 0;
205 }
206
207 /* test if a constant matches the constraint */
tcg_target_const_match(tcg_target_long val,const TCGArgConstraint * arg_ct)208 static inline int tcg_target_const_match(tcg_target_long val,
209 const TCGArgConstraint *arg_ct)
210 {
211 int ct = arg_ct->ct;
212 if (ct & TCG_CT_CONST) {
213 return 1;
214 }
215 if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) {
216 return 1;
217 }
218 if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) {
219 return 1;
220 }
221 return 0;
222 }
223
224 #if TCG_TARGET_REG_BITS == 64
225 # define LOWREGMASK(x) ((x) & 7)
226 #else
227 # define LOWREGMASK(x) (x)
228 #endif
229
230 #define P_EXT 0x100 /* 0x0f opcode prefix */
231 #define P_DATA16 0x200 /* 0x66 opcode prefix */
232 #if TCG_TARGET_REG_BITS == 64
233 # define P_ADDR32 0x400 /* 0x67 opcode prefix */
234 # define P_REXW 0x800 /* Set REX.W = 1 */
235 # define P_REXB_R 0x1000 /* REG field as byte register */
236 # define P_REXB_RM 0x2000 /* R/M field as byte register */
237 #else
238 # define P_ADDR32 0
239 # define P_REXW 0
240 # define P_REXB_R 0
241 # define P_REXB_RM 0
242 #endif
243
244 #define OPC_ARITH_EvIz (0x81)
245 #define OPC_ARITH_EvIb (0x83)
246 #define OPC_ARITH_GvEv (0x03) /* ... plus (ARITH_FOO << 3) */
247 #define OPC_ADD_GvEv (OPC_ARITH_GvEv | (ARITH_ADD << 3))
248 #define OPC_BSWAP (0xc8 | P_EXT)
249 #define OPC_CALL_Jz (0xe8)
250 #define OPC_CMP_GvEv (OPC_ARITH_GvEv | (ARITH_CMP << 3))
251 #define OPC_DEC_r32 (0x48)
252 #define OPC_IMUL_GvEv (0xaf | P_EXT)
253 #define OPC_IMUL_GvEvIb (0x6b)
254 #define OPC_IMUL_GvEvIz (0x69)
255 #define OPC_INC_r32 (0x40)
256 #define OPC_JCC_long (0x80 | P_EXT) /* ... plus condition code */
257 #define OPC_JCC_short (0x70) /* ... plus condition code */
258 #define OPC_JMP_long (0xe9)
259 #define OPC_JMP_short (0xeb)
260 #define OPC_LEA (0x8d)
261 #define OPC_MOVB_EvGv (0x88) /* stores, more or less */
262 #define OPC_MOVL_EvGv (0x89) /* stores, more or less */
263 #define OPC_MOVL_GvEv (0x8b) /* loads, more or less */
264 #define OPC_MOVL_EvIz (0xc7)
265 #define OPC_MOVL_Iv (0xb8)
266 #define OPC_MOVSBL (0xbe | P_EXT)
267 #define OPC_MOVSWL (0xbf | P_EXT)
268 #define OPC_MOVSLQ (0x63 | P_REXW)
269 #define OPC_MOVZBL (0xb6 | P_EXT)
270 #define OPC_MOVZWL (0xb7 | P_EXT)
271 #define OPC_POP_r32 (0x58)
272 #define OPC_PUSH_r32 (0x50)
273 #define OPC_PUSH_Iv (0x68)
274 #define OPC_PUSH_Ib (0x6a)
275 #define OPC_RET (0xc3)
276 #define OPC_SETCC (0x90 | P_EXT | P_REXB_RM) /* ... plus cc */
277 #define OPC_SHIFT_1 (0xd1)
278 #define OPC_SHIFT_Ib (0xc1)
279 #define OPC_SHIFT_cl (0xd3)
280 #define OPC_TESTL (0x85)
281 #define OPC_XCHG_ax_r32 (0x90)
282
283 #define OPC_GRP3_Ev (0xf7)
284 #define OPC_GRP5 (0xff)
285
286 /* Group 1 opcode extensions for 0x80-0x83.
287 These are also used as modifiers for OPC_ARITH. */
288 #define ARITH_ADD 0
289 #define ARITH_OR 1
290 #define ARITH_ADC 2
291 #define ARITH_SBB 3
292 #define ARITH_AND 4
293 #define ARITH_SUB 5
294 #define ARITH_XOR 6
295 #define ARITH_CMP 7
296
297 /* Group 2 opcode extensions for 0xc0, 0xc1, 0xd0-0xd3. */
298 #define SHIFT_ROL 0
299 #define SHIFT_ROR 1
300 #define SHIFT_SHL 4
301 #define SHIFT_SHR 5
302 #define SHIFT_SAR 7
303
304 /* Group 3 opcode extensions for 0xf6, 0xf7. To be used with OPC_GRP3. */
305 #define EXT3_NOT 2
306 #define EXT3_NEG 3
307 #define EXT3_MUL 4
308 #define EXT3_IMUL 5
309 #define EXT3_DIV 6
310 #define EXT3_IDIV 7
311
312 /* Group 5 opcode extensions for 0xff. To be used with OPC_GRP5. */
313 #define EXT5_INC_Ev 0
314 #define EXT5_DEC_Ev 1
315 #define EXT5_CALLN_Ev 2
316 #define EXT5_JMPN_Ev 4
317
318 /* Condition codes to be added to OPC_JCC_{long,short}. */
319 #define JCC_JMP (-1)
320 #define JCC_JO 0x0
321 #define JCC_JNO 0x1
322 #define JCC_JB 0x2
323 #define JCC_JAE 0x3
324 #define JCC_JE 0x4
325 #define JCC_JNE 0x5
326 #define JCC_JBE 0x6
327 #define JCC_JA 0x7
328 #define JCC_JS 0x8
329 #define JCC_JNS 0x9
330 #define JCC_JP 0xa
331 #define JCC_JNP 0xb
332 #define JCC_JL 0xc
333 #define JCC_JGE 0xd
334 #define JCC_JLE 0xe
335 #define JCC_JG 0xf
336
337 static const uint8_t tcg_cond_to_jcc[10] = {
338 [TCG_COND_EQ] = JCC_JE,
339 [TCG_COND_NE] = JCC_JNE,
340 [TCG_COND_LT] = JCC_JL,
341 [TCG_COND_GE] = JCC_JGE,
342 [TCG_COND_LE] = JCC_JLE,
343 [TCG_COND_GT] = JCC_JG,
344 [TCG_COND_LTU] = JCC_JB,
345 [TCG_COND_GEU] = JCC_JAE,
346 [TCG_COND_LEU] = JCC_JBE,
347 [TCG_COND_GTU] = JCC_JA,
348 };
349
350 #if TCG_TARGET_REG_BITS == 64
tcg_out_opc(TCGContext * s,int opc,int r,int rm,int x)351 static void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x)
352 {
353 int rex;
354
355 if (opc & P_DATA16) {
356 /* We should never be asking for both 16 and 64-bit operation. */
357 assert((opc & P_REXW) == 0);
358 tcg_out8(s, 0x66);
359 }
360 if (opc & P_ADDR32) {
361 tcg_out8(s, 0x67);
362 }
363
364 rex = 0;
365 rex |= (opc & P_REXW) >> 8; /* REX.W */
366 rex |= (r & 8) >> 1; /* REX.R */
367 rex |= (x & 8) >> 2; /* REX.X */
368 rex |= (rm & 8) >> 3; /* REX.B */
369
370 /* P_REXB_{R,RM} indicates that the given register is the low byte.
371 For %[abcd]l we need no REX prefix, but for %{si,di,bp,sp}l we do,
372 as otherwise the encoding indicates %[abcd]h. Note that the values
373 that are ORed in merely indicate that the REX byte must be present;
374 those bits get discarded in output. */
375 rex |= opc & (r >= 4 ? P_REXB_R : 0);
376 rex |= opc & (rm >= 4 ? P_REXB_RM : 0);
377
378 if (rex) {
379 tcg_out8(s, (uint8_t)(rex | 0x40));
380 }
381
382 if (opc & P_EXT) {
383 tcg_out8(s, 0x0f);
384 }
385 tcg_out8(s, opc);
386 }
387 #else
tcg_out_opc(TCGContext * s,int opc)388 static void tcg_out_opc(TCGContext *s, int opc)
389 {
390 if (opc & P_DATA16) {
391 tcg_out8(s, 0x66);
392 }
393 if (opc & P_EXT) {
394 tcg_out8(s, 0x0f);
395 }
396 tcg_out8(s, opc);
397 }
398 /* Discard the register arguments to tcg_out_opc early, so as not to penalize
399 the 32-bit compilation paths. This method works with all versions of gcc,
400 whereas relying on optimization may not be able to exclude them. */
401 #define tcg_out_opc(s, opc, r, rm, x) (tcg_out_opc)(s, opc)
402 #endif
403
tcg_out_modrm(TCGContext * s,int opc,int r,int rm)404 static void tcg_out_modrm(TCGContext *s, int opc, int r, int rm)
405 {
406 tcg_out_opc(s, opc, r, rm, 0);
407 tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
408 }
409
410 /* Output an opcode with a full "rm + (index<<shift) + offset" address mode.
411 We handle either RM and INDEX missing with a negative value. In 64-bit
412 mode for absolute addresses, ~RM is the size of the immediate operand
413 that will follow the instruction. */
414
tcg_out_modrm_sib_offset(TCGContext * s,int opc,int r,int rm,int index,int shift,tcg_target_long offset)415 static void tcg_out_modrm_sib_offset(TCGContext *s, int opc, int r, int rm,
416 int index, int shift,
417 tcg_target_long offset)
418 {
419 int mod, len;
420
421 if (index < 0 && rm < 0) {
422 if (TCG_TARGET_REG_BITS == 64) {
423 /* Try for a rip-relative addressing mode. This has replaced
424 the 32-bit-mode absolute addressing encoding. */
425 tcg_target_long pc = (tcg_target_long)s->code_ptr + 5 + ~rm;
426 tcg_target_long disp = offset - pc;
427 if (disp == (int32_t)disp) {
428 tcg_out_opc(s, opc, r, 0, 0);
429 tcg_out8(s, (LOWREGMASK(r) << 3) | 5);
430 tcg_out32(s, disp);
431 return;
432 }
433
434 /* Try for an absolute address encoding. This requires the
435 use of the MODRM+SIB encoding and is therefore larger than
436 rip-relative addressing. */
437 if (offset == (int32_t)offset) {
438 tcg_out_opc(s, opc, r, 0, 0);
439 tcg_out8(s, (LOWREGMASK(r) << 3) | 4);
440 tcg_out8(s, (4 << 3) | 5);
441 tcg_out32(s, offset);
442 return;
443 }
444
445 /* ??? The memory isn't directly addressable. */
446 tcg_abort();
447 } else {
448 /* Absolute address. */
449 tcg_out_opc(s, opc, r, 0, 0);
450 tcg_out8(s, (r << 3) | 5);
451 tcg_out32(s, offset);
452 return;
453 }
454 }
455
456 /* Find the length of the immediate addend. Note that the encoding
457 that would be used for (%ebp) indicates absolute addressing. */
458 if (rm < 0) {
459 mod = 0, len = 4, rm = 5;
460 } else if (offset == 0 && LOWREGMASK(rm) != TCG_REG_EBP) {
461 mod = 0, len = 0;
462 } else if (offset == (int8_t)offset) {
463 mod = 0x40, len = 1;
464 } else {
465 mod = 0x80, len = 4;
466 }
467
468 /* Use a single byte MODRM format if possible. Note that the encoding
469 that would be used for %esp is the escape to the two byte form. */
470 if (index < 0 && LOWREGMASK(rm) != TCG_REG_ESP) {
471 /* Single byte MODRM format. */
472 tcg_out_opc(s, opc, r, rm, 0);
473 tcg_out8(s, mod | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
474 } else {
475 /* Two byte MODRM+SIB format. */
476
477 /* Note that the encoding that would place %esp into the index
478 field indicates no index register. In 64-bit mode, the REX.X
479 bit counts, so %r12 can be used as the index. */
480 if (index < 0) {
481 index = 4;
482 } else {
483 assert(index != TCG_REG_ESP);
484 }
485
486 tcg_out_opc(s, opc, r, rm, index);
487 tcg_out8(s, mod | (LOWREGMASK(r) << 3) | 4);
488 tcg_out8(s, (shift << 6) | (LOWREGMASK(index) << 3) | LOWREGMASK(rm));
489 }
490
491 if (len == 1) {
492 tcg_out8(s, offset);
493 } else if (len == 4) {
494 tcg_out32(s, offset);
495 }
496 }
497
498 /* A simplification of the above with no index or shift. */
tcg_out_modrm_offset(TCGContext * s,int opc,int r,int rm,tcg_target_long offset)499 static inline void tcg_out_modrm_offset(TCGContext *s, int opc, int r,
500 int rm, tcg_target_long offset)
501 {
502 tcg_out_modrm_sib_offset(s, opc, r, rm, -1, 0, offset);
503 }
504
505 /* Generate dest op= src. Uses the same ARITH_* codes as tgen_arithi. */
tgen_arithr(TCGContext * s,int subop,int dest,int src)506 static inline void tgen_arithr(TCGContext *s, int subop, int dest, int src)
507 {
508 /* Propagate an opcode prefix, such as P_REXW. */
509 int ext = subop & ~0x7;
510 subop &= 0x7;
511
512 tcg_out_modrm(s, OPC_ARITH_GvEv + (subop << 3) + ext, dest, src);
513 }
514
tcg_out_mov(TCGContext * s,TCGType type,int ret,int arg)515 static inline void tcg_out_mov(TCGContext *s, TCGType type, int ret, int arg)
516 {
517 if (arg != ret) {
518 int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0);
519 tcg_out_modrm(s, opc, ret, arg);
520 }
521 }
522
tcg_out_movi(TCGContext * s,TCGType type,int ret,tcg_target_long arg)523 static void tcg_out_movi(TCGContext *s, TCGType type,
524 int ret, tcg_target_long arg)
525 {
526 if (arg == 0) {
527 tgen_arithr(s, ARITH_XOR, ret, ret);
528 return;
529 } else if (arg == (uint32_t)arg || type == TCG_TYPE_I32) {
530 tcg_out_opc(s, OPC_MOVL_Iv + LOWREGMASK(ret), 0, ret, 0);
531 tcg_out32(s, arg);
532 } else if (arg == (int32_t)arg) {
533 tcg_out_modrm(s, OPC_MOVL_EvIz + P_REXW, 0, ret);
534 tcg_out32(s, arg);
535 } else {
536 tcg_out_opc(s, OPC_MOVL_Iv + P_REXW + LOWREGMASK(ret), 0, ret, 0);
537 tcg_out32(s, arg);
538 tcg_out32(s, arg >> 31 >> 1);
539 }
540 }
541
tcg_out_pushi(TCGContext * s,tcg_target_long val)542 static inline void tcg_out_pushi(TCGContext *s, tcg_target_long val)
543 {
544 if (val == (int8_t)val) {
545 tcg_out_opc(s, OPC_PUSH_Ib, 0, 0, 0);
546 tcg_out8(s, val);
547 } else if (val == (int32_t)val) {
548 tcg_out_opc(s, OPC_PUSH_Iv, 0, 0, 0);
549 tcg_out32(s, val);
550 } else {
551 tcg_abort();
552 }
553 }
554
tcg_out_push(TCGContext * s,int reg)555 static inline void tcg_out_push(TCGContext *s, int reg)
556 {
557 tcg_out_opc(s, OPC_PUSH_r32 + LOWREGMASK(reg), 0, reg, 0);
558 }
559
tcg_out_pop(TCGContext * s,int reg)560 static inline void tcg_out_pop(TCGContext *s, int reg)
561 {
562 tcg_out_opc(s, OPC_POP_r32 + LOWREGMASK(reg), 0, reg, 0);
563 }
564
tcg_out_ld(TCGContext * s,TCGType type,int ret,int arg1,tcg_target_long arg2)565 static inline void tcg_out_ld(TCGContext *s, TCGType type, int ret,
566 int arg1, tcg_target_long arg2)
567 {
568 int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0);
569 tcg_out_modrm_offset(s, opc, ret, arg1, arg2);
570 }
571
tcg_out_st(TCGContext * s,TCGType type,int arg,int arg1,tcg_target_long arg2)572 static inline void tcg_out_st(TCGContext *s, TCGType type, int arg,
573 int arg1, tcg_target_long arg2)
574 {
575 int opc = OPC_MOVL_EvGv + (type == TCG_TYPE_I64 ? P_REXW : 0);
576 tcg_out_modrm_offset(s, opc, arg, arg1, arg2);
577 }
578
tcg_out_shifti(TCGContext * s,int subopc,int reg,int count)579 static void tcg_out_shifti(TCGContext *s, int subopc, int reg, int count)
580 {
581 /* Propagate an opcode prefix, such as P_DATA16. */
582 int ext = subopc & ~0x7;
583 subopc &= 0x7;
584
585 if (count == 1) {
586 tcg_out_modrm(s, OPC_SHIFT_1 + ext, subopc, reg);
587 } else {
588 tcg_out_modrm(s, OPC_SHIFT_Ib + ext, subopc, reg);
589 tcg_out8(s, count);
590 }
591 }
592
tcg_out_bswap32(TCGContext * s,int reg)593 static inline void tcg_out_bswap32(TCGContext *s, int reg)
594 {
595 tcg_out_opc(s, OPC_BSWAP + LOWREGMASK(reg), 0, reg, 0);
596 }
597
tcg_out_rolw_8(TCGContext * s,int reg)598 static inline void tcg_out_rolw_8(TCGContext *s, int reg)
599 {
600 tcg_out_shifti(s, SHIFT_ROL + P_DATA16, reg, 8);
601 }
602
tcg_out_ext8u(TCGContext * s,int dest,int src)603 static inline void tcg_out_ext8u(TCGContext *s, int dest, int src)
604 {
605 /* movzbl */
606 assert(src < 4 || TCG_TARGET_REG_BITS == 64);
607 tcg_out_modrm(s, OPC_MOVZBL + P_REXB_RM, dest, src);
608 }
609
tcg_out_ext8s(TCGContext * s,int dest,int src,int rexw)610 static void tcg_out_ext8s(TCGContext *s, int dest, int src, int rexw)
611 {
612 /* movsbl */
613 assert(src < 4 || TCG_TARGET_REG_BITS == 64);
614 tcg_out_modrm(s, OPC_MOVSBL + P_REXB_RM + rexw, dest, src);
615 }
616
tcg_out_ext16u(TCGContext * s,int dest,int src)617 static inline void tcg_out_ext16u(TCGContext *s, int dest, int src)
618 {
619 /* movzwl */
620 tcg_out_modrm(s, OPC_MOVZWL, dest, src);
621 }
622
tcg_out_ext16s(TCGContext * s,int dest,int src,int rexw)623 static inline void tcg_out_ext16s(TCGContext *s, int dest, int src, int rexw)
624 {
625 /* movsw[lq] */
626 tcg_out_modrm(s, OPC_MOVSWL + rexw, dest, src);
627 }
628
tcg_out_ext32u(TCGContext * s,int dest,int src)629 static inline void tcg_out_ext32u(TCGContext *s, int dest, int src)
630 {
631 /* 32-bit mov zero extends. */
632 tcg_out_modrm(s, OPC_MOVL_GvEv, dest, src);
633 }
634
tcg_out_ext32s(TCGContext * s,int dest,int src)635 static inline void tcg_out_ext32s(TCGContext *s, int dest, int src)
636 {
637 tcg_out_modrm(s, OPC_MOVSLQ, dest, src);
638 }
639
tcg_out_bswap64(TCGContext * s,int reg)640 static inline void tcg_out_bswap64(TCGContext *s, int reg)
641 {
642 tcg_out_opc(s, OPC_BSWAP + P_REXW + LOWREGMASK(reg), 0, reg, 0);
643 }
644
tgen_arithi(TCGContext * s,int c,int r0,tcg_target_long val,int cf)645 static void tgen_arithi(TCGContext *s, int c, int r0,
646 tcg_target_long val, int cf)
647 {
648 int rexw = 0;
649
650 if (TCG_TARGET_REG_BITS == 64) {
651 rexw = c & -8;
652 c &= 7;
653 }
654
655 /* ??? While INC is 2 bytes shorter than ADDL $1, they also induce
656 partial flags update stalls on Pentium4 and are not recommended
657 by current Intel optimization manuals. */
658 if (!cf && (c == ARITH_ADD || c == ARITH_SUB) && (val == 1 || val == -1)) {
659 int is_inc = (c == ARITH_ADD) ^ (val < 0);
660 if (TCG_TARGET_REG_BITS == 64) {
661 /* The single-byte increment encodings are re-tasked as the
662 REX prefixes. Use the MODRM encoding. */
663 tcg_out_modrm(s, OPC_GRP5 + rexw,
664 (is_inc ? EXT5_INC_Ev : EXT5_DEC_Ev), r0);
665 } else {
666 tcg_out8(s, (is_inc ? OPC_INC_r32 : OPC_DEC_r32) + r0);
667 }
668 return;
669 }
670
671 if (c == ARITH_AND) {
672 if (TCG_TARGET_REG_BITS == 64) {
673 if (val == 0xffffffffu) {
674 tcg_out_ext32u(s, r0, r0);
675 return;
676 }
677 if (val == (uint32_t)val) {
678 /* AND with no high bits set can use a 32-bit operation. */
679 rexw = 0;
680 }
681 }
682 if (val == 0xffu && (r0 < 4 || TCG_TARGET_REG_BITS == 64)) {
683 tcg_out_ext8u(s, r0, r0);
684 return;
685 }
686 if (val == 0xffffu) {
687 tcg_out_ext16u(s, r0, r0);
688 return;
689 }
690 }
691
692 if (val == (int8_t)val) {
693 tcg_out_modrm(s, OPC_ARITH_EvIb + rexw, c, r0);
694 tcg_out8(s, val);
695 return;
696 }
697 if (rexw == 0 || val == (int32_t)val) {
698 tcg_out_modrm(s, OPC_ARITH_EvIz + rexw, c, r0);
699 tcg_out32(s, val);
700 return;
701 }
702
703 tcg_abort();
704 }
705
tcg_out_addi(TCGContext * s,int reg,tcg_target_long val)706 static void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val)
707 {
708 if (val != 0) {
709 tgen_arithi(s, ARITH_ADD + P_REXW, reg, val, 0);
710 }
711 }
712
713 #undef small /* for mingw build */
714
715 /* Use SMALL != 0 to force a short forward branch. */
tcg_out_jxx(TCGContext * s,int opc,int label_index,int small)716 static void tcg_out_jxx(TCGContext *s, int opc, int label_index, int small)
717 {
718 int32_t val, val1;
719 TCGLabel *l = &s->labels[label_index];
720
721 if (l->has_value) {
722 val = l->u.value - (tcg_target_long)s->code_ptr;
723 val1 = val - 2;
724 if ((int8_t)val1 == val1) {
725 if (opc == -1) {
726 tcg_out8(s, OPC_JMP_short);
727 } else {
728 tcg_out8(s, OPC_JCC_short + opc);
729 }
730 tcg_out8(s, val1);
731 } else {
732 if (small) {
733 tcg_abort();
734 }
735 if (opc == -1) {
736 tcg_out8(s, OPC_JMP_long);
737 tcg_out32(s, val - 5);
738 } else {
739 tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
740 tcg_out32(s, val - 6);
741 }
742 }
743 } else if (small) {
744 if (opc == -1) {
745 tcg_out8(s, OPC_JMP_short);
746 } else {
747 tcg_out8(s, OPC_JCC_short + opc);
748 }
749 tcg_out_reloc(s, s->code_ptr, R_386_PC8, label_index, -1);
750 s->code_ptr += 1;
751 } else {
752 if (opc == -1) {
753 tcg_out8(s, OPC_JMP_long);
754 } else {
755 tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
756 }
757 tcg_out_reloc(s, s->code_ptr, R_386_PC32, label_index, -4);
758 s->code_ptr += 4;
759 }
760 }
761
tcg_out_cmp(TCGContext * s,TCGArg arg1,TCGArg arg2,int const_arg2,int rexw)762 static void tcg_out_cmp(TCGContext *s, TCGArg arg1, TCGArg arg2,
763 int const_arg2, int rexw)
764 {
765 if (const_arg2) {
766 if (arg2 == 0) {
767 /* test r, r */
768 tcg_out_modrm(s, OPC_TESTL + rexw, arg1, arg1);
769 } else {
770 tgen_arithi(s, ARITH_CMP + rexw, arg1, arg2, 0);
771 }
772 } else {
773 tgen_arithr(s, ARITH_CMP + rexw, arg1, arg2);
774 }
775 }
776
tcg_out_brcond32(TCGContext * s,TCGCond cond,TCGArg arg1,TCGArg arg2,int const_arg2,int label_index,int small)777 static void tcg_out_brcond32(TCGContext *s, TCGCond cond,
778 TCGArg arg1, TCGArg arg2, int const_arg2,
779 int label_index, int small)
780 {
781 tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
782 tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index, small);
783 }
784
785 #if TCG_TARGET_REG_BITS == 64
tcg_out_brcond64(TCGContext * s,TCGCond cond,TCGArg arg1,TCGArg arg2,int const_arg2,int label_index,int small)786 static void tcg_out_brcond64(TCGContext *s, TCGCond cond,
787 TCGArg arg1, TCGArg arg2, int const_arg2,
788 int label_index, int small)
789 {
790 tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
791 tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index, small);
792 }
793 #else
794 /* XXX: we implement it at the target level to avoid having to
795 handle cross basic blocks temporaries */
tcg_out_brcond2(TCGContext * s,const TCGArg * args,const int * const_args,int small)796 static void tcg_out_brcond2(TCGContext *s, const TCGArg *args,
797 const int *const_args, int small)
798 {
799 int label_next;
800 label_next = gen_new_label();
801 switch(args[4]) {
802 case TCG_COND_EQ:
803 tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
804 label_next, 1);
805 tcg_out_brcond32(s, TCG_COND_EQ, args[1], args[3], const_args[3],
806 args[5], small);
807 break;
808 case TCG_COND_NE:
809 tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
810 args[5], small);
811 tcg_out_brcond32(s, TCG_COND_NE, args[1], args[3], const_args[3],
812 args[5], small);
813 break;
814 case TCG_COND_LT:
815 tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
816 args[5], small);
817 tcg_out_jxx(s, JCC_JNE, label_next, 1);
818 tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
819 args[5], small);
820 break;
821 case TCG_COND_LE:
822 tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
823 args[5], small);
824 tcg_out_jxx(s, JCC_JNE, label_next, 1);
825 tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
826 args[5], small);
827 break;
828 case TCG_COND_GT:
829 tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
830 args[5], small);
831 tcg_out_jxx(s, JCC_JNE, label_next, 1);
832 tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
833 args[5], small);
834 break;
835 case TCG_COND_GE:
836 tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
837 args[5], small);
838 tcg_out_jxx(s, JCC_JNE, label_next, 1);
839 tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
840 args[5], small);
841 break;
842 case TCG_COND_LTU:
843 tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
844 args[5], small);
845 tcg_out_jxx(s, JCC_JNE, label_next, 1);
846 tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
847 args[5], small);
848 break;
849 case TCG_COND_LEU:
850 tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
851 args[5], small);
852 tcg_out_jxx(s, JCC_JNE, label_next, 1);
853 tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
854 args[5], small);
855 break;
856 case TCG_COND_GTU:
857 tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
858 args[5], small);
859 tcg_out_jxx(s, JCC_JNE, label_next, 1);
860 tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
861 args[5], small);
862 break;
863 case TCG_COND_GEU:
864 tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
865 args[5], small);
866 tcg_out_jxx(s, JCC_JNE, label_next, 1);
867 tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
868 args[5], small);
869 break;
870 default:
871 tcg_abort();
872 }
873 tcg_out_label(s, label_next, (tcg_target_long)s->code_ptr);
874 }
875 #endif
876
tcg_out_setcond32(TCGContext * s,TCGCond cond,TCGArg dest,TCGArg arg1,TCGArg arg2,int const_arg2)877 static void tcg_out_setcond32(TCGContext *s, TCGCond cond, TCGArg dest,
878 TCGArg arg1, TCGArg arg2, int const_arg2)
879 {
880 tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
881 tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
882 tcg_out_ext8u(s, dest, dest);
883 }
884
885 #if TCG_TARGET_REG_BITS == 64
tcg_out_setcond64(TCGContext * s,TCGCond cond,TCGArg dest,TCGArg arg1,TCGArg arg2,int const_arg2)886 static void tcg_out_setcond64(TCGContext *s, TCGCond cond, TCGArg dest,
887 TCGArg arg1, TCGArg arg2, int const_arg2)
888 {
889 tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
890 tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
891 tcg_out_ext8u(s, dest, dest);
892 }
893 #else
tcg_out_setcond2(TCGContext * s,const TCGArg * args,const int * const_args)894 static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
895 const int *const_args)
896 {
897 TCGArg new_args[6];
898 int label_true, label_over;
899
900 memcpy(new_args, args+1, 5*sizeof(TCGArg));
901
902 if (args[0] == args[1] || args[0] == args[2]
903 || (!const_args[3] && args[0] == args[3])
904 || (!const_args[4] && args[0] == args[4])) {
905 /* When the destination overlaps with one of the argument
906 registers, don't do anything tricky. */
907 label_true = gen_new_label();
908 label_over = gen_new_label();
909
910 new_args[5] = label_true;
911 tcg_out_brcond2(s, new_args, const_args+1, 1);
912
913 tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
914 tcg_out_jxx(s, JCC_JMP, label_over, 1);
915 tcg_out_label(s, label_true, (tcg_target_long)s->code_ptr);
916
917 tcg_out_movi(s, TCG_TYPE_I32, args[0], 1);
918 tcg_out_label(s, label_over, (tcg_target_long)s->code_ptr);
919 } else {
920 /* When the destination does not overlap one of the arguments,
921 clear the destination first, jump if cond false, and emit an
922 increment in the true case. This results in smaller code. */
923
924 tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
925
926 label_over = gen_new_label();
927 new_args[4] = tcg_invert_cond(new_args[4]);
928 new_args[5] = label_over;
929 tcg_out_brcond2(s, new_args, const_args+1, 1);
930
931 tgen_arithi(s, ARITH_ADD, args[0], 1, 0);
932 tcg_out_label(s, label_over, (tcg_target_long)s->code_ptr);
933 }
934 }
935 #endif
936
tcg_out_branch(TCGContext * s,int call,tcg_target_long dest)937 static void tcg_out_branch(TCGContext *s, int call, tcg_target_long dest)
938 {
939 tcg_target_long disp = dest - (tcg_target_long)s->code_ptr - 5;
940
941 if (disp == (int32_t)disp) {
942 tcg_out_opc(s, call ? OPC_CALL_Jz : OPC_JMP_long, 0, 0, 0);
943 tcg_out32(s, disp);
944 } else {
945 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R10, dest);
946 tcg_out_modrm(s, OPC_GRP5,
947 call ? EXT5_CALLN_Ev : EXT5_JMPN_Ev, TCG_REG_R10);
948 }
949 }
950
tcg_out_calli(TCGContext * s,tcg_target_long dest)951 static inline void tcg_out_calli(TCGContext *s, tcg_target_long dest)
952 {
953 tcg_out_branch(s, 1, dest);
954 }
955
tcg_out_jmp(TCGContext * s,tcg_target_long dest)956 static void tcg_out_jmp(TCGContext *s, tcg_target_long dest)
957 {
958 tcg_out_branch(s, 0, dest);
959 }
960
961 #if defined(CONFIG_SOFTMMU)
962
963 #include "../../softmmu_defs.h"
964
965 static void *qemu_ld_helpers[4] = {
966 __ldb_mmu,
967 __ldw_mmu,
968 __ldl_mmu,
969 __ldq_mmu,
970 };
971
972 static void *qemu_st_helpers[4] = {
973 __stb_mmu,
974 __stw_mmu,
975 __stl_mmu,
976 __stq_mmu,
977 };
978
979 /* Perform the TLB load and compare.
980
981 Inputs:
982 ADDRLO_IDX contains the index into ARGS of the low part of the
983 address; the high part of the address is at ADDR_LOW_IDX+1.
984
985 MEM_INDEX and S_BITS are the memory context and log2 size of the load.
986
987 WHICH is the offset into the CPUTLBEntry structure of the slot to read.
988 This should be offsetof addr_read or addr_write.
989
990 Outputs:
991 LABEL_PTRS is filled with 1 (32-bit addresses) or 2 (64-bit addresses)
992 positions of the displacements of forward jumps to the TLB miss case.
993
994 First argument register is loaded with the low part of the address.
995 In the TLB hit case, it has been adjusted as indicated by the TLB
996 and so is a host address. In the TLB miss case, it continues to
997 hold a guest address.
998
999 Second argument register is clobbered. */
1000
tcg_out_tlb_load(TCGContext * s,int addrlo_idx,int mem_index,int s_bits,const TCGArg * args,uint8_t ** label_ptr,int which)1001 static inline void tcg_out_tlb_load(TCGContext *s, int addrlo_idx,
1002 int mem_index, int s_bits,
1003 const TCGArg *args,
1004 uint8_t **label_ptr, int which)
1005 {
1006 const int addrlo = args[addrlo_idx];
1007 const int r0 = tcg_target_call_iarg_regs[0];
1008 const int r1 = tcg_target_call_iarg_regs[1];
1009 TCGType type = TCG_TYPE_I32;
1010 int rexw = 0;
1011
1012 if (TCG_TARGET_REG_BITS == 64 && TARGET_LONG_BITS == 64) {
1013 type = TCG_TYPE_I64;
1014 rexw = P_REXW;
1015 }
1016
1017 tcg_out_mov(s, type, r1, addrlo);
1018 tcg_out_mov(s, type, r0, addrlo);
1019
1020 tcg_out_shifti(s, SHIFT_SHR + rexw, r1,
1021 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1022
1023 tgen_arithi(s, ARITH_AND + rexw, r0,
1024 TARGET_PAGE_MASK | ((1 << s_bits) - 1), 0);
1025 tgen_arithi(s, ARITH_AND + rexw, r1,
1026 (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS, 0);
1027
1028 tcg_out_modrm_sib_offset(s, OPC_LEA + P_REXW, r1, TCG_AREG0, r1, 0,
1029 offsetof(CPUState, tlb_table[mem_index][0])
1030 + which);
1031
1032 /* cmp 0(r1), r0 */
1033 tcg_out_modrm_offset(s, OPC_CMP_GvEv + rexw, r0, r1, 0);
1034
1035 tcg_out_mov(s, type, r0, addrlo);
1036
1037 /* jne label1 */
1038 tcg_out8(s, OPC_JCC_short + JCC_JNE);
1039 label_ptr[0] = s->code_ptr;
1040 s->code_ptr++;
1041
1042 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1043 /* cmp 4(r1), addrhi */
1044 tcg_out_modrm_offset(s, OPC_CMP_GvEv, args[addrlo_idx+1], r1, 4);
1045
1046 /* jne label1 */
1047 tcg_out8(s, OPC_JCC_short + JCC_JNE);
1048 label_ptr[1] = s->code_ptr;
1049 s->code_ptr++;
1050 }
1051
1052 /* TLB Hit. */
1053
1054 /* add addend(r1), r0 */
1055 tcg_out_modrm_offset(s, OPC_ADD_GvEv + P_REXW, r0, r1,
1056 offsetof(CPUTLBEntry, addend) - which);
1057 }
1058 #endif
1059
tcg_out_qemu_ld_direct(TCGContext * s,int datalo,int datahi,int base,tcg_target_long ofs,int sizeop)1060 static void tcg_out_qemu_ld_direct(TCGContext *s, int datalo, int datahi,
1061 int base, tcg_target_long ofs, int sizeop)
1062 {
1063 #ifdef TARGET_WORDS_BIGENDIAN
1064 const int bswap = 1;
1065 #else
1066 const int bswap = 0;
1067 #endif
1068 switch (sizeop) {
1069 case 0:
1070 tcg_out_modrm_offset(s, OPC_MOVZBL, datalo, base, ofs);
1071 break;
1072 case 0 | 4:
1073 tcg_out_modrm_offset(s, OPC_MOVSBL + P_REXW, datalo, base, ofs);
1074 break;
1075 case 1:
1076 tcg_out_modrm_offset(s, OPC_MOVZWL, datalo, base, ofs);
1077 if (bswap) {
1078 tcg_out_rolw_8(s, datalo);
1079 }
1080 break;
1081 case 1 | 4:
1082 if (bswap) {
1083 tcg_out_modrm_offset(s, OPC_MOVZWL, datalo, base, ofs);
1084 tcg_out_rolw_8(s, datalo);
1085 tcg_out_modrm(s, OPC_MOVSWL + P_REXW, datalo, datalo);
1086 } else {
1087 tcg_out_modrm_offset(s, OPC_MOVSWL + P_REXW, datalo, base, ofs);
1088 }
1089 break;
1090 case 2:
1091 tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
1092 if (bswap) {
1093 tcg_out_bswap32(s, datalo);
1094 }
1095 break;
1096 #if TCG_TARGET_REG_BITS == 64
1097 case 2 | 4:
1098 if (bswap) {
1099 tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
1100 tcg_out_bswap32(s, datalo);
1101 tcg_out_ext32s(s, datalo, datalo);
1102 } else {
1103 tcg_out_modrm_offset(s, OPC_MOVSLQ, datalo, base, ofs);
1104 }
1105 break;
1106 #endif
1107 case 3:
1108 if (TCG_TARGET_REG_BITS == 64) {
1109 tcg_out_ld(s, TCG_TYPE_I64, datalo, base, ofs);
1110 if (bswap) {
1111 tcg_out_bswap64(s, datalo);
1112 }
1113 } else {
1114 if (bswap) {
1115 int t = datalo;
1116 datalo = datahi;
1117 datahi = t;
1118 }
1119 if (base != datalo) {
1120 tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
1121 tcg_out_ld(s, TCG_TYPE_I32, datahi, base, ofs + 4);
1122 } else {
1123 tcg_out_ld(s, TCG_TYPE_I32, datahi, base, ofs + 4);
1124 tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
1125 }
1126 if (bswap) {
1127 tcg_out_bswap32(s, datalo);
1128 tcg_out_bswap32(s, datahi);
1129 }
1130 }
1131 break;
1132 default:
1133 tcg_abort();
1134 }
1135 }
1136
1137 /* XXX: qemu_ld and qemu_st could be modified to clobber only EDX and
1138 EAX. It will be useful once fixed registers globals are less
1139 common. */
tcg_out_qemu_ld(TCGContext * s,const TCGArg * args,int opc)1140 static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
1141 int opc)
1142 {
1143 int data_reg, data_reg2 = 0;
1144 int addrlo_idx;
1145 #if defined(CONFIG_SOFTMMU)
1146 int mem_index, s_bits, arg_idx;
1147 uint8_t *label_ptr[3];
1148 #endif
1149
1150 data_reg = args[0];
1151 addrlo_idx = 1;
1152 if (TCG_TARGET_REG_BITS == 32 && opc == 3) {
1153 data_reg2 = args[1];
1154 addrlo_idx = 2;
1155 }
1156
1157 #if defined(CONFIG_SOFTMMU)
1158 mem_index = args[addrlo_idx + 1 + (TARGET_LONG_BITS > TCG_TARGET_REG_BITS)];
1159 s_bits = opc & 3;
1160
1161 tcg_out_tlb_load(s, addrlo_idx, mem_index, s_bits, args,
1162 label_ptr, offsetof(CPUTLBEntry, addr_read));
1163
1164 /* TLB Hit. */
1165 tcg_out_qemu_ld_direct(s, data_reg, data_reg2,
1166 tcg_target_call_iarg_regs[0], 0, opc);
1167
1168 /* jmp label2 */
1169 tcg_out8(s, OPC_JMP_short);
1170 label_ptr[2] = s->code_ptr;
1171 s->code_ptr++;
1172
1173 /* TLB Miss. */
1174
1175 /* label1: */
1176 *label_ptr[0] = s->code_ptr - label_ptr[0] - 1;
1177 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1178 *label_ptr[1] = s->code_ptr - label_ptr[1] - 1;
1179 }
1180
1181 /* XXX: move that code at the end of the TB */
1182 /* The first argument is already loaded with addrlo. */
1183 arg_idx = 1;
1184 if (TCG_TARGET_REG_BITS == 32 && TARGET_LONG_BITS == 64) {
1185 tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[arg_idx++],
1186 args[addrlo_idx + 1]);
1187 }
1188 tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[arg_idx],
1189 mem_index);
1190 tcg_out_calli(s, (tcg_target_long)qemu_ld_helpers[s_bits]);
1191
1192 switch(opc) {
1193 case 0 | 4:
1194 tcg_out_ext8s(s, data_reg, TCG_REG_EAX, P_REXW);
1195 break;
1196 case 1 | 4:
1197 tcg_out_ext16s(s, data_reg, TCG_REG_EAX, P_REXW);
1198 break;
1199 case 0:
1200 tcg_out_ext8u(s, data_reg, TCG_REG_EAX);
1201 break;
1202 case 1:
1203 tcg_out_ext16u(s, data_reg, TCG_REG_EAX);
1204 break;
1205 case 2:
1206 tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
1207 break;
1208 #if TCG_TARGET_REG_BITS == 64
1209 case 2 | 4:
1210 tcg_out_ext32s(s, data_reg, TCG_REG_EAX);
1211 break;
1212 #endif
1213 case 3:
1214 if (TCG_TARGET_REG_BITS == 64) {
1215 tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_RAX);
1216 } else if (data_reg == TCG_REG_EDX) {
1217 /* xchg %edx, %eax */
1218 tcg_out_opc(s, OPC_XCHG_ax_r32 + TCG_REG_EDX, 0, 0, 0);
1219 tcg_out_mov(s, TCG_TYPE_I32, data_reg2, TCG_REG_EAX);
1220 } else {
1221 tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
1222 tcg_out_mov(s, TCG_TYPE_I32, data_reg2, TCG_REG_EDX);
1223 }
1224 break;
1225 default:
1226 tcg_abort();
1227 }
1228
1229 /* label2: */
1230 *label_ptr[2] = s->code_ptr - label_ptr[2] - 1;
1231 #else
1232 {
1233 int32_t offset = GUEST_BASE;
1234 int base = args[addrlo_idx];
1235
1236 if (TCG_TARGET_REG_BITS == 64) {
1237 /* ??? We assume all operations have left us with register
1238 contents that are zero extended. So far this appears to
1239 be true. If we want to enforce this, we can either do
1240 an explicit zero-extension here, or (if GUEST_BASE == 0)
1241 use the ADDR32 prefix. For now, do nothing. */
1242
1243 if (offset != GUEST_BASE) {
1244 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_RDI, GUEST_BASE);
1245 tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_RDI, base);
1246 base = TCG_REG_RDI, offset = 0;
1247 }
1248 }
1249
1250 tcg_out_qemu_ld_direct(s, data_reg, data_reg2, base, offset, opc);
1251 }
1252 #endif
1253 }
1254
tcg_out_qemu_st_direct(TCGContext * s,int datalo,int datahi,int base,tcg_target_long ofs,int sizeop)1255 static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi,
1256 int base, tcg_target_long ofs, int sizeop)
1257 {
1258 #ifdef TARGET_WORDS_BIGENDIAN
1259 const int bswap = 1;
1260 #else
1261 const int bswap = 0;
1262 #endif
1263 /* ??? Ideally we wouldn't need a scratch register. For user-only,
1264 we could perform the bswap twice to restore the original value
1265 instead of moving to the scratch. But as it is, the L constraint
1266 means that the second argument reg is definitely free here. */
1267 int scratch = tcg_target_call_iarg_regs[1];
1268
1269 switch (sizeop) {
1270 case 0:
1271 tcg_out_modrm_offset(s, OPC_MOVB_EvGv + P_REXB_R, datalo, base, ofs);
1272 break;
1273 case 1:
1274 if (bswap) {
1275 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1276 tcg_out_rolw_8(s, scratch);
1277 datalo = scratch;
1278 }
1279 tcg_out_modrm_offset(s, OPC_MOVL_EvGv + P_DATA16, datalo, base, ofs);
1280 break;
1281 case 2:
1282 if (bswap) {
1283 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1284 tcg_out_bswap32(s, scratch);
1285 datalo = scratch;
1286 }
1287 tcg_out_st(s, TCG_TYPE_I32, datalo, base, ofs);
1288 break;
1289 case 3:
1290 if (TCG_TARGET_REG_BITS == 64) {
1291 if (bswap) {
1292 tcg_out_mov(s, TCG_TYPE_I64, scratch, datalo);
1293 tcg_out_bswap64(s, scratch);
1294 datalo = scratch;
1295 }
1296 tcg_out_st(s, TCG_TYPE_I64, datalo, base, ofs);
1297 } else if (bswap) {
1298 tcg_out_mov(s, TCG_TYPE_I32, scratch, datahi);
1299 tcg_out_bswap32(s, scratch);
1300 tcg_out_st(s, TCG_TYPE_I32, scratch, base, ofs);
1301 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1302 tcg_out_bswap32(s, scratch);
1303 tcg_out_st(s, TCG_TYPE_I32, scratch, base, ofs + 4);
1304 } else {
1305 tcg_out_st(s, TCG_TYPE_I32, datalo, base, ofs);
1306 tcg_out_st(s, TCG_TYPE_I32, datahi, base, ofs + 4);
1307 }
1308 break;
1309 default:
1310 tcg_abort();
1311 }
1312 }
1313
tcg_out_qemu_st(TCGContext * s,const TCGArg * args,int opc)1314 static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
1315 int opc)
1316 {
1317 int data_reg, data_reg2 = 0;
1318 int addrlo_idx;
1319 #if defined(CONFIG_SOFTMMU)
1320 int mem_index, s_bits;
1321 int stack_adjust;
1322 uint8_t *label_ptr[3];
1323 #endif
1324
1325 data_reg = args[0];
1326 addrlo_idx = 1;
1327 if (TCG_TARGET_REG_BITS == 32 && opc == 3) {
1328 data_reg2 = args[1];
1329 addrlo_idx = 2;
1330 }
1331
1332 #if defined(CONFIG_SOFTMMU)
1333 mem_index = args[addrlo_idx + 1 + (TARGET_LONG_BITS > TCG_TARGET_REG_BITS)];
1334 s_bits = opc;
1335
1336 tcg_out_tlb_load(s, addrlo_idx, mem_index, s_bits, args,
1337 label_ptr, offsetof(CPUTLBEntry, addr_write));
1338
1339 /* TLB Hit. */
1340 tcg_out_qemu_st_direct(s, data_reg, data_reg2,
1341 tcg_target_call_iarg_regs[0], 0, opc);
1342
1343 /* jmp label2 */
1344 tcg_out8(s, OPC_JMP_short);
1345 label_ptr[2] = s->code_ptr;
1346 s->code_ptr++;
1347
1348 /* TLB Miss. */
1349
1350 /* label1: */
1351 *label_ptr[0] = s->code_ptr - label_ptr[0] - 1;
1352 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1353 *label_ptr[1] = s->code_ptr - label_ptr[1] - 1;
1354 }
1355
1356 /* XXX: move that code at the end of the TB */
1357 if (TCG_TARGET_REG_BITS == 64) {
1358 tcg_out_mov(s, (opc == 3 ? TCG_TYPE_I64 : TCG_TYPE_I32),
1359 TCG_REG_RSI, data_reg);
1360 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_RDX, mem_index);
1361 stack_adjust = 0;
1362 } else if (TARGET_LONG_BITS == 32) {
1363 tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_EDX, data_reg);
1364 if (opc == 3) {
1365 tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_ECX, data_reg2);
1366 tcg_out_pushi(s, mem_index);
1367 stack_adjust = 4;
1368 } else {
1369 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_ECX, mem_index);
1370 stack_adjust = 0;
1371 }
1372 } else {
1373 if (opc == 3) {
1374 tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_EDX, args[addrlo_idx + 1]);
1375 tcg_out_pushi(s, mem_index);
1376 tcg_out_push(s, data_reg2);
1377 tcg_out_push(s, data_reg);
1378 stack_adjust = 12;
1379 } else {
1380 tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_EDX, args[addrlo_idx + 1]);
1381 switch(opc) {
1382 case 0:
1383 tcg_out_ext8u(s, TCG_REG_ECX, data_reg);
1384 break;
1385 case 1:
1386 tcg_out_ext16u(s, TCG_REG_ECX, data_reg);
1387 break;
1388 case 2:
1389 tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_ECX, data_reg);
1390 break;
1391 }
1392 tcg_out_pushi(s, mem_index);
1393 stack_adjust = 4;
1394 }
1395 }
1396
1397 tcg_out_calli(s, (tcg_target_long)qemu_st_helpers[s_bits]);
1398
1399 if (stack_adjust == (TCG_TARGET_REG_BITS / 8)) {
1400 /* Pop and discard. This is 2 bytes smaller than the add. */
1401 tcg_out_pop(s, TCG_REG_ECX);
1402 } else if (stack_adjust != 0) {
1403 tcg_out_addi(s, TCG_REG_ESP, stack_adjust);
1404 }
1405
1406 /* label2: */
1407 *label_ptr[2] = s->code_ptr - label_ptr[2] - 1;
1408 #else
1409 {
1410 int32_t offset = GUEST_BASE;
1411 int base = args[addrlo_idx];
1412
1413 if (TCG_TARGET_REG_BITS == 64) {
1414 /* ??? We assume all operations have left us with register
1415 contents that are zero extended. So far this appears to
1416 be true. If we want to enforce this, we can either do
1417 an explicit zero-extension here, or (if GUEST_BASE == 0)
1418 use the ADDR32 prefix. For now, do nothing. */
1419
1420 if (offset != GUEST_BASE) {
1421 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_RDI, GUEST_BASE);
1422 tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_RDI, base);
1423 base = TCG_REG_RDI, offset = 0;
1424 }
1425 }
1426
1427 tcg_out_qemu_st_direct(s, data_reg, data_reg2, base, offset, opc);
1428 }
1429 #endif
1430 }
1431
tcg_out_op(TCGContext * s,TCGOpcode opc,const TCGArg * args,const int * const_args)1432 static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
1433 const TCGArg *args, const int *const_args)
1434 {
1435 int c, rexw = 0;
1436
1437 #if TCG_TARGET_REG_BITS == 64
1438 # define OP_32_64(x) \
1439 case glue(glue(INDEX_op_, x), _i64): \
1440 rexw = P_REXW; /* FALLTHRU */ \
1441 case glue(glue(INDEX_op_, x), _i32)
1442 #else
1443 # define OP_32_64(x) \
1444 case glue(glue(INDEX_op_, x), _i32)
1445 #endif
1446
1447 switch(opc) {
1448 case INDEX_op_exit_tb:
1449 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_EAX, args[0]);
1450 tcg_out_jmp(s, (tcg_target_long) tb_ret_addr);
1451 break;
1452 case INDEX_op_goto_tb:
1453 if (s->tb_jmp_offset) {
1454 /* direct jump method */
1455 tcg_out8(s, OPC_JMP_long); /* jmp im */
1456 s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf;
1457 tcg_out32(s, 0);
1458 } else {
1459 /* indirect jump method */
1460 tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, -1,
1461 (tcg_target_long)(s->tb_next + args[0]));
1462 }
1463 s->tb_next_offset[args[0]] = s->code_ptr - s->code_buf;
1464 break;
1465 case INDEX_op_call:
1466 if (const_args[0]) {
1467 tcg_out_calli(s, args[0]);
1468 } else {
1469 /* call *reg */
1470 tcg_out_modrm(s, OPC_GRP5, EXT5_CALLN_Ev, args[0]);
1471 }
1472 break;
1473 case INDEX_op_jmp:
1474 if (const_args[0]) {
1475 tcg_out_jmp(s, args[0]);
1476 } else {
1477 /* jmp *reg */
1478 tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, args[0]);
1479 }
1480 break;
1481 case INDEX_op_br:
1482 tcg_out_jxx(s, JCC_JMP, args[0], 0);
1483 break;
1484 case INDEX_op_movi_i32:
1485 tcg_out_movi(s, TCG_TYPE_I32, args[0], args[1]);
1486 break;
1487 OP_32_64(ld8u):
1488 /* Note that we can ignore REXW for the zero-extend to 64-bit. */
1489 tcg_out_modrm_offset(s, OPC_MOVZBL, args[0], args[1], args[2]);
1490 break;
1491 OP_32_64(ld8s):
1492 tcg_out_modrm_offset(s, OPC_MOVSBL + rexw, args[0], args[1], args[2]);
1493 break;
1494 OP_32_64(ld16u):
1495 /* Note that we can ignore REXW for the zero-extend to 64-bit. */
1496 tcg_out_modrm_offset(s, OPC_MOVZWL, args[0], args[1], args[2]);
1497 break;
1498 OP_32_64(ld16s):
1499 tcg_out_modrm_offset(s, OPC_MOVSWL + rexw, args[0], args[1], args[2]);
1500 break;
1501 #if TCG_TARGET_REG_BITS == 64
1502 case INDEX_op_ld32u_i64:
1503 #endif
1504 case INDEX_op_ld_i32:
1505 tcg_out_ld(s, TCG_TYPE_I32, args[0], args[1], args[2]);
1506 break;
1507
1508 OP_32_64(st8):
1509 tcg_out_modrm_offset(s, OPC_MOVB_EvGv | P_REXB_R,
1510 args[0], args[1], args[2]);
1511 break;
1512 OP_32_64(st16):
1513 tcg_out_modrm_offset(s, OPC_MOVL_EvGv | P_DATA16,
1514 args[0], args[1], args[2]);
1515 break;
1516 #if TCG_TARGET_REG_BITS == 64
1517 case INDEX_op_st32_i64:
1518 #endif
1519 case INDEX_op_st_i32:
1520 tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
1521 break;
1522
1523 OP_32_64(add):
1524 /* For 3-operand addition, use LEA. */
1525 if (args[0] != args[1]) {
1526 TCGArg a0 = args[0], a1 = args[1], a2 = args[2], c3 = 0;
1527
1528 if (const_args[2]) {
1529 c3 = a2, a2 = -1;
1530 } else if (a0 == a2) {
1531 /* Watch out for dest = src + dest, since we've removed
1532 the matching constraint on the add. */
1533 tgen_arithr(s, ARITH_ADD + rexw, a0, a1);
1534 break;
1535 }
1536
1537 tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, a1, a2, 0, c3);
1538 break;
1539 }
1540 c = ARITH_ADD;
1541 goto gen_arith;
1542 OP_32_64(sub):
1543 c = ARITH_SUB;
1544 goto gen_arith;
1545 OP_32_64(and):
1546 c = ARITH_AND;
1547 goto gen_arith;
1548 OP_32_64(or):
1549 c = ARITH_OR;
1550 goto gen_arith;
1551 OP_32_64(xor):
1552 c = ARITH_XOR;
1553 goto gen_arith;
1554 gen_arith:
1555 if (const_args[2]) {
1556 tgen_arithi(s, c + rexw, args[0], args[2], 0);
1557 } else {
1558 tgen_arithr(s, c + rexw, args[0], args[2]);
1559 }
1560 break;
1561
1562 OP_32_64(mul):
1563 if (const_args[2]) {
1564 int32_t val;
1565 val = args[2];
1566 if (val == (int8_t)val) {
1567 tcg_out_modrm(s, OPC_IMUL_GvEvIb + rexw, args[0], args[0]);
1568 tcg_out8(s, val);
1569 } else {
1570 tcg_out_modrm(s, OPC_IMUL_GvEvIz + rexw, args[0], args[0]);
1571 tcg_out32(s, val);
1572 }
1573 } else {
1574 tcg_out_modrm(s, OPC_IMUL_GvEv + rexw, args[0], args[2]);
1575 }
1576 break;
1577
1578 OP_32_64(div2):
1579 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IDIV, args[4]);
1580 break;
1581 OP_32_64(divu2):
1582 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_DIV, args[4]);
1583 break;
1584
1585 OP_32_64(shl):
1586 c = SHIFT_SHL;
1587 goto gen_shift;
1588 OP_32_64(shr):
1589 c = SHIFT_SHR;
1590 goto gen_shift;
1591 OP_32_64(sar):
1592 c = SHIFT_SAR;
1593 goto gen_shift;
1594 OP_32_64(rotl):
1595 c = SHIFT_ROL;
1596 goto gen_shift;
1597 OP_32_64(rotr):
1598 c = SHIFT_ROR;
1599 goto gen_shift;
1600 gen_shift:
1601 if (const_args[2]) {
1602 tcg_out_shifti(s, c + rexw, args[0], args[2]);
1603 } else {
1604 tcg_out_modrm(s, OPC_SHIFT_cl + rexw, c, args[0]);
1605 }
1606 break;
1607
1608 case INDEX_op_brcond_i32:
1609 tcg_out_brcond32(s, args[2], args[0], args[1], const_args[1],
1610 args[3], 0);
1611 break;
1612 case INDEX_op_setcond_i32:
1613 tcg_out_setcond32(s, args[3], args[0], args[1],
1614 args[2], const_args[2]);
1615 break;
1616
1617 OP_32_64(bswap16):
1618 tcg_out_rolw_8(s, args[0]);
1619 break;
1620 OP_32_64(bswap32):
1621 tcg_out_bswap32(s, args[0]);
1622 break;
1623
1624 OP_32_64(neg):
1625 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NEG, args[0]);
1626 break;
1627 OP_32_64(not):
1628 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NOT, args[0]);
1629 break;
1630
1631 OP_32_64(ext8s):
1632 tcg_out_ext8s(s, args[0], args[1], rexw);
1633 break;
1634 OP_32_64(ext16s):
1635 tcg_out_ext16s(s, args[0], args[1], rexw);
1636 break;
1637 OP_32_64(ext8u):
1638 tcg_out_ext8u(s, args[0], args[1]);
1639 break;
1640 OP_32_64(ext16u):
1641 tcg_out_ext16u(s, args[0], args[1]);
1642 break;
1643
1644 case INDEX_op_qemu_ld8u:
1645 tcg_out_qemu_ld(s, args, 0);
1646 break;
1647 case INDEX_op_qemu_ld8s:
1648 tcg_out_qemu_ld(s, args, 0 | 4);
1649 break;
1650 case INDEX_op_qemu_ld16u:
1651 tcg_out_qemu_ld(s, args, 1);
1652 break;
1653 case INDEX_op_qemu_ld16s:
1654 tcg_out_qemu_ld(s, args, 1 | 4);
1655 break;
1656 #if TCG_TARGET_REG_BITS == 64
1657 case INDEX_op_qemu_ld32u:
1658 #endif
1659 case INDEX_op_qemu_ld32:
1660 tcg_out_qemu_ld(s, args, 2);
1661 break;
1662 case INDEX_op_qemu_ld64:
1663 tcg_out_qemu_ld(s, args, 3);
1664 break;
1665
1666 case INDEX_op_qemu_st8:
1667 tcg_out_qemu_st(s, args, 0);
1668 break;
1669 case INDEX_op_qemu_st16:
1670 tcg_out_qemu_st(s, args, 1);
1671 break;
1672 case INDEX_op_qemu_st32:
1673 tcg_out_qemu_st(s, args, 2);
1674 break;
1675 case INDEX_op_qemu_st64:
1676 tcg_out_qemu_st(s, args, 3);
1677 break;
1678
1679 #if TCG_TARGET_REG_BITS == 32
1680 case INDEX_op_brcond2_i32:
1681 tcg_out_brcond2(s, args, const_args, 0);
1682 break;
1683 case INDEX_op_setcond2_i32:
1684 tcg_out_setcond2(s, args, const_args);
1685 break;
1686 case INDEX_op_mulu2_i32:
1687 tcg_out_modrm(s, OPC_GRP3_Ev, EXT3_MUL, args[3]);
1688 break;
1689 case INDEX_op_add2_i32:
1690 if (const_args[4]) {
1691 tgen_arithi(s, ARITH_ADD, args[0], args[4], 1);
1692 } else {
1693 tgen_arithr(s, ARITH_ADD, args[0], args[4]);
1694 }
1695 if (const_args[5]) {
1696 tgen_arithi(s, ARITH_ADC, args[1], args[5], 1);
1697 } else {
1698 tgen_arithr(s, ARITH_ADC, args[1], args[5]);
1699 }
1700 break;
1701 case INDEX_op_sub2_i32:
1702 if (const_args[4]) {
1703 tgen_arithi(s, ARITH_SUB, args[0], args[4], 1);
1704 } else {
1705 tgen_arithr(s, ARITH_SUB, args[0], args[4]);
1706 }
1707 if (const_args[5]) {
1708 tgen_arithi(s, ARITH_SBB, args[1], args[5], 1);
1709 } else {
1710 tgen_arithr(s, ARITH_SBB, args[1], args[5]);
1711 }
1712 break;
1713 #else /* TCG_TARGET_REG_BITS == 64 */
1714 case INDEX_op_movi_i64:
1715 tcg_out_movi(s, TCG_TYPE_I64, args[0], args[1]);
1716 break;
1717 case INDEX_op_ld32s_i64:
1718 tcg_out_modrm_offset(s, OPC_MOVSLQ, args[0], args[1], args[2]);
1719 break;
1720 case INDEX_op_ld_i64:
1721 tcg_out_ld(s, TCG_TYPE_I64, args[0], args[1], args[2]);
1722 break;
1723 case INDEX_op_st_i64:
1724 tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]);
1725 break;
1726 case INDEX_op_qemu_ld32s:
1727 tcg_out_qemu_ld(s, args, 2 | 4);
1728 break;
1729
1730 case INDEX_op_brcond_i64:
1731 tcg_out_brcond64(s, args[2], args[0], args[1], const_args[1],
1732 args[3], 0);
1733 break;
1734 case INDEX_op_setcond_i64:
1735 tcg_out_setcond64(s, args[3], args[0], args[1],
1736 args[2], const_args[2]);
1737 break;
1738
1739 case INDEX_op_bswap64_i64:
1740 tcg_out_bswap64(s, args[0]);
1741 break;
1742 case INDEX_op_ext32u_i64:
1743 tcg_out_ext32u(s, args[0], args[1]);
1744 break;
1745 case INDEX_op_ext32s_i64:
1746 tcg_out_ext32s(s, args[0], args[1]);
1747 break;
1748 #endif
1749
1750 default:
1751 tcg_abort();
1752 }
1753
1754 #undef OP_32_64
1755 }
1756
1757 static const TCGTargetOpDef x86_op_defs[] = {
1758 { INDEX_op_exit_tb, { } },
1759 { INDEX_op_goto_tb, { } },
1760 { INDEX_op_call, { "ri" } },
1761 { INDEX_op_jmp, { "ri" } },
1762 { INDEX_op_br, { } },
1763 { INDEX_op_mov_i32, { "r", "r" } },
1764 { INDEX_op_movi_i32, { "r" } },
1765 { INDEX_op_ld8u_i32, { "r", "r" } },
1766 { INDEX_op_ld8s_i32, { "r", "r" } },
1767 { INDEX_op_ld16u_i32, { "r", "r" } },
1768 { INDEX_op_ld16s_i32, { "r", "r" } },
1769 { INDEX_op_ld_i32, { "r", "r" } },
1770 { INDEX_op_st8_i32, { "q", "r" } },
1771 { INDEX_op_st16_i32, { "r", "r" } },
1772 { INDEX_op_st_i32, { "r", "r" } },
1773
1774 { INDEX_op_add_i32, { "r", "r", "ri" } },
1775 { INDEX_op_sub_i32, { "r", "0", "ri" } },
1776 { INDEX_op_mul_i32, { "r", "0", "ri" } },
1777 { INDEX_op_div2_i32, { "a", "d", "0", "1", "r" } },
1778 { INDEX_op_divu2_i32, { "a", "d", "0", "1", "r" } },
1779 { INDEX_op_and_i32, { "r", "0", "ri" } },
1780 { INDEX_op_or_i32, { "r", "0", "ri" } },
1781 { INDEX_op_xor_i32, { "r", "0", "ri" } },
1782
1783 { INDEX_op_shl_i32, { "r", "0", "ci" } },
1784 { INDEX_op_shr_i32, { "r", "0", "ci" } },
1785 { INDEX_op_sar_i32, { "r", "0", "ci" } },
1786 { INDEX_op_rotl_i32, { "r", "0", "ci" } },
1787 { INDEX_op_rotr_i32, { "r", "0", "ci" } },
1788
1789 { INDEX_op_brcond_i32, { "r", "ri" } },
1790
1791 { INDEX_op_bswap16_i32, { "r", "0" } },
1792 { INDEX_op_bswap32_i32, { "r", "0" } },
1793
1794 { INDEX_op_neg_i32, { "r", "0" } },
1795
1796 { INDEX_op_not_i32, { "r", "0" } },
1797
1798 { INDEX_op_ext8s_i32, { "r", "q" } },
1799 { INDEX_op_ext16s_i32, { "r", "r" } },
1800 { INDEX_op_ext8u_i32, { "r", "q" } },
1801 { INDEX_op_ext16u_i32, { "r", "r" } },
1802
1803 { INDEX_op_setcond_i32, { "q", "r", "ri" } },
1804
1805 #if TCG_TARGET_REG_BITS == 32
1806 { INDEX_op_mulu2_i32, { "a", "d", "a", "r" } },
1807 { INDEX_op_add2_i32, { "r", "r", "0", "1", "ri", "ri" } },
1808 { INDEX_op_sub2_i32, { "r", "r", "0", "1", "ri", "ri" } },
1809 { INDEX_op_brcond2_i32, { "r", "r", "ri", "ri" } },
1810 { INDEX_op_setcond2_i32, { "r", "r", "r", "ri", "ri" } },
1811 #else
1812 { INDEX_op_mov_i64, { "r", "r" } },
1813 { INDEX_op_movi_i64, { "r" } },
1814 { INDEX_op_ld8u_i64, { "r", "r" } },
1815 { INDEX_op_ld8s_i64, { "r", "r" } },
1816 { INDEX_op_ld16u_i64, { "r", "r" } },
1817 { INDEX_op_ld16s_i64, { "r", "r" } },
1818 { INDEX_op_ld32u_i64, { "r", "r" } },
1819 { INDEX_op_ld32s_i64, { "r", "r" } },
1820 { INDEX_op_ld_i64, { "r", "r" } },
1821 { INDEX_op_st8_i64, { "r", "r" } },
1822 { INDEX_op_st16_i64, { "r", "r" } },
1823 { INDEX_op_st32_i64, { "r", "r" } },
1824 { INDEX_op_st_i64, { "r", "r" } },
1825
1826 { INDEX_op_add_i64, { "r", "0", "re" } },
1827 { INDEX_op_mul_i64, { "r", "0", "re" } },
1828 { INDEX_op_div2_i64, { "a", "d", "0", "1", "r" } },
1829 { INDEX_op_divu2_i64, { "a", "d", "0", "1", "r" } },
1830 { INDEX_op_sub_i64, { "r", "0", "re" } },
1831 { INDEX_op_and_i64, { "r", "0", "reZ" } },
1832 { INDEX_op_or_i64, { "r", "0", "re" } },
1833 { INDEX_op_xor_i64, { "r", "0", "re" } },
1834
1835 { INDEX_op_shl_i64, { "r", "0", "ci" } },
1836 { INDEX_op_shr_i64, { "r", "0", "ci" } },
1837 { INDEX_op_sar_i64, { "r", "0", "ci" } },
1838 { INDEX_op_rotl_i64, { "r", "0", "ci" } },
1839 { INDEX_op_rotr_i64, { "r", "0", "ci" } },
1840
1841 { INDEX_op_brcond_i64, { "r", "re" } },
1842 { INDEX_op_setcond_i64, { "r", "r", "re" } },
1843
1844 { INDEX_op_bswap16_i64, { "r", "0" } },
1845 { INDEX_op_bswap32_i64, { "r", "0" } },
1846 { INDEX_op_bswap64_i64, { "r", "0" } },
1847 { INDEX_op_neg_i64, { "r", "0" } },
1848 { INDEX_op_not_i64, { "r", "0" } },
1849
1850 { INDEX_op_ext8s_i64, { "r", "r" } },
1851 { INDEX_op_ext16s_i64, { "r", "r" } },
1852 { INDEX_op_ext32s_i64, { "r", "r" } },
1853 { INDEX_op_ext8u_i64, { "r", "r" } },
1854 { INDEX_op_ext16u_i64, { "r", "r" } },
1855 { INDEX_op_ext32u_i64, { "r", "r" } },
1856 #endif
1857
1858 #if TCG_TARGET_REG_BITS == 64
1859 { INDEX_op_qemu_ld8u, { "r", "L" } },
1860 { INDEX_op_qemu_ld8s, { "r", "L" } },
1861 { INDEX_op_qemu_ld16u, { "r", "L" } },
1862 { INDEX_op_qemu_ld16s, { "r", "L" } },
1863 { INDEX_op_qemu_ld32, { "r", "L" } },
1864 { INDEX_op_qemu_ld32u, { "r", "L" } },
1865 { INDEX_op_qemu_ld32s, { "r", "L" } },
1866 { INDEX_op_qemu_ld64, { "r", "L" } },
1867
1868 { INDEX_op_qemu_st8, { "L", "L" } },
1869 { INDEX_op_qemu_st16, { "L", "L" } },
1870 { INDEX_op_qemu_st32, { "L", "L" } },
1871 { INDEX_op_qemu_st64, { "L", "L" } },
1872 #elif TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
1873 { INDEX_op_qemu_ld8u, { "r", "L" } },
1874 { INDEX_op_qemu_ld8s, { "r", "L" } },
1875 { INDEX_op_qemu_ld16u, { "r", "L" } },
1876 { INDEX_op_qemu_ld16s, { "r", "L" } },
1877 { INDEX_op_qemu_ld32, { "r", "L" } },
1878 { INDEX_op_qemu_ld64, { "r", "r", "L" } },
1879
1880 { INDEX_op_qemu_st8, { "cb", "L" } },
1881 { INDEX_op_qemu_st16, { "L", "L" } },
1882 { INDEX_op_qemu_st32, { "L", "L" } },
1883 { INDEX_op_qemu_st64, { "L", "L", "L" } },
1884 #else
1885 { INDEX_op_qemu_ld8u, { "r", "L", "L" } },
1886 { INDEX_op_qemu_ld8s, { "r", "L", "L" } },
1887 { INDEX_op_qemu_ld16u, { "r", "L", "L" } },
1888 { INDEX_op_qemu_ld16s, { "r", "L", "L" } },
1889 { INDEX_op_qemu_ld32, { "r", "L", "L" } },
1890 { INDEX_op_qemu_ld64, { "r", "r", "L", "L" } },
1891
1892 { INDEX_op_qemu_st8, { "cb", "L", "L" } },
1893 { INDEX_op_qemu_st16, { "L", "L", "L" } },
1894 { INDEX_op_qemu_st32, { "L", "L", "L" } },
1895 { INDEX_op_qemu_st64, { "L", "L", "L", "L" } },
1896 #endif
1897 { -1 },
1898 };
1899
1900 static int tcg_target_callee_save_regs[] = {
1901 #if TCG_TARGET_REG_BITS == 64
1902 TCG_REG_RBP,
1903 TCG_REG_RBX,
1904 TCG_REG_R12,
1905 TCG_REG_R13,
1906 /* TCG_REG_R14, */ /* Currently used for the global env. */
1907 TCG_REG_R15,
1908 #else
1909 /* TCG_REG_EBP, */ /* Currently used for the global env. */
1910 TCG_REG_EBX,
1911 TCG_REG_ESI,
1912 TCG_REG_EDI,
1913 #endif
1914 };
1915
1916 /* Generate global QEMU prologue and epilogue code */
tcg_target_qemu_prologue(TCGContext * s)1917 static void tcg_target_qemu_prologue(TCGContext *s)
1918 {
1919 int i, frame_size, push_size, stack_addend;
1920
1921 /* TB prologue */
1922
1923 /* Save all callee saved registers. */
1924 for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
1925 tcg_out_push(s, tcg_target_callee_save_regs[i]);
1926 }
1927
1928 /* Reserve some stack space. */
1929 push_size = 1 + ARRAY_SIZE(tcg_target_callee_save_regs);
1930 push_size *= TCG_TARGET_REG_BITS / 8;
1931
1932 frame_size = push_size + TCG_STATIC_CALL_ARGS_SIZE;
1933 frame_size = (frame_size + TCG_TARGET_STACK_ALIGN - 1) &
1934 ~(TCG_TARGET_STACK_ALIGN - 1);
1935 stack_addend = frame_size - push_size;
1936 tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
1937
1938 /* jmp *tb. */
1939 tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, tcg_target_call_iarg_regs[0]);
1940
1941 /* TB epilogue */
1942 tb_ret_addr = s->code_ptr;
1943
1944 tcg_out_addi(s, TCG_REG_ESP, stack_addend);
1945
1946 for (i = ARRAY_SIZE(tcg_target_callee_save_regs) - 1; i >= 0; i--) {
1947 tcg_out_pop(s, tcg_target_callee_save_regs[i]);
1948 }
1949 tcg_out_opc(s, OPC_RET, 0, 0, 0);
1950 }
1951
tcg_target_init(TCGContext * s)1952 static void tcg_target_init(TCGContext *s)
1953 {
1954 #if !defined(CONFIG_USER_ONLY)
1955 /* fail safe */
1956 if ((1 << CPU_TLB_ENTRY_BITS) != sizeof(CPUTLBEntry))
1957 tcg_abort();
1958 #endif
1959
1960 if (TCG_TARGET_REG_BITS == 64) {
1961 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff);
1962 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffff);
1963 } else {
1964 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xff);
1965 }
1966
1967 tcg_regset_clear(tcg_target_call_clobber_regs);
1968 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EAX);
1969 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EDX);
1970 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_ECX);
1971 if (TCG_TARGET_REG_BITS == 64) {
1972 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RDI);
1973 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RSI);
1974 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8);
1975 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9);
1976 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10);
1977 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11);
1978 }
1979
1980 tcg_regset_clear(s->reserved_regs);
1981 tcg_regset_set_reg(s->reserved_regs, TCG_REG_ESP);
1982
1983 tcg_add_target_add_op_defs(x86_op_defs);
1984 }
1985