1 /*
2 * Stack-less Just-In-Time compiler
3 *
4 * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without modification, are
7 * permitted provided that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright notice, this list of
10 * conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright notice, this list
13 * of conditions and the following disclaimer in the documentation and/or other materials
14 * provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19 * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
sljit_get_platform_name(void)27 SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
28 {
29 return "x86" SLJIT_CPUINFO;
30 }
31
32 /*
33 32b register indexes:
34 0 - EAX
35 1 - ECX
36 2 - EDX
37 3 - EBX
38 4 - none
39 5 - EBP
40 6 - ESI
41 7 - EDI
42 */
43
44 /*
45 64b register indexes:
46 0 - RAX
47 1 - RCX
48 2 - RDX
49 3 - RBX
50 4 - none
51 5 - RBP
52 6 - RSI
53 7 - RDI
54 8 - R8 - From now on REX prefix is required
55 9 - R9
56 10 - R10
57 11 - R11
58 12 - R12
59 13 - R13
60 14 - R14
61 15 - R15
62 */
63
64 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
65
66 /* Last register + 1. */
67 #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
68
69 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 3] = {
70 0, 0, 2, 1, 0, 0, 0, 0, 7, 6, 3, 4, 5
71 };
72
73 #define CHECK_EXTRA_REGS(p, w, do) \
74 if (p >= SLJIT_R3 && p <= SLJIT_R6) { \
75 w = SLJIT_LOCALS_OFFSET + ((p) - (SLJIT_R3 + 4)) * sizeof(sljit_sw); \
76 p = SLJIT_MEM1(SLJIT_SP); \
77 do; \
78 }
79
80 #else /* SLJIT_CONFIG_X86_32 */
81
82 /* Last register + 1. */
83 #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
84 #define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3)
85 #define TMP_REG3 (SLJIT_NUMBER_OF_REGISTERS + 4)
86
87 /* Note: r12 & 0x7 == 0b100, which decoded as SIB byte present
88 Note: avoid to use r12 and r13 for memory addessing
89 therefore r12 is better for SAVED_EREG than SAVED_REG. */
90 #ifndef _WIN64
91 /* 1st passed in rdi, 2nd argument passed in rsi, 3rd in rdx. */
92 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
93 0, 0, 6, 1, 8, 11, 10, 12, 5, 13, 14, 15, 3, 4, 2, 7, 9
94 };
95 /* low-map. reg_map & 0x7. */
96 static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 5] = {
97 0, 0, 6, 1, 0, 3, 2, 4, 5, 5, 6, 7, 3, 4, 2, 7, 1
98 };
99 #else
100 /* 1st passed in rcx, 2nd argument passed in rdx, 3rd in r8. */
101 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
102 0, 0, 2, 1, 11, 12, 5, 13, 14, 15, 7, 6, 3, 4, 10, 8, 9
103 };
104 /* low-map. reg_map & 0x7. */
105 static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 5] = {
106 0, 0, 2, 1, 3, 4, 5, 5, 6, 7, 7, 6, 3, 4, 2, 0, 1
107 };
108 #endif
109
110 #define REX_W 0x48
111 #define REX_R 0x44
112 #define REX_X 0x42
113 #define REX_B 0x41
114 #define REX 0x40
115
116 #ifndef _WIN64
117 #define HALFWORD_MAX 0x7fffffffl
118 #define HALFWORD_MIN -0x80000000l
119 #else
120 #define HALFWORD_MAX 0x7fffffffll
121 #define HALFWORD_MIN -0x80000000ll
122 #endif
123
124 #define IS_HALFWORD(x) ((x) <= HALFWORD_MAX && (x) >= HALFWORD_MIN)
125 #define NOT_HALFWORD(x) ((x) > HALFWORD_MAX || (x) < HALFWORD_MIN)
126
127 #define CHECK_EXTRA_REGS(p, w, do)
128
129 #endif /* SLJIT_CONFIG_X86_32 */
130
131 #define TMP_FREG (0)
132
133 /* Size flags for emit_x86_instruction: */
134 #define EX86_BIN_INS 0x0010
135 #define EX86_SHIFT_INS 0x0020
136 #define EX86_REX 0x0040
137 #define EX86_NO_REXW 0x0080
138 #define EX86_BYTE_ARG 0x0100
139 #define EX86_HALF_ARG 0x0200
140 #define EX86_PREF_66 0x0400
141 #define EX86_PREF_F2 0x0800
142 #define EX86_PREF_F3 0x1000
143 #define EX86_SSE2_OP1 0x2000
144 #define EX86_SSE2_OP2 0x4000
145 #define EX86_SSE2 (EX86_SSE2_OP1 | EX86_SSE2_OP2)
146
147 /* --------------------------------------------------------------------- */
148 /* Instrucion forms */
149 /* --------------------------------------------------------------------- */
150
151 #define ADD (/* BINARY */ 0 << 3)
152 #define ADD_EAX_i32 0x05
153 #define ADD_r_rm 0x03
154 #define ADD_rm_r 0x01
155 #define ADDSD_x_xm 0x58
156 #define ADC (/* BINARY */ 2 << 3)
157 #define ADC_EAX_i32 0x15
158 #define ADC_r_rm 0x13
159 #define ADC_rm_r 0x11
160 #define AND (/* BINARY */ 4 << 3)
161 #define AND_EAX_i32 0x25
162 #define AND_r_rm 0x23
163 #define AND_rm_r 0x21
164 #define ANDPD_x_xm 0x54
165 #define BSR_r_rm (/* GROUP_0F */ 0xbd)
166 #define CALL_i32 0xe8
167 #define CALL_rm (/* GROUP_FF */ 2 << 3)
168 #define CDQ 0x99
169 #define CMOVNE_r_rm (/* GROUP_0F */ 0x45)
170 #define CMP (/* BINARY */ 7 << 3)
171 #define CMP_EAX_i32 0x3d
172 #define CMP_r_rm 0x3b
173 #define CMP_rm_r 0x39
174 #define CVTPD2PS_x_xm 0x5a
175 #define CVTSI2SD_x_rm 0x2a
176 #define CVTTSD2SI_r_xm 0x2c
177 #define DIV (/* GROUP_F7 */ 6 << 3)
178 #define DIVSD_x_xm 0x5e
179 #define INT3 0xcc
180 #define IDIV (/* GROUP_F7 */ 7 << 3)
181 #define IMUL (/* GROUP_F7 */ 5 << 3)
182 #define IMUL_r_rm (/* GROUP_0F */ 0xaf)
183 #define IMUL_r_rm_i8 0x6b
184 #define IMUL_r_rm_i32 0x69
185 #define JE_i8 0x74
186 #define JNE_i8 0x75
187 #define JMP_i8 0xeb
188 #define JMP_i32 0xe9
189 #define JMP_rm (/* GROUP_FF */ 4 << 3)
190 #define LEA_r_m 0x8d
191 #define MOV_r_rm 0x8b
192 #define MOV_r_i32 0xb8
193 #define MOV_rm_r 0x89
194 #define MOV_rm_i32 0xc7
195 #define MOV_rm8_i8 0xc6
196 #define MOV_rm8_r8 0x88
197 #define MOVSD_x_xm 0x10
198 #define MOVSD_xm_x 0x11
199 #define MOVSXD_r_rm 0x63
200 #define MOVSX_r_rm8 (/* GROUP_0F */ 0xbe)
201 #define MOVSX_r_rm16 (/* GROUP_0F */ 0xbf)
202 #define MOVZX_r_rm8 (/* GROUP_0F */ 0xb6)
203 #define MOVZX_r_rm16 (/* GROUP_0F */ 0xb7)
204 #define MUL (/* GROUP_F7 */ 4 << 3)
205 #define MULSD_x_xm 0x59
206 #define NEG_rm (/* GROUP_F7 */ 3 << 3)
207 #define NOP 0x90
208 #define NOT_rm (/* GROUP_F7 */ 2 << 3)
209 #define OR (/* BINARY */ 1 << 3)
210 #define OR_r_rm 0x0b
211 #define OR_EAX_i32 0x0d
212 #define OR_rm_r 0x09
213 #define OR_rm8_r8 0x08
214 #define POP_r 0x58
215 #define POP_rm 0x8f
216 #define POPF 0x9d
217 #define PUSH_i32 0x68
218 #define PUSH_r 0x50
219 #define PUSH_rm (/* GROUP_FF */ 6 << 3)
220 #define PUSHF 0x9c
221 #define RET_near 0xc3
222 #define RET_i16 0xc2
223 #define SBB (/* BINARY */ 3 << 3)
224 #define SBB_EAX_i32 0x1d
225 #define SBB_r_rm 0x1b
226 #define SBB_rm_r 0x19
227 #define SAR (/* SHIFT */ 7 << 3)
228 #define SHL (/* SHIFT */ 4 << 3)
229 #define SHR (/* SHIFT */ 5 << 3)
230 #define SUB (/* BINARY */ 5 << 3)
231 #define SUB_EAX_i32 0x2d
232 #define SUB_r_rm 0x2b
233 #define SUB_rm_r 0x29
234 #define SUBSD_x_xm 0x5c
235 #define TEST_EAX_i32 0xa9
236 #define TEST_rm_r 0x85
237 #define UCOMISD_x_xm 0x2e
238 #define UNPCKLPD_x_xm 0x14
239 #define XCHG_EAX_r 0x90
240 #define XCHG_r_rm 0x87
241 #define XOR (/* BINARY */ 6 << 3)
242 #define XOR_EAX_i32 0x35
243 #define XOR_r_rm 0x33
244 #define XOR_rm_r 0x31
245 #define XORPD_x_xm 0x57
246
247 #define GROUP_0F 0x0f
248 #define GROUP_F7 0xf7
249 #define GROUP_FF 0xff
250 #define GROUP_BINARY_81 0x81
251 #define GROUP_BINARY_83 0x83
252 #define GROUP_SHIFT_1 0xd1
253 #define GROUP_SHIFT_N 0xc1
254 #define GROUP_SHIFT_CL 0xd3
255
256 #define MOD_REG 0xc0
257 #define MOD_DISP8 0x40
258
259 #define INC_SIZE(s) (*inst++ = (s), compiler->size += (s))
260
261 #define PUSH_REG(r) (*inst++ = (PUSH_r + (r)))
262 #define POP_REG(r) (*inst++ = (POP_r + (r)))
263 #define RET() (*inst++ = (RET_near))
264 #define RET_I16(n) (*inst++ = (RET_i16), *inst++ = n, *inst++ = 0)
265 /* r32, r/m32 */
266 #define MOV_RM(mod, reg, rm) (*inst++ = (MOV_r_rm), *inst++ = (mod) << 6 | (reg) << 3 | (rm))
267
268 /* Multithreading does not affect these static variables, since they store
269 built-in CPU features. Therefore they can be overwritten by different threads
270 if they detect the CPU features in the same time. */
271 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
272 static sljit_s32 cpu_has_sse2 = -1;
273 #endif
274 static sljit_s32 cpu_has_cmov = -1;
275
276 #ifdef _WIN32_WCE
277 #include <cmnintrin.h>
278 #elif defined(_MSC_VER) && _MSC_VER >= 1400
279 #include <intrin.h>
280 #endif
281
282 /******************************************************/
283 /* Unaligned-store functions */
284 /******************************************************/
285
sljit_unaligned_store_s16(void * addr,sljit_s16 value)286 static SLJIT_INLINE void sljit_unaligned_store_s16(void *addr, sljit_s16 value)
287 {
288 SLJIT_MEMCPY(addr, &value, sizeof(value));
289 }
290
sljit_unaligned_store_s32(void * addr,sljit_s32 value)291 static SLJIT_INLINE void sljit_unaligned_store_s32(void *addr, sljit_s32 value)
292 {
293 SLJIT_MEMCPY(addr, &value, sizeof(value));
294 }
295
sljit_unaligned_store_sw(void * addr,sljit_sw value)296 static SLJIT_INLINE void sljit_unaligned_store_sw(void *addr, sljit_sw value)
297 {
298 SLJIT_MEMCPY(addr, &value, sizeof(value));
299 }
300
301 /******************************************************/
302 /* Utility functions */
303 /******************************************************/
304
get_cpu_features(void)305 static void get_cpu_features(void)
306 {
307 sljit_u32 features;
308
309 #if defined(_MSC_VER) && _MSC_VER >= 1400
310
311 int CPUInfo[4];
312 __cpuid(CPUInfo, 1);
313 features = (sljit_u32)CPUInfo[3];
314
315 #elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__SUNPRO_C)
316
317 /* AT&T syntax. */
318 __asm__ (
319 "movl $0x1, %%eax\n"
320 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
321 /* On x86-32, there is no red zone, so this
322 should work (no need for a local variable). */
323 "push %%ebx\n"
324 #endif
325 "cpuid\n"
326 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
327 "pop %%ebx\n"
328 #endif
329 "movl %%edx, %0\n"
330 : "=g" (features)
331 :
332 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
333 : "%eax", "%ecx", "%edx"
334 #else
335 : "%rax", "%rbx", "%rcx", "%rdx"
336 #endif
337 );
338
339 #else /* _MSC_VER && _MSC_VER >= 1400 */
340
341 /* Intel syntax. */
342 __asm {
343 mov eax, 1
344 cpuid
345 mov features, edx
346 }
347
348 #endif /* _MSC_VER && _MSC_VER >= 1400 */
349
350 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
351 cpu_has_sse2 = (features >> 26) & 0x1;
352 #endif
353 cpu_has_cmov = (features >> 15) & 0x1;
354 }
355
get_jump_code(sljit_s32 type)356 static sljit_u8 get_jump_code(sljit_s32 type)
357 {
358 switch (type) {
359 case SLJIT_EQUAL:
360 case SLJIT_EQUAL_F64:
361 return 0x84 /* je */;
362
363 case SLJIT_NOT_EQUAL:
364 case SLJIT_NOT_EQUAL_F64:
365 return 0x85 /* jne */;
366
367 case SLJIT_LESS:
368 case SLJIT_LESS_F64:
369 return 0x82 /* jc */;
370
371 case SLJIT_GREATER_EQUAL:
372 case SLJIT_GREATER_EQUAL_F64:
373 return 0x83 /* jae */;
374
375 case SLJIT_GREATER:
376 case SLJIT_GREATER_F64:
377 return 0x87 /* jnbe */;
378
379 case SLJIT_LESS_EQUAL:
380 case SLJIT_LESS_EQUAL_F64:
381 return 0x86 /* jbe */;
382
383 case SLJIT_SIG_LESS:
384 return 0x8c /* jl */;
385
386 case SLJIT_SIG_GREATER_EQUAL:
387 return 0x8d /* jnl */;
388
389 case SLJIT_SIG_GREATER:
390 return 0x8f /* jnle */;
391
392 case SLJIT_SIG_LESS_EQUAL:
393 return 0x8e /* jle */;
394
395 case SLJIT_OVERFLOW:
396 case SLJIT_MUL_OVERFLOW:
397 return 0x80 /* jo */;
398
399 case SLJIT_NOT_OVERFLOW:
400 case SLJIT_MUL_NOT_OVERFLOW:
401 return 0x81 /* jno */;
402
403 case SLJIT_UNORDERED_F64:
404 return 0x8a /* jp */;
405
406 case SLJIT_ORDERED_F64:
407 return 0x8b /* jpo */;
408 }
409 return 0;
410 }
411
412 static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_s32 type);
413
414 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
415 static sljit_u8* generate_fixed_jump(sljit_u8 *code_ptr, sljit_sw addr, sljit_s32 type);
416 #endif
417
generate_near_jump_code(struct sljit_jump * jump,sljit_u8 * code_ptr,sljit_u8 * code,sljit_s32 type)418 static sljit_u8* generate_near_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_u8 *code, sljit_s32 type)
419 {
420 sljit_s32 short_jump;
421 sljit_uw label_addr;
422
423 if (jump->flags & JUMP_LABEL)
424 label_addr = (sljit_uw)(code + jump->u.label->size);
425 else
426 label_addr = jump->u.target;
427 short_jump = (sljit_sw)(label_addr - (jump->addr + 2)) >= -128 && (sljit_sw)(label_addr - (jump->addr + 2)) <= 127;
428
429 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
430 if ((sljit_sw)(label_addr - (jump->addr + 1)) > HALFWORD_MAX || (sljit_sw)(label_addr - (jump->addr + 1)) < HALFWORD_MIN)
431 return generate_far_jump_code(jump, code_ptr, type);
432 #endif
433
434 if (type == SLJIT_JUMP) {
435 if (short_jump)
436 *code_ptr++ = JMP_i8;
437 else
438 *code_ptr++ = JMP_i32;
439 jump->addr++;
440 }
441 else if (type >= SLJIT_FAST_CALL) {
442 short_jump = 0;
443 *code_ptr++ = CALL_i32;
444 jump->addr++;
445 }
446 else if (short_jump) {
447 *code_ptr++ = get_jump_code(type) - 0x10;
448 jump->addr++;
449 }
450 else {
451 *code_ptr++ = GROUP_0F;
452 *code_ptr++ = get_jump_code(type);
453 jump->addr += 2;
454 }
455
456 if (short_jump) {
457 jump->flags |= PATCH_MB;
458 code_ptr += sizeof(sljit_s8);
459 } else {
460 jump->flags |= PATCH_MW;
461 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
462 code_ptr += sizeof(sljit_sw);
463 #else
464 code_ptr += sizeof(sljit_s32);
465 #endif
466 }
467
468 return code_ptr;
469 }
470
sljit_generate_code(struct sljit_compiler * compiler)471 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
472 {
473 struct sljit_memory_fragment *buf;
474 sljit_u8 *code;
475 sljit_u8 *code_ptr;
476 sljit_u8 *buf_ptr;
477 sljit_u8 *buf_end;
478 sljit_u8 len;
479
480 struct sljit_label *label;
481 struct sljit_jump *jump;
482 struct sljit_const *const_;
483
484 CHECK_ERROR_PTR();
485 CHECK_PTR(check_sljit_generate_code(compiler));
486 reverse_buf(compiler);
487
488 /* Second code generation pass. */
489 code = (sljit_u8*)SLJIT_MALLOC_EXEC(compiler->size);
490 PTR_FAIL_WITH_EXEC_IF(code);
491 buf = compiler->buf;
492
493 code_ptr = code;
494 label = compiler->labels;
495 jump = compiler->jumps;
496 const_ = compiler->consts;
497 do {
498 buf_ptr = buf->memory;
499 buf_end = buf_ptr + buf->used_size;
500 do {
501 len = *buf_ptr++;
502 if (len > 0) {
503 /* The code is already generated. */
504 SLJIT_MEMCPY(code_ptr, buf_ptr, len);
505 code_ptr += len;
506 buf_ptr += len;
507 }
508 else {
509 if (*buf_ptr >= 4) {
510 jump->addr = (sljit_uw)code_ptr;
511 if (!(jump->flags & SLJIT_REWRITABLE_JUMP))
512 code_ptr = generate_near_jump_code(jump, code_ptr, code, *buf_ptr - 4);
513 else
514 code_ptr = generate_far_jump_code(jump, code_ptr, *buf_ptr - 4);
515 jump = jump->next;
516 }
517 else if (*buf_ptr == 0) {
518 label->addr = (sljit_uw)code_ptr;
519 label->size = code_ptr - code;
520 label = label->next;
521 }
522 else if (*buf_ptr == 1) {
523 const_->addr = ((sljit_uw)code_ptr) - sizeof(sljit_sw);
524 const_ = const_->next;
525 }
526 else {
527 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
528 *code_ptr++ = (*buf_ptr == 2) ? CALL_i32 : JMP_i32;
529 buf_ptr++;
530 sljit_unaligned_store_sw(code_ptr, *(sljit_sw*)buf_ptr - ((sljit_sw)code_ptr + sizeof(sljit_sw)));
531 code_ptr += sizeof(sljit_sw);
532 buf_ptr += sizeof(sljit_sw) - 1;
533 #else
534 code_ptr = generate_fixed_jump(code_ptr, *(sljit_sw*)(buf_ptr + 1), *buf_ptr);
535 buf_ptr += sizeof(sljit_sw);
536 #endif
537 }
538 buf_ptr++;
539 }
540 } while (buf_ptr < buf_end);
541 SLJIT_ASSERT(buf_ptr == buf_end);
542 buf = buf->next;
543 } while (buf);
544
545 SLJIT_ASSERT(!label);
546 SLJIT_ASSERT(!jump);
547 SLJIT_ASSERT(!const_);
548
549 jump = compiler->jumps;
550 while (jump) {
551 if (jump->flags & PATCH_MB) {
552 SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_s8))) >= -128 && (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_s8))) <= 127);
553 *(sljit_u8*)jump->addr = (sljit_u8)(jump->u.label->addr - (jump->addr + sizeof(sljit_s8)));
554 } else if (jump->flags & PATCH_MW) {
555 if (jump->flags & JUMP_LABEL) {
556 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
557 sljit_unaligned_store_sw((void*)jump->addr, (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_sw))));
558 #else
559 SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_s32))) >= HALFWORD_MIN && (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_s32))) <= HALFWORD_MAX);
560 sljit_unaligned_store_s32((void*)jump->addr, (sljit_s32)(jump->u.label->addr - (jump->addr + sizeof(sljit_s32))));
561 #endif
562 }
563 else {
564 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
565 sljit_unaligned_store_sw((void*)jump->addr, (sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_sw))));
566 #else
567 SLJIT_ASSERT((sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_s32))) >= HALFWORD_MIN && (sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_s32))) <= HALFWORD_MAX);
568 sljit_unaligned_store_s32((void*)jump->addr, (sljit_s32)(jump->u.target - (jump->addr + sizeof(sljit_s32))));
569 #endif
570 }
571 }
572 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
573 else if (jump->flags & PATCH_MD)
574 sljit_unaligned_store_sw((void*)jump->addr, jump->u.label->addr);
575 #endif
576
577 jump = jump->next;
578 }
579
580 /* Maybe we waste some space because of short jumps. */
581 SLJIT_ASSERT(code_ptr <= code + compiler->size);
582 compiler->error = SLJIT_ERR_COMPILED;
583 compiler->executable_size = code_ptr - code;
584 return (void*)code;
585 }
586
587 /* --------------------------------------------------------------------- */
588 /* Operators */
589 /* --------------------------------------------------------------------- */
590
591 static sljit_s32 emit_cum_binary(struct sljit_compiler *compiler,
592 sljit_u8 op_rm, sljit_u8 op_mr, sljit_u8 op_imm, sljit_u8 op_eax_imm,
593 sljit_s32 dst, sljit_sw dstw,
594 sljit_s32 src1, sljit_sw src1w,
595 sljit_s32 src2, sljit_sw src2w);
596
597 static sljit_s32 emit_non_cum_binary(struct sljit_compiler *compiler,
598 sljit_u8 op_rm, sljit_u8 op_mr, sljit_u8 op_imm, sljit_u8 op_eax_imm,
599 sljit_s32 dst, sljit_sw dstw,
600 sljit_s32 src1, sljit_sw src1w,
601 sljit_s32 src2, sljit_sw src2w);
602
603 static sljit_s32 emit_mov(struct sljit_compiler *compiler,
604 sljit_s32 dst, sljit_sw dstw,
605 sljit_s32 src, sljit_sw srcw);
606
emit_save_flags(struct sljit_compiler * compiler)607 static SLJIT_INLINE sljit_s32 emit_save_flags(struct sljit_compiler *compiler)
608 {
609 sljit_u8 *inst;
610
611 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
612 inst = (sljit_u8*)ensure_buf(compiler, 1 + 5);
613 FAIL_IF(!inst);
614 INC_SIZE(5);
615 #else
616 inst = (sljit_u8*)ensure_buf(compiler, 1 + 6);
617 FAIL_IF(!inst);
618 INC_SIZE(6);
619 *inst++ = REX_W;
620 #endif
621 *inst++ = LEA_r_m; /* lea esp/rsp, [esp/rsp + sizeof(sljit_sw)] */
622 *inst++ = 0x64;
623 *inst++ = 0x24;
624 *inst++ = (sljit_u8)sizeof(sljit_sw);
625 *inst++ = PUSHF;
626 compiler->flags_saved = 1;
627 return SLJIT_SUCCESS;
628 }
629
emit_restore_flags(struct sljit_compiler * compiler,sljit_s32 keep_flags)630 static SLJIT_INLINE sljit_s32 emit_restore_flags(struct sljit_compiler *compiler, sljit_s32 keep_flags)
631 {
632 sljit_u8 *inst;
633
634 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
635 inst = (sljit_u8*)ensure_buf(compiler, 1 + 5);
636 FAIL_IF(!inst);
637 INC_SIZE(5);
638 *inst++ = POPF;
639 #else
640 inst = (sljit_u8*)ensure_buf(compiler, 1 + 6);
641 FAIL_IF(!inst);
642 INC_SIZE(6);
643 *inst++ = POPF;
644 *inst++ = REX_W;
645 #endif
646 *inst++ = LEA_r_m; /* lea esp/rsp, [esp/rsp - sizeof(sljit_sw)] */
647 *inst++ = 0x64;
648 *inst++ = 0x24;
649 *inst++ = (sljit_u8)(-(sljit_s8)sizeof(sljit_sw));
650 compiler->flags_saved = keep_flags;
651 return SLJIT_SUCCESS;
652 }
653
654 #ifdef _WIN32
655 #include <malloc.h>
656
sljit_grow_stack(sljit_sw local_size)657 static void SLJIT_CALL sljit_grow_stack(sljit_sw local_size)
658 {
659 /* Workaround for calling the internal _chkstk() function on Windows.
660 This function touches all 4k pages belongs to the requested stack space,
661 which size is passed in local_size. This is necessary on Windows where
662 the stack can only grow in 4k steps. However, this function just burn
663 CPU cycles if the stack is large enough. However, you don't know it in
664 advance, so it must always be called. I think this is a bad design in
665 general even if it has some reasons. */
666 *(volatile sljit_s32*)alloca(local_size) = 0;
667 }
668
669 #endif
670
671 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
672 #include "sljitNativeX86_32.c"
673 #else
674 #include "sljitNativeX86_64.c"
675 #endif
676
emit_mov(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)677 static sljit_s32 emit_mov(struct sljit_compiler *compiler,
678 sljit_s32 dst, sljit_sw dstw,
679 sljit_s32 src, sljit_sw srcw)
680 {
681 sljit_u8* inst;
682
683 if (dst == SLJIT_UNUSED) {
684 /* No destination, doesn't need to setup flags. */
685 if (src & SLJIT_MEM) {
686 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src, srcw);
687 FAIL_IF(!inst);
688 *inst = MOV_r_rm;
689 }
690 return SLJIT_SUCCESS;
691 }
692 if (FAST_IS_REG(src)) {
693 inst = emit_x86_instruction(compiler, 1, src, 0, dst, dstw);
694 FAIL_IF(!inst);
695 *inst = MOV_rm_r;
696 return SLJIT_SUCCESS;
697 }
698 if (src & SLJIT_IMM) {
699 if (FAST_IS_REG(dst)) {
700 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
701 return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
702 #else
703 if (!compiler->mode32) {
704 if (NOT_HALFWORD(srcw))
705 return emit_load_imm64(compiler, dst, srcw);
706 }
707 else
708 return emit_do_imm32(compiler, (reg_map[dst] >= 8) ? REX_B : 0, MOV_r_i32 + reg_lmap[dst], srcw);
709 #endif
710 }
711 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
712 if (!compiler->mode32 && NOT_HALFWORD(srcw)) {
713 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, srcw));
714 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, dst, dstw);
715 FAIL_IF(!inst);
716 *inst = MOV_rm_r;
717 return SLJIT_SUCCESS;
718 }
719 #endif
720 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, dstw);
721 FAIL_IF(!inst);
722 *inst = MOV_rm_i32;
723 return SLJIT_SUCCESS;
724 }
725 if (FAST_IS_REG(dst)) {
726 inst = emit_x86_instruction(compiler, 1, dst, 0, src, srcw);
727 FAIL_IF(!inst);
728 *inst = MOV_r_rm;
729 return SLJIT_SUCCESS;
730 }
731
732 /* Memory to memory move. Requires two instruction. */
733 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src, srcw);
734 FAIL_IF(!inst);
735 *inst = MOV_r_rm;
736 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
737 FAIL_IF(!inst);
738 *inst = MOV_rm_r;
739 return SLJIT_SUCCESS;
740 }
741
742 #define EMIT_MOV(compiler, dst, dstw, src, srcw) \
743 FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
744
sljit_emit_op0(struct sljit_compiler * compiler,sljit_s32 op)745 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
746 {
747 sljit_u8 *inst;
748 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
749 sljit_s32 size;
750 #endif
751
752 CHECK_ERROR();
753 CHECK(check_sljit_emit_op0(compiler, op));
754
755 switch (GET_OPCODE(op)) {
756 case SLJIT_BREAKPOINT:
757 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
758 FAIL_IF(!inst);
759 INC_SIZE(1);
760 *inst = INT3;
761 break;
762 case SLJIT_NOP:
763 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
764 FAIL_IF(!inst);
765 INC_SIZE(1);
766 *inst = NOP;
767 break;
768 case SLJIT_LMUL_UW:
769 case SLJIT_LMUL_SW:
770 case SLJIT_DIVMOD_UW:
771 case SLJIT_DIVMOD_SW:
772 case SLJIT_DIV_UW:
773 case SLJIT_DIV_SW:
774 compiler->flags_saved = 0;
775 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
776 #ifdef _WIN64
777 SLJIT_COMPILE_ASSERT(
778 reg_map[SLJIT_R0] == 0
779 && reg_map[SLJIT_R1] == 2
780 && reg_map[TMP_REG1] > 7,
781 invalid_register_assignment_for_div_mul);
782 #else
783 SLJIT_COMPILE_ASSERT(
784 reg_map[SLJIT_R0] == 0
785 && reg_map[SLJIT_R1] < 7
786 && reg_map[TMP_REG1] == 2,
787 invalid_register_assignment_for_div_mul);
788 #endif
789 compiler->mode32 = op & SLJIT_I32_OP;
790 #endif
791 SLJIT_COMPILE_ASSERT((SLJIT_DIVMOD_UW & 0x2) == 0 && SLJIT_DIV_UW - 0x2 == SLJIT_DIVMOD_UW, bad_div_opcode_assignments);
792
793 op = GET_OPCODE(op);
794 if ((op | 0x2) == SLJIT_DIV_UW) {
795 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
796 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0);
797 inst = emit_x86_instruction(compiler, 1, SLJIT_R1, 0, SLJIT_R1, 0);
798 #else
799 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
800 #endif
801 FAIL_IF(!inst);
802 *inst = XOR_r_rm;
803 }
804
805 if ((op | 0x2) == SLJIT_DIV_SW) {
806 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
807 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0);
808 #endif
809
810 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
811 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
812 FAIL_IF(!inst);
813 INC_SIZE(1);
814 *inst = CDQ;
815 #else
816 if (compiler->mode32) {
817 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
818 FAIL_IF(!inst);
819 INC_SIZE(1);
820 *inst = CDQ;
821 } else {
822 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
823 FAIL_IF(!inst);
824 INC_SIZE(2);
825 *inst++ = REX_W;
826 *inst = CDQ;
827 }
828 #endif
829 }
830
831 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
832 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
833 FAIL_IF(!inst);
834 INC_SIZE(2);
835 *inst++ = GROUP_F7;
836 *inst = MOD_REG | ((op >= SLJIT_DIVMOD_UW) ? reg_map[TMP_REG1] : reg_map[SLJIT_R1]);
837 #else
838 #ifdef _WIN64
839 size = (!compiler->mode32 || op >= SLJIT_DIVMOD_UW) ? 3 : 2;
840 #else
841 size = (!compiler->mode32) ? 3 : 2;
842 #endif
843 inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
844 FAIL_IF(!inst);
845 INC_SIZE(size);
846 #ifdef _WIN64
847 if (!compiler->mode32)
848 *inst++ = REX_W | ((op >= SLJIT_DIVMOD_UW) ? REX_B : 0);
849 else if (op >= SLJIT_DIVMOD_UW)
850 *inst++ = REX_B;
851 *inst++ = GROUP_F7;
852 *inst = MOD_REG | ((op >= SLJIT_DIVMOD_UW) ? reg_lmap[TMP_REG1] : reg_lmap[SLJIT_R1]);
853 #else
854 if (!compiler->mode32)
855 *inst++ = REX_W;
856 *inst++ = GROUP_F7;
857 *inst = MOD_REG | reg_map[SLJIT_R1];
858 #endif
859 #endif
860 switch (op) {
861 case SLJIT_LMUL_UW:
862 *inst |= MUL;
863 break;
864 case SLJIT_LMUL_SW:
865 *inst |= IMUL;
866 break;
867 case SLJIT_DIVMOD_UW:
868 case SLJIT_DIV_UW:
869 *inst |= DIV;
870 break;
871 case SLJIT_DIVMOD_SW:
872 case SLJIT_DIV_SW:
873 *inst |= IDIV;
874 break;
875 }
876 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && !defined(_WIN64)
877 if (op <= SLJIT_DIVMOD_SW)
878 EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0);
879 #else
880 if (op >= SLJIT_DIV_UW)
881 EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0);
882 #endif
883 break;
884 }
885
886 return SLJIT_SUCCESS;
887 }
888
889 #define ENCODE_PREFIX(prefix) \
890 do { \
891 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); \
892 FAIL_IF(!inst); \
893 INC_SIZE(1); \
894 *inst = (prefix); \
895 } while (0)
896
emit_mov_byte(struct sljit_compiler * compiler,sljit_s32 sign,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)897 static sljit_s32 emit_mov_byte(struct sljit_compiler *compiler, sljit_s32 sign,
898 sljit_s32 dst, sljit_sw dstw,
899 sljit_s32 src, sljit_sw srcw)
900 {
901 sljit_u8* inst;
902 sljit_s32 dst_r;
903 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
904 sljit_s32 work_r;
905 #endif
906
907 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
908 compiler->mode32 = 0;
909 #endif
910
911 if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM))
912 return SLJIT_SUCCESS; /* Empty instruction. */
913
914 if (src & SLJIT_IMM) {
915 if (FAST_IS_REG(dst)) {
916 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
917 return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
918 #else
919 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0);
920 FAIL_IF(!inst);
921 *inst = MOV_rm_i32;
922 return SLJIT_SUCCESS;
923 #endif
924 }
925 inst = emit_x86_instruction(compiler, 1 | EX86_BYTE_ARG | EX86_NO_REXW, SLJIT_IMM, srcw, dst, dstw);
926 FAIL_IF(!inst);
927 *inst = MOV_rm8_i8;
928 return SLJIT_SUCCESS;
929 }
930
931 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
932
933 if ((dst & SLJIT_MEM) && FAST_IS_REG(src)) {
934 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
935 if (reg_map[src] >= 4) {
936 SLJIT_ASSERT(dst_r == TMP_REG1);
937 EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
938 } else
939 dst_r = src;
940 #else
941 dst_r = src;
942 #endif
943 }
944 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
945 else if (FAST_IS_REG(src) && reg_map[src] >= 4) {
946 /* src, dst are registers. */
947 SLJIT_ASSERT(SLOW_IS_REG(dst));
948 if (reg_map[dst] < 4) {
949 if (dst != src)
950 EMIT_MOV(compiler, dst, 0, src, 0);
951 inst = emit_x86_instruction(compiler, 2, dst, 0, dst, 0);
952 FAIL_IF(!inst);
953 *inst++ = GROUP_0F;
954 *inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8;
955 }
956 else {
957 if (dst != src)
958 EMIT_MOV(compiler, dst, 0, src, 0);
959 if (sign) {
960 /* shl reg, 24 */
961 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
962 FAIL_IF(!inst);
963 *inst |= SHL;
964 /* sar reg, 24 */
965 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
966 FAIL_IF(!inst);
967 *inst |= SAR;
968 }
969 else {
970 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 0xff, dst, 0);
971 FAIL_IF(!inst);
972 *(inst + 1) |= AND;
973 }
974 }
975 return SLJIT_SUCCESS;
976 }
977 #endif
978 else {
979 /* src can be memory addr or reg_map[src] < 4 on x86_32 architectures. */
980 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
981 FAIL_IF(!inst);
982 *inst++ = GROUP_0F;
983 *inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8;
984 }
985
986 if (dst & SLJIT_MEM) {
987 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
988 if (dst_r == TMP_REG1) {
989 /* Find a non-used register, whose reg_map[src] < 4. */
990 if ((dst & REG_MASK) == SLJIT_R0) {
991 if ((dst & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_R1))
992 work_r = SLJIT_R2;
993 else
994 work_r = SLJIT_R1;
995 }
996 else {
997 if ((dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R0))
998 work_r = SLJIT_R0;
999 else if ((dst & REG_MASK) == SLJIT_R1)
1000 work_r = SLJIT_R2;
1001 else
1002 work_r = SLJIT_R1;
1003 }
1004
1005 if (work_r == SLJIT_R0) {
1006 ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REG1]);
1007 }
1008 else {
1009 inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
1010 FAIL_IF(!inst);
1011 *inst = XCHG_r_rm;
1012 }
1013
1014 inst = emit_x86_instruction(compiler, 1, work_r, 0, dst, dstw);
1015 FAIL_IF(!inst);
1016 *inst = MOV_rm8_r8;
1017
1018 if (work_r == SLJIT_R0) {
1019 ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REG1]);
1020 }
1021 else {
1022 inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
1023 FAIL_IF(!inst);
1024 *inst = XCHG_r_rm;
1025 }
1026 }
1027 else {
1028 inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
1029 FAIL_IF(!inst);
1030 *inst = MOV_rm8_r8;
1031 }
1032 #else
1033 inst = emit_x86_instruction(compiler, 1 | EX86_REX | EX86_NO_REXW, dst_r, 0, dst, dstw);
1034 FAIL_IF(!inst);
1035 *inst = MOV_rm8_r8;
1036 #endif
1037 }
1038
1039 return SLJIT_SUCCESS;
1040 }
1041
emit_mov_half(struct sljit_compiler * compiler,sljit_s32 sign,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1042 static sljit_s32 emit_mov_half(struct sljit_compiler *compiler, sljit_s32 sign,
1043 sljit_s32 dst, sljit_sw dstw,
1044 sljit_s32 src, sljit_sw srcw)
1045 {
1046 sljit_u8* inst;
1047 sljit_s32 dst_r;
1048
1049 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1050 compiler->mode32 = 0;
1051 #endif
1052
1053 if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM))
1054 return SLJIT_SUCCESS; /* Empty instruction. */
1055
1056 if (src & SLJIT_IMM) {
1057 if (FAST_IS_REG(dst)) {
1058 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1059 return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
1060 #else
1061 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0);
1062 FAIL_IF(!inst);
1063 *inst = MOV_rm_i32;
1064 return SLJIT_SUCCESS;
1065 #endif
1066 }
1067 inst = emit_x86_instruction(compiler, 1 | EX86_HALF_ARG | EX86_NO_REXW | EX86_PREF_66, SLJIT_IMM, srcw, dst, dstw);
1068 FAIL_IF(!inst);
1069 *inst = MOV_rm_i32;
1070 return SLJIT_SUCCESS;
1071 }
1072
1073 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1074
1075 if ((dst & SLJIT_MEM) && FAST_IS_REG(src))
1076 dst_r = src;
1077 else {
1078 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
1079 FAIL_IF(!inst);
1080 *inst++ = GROUP_0F;
1081 *inst = sign ? MOVSX_r_rm16 : MOVZX_r_rm16;
1082 }
1083
1084 if (dst & SLJIT_MEM) {
1085 inst = emit_x86_instruction(compiler, 1 | EX86_NO_REXW | EX86_PREF_66, dst_r, 0, dst, dstw);
1086 FAIL_IF(!inst);
1087 *inst = MOV_rm_r;
1088 }
1089
1090 return SLJIT_SUCCESS;
1091 }
1092
emit_unary(struct sljit_compiler * compiler,sljit_u8 opcode,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1093 static sljit_s32 emit_unary(struct sljit_compiler *compiler, sljit_u8 opcode,
1094 sljit_s32 dst, sljit_sw dstw,
1095 sljit_s32 src, sljit_sw srcw)
1096 {
1097 sljit_u8* inst;
1098
1099 if (dst == SLJIT_UNUSED) {
1100 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1101 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1102 FAIL_IF(!inst);
1103 *inst++ = GROUP_F7;
1104 *inst |= opcode;
1105 return SLJIT_SUCCESS;
1106 }
1107 if (dst == src && dstw == srcw) {
1108 /* Same input and output */
1109 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
1110 FAIL_IF(!inst);
1111 *inst++ = GROUP_F7;
1112 *inst |= opcode;
1113 return SLJIT_SUCCESS;
1114 }
1115 if (FAST_IS_REG(dst)) {
1116 EMIT_MOV(compiler, dst, 0, src, srcw);
1117 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
1118 FAIL_IF(!inst);
1119 *inst++ = GROUP_F7;
1120 *inst |= opcode;
1121 return SLJIT_SUCCESS;
1122 }
1123 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1124 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1125 FAIL_IF(!inst);
1126 *inst++ = GROUP_F7;
1127 *inst |= opcode;
1128 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1129 return SLJIT_SUCCESS;
1130 }
1131
emit_not_with_flags(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1132 static sljit_s32 emit_not_with_flags(struct sljit_compiler *compiler,
1133 sljit_s32 dst, sljit_sw dstw,
1134 sljit_s32 src, sljit_sw srcw)
1135 {
1136 sljit_u8* inst;
1137
1138 if (dst == SLJIT_UNUSED) {
1139 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1140 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1141 FAIL_IF(!inst);
1142 *inst++ = GROUP_F7;
1143 *inst |= NOT_rm;
1144 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
1145 FAIL_IF(!inst);
1146 *inst = OR_r_rm;
1147 return SLJIT_SUCCESS;
1148 }
1149 if (FAST_IS_REG(dst)) {
1150 EMIT_MOV(compiler, dst, 0, src, srcw);
1151 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
1152 FAIL_IF(!inst);
1153 *inst++ = GROUP_F7;
1154 *inst |= NOT_rm;
1155 inst = emit_x86_instruction(compiler, 1, dst, 0, dst, 0);
1156 FAIL_IF(!inst);
1157 *inst = OR_r_rm;
1158 return SLJIT_SUCCESS;
1159 }
1160 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1161 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1162 FAIL_IF(!inst);
1163 *inst++ = GROUP_F7;
1164 *inst |= NOT_rm;
1165 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
1166 FAIL_IF(!inst);
1167 *inst = OR_r_rm;
1168 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1169 return SLJIT_SUCCESS;
1170 }
1171
emit_clz(struct sljit_compiler * compiler,sljit_s32 op_flags,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1172 static sljit_s32 emit_clz(struct sljit_compiler *compiler, sljit_s32 op_flags,
1173 sljit_s32 dst, sljit_sw dstw,
1174 sljit_s32 src, sljit_sw srcw)
1175 {
1176 sljit_u8* inst;
1177 sljit_s32 dst_r;
1178
1179 SLJIT_UNUSED_ARG(op_flags);
1180 if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) {
1181 /* Just set the zero flag. */
1182 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1183 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1184 FAIL_IF(!inst);
1185 *inst++ = GROUP_F7;
1186 *inst |= NOT_rm;
1187 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1188 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 31, TMP_REG1, 0);
1189 #else
1190 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, !(op_flags & SLJIT_I32_OP) ? 63 : 31, TMP_REG1, 0);
1191 #endif
1192 FAIL_IF(!inst);
1193 *inst |= SHR;
1194 return SLJIT_SUCCESS;
1195 }
1196
1197 if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
1198 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw);
1199 src = TMP_REG1;
1200 srcw = 0;
1201 }
1202
1203 inst = emit_x86_instruction(compiler, 2, TMP_REG1, 0, src, srcw);
1204 FAIL_IF(!inst);
1205 *inst++ = GROUP_0F;
1206 *inst = BSR_r_rm;
1207
1208 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1209 if (FAST_IS_REG(dst))
1210 dst_r = dst;
1211 else {
1212 /* Find an unused temporary register. */
1213 if ((dst & REG_MASK) != SLJIT_R0 && (dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R0))
1214 dst_r = SLJIT_R0;
1215 else if ((dst & REG_MASK) != SLJIT_R1 && (dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R1))
1216 dst_r = SLJIT_R1;
1217 else
1218 dst_r = SLJIT_R2;
1219 EMIT_MOV(compiler, dst, dstw, dst_r, 0);
1220 }
1221 EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, 32 + 31);
1222 #else
1223 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
1224 compiler->mode32 = 0;
1225 EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, !(op_flags & SLJIT_I32_OP) ? 64 + 63 : 32 + 31);
1226 compiler->mode32 = op_flags & SLJIT_I32_OP;
1227 #endif
1228
1229 if (cpu_has_cmov == -1)
1230 get_cpu_features();
1231
1232 if (cpu_has_cmov) {
1233 inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG1, 0);
1234 FAIL_IF(!inst);
1235 *inst++ = GROUP_0F;
1236 *inst = CMOVNE_r_rm;
1237 } else {
1238 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1239 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
1240 FAIL_IF(!inst);
1241 INC_SIZE(4);
1242
1243 *inst++ = JE_i8;
1244 *inst++ = 2;
1245 *inst++ = MOV_r_rm;
1246 *inst++ = MOD_REG | (reg_map[dst_r] << 3) | reg_map[TMP_REG1];
1247 #else
1248 inst = (sljit_u8*)ensure_buf(compiler, 1 + 5);
1249 FAIL_IF(!inst);
1250 INC_SIZE(5);
1251
1252 *inst++ = JE_i8;
1253 *inst++ = 3;
1254 *inst++ = REX_W | (reg_map[dst_r] >= 8 ? REX_R : 0) | (reg_map[TMP_REG1] >= 8 ? REX_B : 0);
1255 *inst++ = MOV_r_rm;
1256 *inst++ = MOD_REG | (reg_lmap[dst_r] << 3) | reg_lmap[TMP_REG1];
1257 #endif
1258 }
1259
1260 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1261 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 31, dst_r, 0);
1262 #else
1263 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, !(op_flags & SLJIT_I32_OP) ? 63 : 31, dst_r, 0);
1264 #endif
1265 FAIL_IF(!inst);
1266 *(inst + 1) |= XOR;
1267
1268 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1269 if (dst & SLJIT_MEM) {
1270 inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
1271 FAIL_IF(!inst);
1272 *inst = XCHG_r_rm;
1273 }
1274 #else
1275 if (dst & SLJIT_MEM)
1276 EMIT_MOV(compiler, dst, dstw, TMP_REG2, 0);
1277 #endif
1278 return SLJIT_SUCCESS;
1279 }
1280
sljit_emit_op1(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1281 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
1282 sljit_s32 dst, sljit_sw dstw,
1283 sljit_s32 src, sljit_sw srcw)
1284 {
1285 sljit_u8* inst;
1286 sljit_s32 update = 0;
1287 sljit_s32 op_flags = GET_ALL_FLAGS(op);
1288 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1289 sljit_s32 dst_is_ereg = 0;
1290 sljit_s32 src_is_ereg = 0;
1291 #else
1292 # define src_is_ereg 0
1293 #endif
1294
1295 CHECK_ERROR();
1296 CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
1297 ADJUST_LOCAL_OFFSET(dst, dstw);
1298 ADJUST_LOCAL_OFFSET(src, srcw);
1299
1300 CHECK_EXTRA_REGS(dst, dstw, dst_is_ereg = 1);
1301 CHECK_EXTRA_REGS(src, srcw, src_is_ereg = 1);
1302 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1303 compiler->mode32 = op_flags & SLJIT_I32_OP;
1304 #endif
1305
1306 op = GET_OPCODE(op);
1307 if (op >= SLJIT_MOV && op <= SLJIT_MOVU_P) {
1308 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1309 compiler->mode32 = 0;
1310 #endif
1311
1312 if (op_flags & SLJIT_I32_OP) {
1313 if (FAST_IS_REG(src) && src == dst) {
1314 if (!TYPE_CAST_NEEDED(op))
1315 return SLJIT_SUCCESS;
1316 }
1317 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1318 if (op == SLJIT_MOV_S32 && (src & SLJIT_MEM))
1319 op = SLJIT_MOV_U32;
1320 if (op == SLJIT_MOVU_S32 && (src & SLJIT_MEM))
1321 op = SLJIT_MOVU_U32;
1322 if (op == SLJIT_MOV_U32 && (src & SLJIT_IMM))
1323 op = SLJIT_MOV_S32;
1324 if (op == SLJIT_MOVU_U32 && (src & SLJIT_IMM))
1325 op = SLJIT_MOVU_S32;
1326 #endif
1327 }
1328
1329 SLJIT_COMPILE_ASSERT(SLJIT_MOV + 8 == SLJIT_MOVU, movu_offset);
1330 if (op >= SLJIT_MOVU) {
1331 update = 1;
1332 op -= 8;
1333 }
1334
1335 if (src & SLJIT_IMM) {
1336 switch (op) {
1337 case SLJIT_MOV_U8:
1338 srcw = (sljit_u8)srcw;
1339 break;
1340 case SLJIT_MOV_S8:
1341 srcw = (sljit_s8)srcw;
1342 break;
1343 case SLJIT_MOV_U16:
1344 srcw = (sljit_u16)srcw;
1345 break;
1346 case SLJIT_MOV_S16:
1347 srcw = (sljit_s16)srcw;
1348 break;
1349 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1350 case SLJIT_MOV_U32:
1351 srcw = (sljit_u32)srcw;
1352 break;
1353 case SLJIT_MOV_S32:
1354 srcw = (sljit_s32)srcw;
1355 break;
1356 #endif
1357 }
1358 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1359 if (SLJIT_UNLIKELY(dst_is_ereg))
1360 return emit_mov(compiler, dst, dstw, src, srcw);
1361 #endif
1362 }
1363
1364 if (SLJIT_UNLIKELY(update) && (src & SLJIT_MEM) && !src_is_ereg && (src & REG_MASK) && (srcw != 0 || (src & OFFS_REG_MASK) != 0)) {
1365 inst = emit_x86_instruction(compiler, 1, src & REG_MASK, 0, src, srcw);
1366 FAIL_IF(!inst);
1367 *inst = LEA_r_m;
1368 src &= SLJIT_MEM | 0xf;
1369 srcw = 0;
1370 }
1371
1372 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1373 if (SLJIT_UNLIKELY(dst_is_ereg) && (!(op == SLJIT_MOV || op == SLJIT_MOV_U32 || op == SLJIT_MOV_S32 || op == SLJIT_MOV_P) || (src & SLJIT_MEM))) {
1374 SLJIT_ASSERT(dst == SLJIT_MEM1(SLJIT_SP));
1375 dst = TMP_REG1;
1376 }
1377 #endif
1378
1379 switch (op) {
1380 case SLJIT_MOV:
1381 case SLJIT_MOV_P:
1382 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1383 case SLJIT_MOV_U32:
1384 case SLJIT_MOV_S32:
1385 #endif
1386 FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
1387 break;
1388 case SLJIT_MOV_U8:
1389 FAIL_IF(emit_mov_byte(compiler, 0, dst, dstw, src, srcw));
1390 break;
1391 case SLJIT_MOV_S8:
1392 FAIL_IF(emit_mov_byte(compiler, 1, dst, dstw, src, srcw));
1393 break;
1394 case SLJIT_MOV_U16:
1395 FAIL_IF(emit_mov_half(compiler, 0, dst, dstw, src, srcw));
1396 break;
1397 case SLJIT_MOV_S16:
1398 FAIL_IF(emit_mov_half(compiler, 1, dst, dstw, src, srcw));
1399 break;
1400 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1401 case SLJIT_MOV_U32:
1402 FAIL_IF(emit_mov_int(compiler, 0, dst, dstw, src, srcw));
1403 break;
1404 case SLJIT_MOV_S32:
1405 FAIL_IF(emit_mov_int(compiler, 1, dst, dstw, src, srcw));
1406 break;
1407 #endif
1408 }
1409
1410 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1411 if (SLJIT_UNLIKELY(dst_is_ereg) && dst == TMP_REG1)
1412 return emit_mov(compiler, SLJIT_MEM1(SLJIT_SP), dstw, TMP_REG1, 0);
1413 #endif
1414
1415 if (SLJIT_UNLIKELY(update) && (dst & SLJIT_MEM) && (dst & REG_MASK) && (dstw != 0 || (dst & OFFS_REG_MASK) != 0)) {
1416 inst = emit_x86_instruction(compiler, 1, dst & REG_MASK, 0, dst, dstw);
1417 FAIL_IF(!inst);
1418 *inst = LEA_r_m;
1419 }
1420 return SLJIT_SUCCESS;
1421 }
1422
1423 if (SLJIT_UNLIKELY(GET_FLAGS(op_flags)))
1424 compiler->flags_saved = 0;
1425
1426 switch (op) {
1427 case SLJIT_NOT:
1428 if (SLJIT_UNLIKELY(op_flags & SLJIT_SET_E))
1429 return emit_not_with_flags(compiler, dst, dstw, src, srcw);
1430 return emit_unary(compiler, NOT_rm, dst, dstw, src, srcw);
1431
1432 case SLJIT_NEG:
1433 if (SLJIT_UNLIKELY(op_flags & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
1434 FAIL_IF(emit_save_flags(compiler));
1435 return emit_unary(compiler, NEG_rm, dst, dstw, src, srcw);
1436
1437 case SLJIT_CLZ:
1438 if (SLJIT_UNLIKELY(op_flags & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
1439 FAIL_IF(emit_save_flags(compiler));
1440 return emit_clz(compiler, op_flags, dst, dstw, src, srcw);
1441 }
1442
1443 return SLJIT_SUCCESS;
1444
1445 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1446 # undef src_is_ereg
1447 #endif
1448 }
1449
1450 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1451
1452 #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \
1453 if (IS_HALFWORD(immw) || compiler->mode32) { \
1454 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
1455 FAIL_IF(!inst); \
1456 *(inst + 1) |= (op_imm); \
1457 } \
1458 else { \
1459 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, immw)); \
1460 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, arg, argw); \
1461 FAIL_IF(!inst); \
1462 *inst = (op_mr); \
1463 }
1464
1465 #define BINARY_EAX_IMM(op_eax_imm, immw) \
1466 FAIL_IF(emit_do_imm32(compiler, (!compiler->mode32) ? REX_W : 0, (op_eax_imm), immw))
1467
1468 #else
1469
1470 #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \
1471 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
1472 FAIL_IF(!inst); \
1473 *(inst + 1) |= (op_imm);
1474
1475 #define BINARY_EAX_IMM(op_eax_imm, immw) \
1476 FAIL_IF(emit_do_imm(compiler, (op_eax_imm), immw))
1477
1478 #endif
1479
emit_cum_binary(struct sljit_compiler * compiler,sljit_u8 op_rm,sljit_u8 op_mr,sljit_u8 op_imm,sljit_u8 op_eax_imm,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)1480 static sljit_s32 emit_cum_binary(struct sljit_compiler *compiler,
1481 sljit_u8 op_rm, sljit_u8 op_mr, sljit_u8 op_imm, sljit_u8 op_eax_imm,
1482 sljit_s32 dst, sljit_sw dstw,
1483 sljit_s32 src1, sljit_sw src1w,
1484 sljit_s32 src2, sljit_sw src2w)
1485 {
1486 sljit_u8* inst;
1487
1488 if (dst == SLJIT_UNUSED) {
1489 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1490 if (src2 & SLJIT_IMM) {
1491 BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
1492 }
1493 else {
1494 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1495 FAIL_IF(!inst);
1496 *inst = op_rm;
1497 }
1498 return SLJIT_SUCCESS;
1499 }
1500
1501 if (dst == src1 && dstw == src1w) {
1502 if (src2 & SLJIT_IMM) {
1503 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1504 if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1505 #else
1506 if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) {
1507 #endif
1508 BINARY_EAX_IMM(op_eax_imm, src2w);
1509 }
1510 else {
1511 BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
1512 }
1513 }
1514 else if (FAST_IS_REG(dst)) {
1515 inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
1516 FAIL_IF(!inst);
1517 *inst = op_rm;
1518 }
1519 else if (FAST_IS_REG(src2)) {
1520 /* Special exception for sljit_emit_op_flags. */
1521 inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
1522 FAIL_IF(!inst);
1523 *inst = op_mr;
1524 }
1525 else {
1526 EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w);
1527 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
1528 FAIL_IF(!inst);
1529 *inst = op_mr;
1530 }
1531 return SLJIT_SUCCESS;
1532 }
1533
1534 /* Only for cumulative operations. */
1535 if (dst == src2 && dstw == src2w) {
1536 if (src1 & SLJIT_IMM) {
1537 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1538 if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1539 #else
1540 if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128)) {
1541 #endif
1542 BINARY_EAX_IMM(op_eax_imm, src1w);
1543 }
1544 else {
1545 BINARY_IMM(op_imm, op_mr, src1w, dst, dstw);
1546 }
1547 }
1548 else if (FAST_IS_REG(dst)) {
1549 inst = emit_x86_instruction(compiler, 1, dst, dstw, src1, src1w);
1550 FAIL_IF(!inst);
1551 *inst = op_rm;
1552 }
1553 else if (FAST_IS_REG(src1)) {
1554 inst = emit_x86_instruction(compiler, 1, src1, src1w, dst, dstw);
1555 FAIL_IF(!inst);
1556 *inst = op_mr;
1557 }
1558 else {
1559 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1560 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
1561 FAIL_IF(!inst);
1562 *inst = op_mr;
1563 }
1564 return SLJIT_SUCCESS;
1565 }
1566
1567 /* General version. */
1568 if (FAST_IS_REG(dst)) {
1569 EMIT_MOV(compiler, dst, 0, src1, src1w);
1570 if (src2 & SLJIT_IMM) {
1571 BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
1572 }
1573 else {
1574 inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
1575 FAIL_IF(!inst);
1576 *inst = op_rm;
1577 }
1578 }
1579 else {
1580 /* This version requires less memory writing. */
1581 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1582 if (src2 & SLJIT_IMM) {
1583 BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
1584 }
1585 else {
1586 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1587 FAIL_IF(!inst);
1588 *inst = op_rm;
1589 }
1590 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1591 }
1592
1593 return SLJIT_SUCCESS;
1594 }
1595
1596 static sljit_s32 emit_non_cum_binary(struct sljit_compiler *compiler,
1597 sljit_u8 op_rm, sljit_u8 op_mr, sljit_u8 op_imm, sljit_u8 op_eax_imm,
1598 sljit_s32 dst, sljit_sw dstw,
1599 sljit_s32 src1, sljit_sw src1w,
1600 sljit_s32 src2, sljit_sw src2w)
1601 {
1602 sljit_u8* inst;
1603
1604 if (dst == SLJIT_UNUSED) {
1605 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1606 if (src2 & SLJIT_IMM) {
1607 BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
1608 }
1609 else {
1610 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1611 FAIL_IF(!inst);
1612 *inst = op_rm;
1613 }
1614 return SLJIT_SUCCESS;
1615 }
1616
1617 if (dst == src1 && dstw == src1w) {
1618 if (src2 & SLJIT_IMM) {
1619 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1620 if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1621 #else
1622 if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) {
1623 #endif
1624 BINARY_EAX_IMM(op_eax_imm, src2w);
1625 }
1626 else {
1627 BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
1628 }
1629 }
1630 else if (FAST_IS_REG(dst)) {
1631 inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
1632 FAIL_IF(!inst);
1633 *inst = op_rm;
1634 }
1635 else if (FAST_IS_REG(src2)) {
1636 inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
1637 FAIL_IF(!inst);
1638 *inst = op_mr;
1639 }
1640 else {
1641 EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w);
1642 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
1643 FAIL_IF(!inst);
1644 *inst = op_mr;
1645 }
1646 return SLJIT_SUCCESS;
1647 }
1648
1649 /* General version. */
1650 if (FAST_IS_REG(dst) && dst != src2) {
1651 EMIT_MOV(compiler, dst, 0, src1, src1w);
1652 if (src2 & SLJIT_IMM) {
1653 BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
1654 }
1655 else {
1656 inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
1657 FAIL_IF(!inst);
1658 *inst = op_rm;
1659 }
1660 }
1661 else {
1662 /* This version requires less memory writing. */
1663 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1664 if (src2 & SLJIT_IMM) {
1665 BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
1666 }
1667 else {
1668 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1669 FAIL_IF(!inst);
1670 *inst = op_rm;
1671 }
1672 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1673 }
1674
1675 return SLJIT_SUCCESS;
1676 }
1677
1678 static sljit_s32 emit_mul(struct sljit_compiler *compiler,
1679 sljit_s32 dst, sljit_sw dstw,
1680 sljit_s32 src1, sljit_sw src1w,
1681 sljit_s32 src2, sljit_sw src2w)
1682 {
1683 sljit_u8* inst;
1684 sljit_s32 dst_r;
1685
1686 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1687
1688 /* Register destination. */
1689 if (dst_r == src1 && !(src2 & SLJIT_IMM)) {
1690 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
1691 FAIL_IF(!inst);
1692 *inst++ = GROUP_0F;
1693 *inst = IMUL_r_rm;
1694 }
1695 else if (dst_r == src2 && !(src1 & SLJIT_IMM)) {
1696 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src1, src1w);
1697 FAIL_IF(!inst);
1698 *inst++ = GROUP_0F;
1699 *inst = IMUL_r_rm;
1700 }
1701 else if (src1 & SLJIT_IMM) {
1702 if (src2 & SLJIT_IMM) {
1703 EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, src2w);
1704 src2 = dst_r;
1705 src2w = 0;
1706 }
1707
1708 if (src1w <= 127 && src1w >= -128) {
1709 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1710 FAIL_IF(!inst);
1711 *inst = IMUL_r_rm_i8;
1712 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
1713 FAIL_IF(!inst);
1714 INC_SIZE(1);
1715 *inst = (sljit_s8)src1w;
1716 }
1717 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1718 else {
1719 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1720 FAIL_IF(!inst);
1721 *inst = IMUL_r_rm_i32;
1722 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
1723 FAIL_IF(!inst);
1724 INC_SIZE(4);
1725 sljit_unaligned_store_sw(inst, src1w);
1726 }
1727 #else
1728 else if (IS_HALFWORD(src1w)) {
1729 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1730 FAIL_IF(!inst);
1731 *inst = IMUL_r_rm_i32;
1732 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
1733 FAIL_IF(!inst);
1734 INC_SIZE(4);
1735 sljit_unaligned_store_s32(inst, (sljit_s32)src1w);
1736 }
1737 else {
1738 EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src1w);
1739 if (dst_r != src2)
1740 EMIT_MOV(compiler, dst_r, 0, src2, src2w);
1741 inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
1742 FAIL_IF(!inst);
1743 *inst++ = GROUP_0F;
1744 *inst = IMUL_r_rm;
1745 }
1746 #endif
1747 }
1748 else if (src2 & SLJIT_IMM) {
1749 /* Note: src1 is NOT immediate. */
1750
1751 if (src2w <= 127 && src2w >= -128) {
1752 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1753 FAIL_IF(!inst);
1754 *inst = IMUL_r_rm_i8;
1755 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
1756 FAIL_IF(!inst);
1757 INC_SIZE(1);
1758 *inst = (sljit_s8)src2w;
1759 }
1760 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1761 else {
1762 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1763 FAIL_IF(!inst);
1764 *inst = IMUL_r_rm_i32;
1765 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
1766 FAIL_IF(!inst);
1767 INC_SIZE(4);
1768 sljit_unaligned_store_sw(inst, src2w);
1769 }
1770 #else
1771 else if (IS_HALFWORD(src2w)) {
1772 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1773 FAIL_IF(!inst);
1774 *inst = IMUL_r_rm_i32;
1775 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
1776 FAIL_IF(!inst);
1777 INC_SIZE(4);
1778 sljit_unaligned_store_s32(inst, (sljit_s32)src2w);
1779 }
1780 else {
1781 EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src2w);
1782 if (dst_r != src1)
1783 EMIT_MOV(compiler, dst_r, 0, src1, src1w);
1784 inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
1785 FAIL_IF(!inst);
1786 *inst++ = GROUP_0F;
1787 *inst = IMUL_r_rm;
1788 }
1789 #endif
1790 }
1791 else {
1792 /* Neither argument is immediate. */
1793 if (ADDRESSING_DEPENDS_ON(src2, dst_r))
1794 dst_r = TMP_REG1;
1795 EMIT_MOV(compiler, dst_r, 0, src1, src1w);
1796 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
1797 FAIL_IF(!inst);
1798 *inst++ = GROUP_0F;
1799 *inst = IMUL_r_rm;
1800 }
1801
1802 if (dst_r == TMP_REG1)
1803 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1804
1805 return SLJIT_SUCCESS;
1806 }
1807
1808 static sljit_s32 emit_lea_binary(struct sljit_compiler *compiler, sljit_s32 keep_flags,
1809 sljit_s32 dst, sljit_sw dstw,
1810 sljit_s32 src1, sljit_sw src1w,
1811 sljit_s32 src2, sljit_sw src2w)
1812 {
1813 sljit_u8* inst;
1814 sljit_s32 dst_r, done = 0;
1815
1816 /* These cases better be left to handled by normal way. */
1817 if (!keep_flags) {
1818 if (dst == src1 && dstw == src1w)
1819 return SLJIT_ERR_UNSUPPORTED;
1820 if (dst == src2 && dstw == src2w)
1821 return SLJIT_ERR_UNSUPPORTED;
1822 }
1823
1824 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1825
1826 if (FAST_IS_REG(src1)) {
1827 if (FAST_IS_REG(src2)) {
1828 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM2(src1, src2), 0);
1829 FAIL_IF(!inst);
1830 *inst = LEA_r_m;
1831 done = 1;
1832 }
1833 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1834 if ((src2 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1835 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), (sljit_s32)src2w);
1836 #else
1837 if (src2 & SLJIT_IMM) {
1838 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), src2w);
1839 #endif
1840 FAIL_IF(!inst);
1841 *inst = LEA_r_m;
1842 done = 1;
1843 }
1844 }
1845 else if (FAST_IS_REG(src2)) {
1846 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1847 if ((src1 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1848 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), (sljit_s32)src1w);
1849 #else
1850 if (src1 & SLJIT_IMM) {
1851 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), src1w);
1852 #endif
1853 FAIL_IF(!inst);
1854 *inst = LEA_r_m;
1855 done = 1;
1856 }
1857 }
1858
1859 if (done) {
1860 if (dst_r == TMP_REG1)
1861 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
1862 return SLJIT_SUCCESS;
1863 }
1864 return SLJIT_ERR_UNSUPPORTED;
1865 }
1866
1867 static sljit_s32 emit_cmp_binary(struct sljit_compiler *compiler,
1868 sljit_s32 src1, sljit_sw src1w,
1869 sljit_s32 src2, sljit_sw src2w)
1870 {
1871 sljit_u8* inst;
1872
1873 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1874 if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1875 #else
1876 if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
1877 #endif
1878 BINARY_EAX_IMM(CMP_EAX_i32, src2w);
1879 return SLJIT_SUCCESS;
1880 }
1881
1882 if (FAST_IS_REG(src1)) {
1883 if (src2 & SLJIT_IMM) {
1884 BINARY_IMM(CMP, CMP_rm_r, src2w, src1, 0);
1885 }
1886 else {
1887 inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
1888 FAIL_IF(!inst);
1889 *inst = CMP_r_rm;
1890 }
1891 return SLJIT_SUCCESS;
1892 }
1893
1894 if (FAST_IS_REG(src2) && !(src1 & SLJIT_IMM)) {
1895 inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
1896 FAIL_IF(!inst);
1897 *inst = CMP_rm_r;
1898 return SLJIT_SUCCESS;
1899 }
1900
1901 if (src2 & SLJIT_IMM) {
1902 if (src1 & SLJIT_IMM) {
1903 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1904 src1 = TMP_REG1;
1905 src1w = 0;
1906 }
1907 BINARY_IMM(CMP, CMP_rm_r, src2w, src1, src1w);
1908 }
1909 else {
1910 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1911 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1912 FAIL_IF(!inst);
1913 *inst = CMP_r_rm;
1914 }
1915 return SLJIT_SUCCESS;
1916 }
1917
1918 static sljit_s32 emit_test_binary(struct sljit_compiler *compiler,
1919 sljit_s32 src1, sljit_sw src1w,
1920 sljit_s32 src2, sljit_sw src2w)
1921 {
1922 sljit_u8* inst;
1923
1924 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1925 if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1926 #else
1927 if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
1928 #endif
1929 BINARY_EAX_IMM(TEST_EAX_i32, src2w);
1930 return SLJIT_SUCCESS;
1931 }
1932
1933 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1934 if (src2 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1935 #else
1936 if (src2 == SLJIT_R0 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128)) {
1937 #endif
1938 BINARY_EAX_IMM(TEST_EAX_i32, src1w);
1939 return SLJIT_SUCCESS;
1940 }
1941
1942 if (!(src1 & SLJIT_IMM)) {
1943 if (src2 & SLJIT_IMM) {
1944 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1945 if (IS_HALFWORD(src2w) || compiler->mode32) {
1946 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, src1w);
1947 FAIL_IF(!inst);
1948 *inst = GROUP_F7;
1949 }
1950 else {
1951 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
1952 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src1, src1w);
1953 FAIL_IF(!inst);
1954 *inst = TEST_rm_r;
1955 }
1956 #else
1957 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, src1w);
1958 FAIL_IF(!inst);
1959 *inst = GROUP_F7;
1960 #endif
1961 return SLJIT_SUCCESS;
1962 }
1963 else if (FAST_IS_REG(src1)) {
1964 inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
1965 FAIL_IF(!inst);
1966 *inst = TEST_rm_r;
1967 return SLJIT_SUCCESS;
1968 }
1969 }
1970
1971 if (!(src2 & SLJIT_IMM)) {
1972 if (src1 & SLJIT_IMM) {
1973 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1974 if (IS_HALFWORD(src1w) || compiler->mode32) {
1975 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src1w, src2, src2w);
1976 FAIL_IF(!inst);
1977 *inst = GROUP_F7;
1978 }
1979 else {
1980 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src1w));
1981 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src2, src2w);
1982 FAIL_IF(!inst);
1983 *inst = TEST_rm_r;
1984 }
1985 #else
1986 inst = emit_x86_instruction(compiler, 1, src1, src1w, src2, src2w);
1987 FAIL_IF(!inst);
1988 *inst = GROUP_F7;
1989 #endif
1990 return SLJIT_SUCCESS;
1991 }
1992 else if (FAST_IS_REG(src2)) {
1993 inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
1994 FAIL_IF(!inst);
1995 *inst = TEST_rm_r;
1996 return SLJIT_SUCCESS;
1997 }
1998 }
1999
2000 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2001 if (src2 & SLJIT_IMM) {
2002 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2003 if (IS_HALFWORD(src2w) || compiler->mode32) {
2004 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0);
2005 FAIL_IF(!inst);
2006 *inst = GROUP_F7;
2007 }
2008 else {
2009 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
2010 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, TMP_REG1, 0);
2011 FAIL_IF(!inst);
2012 *inst = TEST_rm_r;
2013 }
2014 #else
2015 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0);
2016 FAIL_IF(!inst);
2017 *inst = GROUP_F7;
2018 #endif
2019 }
2020 else {
2021 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
2022 FAIL_IF(!inst);
2023 *inst = TEST_rm_r;
2024 }
2025 return SLJIT_SUCCESS;
2026 }
2027
2028 static sljit_s32 emit_shift(struct sljit_compiler *compiler,
2029 sljit_u8 mode,
2030 sljit_s32 dst, sljit_sw dstw,
2031 sljit_s32 src1, sljit_sw src1w,
2032 sljit_s32 src2, sljit_sw src2w)
2033 {
2034 sljit_u8* inst;
2035
2036 if ((src2 & SLJIT_IMM) || (src2 == SLJIT_PREF_SHIFT_REG)) {
2037 if (dst == src1 && dstw == src1w) {
2038 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, dstw);
2039 FAIL_IF(!inst);
2040 *inst |= mode;
2041 return SLJIT_SUCCESS;
2042 }
2043 if (dst == SLJIT_UNUSED) {
2044 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2045 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0);
2046 FAIL_IF(!inst);
2047 *inst |= mode;
2048 return SLJIT_SUCCESS;
2049 }
2050 if (dst == SLJIT_PREF_SHIFT_REG && src2 == SLJIT_PREF_SHIFT_REG) {
2051 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2052 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2053 FAIL_IF(!inst);
2054 *inst |= mode;
2055 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2056 return SLJIT_SUCCESS;
2057 }
2058 if (FAST_IS_REG(dst)) {
2059 EMIT_MOV(compiler, dst, 0, src1, src1w);
2060 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, 0);
2061 FAIL_IF(!inst);
2062 *inst |= mode;
2063 return SLJIT_SUCCESS;
2064 }
2065
2066 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2067 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0);
2068 FAIL_IF(!inst);
2069 *inst |= mode;
2070 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
2071 return SLJIT_SUCCESS;
2072 }
2073
2074 if (dst == SLJIT_PREF_SHIFT_REG) {
2075 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2076 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2077 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2078 FAIL_IF(!inst);
2079 *inst |= mode;
2080 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2081 }
2082 else if (FAST_IS_REG(dst) && dst != src2 && !ADDRESSING_DEPENDS_ON(src2, dst)) {
2083 if (src1 != dst)
2084 EMIT_MOV(compiler, dst, 0, src1, src1w);
2085 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0);
2086 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2087 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, dst, 0);
2088 FAIL_IF(!inst);
2089 *inst |= mode;
2090 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2091 }
2092 else {
2093 /* This case is really difficult, since ecx itself may used for
2094 addressing, and we must ensure to work even in that case. */
2095 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2096 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2097 EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_PREF_SHIFT_REG, 0);
2098 #else
2099 /* [esp+0] contains the flags. */
2100 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_sw), SLJIT_PREF_SHIFT_REG, 0);
2101 #endif
2102 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2103 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2104 FAIL_IF(!inst);
2105 *inst |= mode;
2106 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2107 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0);
2108 #else
2109 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_sw));
2110 #endif
2111 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
2112 }
2113
2114 return SLJIT_SUCCESS;
2115 }
2116
2117 static sljit_s32 emit_shift_with_flags(struct sljit_compiler *compiler,
2118 sljit_u8 mode, sljit_s32 set_flags,
2119 sljit_s32 dst, sljit_sw dstw,
2120 sljit_s32 src1, sljit_sw src1w,
2121 sljit_s32 src2, sljit_sw src2w)
2122 {
2123 /* The CPU does not set flags if the shift count is 0. */
2124 if (src2 & SLJIT_IMM) {
2125 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2126 if ((src2w & 0x3f) != 0 || (compiler->mode32 && (src2w & 0x1f) != 0))
2127 return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
2128 #else
2129 if ((src2w & 0x1f) != 0)
2130 return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
2131 #endif
2132 if (!set_flags)
2133 return emit_mov(compiler, dst, dstw, src1, src1w);
2134 /* OR dst, src, 0 */
2135 return emit_cum_binary(compiler, OR_r_rm, OR_rm_r, OR, OR_EAX_i32,
2136 dst, dstw, src1, src1w, SLJIT_IMM, 0);
2137 }
2138
2139 if (!set_flags)
2140 return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
2141
2142 if (!FAST_IS_REG(dst))
2143 FAIL_IF(emit_cmp_binary(compiler, src1, src1w, SLJIT_IMM, 0));
2144
2145 FAIL_IF(emit_shift(compiler,mode, dst, dstw, src1, src1w, src2, src2w));
2146
2147 if (FAST_IS_REG(dst))
2148 return emit_cmp_binary(compiler, dst, dstw, SLJIT_IMM, 0);
2149 return SLJIT_SUCCESS;
2150 }
2151
2152 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,
2153 sljit_s32 dst, sljit_sw dstw,
2154 sljit_s32 src1, sljit_sw src1w,
2155 sljit_s32 src2, sljit_sw src2w)
2156 {
2157 CHECK_ERROR();
2158 CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
2159 ADJUST_LOCAL_OFFSET(dst, dstw);
2160 ADJUST_LOCAL_OFFSET(src1, src1w);
2161 ADJUST_LOCAL_OFFSET(src2, src2w);
2162
2163 CHECK_EXTRA_REGS(dst, dstw, (void)0);
2164 CHECK_EXTRA_REGS(src1, src1w, (void)0);
2165 CHECK_EXTRA_REGS(src2, src2w, (void)0);
2166 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2167 compiler->mode32 = op & SLJIT_I32_OP;
2168 #endif
2169
2170 if (GET_OPCODE(op) >= SLJIT_MUL) {
2171 if (SLJIT_UNLIKELY(GET_FLAGS(op)))
2172 compiler->flags_saved = 0;
2173 else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
2174 FAIL_IF(emit_save_flags(compiler));
2175 }
2176
2177 switch (GET_OPCODE(op)) {
2178 case SLJIT_ADD:
2179 if (!GET_FLAGS(op)) {
2180 if (emit_lea_binary(compiler, op & SLJIT_KEEP_FLAGS, dst, dstw, src1, src1w, src2, src2w) != SLJIT_ERR_UNSUPPORTED)
2181 return compiler->error;
2182 }
2183 else
2184 compiler->flags_saved = 0;
2185 if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
2186 FAIL_IF(emit_save_flags(compiler));
2187 return emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32,
2188 dst, dstw, src1, src1w, src2, src2w);
2189 case SLJIT_ADDC:
2190 if (SLJIT_UNLIKELY(compiler->flags_saved)) /* C flag must be restored. */
2191 FAIL_IF(emit_restore_flags(compiler, 1));
2192 else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS))
2193 FAIL_IF(emit_save_flags(compiler));
2194 if (SLJIT_UNLIKELY(GET_FLAGS(op)))
2195 compiler->flags_saved = 0;
2196 return emit_cum_binary(compiler, ADC_r_rm, ADC_rm_r, ADC, ADC_EAX_i32,
2197 dst, dstw, src1, src1w, src2, src2w);
2198 case SLJIT_SUB:
2199 if (!GET_FLAGS(op)) {
2200 if ((src2 & SLJIT_IMM) && emit_lea_binary(compiler, op & SLJIT_KEEP_FLAGS, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED)
2201 return compiler->error;
2202 }
2203 else
2204 compiler->flags_saved = 0;
2205 if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
2206 FAIL_IF(emit_save_flags(compiler));
2207 if (dst == SLJIT_UNUSED)
2208 return emit_cmp_binary(compiler, src1, src1w, src2, src2w);
2209 return emit_non_cum_binary(compiler, SUB_r_rm, SUB_rm_r, SUB, SUB_EAX_i32,
2210 dst, dstw, src1, src1w, src2, src2w);
2211 case SLJIT_SUBC:
2212 if (SLJIT_UNLIKELY(compiler->flags_saved)) /* C flag must be restored. */
2213 FAIL_IF(emit_restore_flags(compiler, 1));
2214 else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS))
2215 FAIL_IF(emit_save_flags(compiler));
2216 if (SLJIT_UNLIKELY(GET_FLAGS(op)))
2217 compiler->flags_saved = 0;
2218 return emit_non_cum_binary(compiler, SBB_r_rm, SBB_rm_r, SBB, SBB_EAX_i32,
2219 dst, dstw, src1, src1w, src2, src2w);
2220 case SLJIT_MUL:
2221 return emit_mul(compiler, dst, dstw, src1, src1w, src2, src2w);
2222 case SLJIT_AND:
2223 if (dst == SLJIT_UNUSED)
2224 return emit_test_binary(compiler, src1, src1w, src2, src2w);
2225 return emit_cum_binary(compiler, AND_r_rm, AND_rm_r, AND, AND_EAX_i32,
2226 dst, dstw, src1, src1w, src2, src2w);
2227 case SLJIT_OR:
2228 return emit_cum_binary(compiler, OR_r_rm, OR_rm_r, OR, OR_EAX_i32,
2229 dst, dstw, src1, src1w, src2, src2w);
2230 case SLJIT_XOR:
2231 return emit_cum_binary(compiler, XOR_r_rm, XOR_rm_r, XOR, XOR_EAX_i32,
2232 dst, dstw, src1, src1w, src2, src2w);
2233 case SLJIT_SHL:
2234 return emit_shift_with_flags(compiler, SHL, GET_FLAGS(op),
2235 dst, dstw, src1, src1w, src2, src2w);
2236 case SLJIT_LSHR:
2237 return emit_shift_with_flags(compiler, SHR, GET_FLAGS(op),
2238 dst, dstw, src1, src1w, src2, src2w);
2239 case SLJIT_ASHR:
2240 return emit_shift_with_flags(compiler, SAR, GET_FLAGS(op),
2241 dst, dstw, src1, src1w, src2, src2w);
2242 }
2243
2244 return SLJIT_SUCCESS;
2245 }
2246
2247 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg)
2248 {
2249 CHECK_REG_INDEX(check_sljit_get_register_index(reg));
2250 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2251 if (reg >= SLJIT_R3 && reg <= SLJIT_R6)
2252 return -1;
2253 #endif
2254 return reg_map[reg];
2255 }
2256
2257 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg)
2258 {
2259 CHECK_REG_INDEX(check_sljit_get_float_register_index(reg));
2260 return reg;
2261 }
2262
2263 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
2264 void *instruction, sljit_s32 size)
2265 {
2266 sljit_u8 *inst;
2267
2268 CHECK_ERROR();
2269 CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
2270
2271 inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
2272 FAIL_IF(!inst);
2273 INC_SIZE(size);
2274 SLJIT_MEMCPY(inst, instruction, size);
2275 return SLJIT_SUCCESS;
2276 }
2277
2278 /* --------------------------------------------------------------------- */
2279 /* Floating point operators */
2280 /* --------------------------------------------------------------------- */
2281
2282 /* Alignment + 2 * 16 bytes. */
2283 static sljit_s32 sse2_data[3 + (4 + 4) * 2];
2284 static sljit_s32 *sse2_buffer;
2285
2286 static void init_compiler(void)
2287 {
2288 sse2_buffer = (sljit_s32*)(((sljit_uw)sse2_data + 15) & ~0xf);
2289 /* Single precision constants. */
2290 sse2_buffer[0] = 0x80000000;
2291 sse2_buffer[4] = 0x7fffffff;
2292 /* Double precision constants. */
2293 sse2_buffer[8] = 0;
2294 sse2_buffer[9] = 0x80000000;
2295 sse2_buffer[12] = 0xffffffff;
2296 sse2_buffer[13] = 0x7fffffff;
2297 }
2298
2299 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_is_fpu_available(void)
2300 {
2301 #ifdef SLJIT_IS_FPU_AVAILABLE
2302 return SLJIT_IS_FPU_AVAILABLE;
2303 #elif (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
2304 if (cpu_has_sse2 == -1)
2305 get_cpu_features();
2306 return cpu_has_sse2;
2307 #else /* SLJIT_DETECT_SSE2 */
2308 return 1;
2309 #endif /* SLJIT_DETECT_SSE2 */
2310 }
2311
2312 static sljit_s32 emit_sse2(struct sljit_compiler *compiler, sljit_u8 opcode,
2313 sljit_s32 single, sljit_s32 xmm1, sljit_s32 xmm2, sljit_sw xmm2w)
2314 {
2315 sljit_u8 *inst;
2316
2317 inst = emit_x86_instruction(compiler, 2 | (single ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
2318 FAIL_IF(!inst);
2319 *inst++ = GROUP_0F;
2320 *inst = opcode;
2321 return SLJIT_SUCCESS;
2322 }
2323
2324 static sljit_s32 emit_sse2_logic(struct sljit_compiler *compiler, sljit_u8 opcode,
2325 sljit_s32 pref66, sljit_s32 xmm1, sljit_s32 xmm2, sljit_sw xmm2w)
2326 {
2327 sljit_u8 *inst;
2328
2329 inst = emit_x86_instruction(compiler, 2 | (pref66 ? EX86_PREF_66 : 0) | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
2330 FAIL_IF(!inst);
2331 *inst++ = GROUP_0F;
2332 *inst = opcode;
2333 return SLJIT_SUCCESS;
2334 }
2335
2336 static SLJIT_INLINE sljit_s32 emit_sse2_load(struct sljit_compiler *compiler,
2337 sljit_s32 single, sljit_s32 dst, sljit_s32 src, sljit_sw srcw)
2338 {
2339 return emit_sse2(compiler, MOVSD_x_xm, single, dst, src, srcw);
2340 }
2341
2342 static SLJIT_INLINE sljit_s32 emit_sse2_store(struct sljit_compiler *compiler,
2343 sljit_s32 single, sljit_s32 dst, sljit_sw dstw, sljit_s32 src)
2344 {
2345 return emit_sse2(compiler, MOVSD_xm_x, single, src, dst, dstw);
2346 }
2347
2348 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,
2349 sljit_s32 dst, sljit_sw dstw,
2350 sljit_s32 src, sljit_sw srcw)
2351 {
2352 sljit_s32 dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1;
2353 sljit_u8 *inst;
2354
2355 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2356 if (GET_OPCODE(op) == SLJIT_CONV_SW_FROM_F64)
2357 compiler->mode32 = 0;
2358 #endif
2359
2360 inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_F32_OP) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP2, dst_r, 0, src, srcw);
2361 FAIL_IF(!inst);
2362 *inst++ = GROUP_0F;
2363 *inst = CVTTSD2SI_r_xm;
2364
2365 if (dst_r == TMP_REG1 && dst != SLJIT_UNUSED)
2366 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
2367 return SLJIT_SUCCESS;
2368 }
2369
2370 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
2371 sljit_s32 dst, sljit_sw dstw,
2372 sljit_s32 src, sljit_sw srcw)
2373 {
2374 sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG;
2375 sljit_u8 *inst;
2376
2377 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2378 if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_SW)
2379 compiler->mode32 = 0;
2380 #endif
2381
2382 if (src & SLJIT_IMM) {
2383 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2384 if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32)
2385 srcw = (sljit_s32)srcw;
2386 #endif
2387 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
2388 src = TMP_REG1;
2389 srcw = 0;
2390 }
2391
2392 inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_F32_OP) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP1, dst_r, 0, src, srcw);
2393 FAIL_IF(!inst);
2394 *inst++ = GROUP_0F;
2395 *inst = CVTSI2SD_x_rm;
2396
2397 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2398 compiler->mode32 = 1;
2399 #endif
2400 if (dst_r == TMP_FREG)
2401 return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG);
2402 return SLJIT_SUCCESS;
2403 }
2404
2405 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op,
2406 sljit_s32 src1, sljit_sw src1w,
2407 sljit_s32 src2, sljit_sw src2w)
2408 {
2409 compiler->flags_saved = 0;
2410 if (!FAST_IS_REG(src1)) {
2411 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src1, src1w));
2412 src1 = TMP_FREG;
2413 }
2414 return emit_sse2_logic(compiler, UCOMISD_x_xm, !(op & SLJIT_F32_OP), src1, src2, src2w);
2415 }
2416
2417 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
2418 sljit_s32 dst, sljit_sw dstw,
2419 sljit_s32 src, sljit_sw srcw)
2420 {
2421 sljit_s32 dst_r;
2422
2423 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2424 compiler->mode32 = 1;
2425 #endif
2426
2427 CHECK_ERROR();
2428 SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
2429
2430 if (GET_OPCODE(op) == SLJIT_MOV_F64) {
2431 if (FAST_IS_REG(dst))
2432 return emit_sse2_load(compiler, op & SLJIT_F32_OP, dst, src, srcw);
2433 if (FAST_IS_REG(src))
2434 return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, src);
2435 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src, srcw));
2436 return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG);
2437 }
2438
2439 if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32) {
2440 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG;
2441 if (FAST_IS_REG(src)) {
2442 /* We overwrite the high bits of source. From SLJIT point of view,
2443 this is not an issue.
2444 Note: In SSE3, we could also use MOVDDUP and MOVSLDUP. */
2445 FAIL_IF(emit_sse2_logic(compiler, UNPCKLPD_x_xm, op & SLJIT_F32_OP, src, src, 0));
2446 }
2447 else {
2448 FAIL_IF(emit_sse2_load(compiler, !(op & SLJIT_F32_OP), TMP_FREG, src, srcw));
2449 src = TMP_FREG;
2450 }
2451
2452 FAIL_IF(emit_sse2_logic(compiler, CVTPD2PS_x_xm, op & SLJIT_F32_OP, dst_r, src, 0));
2453 if (dst_r == TMP_FREG)
2454 return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG);
2455 return SLJIT_SUCCESS;
2456 }
2457
2458 if (SLOW_IS_REG(dst)) {
2459 dst_r = dst;
2460 if (dst != src)
2461 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, dst_r, src, srcw));
2462 }
2463 else {
2464 dst_r = TMP_FREG;
2465 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, dst_r, src, srcw));
2466 }
2467
2468 switch (GET_OPCODE(op)) {
2469 case SLJIT_NEG_F64:
2470 FAIL_IF(emit_sse2_logic(compiler, XORPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_F32_OP ? sse2_buffer : sse2_buffer + 8)));
2471 break;
2472
2473 case SLJIT_ABS_F64:
2474 FAIL_IF(emit_sse2_logic(compiler, ANDPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_F32_OP ? sse2_buffer + 4 : sse2_buffer + 12)));
2475 break;
2476 }
2477
2478 if (dst_r == TMP_FREG)
2479 return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG);
2480 return SLJIT_SUCCESS;
2481 }
2482
2483 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op,
2484 sljit_s32 dst, sljit_sw dstw,
2485 sljit_s32 src1, sljit_sw src1w,
2486 sljit_s32 src2, sljit_sw src2w)
2487 {
2488 sljit_s32 dst_r;
2489
2490 CHECK_ERROR();
2491 CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
2492 ADJUST_LOCAL_OFFSET(dst, dstw);
2493 ADJUST_LOCAL_OFFSET(src1, src1w);
2494 ADJUST_LOCAL_OFFSET(src2, src2w);
2495
2496 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2497 compiler->mode32 = 1;
2498 #endif
2499
2500 if (FAST_IS_REG(dst)) {
2501 dst_r = dst;
2502 if (dst == src1)
2503 ; /* Do nothing here. */
2504 else if (dst == src2 && (op == SLJIT_ADD_F64 || op == SLJIT_MUL_F64)) {
2505 /* Swap arguments. */
2506 src2 = src1;
2507 src2w = src1w;
2508 }
2509 else if (dst != src2)
2510 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, dst_r, src1, src1w));
2511 else {
2512 dst_r = TMP_FREG;
2513 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src1, src1w));
2514 }
2515 }
2516 else {
2517 dst_r = TMP_FREG;
2518 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src1, src1w));
2519 }
2520
2521 switch (GET_OPCODE(op)) {
2522 case SLJIT_ADD_F64:
2523 FAIL_IF(emit_sse2(compiler, ADDSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w));
2524 break;
2525
2526 case SLJIT_SUB_F64:
2527 FAIL_IF(emit_sse2(compiler, SUBSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w));
2528 break;
2529
2530 case SLJIT_MUL_F64:
2531 FAIL_IF(emit_sse2(compiler, MULSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w));
2532 break;
2533
2534 case SLJIT_DIV_F64:
2535 FAIL_IF(emit_sse2(compiler, DIVSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w));
2536 break;
2537 }
2538
2539 if (dst_r == TMP_FREG)
2540 return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG);
2541 return SLJIT_SUCCESS;
2542 }
2543
2544 /* --------------------------------------------------------------------- */
2545 /* Conditional instructions */
2546 /* --------------------------------------------------------------------- */
2547
2548 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
2549 {
2550 sljit_u8 *inst;
2551 struct sljit_label *label;
2552
2553 CHECK_ERROR_PTR();
2554 CHECK_PTR(check_sljit_emit_label(compiler));
2555
2556 /* We should restore the flags before the label,
2557 since other taken jumps has their own flags as well. */
2558 if (SLJIT_UNLIKELY(compiler->flags_saved))
2559 PTR_FAIL_IF(emit_restore_flags(compiler, 0));
2560
2561 if (compiler->last_label && compiler->last_label->size == compiler->size)
2562 return compiler->last_label;
2563
2564 label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
2565 PTR_FAIL_IF(!label);
2566 set_label(label, compiler);
2567
2568 inst = (sljit_u8*)ensure_buf(compiler, 2);
2569 PTR_FAIL_IF(!inst);
2570
2571 *inst++ = 0;
2572 *inst++ = 0;
2573
2574 return label;
2575 }
2576
2577 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)
2578 {
2579 sljit_u8 *inst;
2580 struct sljit_jump *jump;
2581
2582 CHECK_ERROR_PTR();
2583 CHECK_PTR(check_sljit_emit_jump(compiler, type));
2584
2585 if (SLJIT_UNLIKELY(compiler->flags_saved)) {
2586 if ((type & 0xff) <= SLJIT_JUMP)
2587 PTR_FAIL_IF(emit_restore_flags(compiler, 0));
2588 compiler->flags_saved = 0;
2589 }
2590
2591 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2592 PTR_FAIL_IF_NULL(jump);
2593 set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
2594 type &= 0xff;
2595
2596 if (type >= SLJIT_CALL1)
2597 PTR_FAIL_IF(call_with_args(compiler, type));
2598
2599 /* Worst case size. */
2600 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2601 compiler->size += (type >= SLJIT_JUMP) ? 5 : 6;
2602 #else
2603 compiler->size += (type >= SLJIT_JUMP) ? (10 + 3) : (2 + 10 + 3);
2604 #endif
2605
2606 inst = (sljit_u8*)ensure_buf(compiler, 2);
2607 PTR_FAIL_IF_NULL(inst);
2608
2609 *inst++ = 0;
2610 *inst++ = type + 4;
2611 return jump;
2612 }
2613
2614 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
2615 {
2616 sljit_u8 *inst;
2617 struct sljit_jump *jump;
2618
2619 CHECK_ERROR();
2620 CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
2621 ADJUST_LOCAL_OFFSET(src, srcw);
2622
2623 CHECK_EXTRA_REGS(src, srcw, (void)0);
2624
2625 if (SLJIT_UNLIKELY(compiler->flags_saved)) {
2626 if (type <= SLJIT_JUMP)
2627 FAIL_IF(emit_restore_flags(compiler, 0));
2628 compiler->flags_saved = 0;
2629 }
2630
2631 if (type >= SLJIT_CALL1) {
2632 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2633 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
2634 if (src == SLJIT_R2) {
2635 EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
2636 src = TMP_REG1;
2637 }
2638 if (src == SLJIT_MEM1(SLJIT_SP) && type >= SLJIT_CALL3)
2639 srcw += sizeof(sljit_sw);
2640 #endif
2641 #endif
2642 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && defined(_WIN64)
2643 if (src == SLJIT_R2) {
2644 EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
2645 src = TMP_REG1;
2646 }
2647 #endif
2648 FAIL_IF(call_with_args(compiler, type));
2649 }
2650
2651 if (src == SLJIT_IMM) {
2652 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2653 FAIL_IF_NULL(jump);
2654 set_jump(jump, compiler, JUMP_ADDR);
2655 jump->u.target = srcw;
2656
2657 /* Worst case size. */
2658 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2659 compiler->size += 5;
2660 #else
2661 compiler->size += 10 + 3;
2662 #endif
2663
2664 inst = (sljit_u8*)ensure_buf(compiler, 2);
2665 FAIL_IF_NULL(inst);
2666
2667 *inst++ = 0;
2668 *inst++ = type + 4;
2669 }
2670 else {
2671 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2672 /* REX_W is not necessary (src is not immediate). */
2673 compiler->mode32 = 1;
2674 #endif
2675 inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
2676 FAIL_IF(!inst);
2677 *inst++ = GROUP_FF;
2678 *inst |= (type >= SLJIT_FAST_CALL) ? CALL_rm : JMP_rm;
2679 }
2680 return SLJIT_SUCCESS;
2681 }
2682
2683 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
2684 sljit_s32 dst, sljit_sw dstw,
2685 sljit_s32 src, sljit_sw srcw,
2686 sljit_s32 type)
2687 {
2688 sljit_u8 *inst;
2689 sljit_u8 cond_set = 0;
2690 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2691 sljit_s32 reg;
2692 #else
2693 /* CHECK_EXTRA_REGS migh overwrite these values. */
2694 sljit_s32 dst_save = dst;
2695 sljit_sw dstw_save = dstw;
2696 #endif
2697
2698 CHECK_ERROR();
2699 CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type));
2700 SLJIT_UNUSED_ARG(srcw);
2701
2702 if (dst == SLJIT_UNUSED)
2703 return SLJIT_SUCCESS;
2704
2705 ADJUST_LOCAL_OFFSET(dst, dstw);
2706 CHECK_EXTRA_REGS(dst, dstw, (void)0);
2707 if (SLJIT_UNLIKELY(compiler->flags_saved))
2708 FAIL_IF(emit_restore_flags(compiler, op & SLJIT_KEEP_FLAGS));
2709
2710 type &= 0xff;
2711 /* setcc = jcc + 0x10. */
2712 cond_set = get_jump_code(type) + 0x10;
2713
2714 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2715 if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && dst == src) {
2716 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 + 3);
2717 FAIL_IF(!inst);
2718 INC_SIZE(4 + 3);
2719 /* Set low register to conditional flag. */
2720 *inst++ = (reg_map[TMP_REG1] <= 7) ? REX : REX_B;
2721 *inst++ = GROUP_0F;
2722 *inst++ = cond_set;
2723 *inst++ = MOD_REG | reg_lmap[TMP_REG1];
2724 *inst++ = REX | (reg_map[TMP_REG1] <= 7 ? 0 : REX_R) | (reg_map[dst] <= 7 ? 0 : REX_B);
2725 *inst++ = OR_rm8_r8;
2726 *inst++ = MOD_REG | (reg_lmap[TMP_REG1] << 3) | reg_lmap[dst];
2727 return SLJIT_SUCCESS;
2728 }
2729
2730 reg = (op == SLJIT_MOV && FAST_IS_REG(dst)) ? dst : TMP_REG1;
2731
2732 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 + 4);
2733 FAIL_IF(!inst);
2734 INC_SIZE(4 + 4);
2735 /* Set low register to conditional flag. */
2736 *inst++ = (reg_map[reg] <= 7) ? REX : REX_B;
2737 *inst++ = GROUP_0F;
2738 *inst++ = cond_set;
2739 *inst++ = MOD_REG | reg_lmap[reg];
2740 *inst++ = REX_W | (reg_map[reg] <= 7 ? 0 : (REX_B | REX_R));
2741 *inst++ = GROUP_0F;
2742 *inst++ = MOVZX_r_rm8;
2743 *inst = MOD_REG | (reg_lmap[reg] << 3) | reg_lmap[reg];
2744
2745 if (reg != TMP_REG1)
2746 return SLJIT_SUCCESS;
2747
2748 if (GET_OPCODE(op) < SLJIT_ADD) {
2749 compiler->mode32 = GET_OPCODE(op) != SLJIT_MOV;
2750 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
2751 }
2752 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
2753 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
2754 compiler->skip_checks = 1;
2755 #endif
2756 return sljit_emit_op2(compiler, op, dst, dstw, dst, dstw, TMP_REG1, 0);
2757 #else /* SLJIT_CONFIG_X86_64 */
2758 if (GET_OPCODE(op) < SLJIT_ADD && FAST_IS_REG(dst)) {
2759 if (reg_map[dst] <= 4) {
2760 /* Low byte is accessible. */
2761 inst = (sljit_u8*)ensure_buf(compiler, 1 + 3 + 3);
2762 FAIL_IF(!inst);
2763 INC_SIZE(3 + 3);
2764 /* Set low byte to conditional flag. */
2765 *inst++ = GROUP_0F;
2766 *inst++ = cond_set;
2767 *inst++ = MOD_REG | reg_map[dst];
2768
2769 *inst++ = GROUP_0F;
2770 *inst++ = MOVZX_r_rm8;
2771 *inst = MOD_REG | (reg_map[dst] << 3) | reg_map[dst];
2772 return SLJIT_SUCCESS;
2773 }
2774
2775 /* Low byte is not accessible. */
2776 if (cpu_has_cmov == -1)
2777 get_cpu_features();
2778
2779 if (cpu_has_cmov) {
2780 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, 1);
2781 /* a xor reg, reg operation would overwrite the flags. */
2782 EMIT_MOV(compiler, dst, 0, SLJIT_IMM, 0);
2783
2784 inst = (sljit_u8*)ensure_buf(compiler, 1 + 3);
2785 FAIL_IF(!inst);
2786 INC_SIZE(3);
2787
2788 *inst++ = GROUP_0F;
2789 /* cmovcc = setcc - 0x50. */
2790 *inst++ = cond_set - 0x50;
2791 *inst++ = MOD_REG | (reg_map[dst] << 3) | reg_map[TMP_REG1];
2792 return SLJIT_SUCCESS;
2793 }
2794
2795 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1);
2796 FAIL_IF(!inst);
2797 INC_SIZE(1 + 3 + 3 + 1);
2798 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2799 /* Set al to conditional flag. */
2800 *inst++ = GROUP_0F;
2801 *inst++ = cond_set;
2802 *inst++ = MOD_REG | 0 /* eax */;
2803
2804 *inst++ = GROUP_0F;
2805 *inst++ = MOVZX_r_rm8;
2806 *inst++ = MOD_REG | (reg_map[dst] << 3) | 0 /* eax */;
2807 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2808 return SLJIT_SUCCESS;
2809 }
2810
2811 if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && dst == src && reg_map[dst] <= 4) {
2812 SLJIT_COMPILE_ASSERT(reg_map[SLJIT_R0] == 0, scratch_reg1_must_be_eax);
2813 if (dst != SLJIT_R0) {
2814 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 3 + 2 + 1);
2815 FAIL_IF(!inst);
2816 INC_SIZE(1 + 3 + 2 + 1);
2817 /* Set low register to conditional flag. */
2818 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2819 *inst++ = GROUP_0F;
2820 *inst++ = cond_set;
2821 *inst++ = MOD_REG | 0 /* eax */;
2822 *inst++ = OR_rm8_r8;
2823 *inst++ = MOD_REG | (0 /* eax */ << 3) | reg_map[dst];
2824 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2825 }
2826 else {
2827 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2 + 3 + 2 + 2);
2828 FAIL_IF(!inst);
2829 INC_SIZE(2 + 3 + 2 + 2);
2830 /* Set low register to conditional flag. */
2831 *inst++ = XCHG_r_rm;
2832 *inst++ = MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1];
2833 *inst++ = GROUP_0F;
2834 *inst++ = cond_set;
2835 *inst++ = MOD_REG | 1 /* ecx */;
2836 *inst++ = OR_rm8_r8;
2837 *inst++ = MOD_REG | (1 /* ecx */ << 3) | 0 /* eax */;
2838 *inst++ = XCHG_r_rm;
2839 *inst++ = MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1];
2840 }
2841 return SLJIT_SUCCESS;
2842 }
2843
2844 /* Set TMP_REG1 to the bit. */
2845 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1);
2846 FAIL_IF(!inst);
2847 INC_SIZE(1 + 3 + 3 + 1);
2848 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2849 /* Set al to conditional flag. */
2850 *inst++ = GROUP_0F;
2851 *inst++ = cond_set;
2852 *inst++ = MOD_REG | 0 /* eax */;
2853
2854 *inst++ = GROUP_0F;
2855 *inst++ = MOVZX_r_rm8;
2856 *inst++ = MOD_REG | (0 << 3) /* eax */ | 0 /* eax */;
2857
2858 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2859
2860 if (GET_OPCODE(op) < SLJIT_ADD)
2861 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
2862
2863 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
2864 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
2865 compiler->skip_checks = 1;
2866 #endif
2867 return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0);
2868 #endif /* SLJIT_CONFIG_X86_64 */
2869 }
2870
2871 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset)
2872 {
2873 CHECK_ERROR();
2874 CHECK(check_sljit_get_local_base(compiler, dst, dstw, offset));
2875 ADJUST_LOCAL_OFFSET(dst, dstw);
2876
2877 CHECK_EXTRA_REGS(dst, dstw, (void)0);
2878
2879 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2880 compiler->mode32 = 0;
2881 #endif
2882
2883 ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_SP), offset);
2884
2885 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2886 if (NOT_HALFWORD(offset)) {
2887 FAIL_IF(emit_load_imm64(compiler, TMP_REG1, offset));
2888 #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
2889 SLJIT_ASSERT(emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0) != SLJIT_ERR_UNSUPPORTED);
2890 return compiler->error;
2891 #else
2892 return emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0);
2893 #endif
2894 }
2895 #endif
2896
2897 if (offset != 0)
2898 return emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_SP, 0, SLJIT_IMM, offset);
2899 return emit_mov(compiler, dst, dstw, SLJIT_SP, 0);
2900 }
2901
2902 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
2903 {
2904 sljit_u8 *inst;
2905 struct sljit_const *const_;
2906 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2907 sljit_s32 reg;
2908 #endif
2909
2910 CHECK_ERROR_PTR();
2911 CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
2912 ADJUST_LOCAL_OFFSET(dst, dstw);
2913
2914 CHECK_EXTRA_REGS(dst, dstw, (void)0);
2915
2916 const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
2917 PTR_FAIL_IF(!const_);
2918 set_const(const_, compiler);
2919
2920 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2921 compiler->mode32 = 0;
2922 reg = SLOW_IS_REG(dst) ? dst : TMP_REG1;
2923
2924 if (emit_load_imm64(compiler, reg, init_value))
2925 return NULL;
2926 #else
2927 if (dst == SLJIT_UNUSED)
2928 dst = TMP_REG1;
2929
2930 if (emit_mov(compiler, dst, dstw, SLJIT_IMM, init_value))
2931 return NULL;
2932 #endif
2933
2934 inst = (sljit_u8*)ensure_buf(compiler, 2);
2935 PTR_FAIL_IF(!inst);
2936
2937 *inst++ = 0;
2938 *inst++ = 1;
2939
2940 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2941 if (dst & SLJIT_MEM)
2942 if (emit_mov(compiler, dst, dstw, TMP_REG1, 0))
2943 return NULL;
2944 #endif
2945
2946 return const_;
2947 }
2948
2949 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr)
2950 {
2951 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2952 sljit_unaligned_store_sw((void*)addr, new_addr - (addr + 4));
2953 #else
2954 sljit_unaligned_store_sw((void*)addr, (sljit_sw) new_addr);
2955 #endif
2956 }
2957
2958 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant)
2959 {
2960 sljit_unaligned_store_sw((void*)addr, new_constant);
2961 }
2962
2963 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_x86_is_sse2_available(void)
2964 {
2965 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
2966 if (cpu_has_sse2 == -1)
2967 get_cpu_features();
2968 return cpu_has_sse2;
2969 #else
2970 return 1;
2971 #endif
2972 }
2973
2974 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_x86_is_cmov_available(void)
2975 {
2976 if (cpu_has_cmov == -1)
2977 get_cpu_features();
2978 return cpu_has_cmov;
2979 }
2980
2981 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_x86_emit_cmov(struct sljit_compiler *compiler,
2982 sljit_s32 type,
2983 sljit_s32 dst_reg,
2984 sljit_s32 src, sljit_sw srcw)
2985 {
2986 sljit_u8* inst;
2987
2988 CHECK_ERROR();
2989 #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
2990 CHECK_ARGUMENT(sljit_x86_is_cmov_available());
2991 CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_I32_OP)));
2992 CHECK_ARGUMENT((type & 0xff) >= SLJIT_EQUAL && (type & 0xff) <= SLJIT_ORDERED_F64);
2993 CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(dst_reg & ~SLJIT_I32_OP));
2994 FUNCTION_CHECK_SRC(src, srcw);
2995 #endif
2996 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
2997 if (SLJIT_UNLIKELY(!!compiler->verbose)) {
2998 fprintf(compiler->verbose, " x86_cmov%s %s%s, ",
2999 !(dst_reg & SLJIT_I32_OP) ? "" : ".i",
3000 jump_names[type & 0xff], JUMP_POSTFIX(type));
3001 sljit_verbose_reg(compiler, dst_reg & ~SLJIT_I32_OP);
3002 fprintf(compiler->verbose, ", ");
3003 sljit_verbose_param(compiler, src, srcw);
3004 fprintf(compiler->verbose, "\n");
3005 }
3006 #endif
3007
3008 ADJUST_LOCAL_OFFSET(src, srcw);
3009 CHECK_EXTRA_REGS(src, srcw, (void)0);
3010
3011 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3012 compiler->mode32 = dst_reg & SLJIT_I32_OP;
3013 #endif
3014 dst_reg &= ~SLJIT_I32_OP;
3015
3016 if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
3017 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw);
3018 src = TMP_REG1;
3019 srcw = 0;
3020 }
3021
3022 inst = emit_x86_instruction(compiler, 2, dst_reg, 0, src, srcw);
3023 FAIL_IF(!inst);
3024 *inst++ = GROUP_0F;
3025 *inst = get_jump_code(type & 0xff) - 0x40;
3026 return SLJIT_SUCCESS;
3027 }
3028