1 /*
2 * Stack-less Just-In-Time compiler
3 *
4 * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without modification, are
7 * permitted provided that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright notice, this list of
10 * conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright notice, this list
13 * of conditions and the following disclaimer in the documentation and/or other materials
14 * provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19 * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
sljit_get_platform_name(void)27 SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
28 {
29 return "x86" SLJIT_CPUINFO;
30 }
31
32 /*
33 32b register indexes:
34 0 - EAX
35 1 - ECX
36 2 - EDX
37 3 - EBX
38 4 - ESP
39 5 - EBP
40 6 - ESI
41 7 - EDI
42 */
43
44 /*
45 64b register indexes:
46 0 - RAX
47 1 - RCX
48 2 - RDX
49 3 - RBX
50 4 - RSP
51 5 - RBP
52 6 - RSI
53 7 - RDI
54 8 - R8 - From now on REX prefix is required
55 9 - R9
56 10 - R10
57 11 - R11
58 12 - R12
59 13 - R13
60 14 - R14
61 15 - R15
62 */
63
64 #define TMP_FREG (0)
65
66 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
67
68 /* Last register + 1. */
69 #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
70
71 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 3] = {
72 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 7, 6, 3, 4, 5
73 };
74
75 #define CHECK_EXTRA_REGS(p, w, do) \
76 if (p >= SLJIT_R3 && p <= SLJIT_S3) { \
77 w = (2 * SSIZE_OF(sw)) + ((p) - SLJIT_R3) * SSIZE_OF(sw); \
78 p = SLJIT_MEM1(SLJIT_SP); \
79 do; \
80 }
81
82 #else /* SLJIT_CONFIG_X86_32 */
83
84 /* Last register + 1. */
85 #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
86 #define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3)
87
88 /* Note: r12 & 0x7 == 0b100, which decoded as SIB byte present
89 Note: avoid to use r12 and r13 for memory addessing
90 therefore r12 is better to be a higher saved register. */
91 #ifndef _WIN64
92 /* Args: rdi(=7), rsi(=6), rdx(=2), rcx(=1), r8, r9. Scratches: rax(=0), r10, r11 */
93 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 4] = {
94 0, 0, 6, 7, 1, 8, 11, 10, 12, 5, 13, 14, 15, 3, 4, 2, 9
95 };
96 /* low-map. reg_map & 0x7. */
97 static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 4] = {
98 0, 0, 6, 7, 1, 0, 3, 2, 4, 5, 5, 6, 7, 3, 4, 2, 1
99 };
100 #else
101 /* Args: rcx(=1), rdx(=2), r8, r9. Scratches: rax(=0), r10, r11 */
102 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 4] = {
103 0, 0, 2, 8, 1, 11, 12, 5, 13, 14, 15, 7, 6, 3, 4, 9, 10
104 };
105 /* low-map. reg_map & 0x7. */
106 static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 4] = {
107 0, 0, 2, 0, 1, 3, 4, 5, 5, 6, 7, 7, 6, 3, 4, 1, 2
108 };
109 #endif
110
111 /* Args: xmm0-xmm3 */
112 static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1] = {
113 4, 0, 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
114 };
115 /* low-map. freg_map & 0x7. */
116 static const sljit_u8 freg_lmap[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1] = {
117 4, 0, 1, 2, 3, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7
118 };
119
120 #define REX_W 0x48
121 #define REX_R 0x44
122 #define REX_X 0x42
123 #define REX_B 0x41
124 #define REX 0x40
125
126 #ifndef _WIN64
127 #define HALFWORD_MAX 0x7fffffffl
128 #define HALFWORD_MIN -0x80000000l
129 #else
130 #define HALFWORD_MAX 0x7fffffffll
131 #define HALFWORD_MIN -0x80000000ll
132 #endif
133
134 #define IS_HALFWORD(x) ((x) <= HALFWORD_MAX && (x) >= HALFWORD_MIN)
135 #define NOT_HALFWORD(x) ((x) > HALFWORD_MAX || (x) < HALFWORD_MIN)
136
137 #define CHECK_EXTRA_REGS(p, w, do)
138
139 #endif /* SLJIT_CONFIG_X86_32 */
140
141 #define U8(v) ((sljit_u8)(v))
142
143
144 /* Size flags for emit_x86_instruction: */
145 #define EX86_BIN_INS 0x0010
146 #define EX86_SHIFT_INS 0x0020
147 #define EX86_REX 0x0040
148 #define EX86_NO_REXW 0x0080
149 #define EX86_BYTE_ARG 0x0100
150 #define EX86_HALF_ARG 0x0200
151 #define EX86_PREF_66 0x0400
152 #define EX86_PREF_F2 0x0800
153 #define EX86_PREF_F3 0x1000
154 #define EX86_SSE2_OP1 0x2000
155 #define EX86_SSE2_OP2 0x4000
156 #define EX86_SSE2 (EX86_SSE2_OP1 | EX86_SSE2_OP2)
157
158 /* --------------------------------------------------------------------- */
159 /* Instrucion forms */
160 /* --------------------------------------------------------------------- */
161
162 #define ADD (/* BINARY */ 0 << 3)
163 #define ADD_EAX_i32 0x05
164 #define ADD_r_rm 0x03
165 #define ADD_rm_r 0x01
166 #define ADDSD_x_xm 0x58
167 #define ADC (/* BINARY */ 2 << 3)
168 #define ADC_EAX_i32 0x15
169 #define ADC_r_rm 0x13
170 #define ADC_rm_r 0x11
171 #define AND (/* BINARY */ 4 << 3)
172 #define AND_EAX_i32 0x25
173 #define AND_r_rm 0x23
174 #define AND_rm_r 0x21
175 #define ANDPD_x_xm 0x54
176 #define BSR_r_rm (/* GROUP_0F */ 0xbd)
177 #define BSF_r_rm (/* GROUP_0F */ 0xbc)
178 #define CALL_i32 0xe8
179 #define CALL_rm (/* GROUP_FF */ 2 << 3)
180 #define CDQ 0x99
181 #define CMOVE_r_rm (/* GROUP_0F */ 0x44)
182 #define CMP (/* BINARY */ 7 << 3)
183 #define CMP_EAX_i32 0x3d
184 #define CMP_r_rm 0x3b
185 #define CMP_rm_r 0x39
186 #define CVTPD2PS_x_xm 0x5a
187 #define CVTSI2SD_x_rm 0x2a
188 #define CVTTSD2SI_r_xm 0x2c
189 #define DIV (/* GROUP_F7 */ 6 << 3)
190 #define DIVSD_x_xm 0x5e
191 #define FLDS 0xd9
192 #define FLDL 0xdd
193 #define FSTPS 0xd9
194 #define FSTPD 0xdd
195 #define INT3 0xcc
196 #define IDIV (/* GROUP_F7 */ 7 << 3)
197 #define IMUL (/* GROUP_F7 */ 5 << 3)
198 #define IMUL_r_rm (/* GROUP_0F */ 0xaf)
199 #define IMUL_r_rm_i8 0x6b
200 #define IMUL_r_rm_i32 0x69
201 #define JE_i8 0x74
202 #define JNE_i8 0x75
203 #define JMP_i8 0xeb
204 #define JMP_i32 0xe9
205 #define JMP_rm (/* GROUP_FF */ 4 << 3)
206 #define LEA_r_m 0x8d
207 #define LOOP_i8 0xe2
208 #define LZCNT_r_rm (/* GROUP_F3 */ /* GROUP_0F */ 0xbd)
209 #define MOV_r_rm 0x8b
210 #define MOV_r_i32 0xb8
211 #define MOV_rm_r 0x89
212 #define MOV_rm_i32 0xc7
213 #define MOV_rm8_i8 0xc6
214 #define MOV_rm8_r8 0x88
215 #define MOVAPS_x_xm 0x28
216 #define MOVAPS_xm_x 0x29
217 #define MOVSD_x_xm 0x10
218 #define MOVSD_xm_x 0x11
219 #define MOVSXD_r_rm 0x63
220 #define MOVSX_r_rm8 (/* GROUP_0F */ 0xbe)
221 #define MOVSX_r_rm16 (/* GROUP_0F */ 0xbf)
222 #define MOVZX_r_rm8 (/* GROUP_0F */ 0xb6)
223 #define MOVZX_r_rm16 (/* GROUP_0F */ 0xb7)
224 #define MUL (/* GROUP_F7 */ 4 << 3)
225 #define MULSD_x_xm 0x59
226 #define NEG_rm (/* GROUP_F7 */ 3 << 3)
227 #define NOP 0x90
228 #define NOT_rm (/* GROUP_F7 */ 2 << 3)
229 #define OR (/* BINARY */ 1 << 3)
230 #define OR_r_rm 0x0b
231 #define OR_EAX_i32 0x0d
232 #define OR_rm_r 0x09
233 #define OR_rm8_r8 0x08
234 #define POP_r 0x58
235 #define POP_rm 0x8f
236 #define POPF 0x9d
237 #define PREFETCH 0x18
238 #define PUSH_i32 0x68
239 #define PUSH_r 0x50
240 #define PUSH_rm (/* GROUP_FF */ 6 << 3)
241 #define PUSHF 0x9c
242 #define ROL (/* SHIFT */ 0 << 3)
243 #define ROR (/* SHIFT */ 1 << 3)
244 #define RET_near 0xc3
245 #define RET_i16 0xc2
246 #define SBB (/* BINARY */ 3 << 3)
247 #define SBB_EAX_i32 0x1d
248 #define SBB_r_rm 0x1b
249 #define SBB_rm_r 0x19
250 #define SAR (/* SHIFT */ 7 << 3)
251 #define SHL (/* SHIFT */ 4 << 3)
252 #define SHLD (/* GROUP_0F */ 0xa5)
253 #define SHRD (/* GROUP_0F */ 0xad)
254 #define SHR (/* SHIFT */ 5 << 3)
255 #define SUB (/* BINARY */ 5 << 3)
256 #define SUB_EAX_i32 0x2d
257 #define SUB_r_rm 0x2b
258 #define SUB_rm_r 0x29
259 #define SUBSD_x_xm 0x5c
260 #define TEST_EAX_i32 0xa9
261 #define TEST_rm_r 0x85
262 #define TZCNT_r_rm (/* GROUP_F3 */ /* GROUP_0F */ 0xbc)
263 #define UCOMISD_x_xm 0x2e
264 #define UNPCKLPD_x_xm 0x14
265 #define XCHG_EAX_r 0x90
266 #define XCHG_r_rm 0x87
267 #define XOR (/* BINARY */ 6 << 3)
268 #define XOR_EAX_i32 0x35
269 #define XOR_r_rm 0x33
270 #define XOR_rm_r 0x31
271 #define XORPD_x_xm 0x57
272
273 #define GROUP_0F 0x0f
274 #define GROUP_F3 0xf3
275 #define GROUP_F7 0xf7
276 #define GROUP_FF 0xff
277 #define GROUP_BINARY_81 0x81
278 #define GROUP_BINARY_83 0x83
279 #define GROUP_SHIFT_1 0xd1
280 #define GROUP_SHIFT_N 0xc1
281 #define GROUP_SHIFT_CL 0xd3
282
283 #define MOD_REG 0xc0
284 #define MOD_DISP8 0x40
285
286 #define INC_SIZE(s) (*inst++ = U8(s), compiler->size += (s))
287
288 #define PUSH_REG(r) (*inst++ = U8(PUSH_r + (r)))
289 #define POP_REG(r) (*inst++ = U8(POP_r + (r)))
290 #define RET() (*inst++ = RET_near)
291 #define RET_I16(n) (*inst++ = RET_i16, *inst++ = U8(n), *inst++ = 0)
292
293 /* Multithreading does not affect these static variables, since they store
294 built-in CPU features. Therefore they can be overwritten by different threads
295 if they detect the CPU features in the same time. */
296 #define CPU_FEATURE_DETECTED 0x001
297 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
298 #define CPU_FEATURE_SSE2 0x002
299 #endif
300 #define CPU_FEATURE_LZCNT 0x004
301 #define CPU_FEATURE_TZCNT 0x008
302 #define CPU_FEATURE_CMOV 0x010
303
304 static sljit_u32 cpu_feature_list = 0;
305
306 #ifdef _WIN32_WCE
307 #include <cmnintrin.h>
308 #elif defined(_MSC_VER) && _MSC_VER >= 1400
309 #include <intrin.h>
310 #endif
311
312 /******************************************************/
313 /* Unaligned-store functions */
314 /******************************************************/
315
sljit_unaligned_store_s16(void * addr,sljit_s16 value)316 static SLJIT_INLINE void sljit_unaligned_store_s16(void *addr, sljit_s16 value)
317 {
318 SLJIT_MEMCPY(addr, &value, sizeof(value));
319 }
320
sljit_unaligned_store_s32(void * addr,sljit_s32 value)321 static SLJIT_INLINE void sljit_unaligned_store_s32(void *addr, sljit_s32 value)
322 {
323 SLJIT_MEMCPY(addr, &value, sizeof(value));
324 }
325
sljit_unaligned_store_sw(void * addr,sljit_sw value)326 static SLJIT_INLINE void sljit_unaligned_store_sw(void *addr, sljit_sw value)
327 {
328 SLJIT_MEMCPY(addr, &value, sizeof(value));
329 }
330
331 /******************************************************/
332 /* Utility functions */
333 /******************************************************/
334
get_cpu_features(void)335 static void get_cpu_features(void)
336 {
337 sljit_u32 feature_list = CPU_FEATURE_DETECTED;
338 sljit_u32 value;
339
340 #if defined(_MSC_VER) && _MSC_VER >= 1400
341
342 int CPUInfo[4];
343
344 __cpuid(CPUInfo, 0);
345 if (CPUInfo[0] >= 7) {
346 __cpuidex(CPUInfo, 7, 0);
347 if (CPUInfo[1] & 0x8)
348 feature_list |= CPU_FEATURE_TZCNT;
349 }
350
351 __cpuid(CPUInfo, (int)0x80000001);
352 if (CPUInfo[2] & 0x20)
353 feature_list |= CPU_FEATURE_LZCNT;
354
355 __cpuid(CPUInfo, 1);
356 value = (sljit_u32)CPUInfo[3];
357
358 #elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__SUNPRO_C)
359
360 /* AT&T syntax. */
361 __asm__ (
362 "movl $0x0, %%eax\n"
363 "lzcnt %%eax, %%eax\n"
364 "setnz %%al\n"
365 "movl %%eax, %0\n"
366 : "=g" (value)
367 :
368 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
369 : "eax"
370 #else
371 : "rax"
372 #endif
373 );
374
375 if (value & 0x1)
376 feature_list |= CPU_FEATURE_LZCNT;
377
378 __asm__ (
379 "movl $0x0, %%eax\n"
380 "tzcnt %%eax, %%eax\n"
381 "setnz %%al\n"
382 "movl %%eax, %0\n"
383 : "=g" (value)
384 :
385 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
386 : "eax"
387 #else
388 : "rax"
389 #endif
390 );
391
392 if (value & 0x1)
393 feature_list |= CPU_FEATURE_TZCNT;
394
395 __asm__ (
396 "movl $0x1, %%eax\n"
397 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
398 /* On x86-32, there is no red zone, so this
399 should work (no need for a local variable). */
400 "push %%ebx\n"
401 #endif
402 "cpuid\n"
403 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
404 "pop %%ebx\n"
405 #endif
406 "movl %%edx, %0\n"
407 : "=g" (value)
408 :
409 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
410 : "%eax", "%ecx", "%edx"
411 #else
412 : "%rax", "%rbx", "%rcx", "%rdx"
413 #endif
414 );
415
416 #else /* _MSC_VER && _MSC_VER >= 1400 */
417
418 /* Intel syntax. */
419 __asm {
420 mov eax, 0
421 lzcnt eax, eax
422 setnz al
423 mov value, eax
424 }
425
426 if (value & 0x1)
427 feature_list |= CPU_FEATURE_LZCNT;
428
429 __asm {
430 mov eax, 0
431 tzcnt eax, eax
432 setnz al
433 mov value, eax
434 }
435
436 if (value & 0x1)
437 feature_list |= CPU_FEATURE_TZCNT;
438
439 __asm {
440 mov eax, 1
441 cpuid
442 mov value, edx
443 }
444
445 #endif /* _MSC_VER && _MSC_VER >= 1400 */
446
447 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
448 if (value & 0x4000000)
449 feature_list |= CPU_FEATURE_SSE2;
450 #endif
451 if (value & 0x8000)
452 feature_list |= CPU_FEATURE_CMOV;
453
454 cpu_feature_list = feature_list;
455 }
456
get_jump_code(sljit_uw type)457 static sljit_u8 get_jump_code(sljit_uw type)
458 {
459 switch (type) {
460 case SLJIT_EQUAL:
461 case SLJIT_F_EQUAL:
462 case SLJIT_UNORDERED_OR_EQUAL:
463 case SLJIT_ORDERED_EQUAL: /* Not supported. */
464 return 0x84 /* je */;
465
466 case SLJIT_NOT_EQUAL:
467 case SLJIT_F_NOT_EQUAL:
468 case SLJIT_ORDERED_NOT_EQUAL:
469 case SLJIT_UNORDERED_OR_NOT_EQUAL: /* Not supported. */
470 return 0x85 /* jne */;
471
472 case SLJIT_LESS:
473 case SLJIT_CARRY:
474 case SLJIT_F_LESS:
475 case SLJIT_UNORDERED_OR_LESS:
476 case SLJIT_UNORDERED_OR_GREATER:
477 return 0x82 /* jc */;
478
479 case SLJIT_GREATER_EQUAL:
480 case SLJIT_NOT_CARRY:
481 case SLJIT_F_GREATER_EQUAL:
482 case SLJIT_ORDERED_GREATER_EQUAL:
483 case SLJIT_ORDERED_LESS_EQUAL:
484 return 0x83 /* jae */;
485
486 case SLJIT_GREATER:
487 case SLJIT_F_GREATER:
488 case SLJIT_ORDERED_LESS:
489 case SLJIT_ORDERED_GREATER:
490 return 0x87 /* jnbe */;
491
492 case SLJIT_LESS_EQUAL:
493 case SLJIT_F_LESS_EQUAL:
494 case SLJIT_UNORDERED_OR_GREATER_EQUAL:
495 case SLJIT_UNORDERED_OR_LESS_EQUAL:
496 return 0x86 /* jbe */;
497
498 case SLJIT_SIG_LESS:
499 return 0x8c /* jl */;
500
501 case SLJIT_SIG_GREATER_EQUAL:
502 return 0x8d /* jnl */;
503
504 case SLJIT_SIG_GREATER:
505 return 0x8f /* jnle */;
506
507 case SLJIT_SIG_LESS_EQUAL:
508 return 0x8e /* jle */;
509
510 case SLJIT_OVERFLOW:
511 return 0x80 /* jo */;
512
513 case SLJIT_NOT_OVERFLOW:
514 return 0x81 /* jno */;
515
516 case SLJIT_UNORDERED:
517 return 0x8a /* jp */;
518
519 case SLJIT_ORDERED:
520 return 0x8b /* jpo */;
521 }
522 return 0;
523 }
524
525 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
526 static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_sw executable_offset);
527 #else
528 static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr);
529 static sljit_u8* generate_put_label_code(struct sljit_put_label *put_label, sljit_u8 *code_ptr, sljit_uw max_label);
530 #endif
531
generate_near_jump_code(struct sljit_jump * jump,sljit_u8 * code_ptr,sljit_u8 * code,sljit_sw executable_offset)532 static sljit_u8* generate_near_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_u8 *code, sljit_sw executable_offset)
533 {
534 sljit_uw type = jump->flags >> TYPE_SHIFT;
535 sljit_s32 short_jump;
536 sljit_uw label_addr;
537
538 if (jump->flags & JUMP_LABEL)
539 label_addr = (sljit_uw)(code + jump->u.label->size);
540 else
541 label_addr = jump->u.target - (sljit_uw)executable_offset;
542
543 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
544 if ((sljit_sw)(label_addr - (jump->addr + 1)) > HALFWORD_MAX || (sljit_sw)(label_addr - (jump->addr + 1)) < HALFWORD_MIN)
545 return generate_far_jump_code(jump, code_ptr);
546 #endif
547
548 short_jump = (sljit_sw)(label_addr - (jump->addr + 2)) >= -128 && (sljit_sw)(label_addr - (jump->addr + 2)) <= 127;
549
550 if (type == SLJIT_JUMP) {
551 if (short_jump)
552 *code_ptr++ = JMP_i8;
553 else
554 *code_ptr++ = JMP_i32;
555 jump->addr++;
556 }
557 else if (type >= SLJIT_FAST_CALL) {
558 short_jump = 0;
559 *code_ptr++ = CALL_i32;
560 jump->addr++;
561 }
562 else if (short_jump) {
563 *code_ptr++ = U8(get_jump_code(type) - 0x10);
564 jump->addr++;
565 }
566 else {
567 *code_ptr++ = GROUP_0F;
568 *code_ptr++ = get_jump_code(type);
569 jump->addr += 2;
570 }
571
572 if (short_jump) {
573 jump->flags |= PATCH_MB;
574 code_ptr += sizeof(sljit_s8);
575 } else {
576 jump->flags |= PATCH_MW;
577 code_ptr += sizeof(sljit_s32);
578 }
579
580 return code_ptr;
581 }
582
sljit_generate_code(struct sljit_compiler * compiler)583 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
584 {
585 struct sljit_memory_fragment *buf;
586 sljit_u8 *code;
587 sljit_u8 *code_ptr;
588 sljit_u8 *buf_ptr;
589 sljit_u8 *buf_end;
590 sljit_u8 len;
591 sljit_sw executable_offset;
592 sljit_uw jump_addr;
593
594 struct sljit_label *label;
595 struct sljit_jump *jump;
596 struct sljit_const *const_;
597 struct sljit_put_label *put_label;
598
599 CHECK_ERROR_PTR();
600 CHECK_PTR(check_sljit_generate_code(compiler));
601 reverse_buf(compiler);
602
603 /* Second code generation pass. */
604 code = (sljit_u8*)SLJIT_MALLOC_EXEC(compiler->size, compiler->exec_allocator_data);
605 PTR_FAIL_WITH_EXEC_IF(code);
606 buf = compiler->buf;
607
608 code_ptr = code;
609 label = compiler->labels;
610 jump = compiler->jumps;
611 const_ = compiler->consts;
612 put_label = compiler->put_labels;
613 executable_offset = SLJIT_EXEC_OFFSET(code);
614
615 do {
616 buf_ptr = buf->memory;
617 buf_end = buf_ptr + buf->used_size;
618 do {
619 len = *buf_ptr++;
620 if (len > 0) {
621 /* The code is already generated. */
622 SLJIT_MEMCPY(code_ptr, buf_ptr, len);
623 code_ptr += len;
624 buf_ptr += len;
625 }
626 else {
627 switch (*buf_ptr) {
628 case 0:
629 label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
630 label->size = (sljit_uw)(code_ptr - code);
631 label = label->next;
632 break;
633 case 1:
634 jump->addr = (sljit_uw)code_ptr;
635 if (!(jump->flags & SLJIT_REWRITABLE_JUMP))
636 code_ptr = generate_near_jump_code(jump, code_ptr, code, executable_offset);
637 else {
638 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
639 code_ptr = generate_far_jump_code(jump, code_ptr, executable_offset);
640 #else
641 code_ptr = generate_far_jump_code(jump, code_ptr);
642 #endif
643 }
644 jump = jump->next;
645 break;
646 case 2:
647 const_->addr = ((sljit_uw)code_ptr) - sizeof(sljit_sw);
648 const_ = const_->next;
649 break;
650 default:
651 SLJIT_ASSERT(*buf_ptr == 3);
652 SLJIT_ASSERT(put_label->label);
653 put_label->addr = (sljit_uw)code_ptr;
654 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
655 code_ptr = generate_put_label_code(put_label, code_ptr, (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code, executable_offset) + put_label->label->size);
656 #endif
657 put_label = put_label->next;
658 break;
659 }
660 buf_ptr++;
661 }
662 } while (buf_ptr < buf_end);
663 SLJIT_ASSERT(buf_ptr == buf_end);
664 buf = buf->next;
665 } while (buf);
666
667 SLJIT_ASSERT(!label);
668 SLJIT_ASSERT(!jump);
669 SLJIT_ASSERT(!const_);
670 SLJIT_ASSERT(!put_label);
671 SLJIT_ASSERT(code_ptr <= code + compiler->size);
672
673 jump = compiler->jumps;
674 while (jump) {
675 if (jump->flags & (PATCH_MB | PATCH_MW)) {
676 if (jump->flags & JUMP_LABEL)
677 jump_addr = jump->u.label->addr;
678 else
679 jump_addr = jump->u.target;
680
681 jump_addr -= jump->addr + (sljit_uw)executable_offset;
682
683 if (jump->flags & PATCH_MB) {
684 jump_addr -= sizeof(sljit_s8);
685 SLJIT_ASSERT((sljit_sw)jump_addr >= -128 && (sljit_sw)jump_addr <= 127);
686 *(sljit_u8*)jump->addr = U8(jump_addr);
687 } else {
688 jump_addr -= sizeof(sljit_s32);
689 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
690 sljit_unaligned_store_sw((void*)jump->addr, (sljit_sw)jump_addr);
691 #else
692 SLJIT_ASSERT((sljit_sw)jump_addr >= HALFWORD_MIN && (sljit_sw)jump_addr <= HALFWORD_MAX);
693 sljit_unaligned_store_s32((void*)jump->addr, (sljit_s32)jump_addr);
694 #endif
695 }
696 }
697 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
698 else if (jump->flags & PATCH_MD) {
699 SLJIT_ASSERT(jump->flags & JUMP_LABEL);
700 sljit_unaligned_store_sw((void*)jump->addr, (sljit_sw)jump->u.label->addr);
701 }
702 #endif
703
704 jump = jump->next;
705 }
706
707 put_label = compiler->put_labels;
708 while (put_label) {
709 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
710 sljit_unaligned_store_sw((void*)(put_label->addr - sizeof(sljit_sw)), (sljit_sw)put_label->label->addr);
711 #else
712 if (put_label->flags & PATCH_MD) {
713 SLJIT_ASSERT(put_label->label->addr > HALFWORD_MAX);
714 sljit_unaligned_store_sw((void*)(put_label->addr - sizeof(sljit_sw)), (sljit_sw)put_label->label->addr);
715 }
716 else {
717 SLJIT_ASSERT(put_label->label->addr <= HALFWORD_MAX);
718 sljit_unaligned_store_s32((void*)(put_label->addr - sizeof(sljit_s32)), (sljit_s32)put_label->label->addr);
719 }
720 #endif
721
722 put_label = put_label->next;
723 }
724
725 compiler->error = SLJIT_ERR_COMPILED;
726 compiler->executable_offset = executable_offset;
727 compiler->executable_size = (sljit_uw)(code_ptr - code);
728
729 code = (sljit_u8*)SLJIT_ADD_EXEC_OFFSET(code, executable_offset);
730
731 SLJIT_UPDATE_WX_FLAGS(code, (sljit_u8*)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset), 1);
732 return (void*)code;
733 }
734
sljit_has_cpu_feature(sljit_s32 feature_type)735 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
736 {
737 switch (feature_type) {
738 case SLJIT_HAS_FPU:
739 #ifdef SLJIT_IS_FPU_AVAILABLE
740 return SLJIT_IS_FPU_AVAILABLE;
741 #elif (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
742 if (cpu_feature_list == 0)
743 get_cpu_features();
744 return (cpu_feature_list & CPU_FEATURE_SSE2) != 0;
745 #else /* SLJIT_DETECT_SSE2 */
746 return 1;
747 #endif /* SLJIT_DETECT_SSE2 */
748
749 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
750 case SLJIT_HAS_VIRTUAL_REGISTERS:
751 return 1;
752 #endif /* SLJIT_CONFIG_X86_32 */
753
754 case SLJIT_HAS_CLZ:
755 if (cpu_feature_list == 0)
756 get_cpu_features();
757
758 return (cpu_feature_list & CPU_FEATURE_LZCNT) ? 1 : 2;
759
760 case SLJIT_HAS_CTZ:
761 if (cpu_feature_list == 0)
762 get_cpu_features();
763
764 return (cpu_feature_list & CPU_FEATURE_TZCNT) ? 1 : 2;
765
766 case SLJIT_HAS_CMOV:
767 if (cpu_feature_list == 0)
768 get_cpu_features();
769 return (cpu_feature_list & CPU_FEATURE_CMOV) != 0;
770
771 case SLJIT_HAS_ROT:
772 case SLJIT_HAS_PREFETCH:
773 return 1;
774
775 case SLJIT_HAS_SSE2:
776 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
777 if (cpu_feature_list == 0)
778 get_cpu_features();
779 return (cpu_feature_list & CPU_FEATURE_SSE2) != 0;
780 #else /* !SLJIT_DETECT_SSE2 */
781 return 1;
782 #endif /* SLJIT_DETECT_SSE2 */
783
784 default:
785 return 0;
786 }
787 }
788
sljit_cmp_info(sljit_s32 type)789 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type)
790 {
791 if (type < SLJIT_UNORDERED || type > SLJIT_ORDERED_LESS_EQUAL)
792 return 0;
793
794 switch (type) {
795 case SLJIT_ORDERED_EQUAL:
796 case SLJIT_UNORDERED_OR_NOT_EQUAL:
797 return 0;
798 }
799
800 return 1;
801 }
802
803 /* --------------------------------------------------------------------- */
804 /* Operators */
805 /* --------------------------------------------------------------------- */
806
807 #define BINARY_OPCODE(opcode) (((opcode ## _EAX_i32) << 24) | ((opcode ## _r_rm) << 16) | ((opcode ## _rm_r) << 8) | (opcode))
808
809 #define BINARY_IMM32(op_imm, immw, arg, argw) \
810 do { \
811 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
812 FAIL_IF(!inst); \
813 *(inst + 1) |= (op_imm); \
814 } while (0)
815
816 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
817
818 #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \
819 do { \
820 if (IS_HALFWORD(immw) || compiler->mode32) { \
821 BINARY_IMM32(op_imm, immw, arg, argw); \
822 } \
823 else { \
824 FAIL_IF(emit_load_imm64(compiler, (arg == TMP_REG1) ? TMP_REG2 : TMP_REG1, immw)); \
825 inst = emit_x86_instruction(compiler, 1, (arg == TMP_REG1) ? TMP_REG2 : TMP_REG1, 0, arg, argw); \
826 FAIL_IF(!inst); \
827 *inst = (op_mr); \
828 } \
829 } while (0)
830
831 #define BINARY_EAX_IMM(op_eax_imm, immw) \
832 FAIL_IF(emit_do_imm32(compiler, (!compiler->mode32) ? REX_W : 0, (op_eax_imm), immw))
833
834 #else /* !SLJIT_CONFIG_X86_64 */
835
836 #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \
837 BINARY_IMM32(op_imm, immw, arg, argw)
838
839 #define BINARY_EAX_IMM(op_eax_imm, immw) \
840 FAIL_IF(emit_do_imm(compiler, (op_eax_imm), immw))
841
842 #endif /* SLJIT_CONFIG_X86_64 */
843
844 static sljit_s32 emit_mov(struct sljit_compiler *compiler,
845 sljit_s32 dst, sljit_sw dstw,
846 sljit_s32 src, sljit_sw srcw);
847
848 #define EMIT_MOV(compiler, dst, dstw, src, srcw) \
849 FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
850
851 static SLJIT_INLINE sljit_s32 emit_sse2_store(struct sljit_compiler *compiler,
852 sljit_s32 single, sljit_s32 dst, sljit_sw dstw, sljit_s32 src);
853
854 static SLJIT_INLINE sljit_s32 emit_sse2_load(struct sljit_compiler *compiler,
855 sljit_s32 single, sljit_s32 dst, sljit_s32 src, sljit_sw srcw);
856
857 static sljit_s32 emit_cmp_binary(struct sljit_compiler *compiler,
858 sljit_s32 src1, sljit_sw src1w,
859 sljit_s32 src2, sljit_sw src2w);
860
emit_endbranch(struct sljit_compiler * compiler)861 static SLJIT_INLINE sljit_s32 emit_endbranch(struct sljit_compiler *compiler)
862 {
863 #if (defined SLJIT_CONFIG_X86_CET && SLJIT_CONFIG_X86_CET)
864 /* Emit endbr32/endbr64 when CET is enabled. */
865 sljit_u8 *inst;
866 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
867 FAIL_IF(!inst);
868 INC_SIZE(4);
869 *inst++ = 0xf3;
870 *inst++ = 0x0f;
871 *inst++ = 0x1e;
872 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
873 *inst = 0xfb;
874 #else
875 *inst = 0xfa;
876 #endif
877 #else /* !SLJIT_CONFIG_X86_CET */
878 SLJIT_UNUSED_ARG(compiler);
879 #endif /* SLJIT_CONFIG_X86_CET */
880 return SLJIT_SUCCESS;
881 }
882
883 #if (defined SLJIT_CONFIG_X86_CET && SLJIT_CONFIG_X86_CET) && defined (__SHSTK__)
884
emit_rdssp(struct sljit_compiler * compiler,sljit_s32 reg)885 static SLJIT_INLINE sljit_s32 emit_rdssp(struct sljit_compiler *compiler, sljit_s32 reg)
886 {
887 sljit_u8 *inst;
888 sljit_s32 size;
889
890 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
891 size = 5;
892 #else
893 size = 4;
894 #endif
895
896 inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
897 FAIL_IF(!inst);
898 INC_SIZE(size);
899 *inst++ = 0xf3;
900 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
901 *inst++ = REX_W | (reg_map[reg] <= 7 ? 0 : REX_B);
902 #endif
903 *inst++ = 0x0f;
904 *inst++ = 0x1e;
905 *inst = (0x3 << 6) | (0x1 << 3) | (reg_map[reg] & 0x7);
906 return SLJIT_SUCCESS;
907 }
908
emit_incssp(struct sljit_compiler * compiler,sljit_s32 reg)909 static SLJIT_INLINE sljit_s32 emit_incssp(struct sljit_compiler *compiler, sljit_s32 reg)
910 {
911 sljit_u8 *inst;
912 sljit_s32 size;
913
914 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
915 size = 5;
916 #else
917 size = 4;
918 #endif
919
920 inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
921 FAIL_IF(!inst);
922 INC_SIZE(size);
923 *inst++ = 0xf3;
924 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
925 *inst++ = REX_W | (reg_map[reg] <= 7 ? 0 : REX_B);
926 #endif
927 *inst++ = 0x0f;
928 *inst++ = 0xae;
929 *inst = (0x3 << 6) | (0x5 << 3) | (reg_map[reg] & 0x7);
930 return SLJIT_SUCCESS;
931 }
932
933 #endif /* SLJIT_CONFIG_X86_CET && __SHSTK__ */
934
cpu_has_shadow_stack(void)935 static SLJIT_INLINE sljit_s32 cpu_has_shadow_stack(void)
936 {
937 #if (defined SLJIT_CONFIG_X86_CET && SLJIT_CONFIG_X86_CET) && defined (__SHSTK__)
938 return _get_ssp() != 0;
939 #else /* !SLJIT_CONFIG_X86_CET || !__SHSTK__ */
940 return 0;
941 #endif /* SLJIT_CONFIG_X86_CET && __SHSTK__ */
942 }
943
adjust_shadow_stack(struct sljit_compiler * compiler,sljit_s32 src,sljit_sw srcw)944 static SLJIT_INLINE sljit_s32 adjust_shadow_stack(struct sljit_compiler *compiler,
945 sljit_s32 src, sljit_sw srcw)
946 {
947 #if (defined SLJIT_CONFIG_X86_CET && SLJIT_CONFIG_X86_CET) && defined (__SHSTK__)
948 sljit_u8 *inst, *jz_after_cmp_inst;
949 sljit_uw size_jz_after_cmp_inst;
950
951 sljit_uw size_before_rdssp_inst = compiler->size;
952
953 /* Generate "RDSSP TMP_REG1". */
954 FAIL_IF(emit_rdssp(compiler, TMP_REG1));
955
956 /* Load return address on shadow stack into TMP_REG1. */
957 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
958 SLJIT_ASSERT(reg_map[TMP_REG1] == 5);
959
960 /* Hand code unsupported "mov 0x0(%ebp),%ebp". */
961 inst = (sljit_u8*)ensure_buf(compiler, 1 + 3);
962 FAIL_IF(!inst);
963 INC_SIZE(3);
964 *inst++ = 0x8b;
965 *inst++ = 0x6d;
966 *inst = 0;
967 #else /* !SLJIT_CONFIG_X86_32 */
968 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(TMP_REG1), 0);
969 #endif /* SLJIT_CONFIG_X86_32 */
970
971 /* Compare return address against TMP_REG1. */
972 FAIL_IF(emit_cmp_binary (compiler, TMP_REG1, 0, src, srcw));
973
974 /* Generate JZ to skip shadow stack ajdustment when shadow
975 stack matches normal stack. */
976 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
977 FAIL_IF(!inst);
978 INC_SIZE(2);
979 *inst++ = get_jump_code(SLJIT_EQUAL) - 0x10;
980 size_jz_after_cmp_inst = compiler->size;
981 jz_after_cmp_inst = inst;
982
983 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
984 /* REX_W is not necessary. */
985 compiler->mode32 = 1;
986 #endif
987 /* Load 1 into TMP_REG1. */
988 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, 1);
989
990 /* Generate "INCSSP TMP_REG1". */
991 FAIL_IF(emit_incssp(compiler, TMP_REG1));
992
993 /* Jump back to "RDSSP TMP_REG1" to check shadow stack again. */
994 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
995 FAIL_IF(!inst);
996 INC_SIZE(2);
997 *inst++ = JMP_i8;
998 *inst = size_before_rdssp_inst - compiler->size;
999
1000 *jz_after_cmp_inst = compiler->size - size_jz_after_cmp_inst;
1001 #else /* !SLJIT_CONFIG_X86_CET || !__SHSTK__ */
1002 SLJIT_UNUSED_ARG(compiler);
1003 SLJIT_UNUSED_ARG(src);
1004 SLJIT_UNUSED_ARG(srcw);
1005 #endif /* SLJIT_CONFIG_X86_CET && __SHSTK__ */
1006 return SLJIT_SUCCESS;
1007 }
1008
1009 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1010 #include "sljitNativeX86_32.c"
1011 #else
1012 #include "sljitNativeX86_64.c"
1013 #endif
1014
emit_mov(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1015 static sljit_s32 emit_mov(struct sljit_compiler *compiler,
1016 sljit_s32 dst, sljit_sw dstw,
1017 sljit_s32 src, sljit_sw srcw)
1018 {
1019 sljit_u8* inst;
1020
1021 if (FAST_IS_REG(src)) {
1022 inst = emit_x86_instruction(compiler, 1, src, 0, dst, dstw);
1023 FAIL_IF(!inst);
1024 *inst = MOV_rm_r;
1025 return SLJIT_SUCCESS;
1026 }
1027 if (src & SLJIT_IMM) {
1028 if (FAST_IS_REG(dst)) {
1029 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1030 return emit_do_imm(compiler, MOV_r_i32 | reg_map[dst], srcw);
1031 #else
1032 if (!compiler->mode32) {
1033 if (NOT_HALFWORD(srcw))
1034 return emit_load_imm64(compiler, dst, srcw);
1035 }
1036 else
1037 return emit_do_imm32(compiler, (reg_map[dst] >= 8) ? REX_B : 0, U8(MOV_r_i32 | reg_lmap[dst]), srcw);
1038 #endif
1039 }
1040 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1041 if (!compiler->mode32 && NOT_HALFWORD(srcw)) {
1042 /* Immediate to memory move. Only SLJIT_MOV operation copies
1043 an immediate directly into memory so TMP_REG1 can be used. */
1044 FAIL_IF(emit_load_imm64(compiler, TMP_REG1, srcw));
1045 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
1046 FAIL_IF(!inst);
1047 *inst = MOV_rm_r;
1048 return SLJIT_SUCCESS;
1049 }
1050 #endif
1051 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, dstw);
1052 FAIL_IF(!inst);
1053 *inst = MOV_rm_i32;
1054 return SLJIT_SUCCESS;
1055 }
1056 if (FAST_IS_REG(dst)) {
1057 inst = emit_x86_instruction(compiler, 1, dst, 0, src, srcw);
1058 FAIL_IF(!inst);
1059 *inst = MOV_r_rm;
1060 return SLJIT_SUCCESS;
1061 }
1062
1063 /* Memory to memory move. Only SLJIT_MOV operation copies
1064 data from memory to memory so TMP_REG1 can be used. */
1065 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src, srcw);
1066 FAIL_IF(!inst);
1067 *inst = MOV_r_rm;
1068 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
1069 FAIL_IF(!inst);
1070 *inst = MOV_rm_r;
1071 return SLJIT_SUCCESS;
1072 }
1073
sljit_emit_op0(struct sljit_compiler * compiler,sljit_s32 op)1074 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
1075 {
1076 sljit_u8 *inst;
1077 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1078 sljit_uw size;
1079 #endif
1080
1081 CHECK_ERROR();
1082 CHECK(check_sljit_emit_op0(compiler, op));
1083
1084 switch (GET_OPCODE(op)) {
1085 case SLJIT_BREAKPOINT:
1086 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
1087 FAIL_IF(!inst);
1088 INC_SIZE(1);
1089 *inst = INT3;
1090 break;
1091 case SLJIT_NOP:
1092 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
1093 FAIL_IF(!inst);
1094 INC_SIZE(1);
1095 *inst = NOP;
1096 break;
1097 case SLJIT_LMUL_UW:
1098 case SLJIT_LMUL_SW:
1099 case SLJIT_DIVMOD_UW:
1100 case SLJIT_DIVMOD_SW:
1101 case SLJIT_DIV_UW:
1102 case SLJIT_DIV_SW:
1103 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1104 #ifdef _WIN64
1105 SLJIT_ASSERT(
1106 reg_map[SLJIT_R0] == 0
1107 && reg_map[SLJIT_R1] == 2
1108 && reg_map[TMP_REG1] > 7);
1109 #else
1110 SLJIT_ASSERT(
1111 reg_map[SLJIT_R0] == 0
1112 && reg_map[SLJIT_R1] < 7
1113 && reg_map[TMP_REG1] == 2);
1114 #endif
1115 compiler->mode32 = op & SLJIT_32;
1116 #endif
1117 SLJIT_COMPILE_ASSERT((SLJIT_DIVMOD_UW & 0x2) == 0 && SLJIT_DIV_UW - 0x2 == SLJIT_DIVMOD_UW, bad_div_opcode_assignments);
1118
1119 op = GET_OPCODE(op);
1120 if ((op | 0x2) == SLJIT_DIV_UW) {
1121 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
1122 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0);
1123 inst = emit_x86_instruction(compiler, 1, SLJIT_R1, 0, SLJIT_R1, 0);
1124 #else
1125 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
1126 #endif
1127 FAIL_IF(!inst);
1128 *inst = XOR_r_rm;
1129 }
1130
1131 if ((op | 0x2) == SLJIT_DIV_SW) {
1132 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
1133 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0);
1134 #endif
1135
1136 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1137 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
1138 FAIL_IF(!inst);
1139 INC_SIZE(1);
1140 *inst = CDQ;
1141 #else
1142 if (compiler->mode32) {
1143 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
1144 FAIL_IF(!inst);
1145 INC_SIZE(1);
1146 *inst = CDQ;
1147 } else {
1148 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
1149 FAIL_IF(!inst);
1150 INC_SIZE(2);
1151 *inst++ = REX_W;
1152 *inst = CDQ;
1153 }
1154 #endif
1155 }
1156
1157 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1158 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
1159 FAIL_IF(!inst);
1160 INC_SIZE(2);
1161 *inst++ = GROUP_F7;
1162 *inst = MOD_REG | ((op >= SLJIT_DIVMOD_UW) ? reg_map[TMP_REG1] : reg_map[SLJIT_R1]);
1163 #else
1164 #ifdef _WIN64
1165 size = (!compiler->mode32 || op >= SLJIT_DIVMOD_UW) ? 3 : 2;
1166 #else
1167 size = (!compiler->mode32) ? 3 : 2;
1168 #endif
1169 inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
1170 FAIL_IF(!inst);
1171 INC_SIZE(size);
1172 #ifdef _WIN64
1173 if (!compiler->mode32)
1174 *inst++ = REX_W | ((op >= SLJIT_DIVMOD_UW) ? REX_B : 0);
1175 else if (op >= SLJIT_DIVMOD_UW)
1176 *inst++ = REX_B;
1177 *inst++ = GROUP_F7;
1178 *inst = MOD_REG | ((op >= SLJIT_DIVMOD_UW) ? reg_lmap[TMP_REG1] : reg_lmap[SLJIT_R1]);
1179 #else
1180 if (!compiler->mode32)
1181 *inst++ = REX_W;
1182 *inst++ = GROUP_F7;
1183 *inst = MOD_REG | reg_map[SLJIT_R1];
1184 #endif
1185 #endif
1186 switch (op) {
1187 case SLJIT_LMUL_UW:
1188 *inst |= MUL;
1189 break;
1190 case SLJIT_LMUL_SW:
1191 *inst |= IMUL;
1192 break;
1193 case SLJIT_DIVMOD_UW:
1194 case SLJIT_DIV_UW:
1195 *inst |= DIV;
1196 break;
1197 case SLJIT_DIVMOD_SW:
1198 case SLJIT_DIV_SW:
1199 *inst |= IDIV;
1200 break;
1201 }
1202 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && !defined(_WIN64)
1203 if (op <= SLJIT_DIVMOD_SW)
1204 EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0);
1205 #else
1206 if (op >= SLJIT_DIV_UW)
1207 EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0);
1208 #endif
1209 break;
1210 case SLJIT_ENDBR:
1211 return emit_endbranch(compiler);
1212 case SLJIT_SKIP_FRAMES_BEFORE_RETURN:
1213 return skip_frames_before_return(compiler);
1214 }
1215
1216 return SLJIT_SUCCESS;
1217 }
1218
1219 #define ENCODE_PREFIX(prefix) \
1220 do { \
1221 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); \
1222 FAIL_IF(!inst); \
1223 INC_SIZE(1); \
1224 *inst = U8(prefix); \
1225 } while (0)
1226
emit_mov_byte(struct sljit_compiler * compiler,sljit_s32 sign,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1227 static sljit_s32 emit_mov_byte(struct sljit_compiler *compiler, sljit_s32 sign,
1228 sljit_s32 dst, sljit_sw dstw,
1229 sljit_s32 src, sljit_sw srcw)
1230 {
1231 sljit_u8* inst;
1232 sljit_s32 dst_r;
1233 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1234 sljit_s32 work_r;
1235 #endif
1236
1237 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1238 compiler->mode32 = 0;
1239 #endif
1240
1241 if (src & SLJIT_IMM) {
1242 if (FAST_IS_REG(dst)) {
1243 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1244 return emit_do_imm(compiler, MOV_r_i32 | reg_map[dst], srcw);
1245 #else
1246 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0);
1247 FAIL_IF(!inst);
1248 *inst = MOV_rm_i32;
1249 return SLJIT_SUCCESS;
1250 #endif
1251 }
1252 inst = emit_x86_instruction(compiler, 1 | EX86_BYTE_ARG | EX86_NO_REXW, SLJIT_IMM, srcw, dst, dstw);
1253 FAIL_IF(!inst);
1254 *inst = MOV_rm8_i8;
1255 return SLJIT_SUCCESS;
1256 }
1257
1258 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1259
1260 if ((dst & SLJIT_MEM) && FAST_IS_REG(src)) {
1261 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1262 if (reg_map[src] >= 4) {
1263 SLJIT_ASSERT(dst_r == TMP_REG1);
1264 EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
1265 } else
1266 dst_r = src;
1267 #else
1268 dst_r = src;
1269 #endif
1270 }
1271 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1272 else if (FAST_IS_REG(src) && reg_map[src] >= 4) {
1273 /* src, dst are registers. */
1274 SLJIT_ASSERT(FAST_IS_REG(dst));
1275 if (reg_map[dst] < 4) {
1276 if (dst != src)
1277 EMIT_MOV(compiler, dst, 0, src, 0);
1278 inst = emit_x86_instruction(compiler, 2, dst, 0, dst, 0);
1279 FAIL_IF(!inst);
1280 *inst++ = GROUP_0F;
1281 *inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8;
1282 }
1283 else {
1284 if (dst != src)
1285 EMIT_MOV(compiler, dst, 0, src, 0);
1286 if (sign) {
1287 /* shl reg, 24 */
1288 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
1289 FAIL_IF(!inst);
1290 *inst |= SHL;
1291 /* sar reg, 24 */
1292 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
1293 FAIL_IF(!inst);
1294 *inst |= SAR;
1295 }
1296 else {
1297 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 0xff, dst, 0);
1298 FAIL_IF(!inst);
1299 *(inst + 1) |= AND;
1300 }
1301 }
1302 return SLJIT_SUCCESS;
1303 }
1304 #endif
1305 else {
1306 /* src can be memory addr or reg_map[src] < 4 on x86_32 architectures. */
1307 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
1308 FAIL_IF(!inst);
1309 *inst++ = GROUP_0F;
1310 *inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8;
1311 }
1312
1313 if (dst & SLJIT_MEM) {
1314 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1315 if (dst_r == TMP_REG1) {
1316 /* Find a non-used register, whose reg_map[src] < 4. */
1317 if ((dst & REG_MASK) == SLJIT_R0) {
1318 if ((dst & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_R1))
1319 work_r = SLJIT_R2;
1320 else
1321 work_r = SLJIT_R1;
1322 }
1323 else {
1324 if ((dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R0))
1325 work_r = SLJIT_R0;
1326 else if ((dst & REG_MASK) == SLJIT_R1)
1327 work_r = SLJIT_R2;
1328 else
1329 work_r = SLJIT_R1;
1330 }
1331
1332 if (work_r == SLJIT_R0) {
1333 ENCODE_PREFIX(XCHG_EAX_r | reg_map[TMP_REG1]);
1334 }
1335 else {
1336 inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
1337 FAIL_IF(!inst);
1338 *inst = XCHG_r_rm;
1339 }
1340
1341 inst = emit_x86_instruction(compiler, 1, work_r, 0, dst, dstw);
1342 FAIL_IF(!inst);
1343 *inst = MOV_rm8_r8;
1344
1345 if (work_r == SLJIT_R0) {
1346 ENCODE_PREFIX(XCHG_EAX_r | reg_map[TMP_REG1]);
1347 }
1348 else {
1349 inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
1350 FAIL_IF(!inst);
1351 *inst = XCHG_r_rm;
1352 }
1353 }
1354 else {
1355 inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
1356 FAIL_IF(!inst);
1357 *inst = MOV_rm8_r8;
1358 }
1359 #else
1360 inst = emit_x86_instruction(compiler, 1 | EX86_REX | EX86_NO_REXW, dst_r, 0, dst, dstw);
1361 FAIL_IF(!inst);
1362 *inst = MOV_rm8_r8;
1363 #endif
1364 }
1365
1366 return SLJIT_SUCCESS;
1367 }
1368
emit_prefetch(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src,sljit_sw srcw)1369 static sljit_s32 emit_prefetch(struct sljit_compiler *compiler, sljit_s32 op,
1370 sljit_s32 src, sljit_sw srcw)
1371 {
1372 sljit_u8* inst;
1373
1374 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1375 compiler->mode32 = 1;
1376 #endif
1377
1378 inst = emit_x86_instruction(compiler, 2, 0, 0, src, srcw);
1379 FAIL_IF(!inst);
1380 *inst++ = GROUP_0F;
1381 *inst++ = PREFETCH;
1382
1383 if (op == SLJIT_PREFETCH_L1)
1384 *inst |= (1 << 3);
1385 else if (op == SLJIT_PREFETCH_L2)
1386 *inst |= (2 << 3);
1387 else if (op == SLJIT_PREFETCH_L3)
1388 *inst |= (3 << 3);
1389
1390 return SLJIT_SUCCESS;
1391 }
1392
emit_mov_half(struct sljit_compiler * compiler,sljit_s32 sign,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1393 static sljit_s32 emit_mov_half(struct sljit_compiler *compiler, sljit_s32 sign,
1394 sljit_s32 dst, sljit_sw dstw,
1395 sljit_s32 src, sljit_sw srcw)
1396 {
1397 sljit_u8* inst;
1398 sljit_s32 dst_r;
1399
1400 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1401 compiler->mode32 = 0;
1402 #endif
1403
1404 if (src & SLJIT_IMM) {
1405 if (FAST_IS_REG(dst)) {
1406 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1407 return emit_do_imm(compiler, MOV_r_i32 | reg_map[dst], srcw);
1408 #else
1409 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0);
1410 FAIL_IF(!inst);
1411 *inst = MOV_rm_i32;
1412 return SLJIT_SUCCESS;
1413 #endif
1414 }
1415 inst = emit_x86_instruction(compiler, 1 | EX86_HALF_ARG | EX86_NO_REXW | EX86_PREF_66, SLJIT_IMM, srcw, dst, dstw);
1416 FAIL_IF(!inst);
1417 *inst = MOV_rm_i32;
1418 return SLJIT_SUCCESS;
1419 }
1420
1421 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1422
1423 if ((dst & SLJIT_MEM) && FAST_IS_REG(src))
1424 dst_r = src;
1425 else {
1426 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
1427 FAIL_IF(!inst);
1428 *inst++ = GROUP_0F;
1429 *inst = sign ? MOVSX_r_rm16 : MOVZX_r_rm16;
1430 }
1431
1432 if (dst & SLJIT_MEM) {
1433 inst = emit_x86_instruction(compiler, 1 | EX86_NO_REXW | EX86_PREF_66, dst_r, 0, dst, dstw);
1434 FAIL_IF(!inst);
1435 *inst = MOV_rm_r;
1436 }
1437
1438 return SLJIT_SUCCESS;
1439 }
1440
emit_unary(struct sljit_compiler * compiler,sljit_u8 opcode,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1441 static sljit_s32 emit_unary(struct sljit_compiler *compiler, sljit_u8 opcode,
1442 sljit_s32 dst, sljit_sw dstw,
1443 sljit_s32 src, sljit_sw srcw)
1444 {
1445 sljit_u8* inst;
1446
1447 if (dst == src && dstw == srcw) {
1448 /* Same input and output */
1449 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
1450 FAIL_IF(!inst);
1451 *inst++ = GROUP_F7;
1452 *inst |= opcode;
1453 return SLJIT_SUCCESS;
1454 }
1455
1456 if (FAST_IS_REG(dst)) {
1457 EMIT_MOV(compiler, dst, 0, src, srcw);
1458 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, 0);
1459 FAIL_IF(!inst);
1460 *inst++ = GROUP_F7;
1461 *inst |= opcode;
1462 return SLJIT_SUCCESS;
1463 }
1464
1465 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1466 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1467 FAIL_IF(!inst);
1468 *inst++ = GROUP_F7;
1469 *inst |= opcode;
1470 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1471 return SLJIT_SUCCESS;
1472 }
1473
emit_not_with_flags(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1474 static sljit_s32 emit_not_with_flags(struct sljit_compiler *compiler,
1475 sljit_s32 dst, sljit_sw dstw,
1476 sljit_s32 src, sljit_sw srcw)
1477 {
1478 sljit_u8* inst;
1479
1480 if (FAST_IS_REG(dst)) {
1481 EMIT_MOV(compiler, dst, 0, src, srcw);
1482 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, 0);
1483 FAIL_IF(!inst);
1484 *inst++ = GROUP_F7;
1485 *inst |= NOT_rm;
1486 inst = emit_x86_instruction(compiler, 1, dst, 0, dst, 0);
1487 FAIL_IF(!inst);
1488 *inst = OR_r_rm;
1489 return SLJIT_SUCCESS;
1490 }
1491
1492 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1493 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1494 FAIL_IF(!inst);
1495 *inst++ = GROUP_F7;
1496 *inst |= NOT_rm;
1497 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
1498 FAIL_IF(!inst);
1499 *inst = OR_r_rm;
1500 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1501 return SLJIT_SUCCESS;
1502 }
1503
1504 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1505 static const sljit_sw emit_clz_arg = 32 + 31;
1506 static const sljit_sw emit_ctz_arg = 32;
1507 #endif
1508
emit_clz_ctz(struct sljit_compiler * compiler,sljit_s32 is_clz,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1509 static sljit_s32 emit_clz_ctz(struct sljit_compiler *compiler, sljit_s32 is_clz,
1510 sljit_s32 dst, sljit_sw dstw,
1511 sljit_s32 src, sljit_sw srcw)
1512 {
1513 sljit_u8* inst;
1514 sljit_s32 dst_r;
1515 sljit_sw max;
1516
1517 if (cpu_feature_list == 0)
1518 get_cpu_features();
1519
1520 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1521
1522 if (is_clz ? (cpu_feature_list & CPU_FEATURE_LZCNT) : (cpu_feature_list & CPU_FEATURE_TZCNT)) {
1523 /* Group prefix added separately. */
1524 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
1525 FAIL_IF(!inst);
1526 INC_SIZE(1);
1527 *inst++ = GROUP_F3;
1528
1529 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
1530 FAIL_IF(!inst);
1531 *inst++ = GROUP_0F;
1532 *inst = is_clz ? LZCNT_r_rm : TZCNT_r_rm;
1533
1534 if (dst & SLJIT_MEM)
1535 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1536 return SLJIT_SUCCESS;
1537 }
1538
1539 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
1540 FAIL_IF(!inst);
1541 *inst++ = GROUP_0F;
1542 *inst = is_clz ? BSR_r_rm : BSF_r_rm;
1543
1544 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1545 max = is_clz ? (32 + 31) : 32;
1546
1547 if (cpu_feature_list & CPU_FEATURE_CMOV) {
1548 if (dst_r != TMP_REG1) {
1549 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, max);
1550 inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG1, 0);
1551 }
1552 else
1553 inst = emit_x86_instruction(compiler, 2, dst_r, 0, SLJIT_MEM0(), is_clz ? (sljit_sw)&emit_clz_arg : (sljit_sw)&emit_ctz_arg);
1554
1555 FAIL_IF(!inst);
1556 *inst++ = GROUP_0F;
1557 *inst = CMOVE_r_rm;
1558 }
1559 else
1560 FAIL_IF(sljit_emit_cmov_generic(compiler, SLJIT_EQUAL, dst_r, SLJIT_IMM, max));
1561
1562 if (is_clz) {
1563 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 31, dst_r, 0);
1564 FAIL_IF(!inst);
1565 *(inst + 1) |= XOR;
1566 }
1567 #else
1568 if (is_clz)
1569 max = compiler->mode32 ? (32 + 31) : (64 + 63);
1570 else
1571 max = compiler->mode32 ? 32 : 64;
1572
1573 if (cpu_feature_list & CPU_FEATURE_CMOV) {
1574 EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, max);
1575
1576 inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
1577 FAIL_IF(!inst);
1578 *inst++ = GROUP_0F;
1579 *inst = CMOVE_r_rm;
1580 }
1581 else
1582 FAIL_IF(sljit_emit_cmov_generic(compiler, SLJIT_EQUAL, dst_r, SLJIT_IMM, max));
1583
1584 if (is_clz) {
1585 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, max >> 1, dst_r, 0);
1586 FAIL_IF(!inst);
1587 *(inst + 1) |= XOR;
1588 }
1589 #endif
1590
1591 if (dst & SLJIT_MEM)
1592 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1593 return SLJIT_SUCCESS;
1594 }
1595
sljit_emit_op1(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1596 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
1597 sljit_s32 dst, sljit_sw dstw,
1598 sljit_s32 src, sljit_sw srcw)
1599 {
1600 sljit_s32 op_flags = GET_ALL_FLAGS(op);
1601 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1602 sljit_s32 dst_is_ereg = 0;
1603 #endif
1604
1605 CHECK_ERROR();
1606 CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
1607 ADJUST_LOCAL_OFFSET(dst, dstw);
1608 ADJUST_LOCAL_OFFSET(src, srcw);
1609
1610 CHECK_EXTRA_REGS(dst, dstw, dst_is_ereg = 1);
1611 CHECK_EXTRA_REGS(src, srcw, (void)0);
1612 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1613 compiler->mode32 = op_flags & SLJIT_32;
1614 #endif
1615
1616 op = GET_OPCODE(op);
1617
1618 if (op >= SLJIT_MOV && op <= SLJIT_MOV_P) {
1619 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1620 compiler->mode32 = 0;
1621 #endif
1622
1623 if (FAST_IS_REG(src) && src == dst) {
1624 if (!TYPE_CAST_NEEDED(op))
1625 return SLJIT_SUCCESS;
1626 }
1627
1628 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1629 if (op_flags & SLJIT_32) {
1630 if (src & SLJIT_MEM) {
1631 if (op == SLJIT_MOV_S32)
1632 op = SLJIT_MOV_U32;
1633 }
1634 else if (src & SLJIT_IMM) {
1635 if (op == SLJIT_MOV_U32)
1636 op = SLJIT_MOV_S32;
1637 }
1638 }
1639 #endif
1640
1641 if (src & SLJIT_IMM) {
1642 switch (op) {
1643 case SLJIT_MOV_U8:
1644 srcw = (sljit_u8)srcw;
1645 break;
1646 case SLJIT_MOV_S8:
1647 srcw = (sljit_s8)srcw;
1648 break;
1649 case SLJIT_MOV_U16:
1650 srcw = (sljit_u16)srcw;
1651 break;
1652 case SLJIT_MOV_S16:
1653 srcw = (sljit_s16)srcw;
1654 break;
1655 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1656 case SLJIT_MOV_U32:
1657 srcw = (sljit_u32)srcw;
1658 break;
1659 case SLJIT_MOV_S32:
1660 srcw = (sljit_s32)srcw;
1661 break;
1662 #endif
1663 }
1664 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1665 if (SLJIT_UNLIKELY(dst_is_ereg))
1666 return emit_mov(compiler, dst, dstw, src, srcw);
1667 #endif
1668 }
1669
1670 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1671 if (SLJIT_UNLIKELY(dst_is_ereg) && (!(op == SLJIT_MOV || op == SLJIT_MOV_U32 || op == SLJIT_MOV_S32 || op == SLJIT_MOV_P) || (src & SLJIT_MEM))) {
1672 SLJIT_ASSERT(dst == SLJIT_MEM1(SLJIT_SP));
1673 dst = TMP_REG1;
1674 }
1675 #endif
1676
1677 switch (op) {
1678 case SLJIT_MOV:
1679 case SLJIT_MOV_P:
1680 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1681 case SLJIT_MOV_U32:
1682 case SLJIT_MOV_S32:
1683 case SLJIT_MOV32:
1684 #endif
1685 EMIT_MOV(compiler, dst, dstw, src, srcw);
1686 break;
1687 case SLJIT_MOV_U8:
1688 FAIL_IF(emit_mov_byte(compiler, 0, dst, dstw, src, srcw));
1689 break;
1690 case SLJIT_MOV_S8:
1691 FAIL_IF(emit_mov_byte(compiler, 1, dst, dstw, src, srcw));
1692 break;
1693 case SLJIT_MOV_U16:
1694 FAIL_IF(emit_mov_half(compiler, 0, dst, dstw, src, srcw));
1695 break;
1696 case SLJIT_MOV_S16:
1697 FAIL_IF(emit_mov_half(compiler, 1, dst, dstw, src, srcw));
1698 break;
1699 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1700 case SLJIT_MOV_U32:
1701 FAIL_IF(emit_mov_int(compiler, 0, dst, dstw, src, srcw));
1702 break;
1703 case SLJIT_MOV_S32:
1704 FAIL_IF(emit_mov_int(compiler, 1, dst, dstw, src, srcw));
1705 break;
1706 case SLJIT_MOV32:
1707 compiler->mode32 = 1;
1708 EMIT_MOV(compiler, dst, dstw, src, srcw);
1709 compiler->mode32 = 0;
1710 break;
1711 #endif
1712 }
1713
1714 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1715 if (SLJIT_UNLIKELY(dst_is_ereg) && dst == TMP_REG1)
1716 return emit_mov(compiler, SLJIT_MEM1(SLJIT_SP), dstw, TMP_REG1, 0);
1717 #endif
1718 return SLJIT_SUCCESS;
1719 }
1720
1721 switch (op) {
1722 case SLJIT_NOT:
1723 if (SLJIT_UNLIKELY(op_flags & SLJIT_SET_Z))
1724 return emit_not_with_flags(compiler, dst, dstw, src, srcw);
1725 return emit_unary(compiler, NOT_rm, dst, dstw, src, srcw);
1726
1727 case SLJIT_CLZ:
1728 case SLJIT_CTZ:
1729 return emit_clz_ctz(compiler, (op == SLJIT_CLZ), dst, dstw, src, srcw);
1730 }
1731
1732 return SLJIT_SUCCESS;
1733 }
1734
emit_cum_binary(struct sljit_compiler * compiler,sljit_u32 op_types,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)1735 static sljit_s32 emit_cum_binary(struct sljit_compiler *compiler,
1736 sljit_u32 op_types,
1737 sljit_s32 dst, sljit_sw dstw,
1738 sljit_s32 src1, sljit_sw src1w,
1739 sljit_s32 src2, sljit_sw src2w)
1740 {
1741 sljit_u8* inst;
1742 sljit_u8 op_eax_imm = U8(op_types >> 24);
1743 sljit_u8 op_rm = U8((op_types >> 16) & 0xff);
1744 sljit_u8 op_mr = U8((op_types >> 8) & 0xff);
1745 sljit_u8 op_imm = U8(op_types & 0xff);
1746
1747 if (dst == src1 && dstw == src1w) {
1748 if (src2 & SLJIT_IMM) {
1749 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1750 if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1751 #else
1752 if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) {
1753 #endif
1754 BINARY_EAX_IMM(op_eax_imm, src2w);
1755 }
1756 else {
1757 BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
1758 }
1759 }
1760 else if (FAST_IS_REG(dst)) {
1761 inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
1762 FAIL_IF(!inst);
1763 *inst = op_rm;
1764 }
1765 else if (FAST_IS_REG(src2)) {
1766 /* Special exception for sljit_emit_op_flags. */
1767 inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
1768 FAIL_IF(!inst);
1769 *inst = op_mr;
1770 }
1771 else {
1772 EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w);
1773 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
1774 FAIL_IF(!inst);
1775 *inst = op_mr;
1776 }
1777 return SLJIT_SUCCESS;
1778 }
1779
1780 /* Only for cumulative operations. */
1781 if (dst == src2 && dstw == src2w) {
1782 if (src1 & SLJIT_IMM) {
1783 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1784 if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1785 #else
1786 if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128)) {
1787 #endif
1788 BINARY_EAX_IMM(op_eax_imm, src1w);
1789 }
1790 else {
1791 BINARY_IMM(op_imm, op_mr, src1w, dst, dstw);
1792 }
1793 }
1794 else if (FAST_IS_REG(dst)) {
1795 inst = emit_x86_instruction(compiler, 1, dst, dstw, src1, src1w);
1796 FAIL_IF(!inst);
1797 *inst = op_rm;
1798 }
1799 else if (FAST_IS_REG(src1)) {
1800 inst = emit_x86_instruction(compiler, 1, src1, src1w, dst, dstw);
1801 FAIL_IF(!inst);
1802 *inst = op_mr;
1803 }
1804 else {
1805 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1806 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
1807 FAIL_IF(!inst);
1808 *inst = op_mr;
1809 }
1810 return SLJIT_SUCCESS;
1811 }
1812
1813 /* General version. */
1814 if (FAST_IS_REG(dst)) {
1815 EMIT_MOV(compiler, dst, 0, src1, src1w);
1816 if (src2 & SLJIT_IMM) {
1817 BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
1818 }
1819 else {
1820 inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
1821 FAIL_IF(!inst);
1822 *inst = op_rm;
1823 }
1824 }
1825 else {
1826 /* This version requires less memory writing. */
1827 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1828 if (src2 & SLJIT_IMM) {
1829 BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
1830 }
1831 else {
1832 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1833 FAIL_IF(!inst);
1834 *inst = op_rm;
1835 }
1836 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1837 }
1838
1839 return SLJIT_SUCCESS;
1840 }
1841
1842 static sljit_s32 emit_non_cum_binary(struct sljit_compiler *compiler,
1843 sljit_u32 op_types,
1844 sljit_s32 dst, sljit_sw dstw,
1845 sljit_s32 src1, sljit_sw src1w,
1846 sljit_s32 src2, sljit_sw src2w)
1847 {
1848 sljit_u8* inst;
1849 sljit_u8 op_eax_imm = U8(op_types >> 24);
1850 sljit_u8 op_rm = U8((op_types >> 16) & 0xff);
1851 sljit_u8 op_mr = U8((op_types >> 8) & 0xff);
1852 sljit_u8 op_imm = U8(op_types & 0xff);
1853
1854 if (dst == src1 && dstw == src1w) {
1855 if (src2 & SLJIT_IMM) {
1856 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1857 if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1858 #else
1859 if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) {
1860 #endif
1861 BINARY_EAX_IMM(op_eax_imm, src2w);
1862 }
1863 else {
1864 BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
1865 }
1866 }
1867 else if (FAST_IS_REG(dst)) {
1868 inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
1869 FAIL_IF(!inst);
1870 *inst = op_rm;
1871 }
1872 else if (FAST_IS_REG(src2)) {
1873 inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
1874 FAIL_IF(!inst);
1875 *inst = op_mr;
1876 }
1877 else {
1878 EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w);
1879 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
1880 FAIL_IF(!inst);
1881 *inst = op_mr;
1882 }
1883 return SLJIT_SUCCESS;
1884 }
1885
1886 /* General version. */
1887 if (FAST_IS_REG(dst) && dst != src2) {
1888 EMIT_MOV(compiler, dst, 0, src1, src1w);
1889 if (src2 & SLJIT_IMM) {
1890 BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
1891 }
1892 else {
1893 inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
1894 FAIL_IF(!inst);
1895 *inst = op_rm;
1896 }
1897 }
1898 else {
1899 /* This version requires less memory writing. */
1900 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1901 if (src2 & SLJIT_IMM) {
1902 BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
1903 }
1904 else {
1905 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1906 FAIL_IF(!inst);
1907 *inst = op_rm;
1908 }
1909 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1910 }
1911
1912 return SLJIT_SUCCESS;
1913 }
1914
1915 static sljit_s32 emit_mul(struct sljit_compiler *compiler,
1916 sljit_s32 dst, sljit_sw dstw,
1917 sljit_s32 src1, sljit_sw src1w,
1918 sljit_s32 src2, sljit_sw src2w)
1919 {
1920 sljit_u8* inst;
1921 sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1922
1923 /* Register destination. */
1924 if (dst_r == src1 && !(src2 & SLJIT_IMM)) {
1925 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
1926 FAIL_IF(!inst);
1927 *inst++ = GROUP_0F;
1928 *inst = IMUL_r_rm;
1929 }
1930 else if (dst_r == src2 && !(src1 & SLJIT_IMM)) {
1931 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src1, src1w);
1932 FAIL_IF(!inst);
1933 *inst++ = GROUP_0F;
1934 *inst = IMUL_r_rm;
1935 }
1936 else if (src1 & SLJIT_IMM) {
1937 if (src2 & SLJIT_IMM) {
1938 EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, src2w);
1939 src2 = dst_r;
1940 src2w = 0;
1941 }
1942
1943 if (src1w <= 127 && src1w >= -128) {
1944 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1945 FAIL_IF(!inst);
1946 *inst = IMUL_r_rm_i8;
1947 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
1948 FAIL_IF(!inst);
1949 INC_SIZE(1);
1950 *inst = U8(src1w);
1951 }
1952 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1953 else {
1954 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1955 FAIL_IF(!inst);
1956 *inst = IMUL_r_rm_i32;
1957 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
1958 FAIL_IF(!inst);
1959 INC_SIZE(4);
1960 sljit_unaligned_store_sw(inst, src1w);
1961 }
1962 #else
1963 else if (IS_HALFWORD(src1w)) {
1964 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1965 FAIL_IF(!inst);
1966 *inst = IMUL_r_rm_i32;
1967 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
1968 FAIL_IF(!inst);
1969 INC_SIZE(4);
1970 sljit_unaligned_store_s32(inst, (sljit_s32)src1w);
1971 }
1972 else {
1973 if (dst_r != src2)
1974 EMIT_MOV(compiler, dst_r, 0, src2, src2w);
1975 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src1w));
1976 inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
1977 FAIL_IF(!inst);
1978 *inst++ = GROUP_0F;
1979 *inst = IMUL_r_rm;
1980 }
1981 #endif
1982 }
1983 else if (src2 & SLJIT_IMM) {
1984 /* Note: src1 is NOT immediate. */
1985
1986 if (src2w <= 127 && src2w >= -128) {
1987 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1988 FAIL_IF(!inst);
1989 *inst = IMUL_r_rm_i8;
1990 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
1991 FAIL_IF(!inst);
1992 INC_SIZE(1);
1993 *inst = U8(src2w);
1994 }
1995 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1996 else {
1997 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1998 FAIL_IF(!inst);
1999 *inst = IMUL_r_rm_i32;
2000 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
2001 FAIL_IF(!inst);
2002 INC_SIZE(4);
2003 sljit_unaligned_store_sw(inst, src2w);
2004 }
2005 #else
2006 else if (IS_HALFWORD(src2w)) {
2007 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
2008 FAIL_IF(!inst);
2009 *inst = IMUL_r_rm_i32;
2010 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
2011 FAIL_IF(!inst);
2012 INC_SIZE(4);
2013 sljit_unaligned_store_s32(inst, (sljit_s32)src2w);
2014 }
2015 else {
2016 if (dst_r != src1)
2017 EMIT_MOV(compiler, dst_r, 0, src1, src1w);
2018 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
2019 inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
2020 FAIL_IF(!inst);
2021 *inst++ = GROUP_0F;
2022 *inst = IMUL_r_rm;
2023 }
2024 #endif
2025 }
2026 else {
2027 /* Neither argument is immediate. */
2028 if (ADDRESSING_DEPENDS_ON(src2, dst_r))
2029 dst_r = TMP_REG1;
2030 EMIT_MOV(compiler, dst_r, 0, src1, src1w);
2031 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
2032 FAIL_IF(!inst);
2033 *inst++ = GROUP_0F;
2034 *inst = IMUL_r_rm;
2035 }
2036
2037 if (dst & SLJIT_MEM)
2038 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
2039
2040 return SLJIT_SUCCESS;
2041 }
2042
2043 static sljit_s32 emit_lea_binary(struct sljit_compiler *compiler,
2044 sljit_s32 dst, sljit_sw dstw,
2045 sljit_s32 src1, sljit_sw src1w,
2046 sljit_s32 src2, sljit_sw src2w)
2047 {
2048 sljit_u8* inst;
2049 sljit_s32 dst_r, done = 0;
2050
2051 /* These cases better be left to handled by normal way. */
2052 if (dst == src1 && dstw == src1w)
2053 return SLJIT_ERR_UNSUPPORTED;
2054 if (dst == src2 && dstw == src2w)
2055 return SLJIT_ERR_UNSUPPORTED;
2056
2057 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
2058
2059 if (FAST_IS_REG(src1)) {
2060 if (FAST_IS_REG(src2)) {
2061 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM2(src1, src2), 0);
2062 FAIL_IF(!inst);
2063 *inst = LEA_r_m;
2064 done = 1;
2065 }
2066 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2067 if ((src2 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src2w))) {
2068 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), (sljit_s32)src2w);
2069 #else
2070 if (src2 & SLJIT_IMM) {
2071 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), src2w);
2072 #endif
2073 FAIL_IF(!inst);
2074 *inst = LEA_r_m;
2075 done = 1;
2076 }
2077 }
2078 else if (FAST_IS_REG(src2)) {
2079 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2080 if ((src1 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src1w))) {
2081 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), (sljit_s32)src1w);
2082 #else
2083 if (src1 & SLJIT_IMM) {
2084 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), src1w);
2085 #endif
2086 FAIL_IF(!inst);
2087 *inst = LEA_r_m;
2088 done = 1;
2089 }
2090 }
2091
2092 if (done) {
2093 if (dst_r == TMP_REG1)
2094 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
2095 return SLJIT_SUCCESS;
2096 }
2097 return SLJIT_ERR_UNSUPPORTED;
2098 }
2099
2100 static sljit_s32 emit_cmp_binary(struct sljit_compiler *compiler,
2101 sljit_s32 src1, sljit_sw src1w,
2102 sljit_s32 src2, sljit_sw src2w)
2103 {
2104 sljit_u8* inst;
2105
2106 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2107 if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
2108 #else
2109 if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
2110 #endif
2111 BINARY_EAX_IMM(CMP_EAX_i32, src2w);
2112 return SLJIT_SUCCESS;
2113 }
2114
2115 if (FAST_IS_REG(src1)) {
2116 if (src2 & SLJIT_IMM) {
2117 BINARY_IMM(CMP, CMP_rm_r, src2w, src1, 0);
2118 }
2119 else {
2120 inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
2121 FAIL_IF(!inst);
2122 *inst = CMP_r_rm;
2123 }
2124 return SLJIT_SUCCESS;
2125 }
2126
2127 if (FAST_IS_REG(src2) && !(src1 & SLJIT_IMM)) {
2128 inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
2129 FAIL_IF(!inst);
2130 *inst = CMP_rm_r;
2131 return SLJIT_SUCCESS;
2132 }
2133
2134 if (src2 & SLJIT_IMM) {
2135 if (src1 & SLJIT_IMM) {
2136 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2137 src1 = TMP_REG1;
2138 src1w = 0;
2139 }
2140 BINARY_IMM(CMP, CMP_rm_r, src2w, src1, src1w);
2141 }
2142 else {
2143 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2144 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
2145 FAIL_IF(!inst);
2146 *inst = CMP_r_rm;
2147 }
2148 return SLJIT_SUCCESS;
2149 }
2150
2151 static sljit_s32 emit_test_binary(struct sljit_compiler *compiler,
2152 sljit_s32 src1, sljit_sw src1w,
2153 sljit_s32 src2, sljit_sw src2w)
2154 {
2155 sljit_u8* inst;
2156
2157 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2158 if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
2159 #else
2160 if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
2161 #endif
2162 BINARY_EAX_IMM(TEST_EAX_i32, src2w);
2163 return SLJIT_SUCCESS;
2164 }
2165
2166 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2167 if (src2 == SLJIT_R0 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
2168 #else
2169 if (src2 == SLJIT_R0 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128)) {
2170 #endif
2171 BINARY_EAX_IMM(TEST_EAX_i32, src1w);
2172 return SLJIT_SUCCESS;
2173 }
2174
2175 if (!(src1 & SLJIT_IMM)) {
2176 if (src2 & SLJIT_IMM) {
2177 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2178 if (IS_HALFWORD(src2w) || compiler->mode32) {
2179 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, src1w);
2180 FAIL_IF(!inst);
2181 *inst = GROUP_F7;
2182 }
2183 else {
2184 FAIL_IF(emit_load_imm64(compiler, TMP_REG1, src2w));
2185 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src1, src1w);
2186 FAIL_IF(!inst);
2187 *inst = TEST_rm_r;
2188 }
2189 #else
2190 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, src1w);
2191 FAIL_IF(!inst);
2192 *inst = GROUP_F7;
2193 #endif
2194 return SLJIT_SUCCESS;
2195 }
2196 else if (FAST_IS_REG(src1)) {
2197 inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
2198 FAIL_IF(!inst);
2199 *inst = TEST_rm_r;
2200 return SLJIT_SUCCESS;
2201 }
2202 }
2203
2204 if (!(src2 & SLJIT_IMM)) {
2205 if (src1 & SLJIT_IMM) {
2206 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2207 if (IS_HALFWORD(src1w) || compiler->mode32) {
2208 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src1w, src2, src2w);
2209 FAIL_IF(!inst);
2210 *inst = GROUP_F7;
2211 }
2212 else {
2213 FAIL_IF(emit_load_imm64(compiler, TMP_REG1, src1w));
2214 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
2215 FAIL_IF(!inst);
2216 *inst = TEST_rm_r;
2217 }
2218 #else
2219 inst = emit_x86_instruction(compiler, 1, src1, src1w, src2, src2w);
2220 FAIL_IF(!inst);
2221 *inst = GROUP_F7;
2222 #endif
2223 return SLJIT_SUCCESS;
2224 }
2225 else if (FAST_IS_REG(src2)) {
2226 inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
2227 FAIL_IF(!inst);
2228 *inst = TEST_rm_r;
2229 return SLJIT_SUCCESS;
2230 }
2231 }
2232
2233 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2234 if (src2 & SLJIT_IMM) {
2235 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2236 if (IS_HALFWORD(src2w) || compiler->mode32) {
2237 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0);
2238 FAIL_IF(!inst);
2239 *inst = GROUP_F7;
2240 }
2241 else {
2242 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
2243 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, TMP_REG1, 0);
2244 FAIL_IF(!inst);
2245 *inst = TEST_rm_r;
2246 }
2247 #else
2248 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0);
2249 FAIL_IF(!inst);
2250 *inst = GROUP_F7;
2251 #endif
2252 }
2253 else {
2254 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
2255 FAIL_IF(!inst);
2256 *inst = TEST_rm_r;
2257 }
2258 return SLJIT_SUCCESS;
2259 }
2260
2261 static sljit_s32 emit_shift(struct sljit_compiler *compiler,
2262 sljit_u8 mode,
2263 sljit_s32 dst, sljit_sw dstw,
2264 sljit_s32 src1, sljit_sw src1w,
2265 sljit_s32 src2, sljit_sw src2w)
2266 {
2267 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2268 sljit_s32 mode32;
2269 #endif
2270 sljit_u8* inst;
2271
2272 if ((src2 & SLJIT_IMM) || (src2 == SLJIT_PREF_SHIFT_REG)) {
2273 if (dst == src1 && dstw == src1w) {
2274 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, dstw);
2275 FAIL_IF(!inst);
2276 *inst |= mode;
2277 return SLJIT_SUCCESS;
2278 }
2279 if (dst == SLJIT_PREF_SHIFT_REG && src2 == SLJIT_PREF_SHIFT_REG) {
2280 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2281 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2282 FAIL_IF(!inst);
2283 *inst |= mode;
2284 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2285 return SLJIT_SUCCESS;
2286 }
2287 if (FAST_IS_REG(dst)) {
2288 EMIT_MOV(compiler, dst, 0, src1, src1w);
2289 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, 0);
2290 FAIL_IF(!inst);
2291 *inst |= mode;
2292 return SLJIT_SUCCESS;
2293 }
2294
2295 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2296 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0);
2297 FAIL_IF(!inst);
2298 *inst |= mode;
2299 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
2300 return SLJIT_SUCCESS;
2301 }
2302
2303 if (dst == SLJIT_PREF_SHIFT_REG) {
2304 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2305 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2306 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2307 FAIL_IF(!inst);
2308 *inst |= mode;
2309 return emit_mov(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2310 }
2311
2312 if (FAST_IS_REG(dst) && dst != src2 && dst != TMP_REG1 && !ADDRESSING_DEPENDS_ON(src2, dst)) {
2313 if (src1 != dst)
2314 EMIT_MOV(compiler, dst, 0, src1, src1w);
2315 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2316 mode32 = compiler->mode32;
2317 compiler->mode32 = 0;
2318 #endif
2319 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0);
2320 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2321 compiler->mode32 = mode32;
2322 #endif
2323 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2324 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, dst, 0);
2325 FAIL_IF(!inst);
2326 *inst |= mode;
2327 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2328 compiler->mode32 = 0;
2329 #endif
2330 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2331 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2332 compiler->mode32 = mode32;
2333 #endif
2334 return SLJIT_SUCCESS;
2335 }
2336
2337 /* This case is complex since ecx itself may be used for
2338 addressing, and this case must be supported as well. */
2339 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2340 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2341 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_PREF_SHIFT_REG, 0);
2342 #else /* !SLJIT_CONFIG_X86_32 */
2343 mode32 = compiler->mode32;
2344 compiler->mode32 = 0;
2345 EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_PREF_SHIFT_REG, 0);
2346 compiler->mode32 = mode32;
2347 #endif /* SLJIT_CONFIG_X86_32 */
2348
2349 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2350 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2351 FAIL_IF(!inst);
2352 *inst |= mode;
2353
2354 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2355 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_SP), 0);
2356 #else
2357 compiler->mode32 = 0;
2358 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0);
2359 compiler->mode32 = mode32;
2360 #endif /* SLJIT_CONFIG_X86_32 */
2361
2362 if (dst != TMP_REG1)
2363 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
2364
2365 return SLJIT_SUCCESS;
2366 }
2367
2368 static sljit_s32 emit_shift_with_flags(struct sljit_compiler *compiler,
2369 sljit_u8 mode, sljit_s32 set_flags,
2370 sljit_s32 dst, sljit_sw dstw,
2371 sljit_s32 src1, sljit_sw src1w,
2372 sljit_s32 src2, sljit_sw src2w)
2373 {
2374 /* The CPU does not set flags if the shift count is 0. */
2375 if (src2 & SLJIT_IMM) {
2376 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2377 src2w &= compiler->mode32 ? 0x1f : 0x3f;
2378 #else /* !SLJIT_CONFIG_X86_64 */
2379 src2w &= 0x1f;
2380 #endif /* SLJIT_CONFIG_X86_64 */
2381 if (src2w != 0)
2382 return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
2383
2384 if (!set_flags)
2385 return emit_mov(compiler, dst, dstw, src1, src1w);
2386 /* OR dst, src, 0 */
2387 return emit_cum_binary(compiler, BINARY_OPCODE(OR),
2388 dst, dstw, src1, src1w, SLJIT_IMM, 0);
2389 }
2390
2391 if (!set_flags)
2392 return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
2393
2394 if (!FAST_IS_REG(dst))
2395 FAIL_IF(emit_cmp_binary(compiler, src1, src1w, SLJIT_IMM, 0));
2396
2397 FAIL_IF(emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w));
2398
2399 if (FAST_IS_REG(dst))
2400 return emit_cmp_binary(compiler, dst, dstw, SLJIT_IMM, 0);
2401 return SLJIT_SUCCESS;
2402 }
2403
2404 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,
2405 sljit_s32 dst, sljit_sw dstw,
2406 sljit_s32 src1, sljit_sw src1w,
2407 sljit_s32 src2, sljit_sw src2w)
2408 {
2409 CHECK_ERROR();
2410 CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w));
2411 ADJUST_LOCAL_OFFSET(dst, dstw);
2412 ADJUST_LOCAL_OFFSET(src1, src1w);
2413 ADJUST_LOCAL_OFFSET(src2, src2w);
2414
2415 CHECK_EXTRA_REGS(dst, dstw, (void)0);
2416 CHECK_EXTRA_REGS(src1, src1w, (void)0);
2417 CHECK_EXTRA_REGS(src2, src2w, (void)0);
2418 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2419 compiler->mode32 = op & SLJIT_32;
2420 #endif
2421
2422 SLJIT_ASSERT(dst != TMP_REG1 || HAS_FLAGS(op));
2423
2424 switch (GET_OPCODE(op)) {
2425 case SLJIT_ADD:
2426 if (!HAS_FLAGS(op)) {
2427 if (emit_lea_binary(compiler, dst, dstw, src1, src1w, src2, src2w) != SLJIT_ERR_UNSUPPORTED)
2428 return compiler->error;
2429 }
2430 return emit_cum_binary(compiler, BINARY_OPCODE(ADD),
2431 dst, dstw, src1, src1w, src2, src2w);
2432 case SLJIT_ADDC:
2433 return emit_cum_binary(compiler, BINARY_OPCODE(ADC),
2434 dst, dstw, src1, src1w, src2, src2w);
2435 case SLJIT_SUB:
2436 if (src1 == SLJIT_IMM && src1w == 0)
2437 return emit_unary(compiler, NEG_rm, dst, dstw, src2, src2w);
2438
2439 if (!HAS_FLAGS(op)) {
2440 if ((src2 & SLJIT_IMM) && emit_lea_binary(compiler, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED)
2441 return compiler->error;
2442 if (FAST_IS_REG(dst) && src2 == dst) {
2443 FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB), dst, 0, dst, 0, src1, src1w));
2444 return emit_unary(compiler, NEG_rm, dst, 0, dst, 0);
2445 }
2446 }
2447
2448 return emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
2449 dst, dstw, src1, src1w, src2, src2w);
2450 case SLJIT_SUBC:
2451 return emit_non_cum_binary(compiler, BINARY_OPCODE(SBB),
2452 dst, dstw, src1, src1w, src2, src2w);
2453 case SLJIT_MUL:
2454 return emit_mul(compiler, dst, dstw, src1, src1w, src2, src2w);
2455 case SLJIT_AND:
2456 return emit_cum_binary(compiler, BINARY_OPCODE(AND),
2457 dst, dstw, src1, src1w, src2, src2w);
2458 case SLJIT_OR:
2459 return emit_cum_binary(compiler, BINARY_OPCODE(OR),
2460 dst, dstw, src1, src1w, src2, src2w);
2461 case SLJIT_XOR:
2462 return emit_cum_binary(compiler, BINARY_OPCODE(XOR),
2463 dst, dstw, src1, src1w, src2, src2w);
2464 case SLJIT_SHL:
2465 case SLJIT_MSHL:
2466 return emit_shift_with_flags(compiler, SHL, HAS_FLAGS(op),
2467 dst, dstw, src1, src1w, src2, src2w);
2468 case SLJIT_LSHR:
2469 case SLJIT_MLSHR:
2470 return emit_shift_with_flags(compiler, SHR, HAS_FLAGS(op),
2471 dst, dstw, src1, src1w, src2, src2w);
2472 case SLJIT_ASHR:
2473 case SLJIT_MASHR:
2474 return emit_shift_with_flags(compiler, SAR, HAS_FLAGS(op),
2475 dst, dstw, src1, src1w, src2, src2w);
2476 case SLJIT_ROTL:
2477 return emit_shift_with_flags(compiler, ROL, 0,
2478 dst, dstw, src1, src1w, src2, src2w);
2479 case SLJIT_ROTR:
2480 return emit_shift_with_flags(compiler, ROR, 0,
2481 dst, dstw, src1, src1w, src2, src2w);
2482 }
2483
2484 return SLJIT_SUCCESS;
2485 }
2486
2487 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op,
2488 sljit_s32 src1, sljit_sw src1w,
2489 sljit_s32 src2, sljit_sw src2w)
2490 {
2491 sljit_s32 opcode = GET_OPCODE(op);
2492
2493 CHECK_ERROR();
2494 CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w));
2495
2496 if (opcode != SLJIT_SUB && opcode != SLJIT_AND) {
2497 SLJIT_SKIP_CHECKS(compiler);
2498 return sljit_emit_op2(compiler, op, TMP_REG1, 0, src1, src1w, src2, src2w);
2499 }
2500
2501 ADJUST_LOCAL_OFFSET(src1, src1w);
2502 ADJUST_LOCAL_OFFSET(src2, src2w);
2503
2504 CHECK_EXTRA_REGS(src1, src1w, (void)0);
2505 CHECK_EXTRA_REGS(src2, src2w, (void)0);
2506 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2507 compiler->mode32 = op & SLJIT_32;
2508 #endif
2509
2510 if (opcode == SLJIT_SUB) {
2511 return emit_cmp_binary(compiler, src1, src1w, src2, src2w);
2512 }
2513 return emit_test_binary(compiler, src1, src1w, src2, src2w);
2514 }
2515
2516 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op,
2517 sljit_s32 src_dst,
2518 sljit_s32 src1, sljit_sw src1w,
2519 sljit_s32 src2, sljit_sw src2w)
2520 {
2521 sljit_s32 restore_ecx = 0;
2522 sljit_s32 is_rotate, is_left;
2523 sljit_u8* inst;
2524 sljit_sw dstw = 0;
2525 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2526 sljit_s32 tmp2 = SLJIT_MEM1(SLJIT_SP);
2527 #else /* !SLJIT_CONFIG_X86_32 */
2528 sljit_s32 tmp2 = TMP_REG2;
2529 #endif /* SLJIT_CONFIG_X86_32 */
2530
2531 CHECK_ERROR();
2532 CHECK(check_sljit_emit_shift_into(compiler, op, src_dst, src1, src1w, src2, src2w));
2533 ADJUST_LOCAL_OFFSET(src1, src1w);
2534 ADJUST_LOCAL_OFFSET(src2, src2w);
2535
2536 CHECK_EXTRA_REGS(src1, src1w, (void)0);
2537 CHECK_EXTRA_REGS(src2, src2w, (void)0);
2538
2539 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2540 compiler->mode32 = op & SLJIT_32;
2541 #endif
2542
2543 if (src2 & SLJIT_IMM) {
2544 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2545 src2w &= 0x1f;
2546 #else /* !SLJIT_CONFIG_X86_32 */
2547 src2w &= (op & SLJIT_32) ? 0x1f : 0x3f;
2548 #endif /* SLJIT_CONFIG_X86_32 */
2549
2550 if (src2w == 0)
2551 return SLJIT_SUCCESS;
2552 }
2553
2554 is_left = (GET_OPCODE(op) == SLJIT_SHL || GET_OPCODE(op) == SLJIT_MSHL);
2555
2556 is_rotate = (src_dst == src1);
2557 CHECK_EXTRA_REGS(src_dst, dstw, (void)0);
2558
2559 if (is_rotate)
2560 return emit_shift(compiler, is_left ? ROL : ROR, src_dst, dstw, src1, src1w, src2, src2w);
2561
2562 if ((src2 & SLJIT_IMM) || src2 == SLJIT_PREF_SHIFT_REG) {
2563 if (!FAST_IS_REG(src1)) {
2564 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2565 src1 = TMP_REG1;
2566 }
2567 } else if (FAST_IS_REG(src1)) {
2568 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2569 compiler->mode32 = 0;
2570 #endif
2571 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0);
2572 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2573 compiler->mode32 = op & SLJIT_32;
2574 #endif
2575 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2576
2577 if (src1 == SLJIT_PREF_SHIFT_REG)
2578 src1 = TMP_REG1;
2579
2580 if (src_dst == SLJIT_PREF_SHIFT_REG)
2581 src_dst = TMP_REG1;
2582
2583 restore_ecx = 1;
2584 } else {
2585 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2586 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2587 compiler->mode32 = 0;
2588 #endif
2589 EMIT_MOV(compiler, tmp2, 0, SLJIT_PREF_SHIFT_REG, 0);
2590 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2591 compiler->mode32 = op & SLJIT_32;
2592 #endif
2593 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2594
2595 src1 = TMP_REG1;
2596
2597 if (src_dst == SLJIT_PREF_SHIFT_REG) {
2598 src_dst = tmp2;
2599 SLJIT_ASSERT(dstw == 0);
2600 }
2601
2602 restore_ecx = 2;
2603 }
2604
2605 inst = emit_x86_instruction(compiler, 2, src1, 0, src_dst, dstw);
2606 FAIL_IF(!inst);
2607 inst[0] = GROUP_0F;
2608
2609 if (src2 & SLJIT_IMM) {
2610 inst[1] = U8((is_left ? SHLD : SHRD) - 1);
2611
2612 /* Immedate argument is added separately. */
2613 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
2614 FAIL_IF(!inst);
2615 INC_SIZE(1);
2616 *inst = U8(src2w);
2617 } else
2618 inst[1] = U8(is_left ? SHLD : SHRD);
2619
2620 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2621 compiler->mode32 = 0;
2622 #endif
2623
2624 if (restore_ecx == 1)
2625 return emit_mov(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2626 if (restore_ecx == 2)
2627 return emit_mov(compiler, SLJIT_PREF_SHIFT_REG, 0, tmp2, 0);
2628
2629 return SLJIT_SUCCESS;
2630 }
2631
2632 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op,
2633 sljit_s32 src, sljit_sw srcw)
2634 {
2635 CHECK_ERROR();
2636 CHECK(check_sljit_emit_op_src(compiler, op, src, srcw));
2637 ADJUST_LOCAL_OFFSET(src, srcw);
2638
2639 CHECK_EXTRA_REGS(src, srcw, (void)0);
2640
2641 switch (op) {
2642 case SLJIT_FAST_RETURN:
2643 return emit_fast_return(compiler, src, srcw);
2644 case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN:
2645 /* Don't adjust shadow stack if it isn't enabled. */
2646 if (!cpu_has_shadow_stack ())
2647 return SLJIT_SUCCESS;
2648 return adjust_shadow_stack(compiler, src, srcw);
2649 case SLJIT_PREFETCH_L1:
2650 case SLJIT_PREFETCH_L2:
2651 case SLJIT_PREFETCH_L3:
2652 case SLJIT_PREFETCH_ONCE:
2653 return emit_prefetch(compiler, op, src, srcw);
2654 }
2655
2656 return SLJIT_SUCCESS;
2657 }
2658
2659 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg)
2660 {
2661 CHECK_REG_INDEX(check_sljit_get_register_index(reg));
2662 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2663 if (reg >= SLJIT_R3 && reg <= SLJIT_R8)
2664 return -1;
2665 #endif
2666 return reg_map[reg];
2667 }
2668
2669 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg)
2670 {
2671 CHECK_REG_INDEX(check_sljit_get_float_register_index(reg));
2672 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2673 return reg;
2674 #else
2675 return freg_map[reg];
2676 #endif
2677 }
2678
2679 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
2680 void *instruction, sljit_u32 size)
2681 {
2682 sljit_u8 *inst;
2683
2684 CHECK_ERROR();
2685 CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
2686
2687 inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
2688 FAIL_IF(!inst);
2689 INC_SIZE(size);
2690 SLJIT_MEMCPY(inst, instruction, size);
2691 return SLJIT_SUCCESS;
2692 }
2693
2694 /* --------------------------------------------------------------------- */
2695 /* Floating point operators */
2696 /* --------------------------------------------------------------------- */
2697
2698 /* Alignment(3) + 4 * 16 bytes. */
2699 static sljit_u32 sse2_data[3 + (4 * 4)];
2700 static sljit_u32 *sse2_buffer;
2701
2702 static void init_compiler(void)
2703 {
2704 /* Align to 16 bytes. */
2705 sse2_buffer = (sljit_u32*)(((sljit_uw)sse2_data + 15) & ~(sljit_uw)0xf);
2706
2707 /* Single precision constants (each constant is 16 byte long). */
2708 sse2_buffer[0] = 0x80000000;
2709 sse2_buffer[4] = 0x7fffffff;
2710 /* Double precision constants (each constant is 16 byte long). */
2711 sse2_buffer[8] = 0;
2712 sse2_buffer[9] = 0x80000000;
2713 sse2_buffer[12] = 0xffffffff;
2714 sse2_buffer[13] = 0x7fffffff;
2715 }
2716
2717 static sljit_s32 emit_sse2(struct sljit_compiler *compiler, sljit_u8 opcode,
2718 sljit_s32 single, sljit_s32 xmm1, sljit_s32 xmm2, sljit_sw xmm2w)
2719 {
2720 sljit_u8 *inst;
2721
2722 inst = emit_x86_instruction(compiler, 2 | (single ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
2723 FAIL_IF(!inst);
2724 *inst++ = GROUP_0F;
2725 *inst = opcode;
2726 return SLJIT_SUCCESS;
2727 }
2728
2729 static sljit_s32 emit_sse2_logic(struct sljit_compiler *compiler, sljit_u8 opcode,
2730 sljit_s32 pref66, sljit_s32 xmm1, sljit_s32 xmm2, sljit_sw xmm2w)
2731 {
2732 sljit_u8 *inst;
2733
2734 inst = emit_x86_instruction(compiler, 2 | (pref66 ? EX86_PREF_66 : 0) | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
2735 FAIL_IF(!inst);
2736 *inst++ = GROUP_0F;
2737 *inst = opcode;
2738 return SLJIT_SUCCESS;
2739 }
2740
2741 static SLJIT_INLINE sljit_s32 emit_sse2_load(struct sljit_compiler *compiler,
2742 sljit_s32 single, sljit_s32 dst, sljit_s32 src, sljit_sw srcw)
2743 {
2744 return emit_sse2(compiler, MOVSD_x_xm, single, dst, src, srcw);
2745 }
2746
2747 static SLJIT_INLINE sljit_s32 emit_sse2_store(struct sljit_compiler *compiler,
2748 sljit_s32 single, sljit_s32 dst, sljit_sw dstw, sljit_s32 src)
2749 {
2750 return emit_sse2(compiler, MOVSD_xm_x, single, src, dst, dstw);
2751 }
2752
2753 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,
2754 sljit_s32 dst, sljit_sw dstw,
2755 sljit_s32 src, sljit_sw srcw)
2756 {
2757 sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
2758 sljit_u8 *inst;
2759
2760 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2761 if (GET_OPCODE(op) == SLJIT_CONV_SW_FROM_F64)
2762 compiler->mode32 = 0;
2763 #endif
2764
2765 inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_32) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP2, dst_r, 0, src, srcw);
2766 FAIL_IF(!inst);
2767 *inst++ = GROUP_0F;
2768 *inst = CVTTSD2SI_r_xm;
2769
2770 if (dst & SLJIT_MEM)
2771 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
2772 return SLJIT_SUCCESS;
2773 }
2774
2775 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
2776 sljit_s32 dst, sljit_sw dstw,
2777 sljit_s32 src, sljit_sw srcw)
2778 {
2779 sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG;
2780 sljit_u8 *inst;
2781
2782 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2783 if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_SW)
2784 compiler->mode32 = 0;
2785 #endif
2786
2787 if (src & SLJIT_IMM) {
2788 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2789 if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32)
2790 srcw = (sljit_s32)srcw;
2791 #endif
2792 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
2793 src = TMP_REG1;
2794 srcw = 0;
2795 }
2796
2797 inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_32) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP1, dst_r, 0, src, srcw);
2798 FAIL_IF(!inst);
2799 *inst++ = GROUP_0F;
2800 *inst = CVTSI2SD_x_rm;
2801
2802 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2803 compiler->mode32 = 1;
2804 #endif
2805 if (dst_r == TMP_FREG)
2806 return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG);
2807 return SLJIT_SUCCESS;
2808 }
2809
2810 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op,
2811 sljit_s32 src1, sljit_sw src1w,
2812 sljit_s32 src2, sljit_sw src2w)
2813 {
2814 switch (GET_FLAG_TYPE(op)) {
2815 case SLJIT_ORDERED_LESS:
2816 case SLJIT_UNORDERED_OR_GREATER_EQUAL:
2817 case SLJIT_UNORDERED_OR_GREATER:
2818 case SLJIT_ORDERED_LESS_EQUAL:
2819 if (!FAST_IS_REG(src2)) {
2820 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src2, src2w));
2821 src2 = TMP_FREG;
2822 }
2823
2824 return emit_sse2_logic(compiler, UCOMISD_x_xm, !(op & SLJIT_32), src2, src1, src1w);
2825 }
2826
2827 if (!FAST_IS_REG(src1)) {
2828 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src1, src1w));
2829 src1 = TMP_FREG;
2830 }
2831
2832 return emit_sse2_logic(compiler, UCOMISD_x_xm, !(op & SLJIT_32), src1, src2, src2w);
2833 }
2834
2835 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
2836 sljit_s32 dst, sljit_sw dstw,
2837 sljit_s32 src, sljit_sw srcw)
2838 {
2839 sljit_s32 dst_r;
2840
2841 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2842 compiler->mode32 = 1;
2843 #endif
2844
2845 CHECK_ERROR();
2846 SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
2847
2848 if (GET_OPCODE(op) == SLJIT_MOV_F64) {
2849 if (FAST_IS_REG(dst))
2850 return emit_sse2_load(compiler, op & SLJIT_32, dst, src, srcw);
2851 if (FAST_IS_REG(src))
2852 return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, src);
2853 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src, srcw));
2854 return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG);
2855 }
2856
2857 if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32) {
2858 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG;
2859 if (FAST_IS_REG(src)) {
2860 /* We overwrite the high bits of source. From SLJIT point of view,
2861 this is not an issue.
2862 Note: In SSE3, we could also use MOVDDUP and MOVSLDUP. */
2863 FAIL_IF(emit_sse2_logic(compiler, UNPCKLPD_x_xm, op & SLJIT_32, src, src, 0));
2864 }
2865 else {
2866 FAIL_IF(emit_sse2_load(compiler, !(op & SLJIT_32), TMP_FREG, src, srcw));
2867 src = TMP_FREG;
2868 }
2869
2870 FAIL_IF(emit_sse2_logic(compiler, CVTPD2PS_x_xm, op & SLJIT_32, dst_r, src, 0));
2871 if (dst_r == TMP_FREG)
2872 return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG);
2873 return SLJIT_SUCCESS;
2874 }
2875
2876 if (FAST_IS_REG(dst)) {
2877 dst_r = dst;
2878 if (dst != src)
2879 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, dst_r, src, srcw));
2880 }
2881 else {
2882 dst_r = TMP_FREG;
2883 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, dst_r, src, srcw));
2884 }
2885
2886 switch (GET_OPCODE(op)) {
2887 case SLJIT_NEG_F64:
2888 FAIL_IF(emit_sse2_logic(compiler, XORPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_32 ? sse2_buffer : sse2_buffer + 8)));
2889 break;
2890
2891 case SLJIT_ABS_F64:
2892 FAIL_IF(emit_sse2_logic(compiler, ANDPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_32 ? sse2_buffer + 4 : sse2_buffer + 12)));
2893 break;
2894 }
2895
2896 if (dst_r == TMP_FREG)
2897 return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG);
2898 return SLJIT_SUCCESS;
2899 }
2900
2901 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op,
2902 sljit_s32 dst, sljit_sw dstw,
2903 sljit_s32 src1, sljit_sw src1w,
2904 sljit_s32 src2, sljit_sw src2w)
2905 {
2906 sljit_s32 dst_r;
2907
2908 CHECK_ERROR();
2909 CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
2910 ADJUST_LOCAL_OFFSET(dst, dstw);
2911 ADJUST_LOCAL_OFFSET(src1, src1w);
2912 ADJUST_LOCAL_OFFSET(src2, src2w);
2913
2914 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2915 compiler->mode32 = 1;
2916 #endif
2917
2918 if (FAST_IS_REG(dst)) {
2919 dst_r = dst;
2920 if (dst == src1)
2921 ; /* Do nothing here. */
2922 else if (dst == src2 && (op == SLJIT_ADD_F64 || op == SLJIT_MUL_F64)) {
2923 /* Swap arguments. */
2924 src2 = src1;
2925 src2w = src1w;
2926 }
2927 else if (dst != src2)
2928 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, dst_r, src1, src1w));
2929 else {
2930 dst_r = TMP_FREG;
2931 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src1, src1w));
2932 }
2933 }
2934 else {
2935 dst_r = TMP_FREG;
2936 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src1, src1w));
2937 }
2938
2939 switch (GET_OPCODE(op)) {
2940 case SLJIT_ADD_F64:
2941 FAIL_IF(emit_sse2(compiler, ADDSD_x_xm, op & SLJIT_32, dst_r, src2, src2w));
2942 break;
2943
2944 case SLJIT_SUB_F64:
2945 FAIL_IF(emit_sse2(compiler, SUBSD_x_xm, op & SLJIT_32, dst_r, src2, src2w));
2946 break;
2947
2948 case SLJIT_MUL_F64:
2949 FAIL_IF(emit_sse2(compiler, MULSD_x_xm, op & SLJIT_32, dst_r, src2, src2w));
2950 break;
2951
2952 case SLJIT_DIV_F64:
2953 FAIL_IF(emit_sse2(compiler, DIVSD_x_xm, op & SLJIT_32, dst_r, src2, src2w));
2954 break;
2955 }
2956
2957 if (dst_r == TMP_FREG)
2958 return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG);
2959 return SLJIT_SUCCESS;
2960 }
2961
2962 /* --------------------------------------------------------------------- */
2963 /* Conditional instructions */
2964 /* --------------------------------------------------------------------- */
2965
2966 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
2967 {
2968 sljit_u8 *inst;
2969 struct sljit_label *label;
2970
2971 CHECK_ERROR_PTR();
2972 CHECK_PTR(check_sljit_emit_label(compiler));
2973
2974 if (compiler->last_label && compiler->last_label->size == compiler->size)
2975 return compiler->last_label;
2976
2977 label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
2978 PTR_FAIL_IF(!label);
2979 set_label(label, compiler);
2980
2981 inst = (sljit_u8*)ensure_buf(compiler, 2);
2982 PTR_FAIL_IF(!inst);
2983
2984 *inst++ = 0;
2985 *inst++ = 0;
2986
2987 return label;
2988 }
2989
2990 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)
2991 {
2992 sljit_u8 *inst;
2993 struct sljit_jump *jump;
2994
2995 CHECK_ERROR_PTR();
2996 CHECK_PTR(check_sljit_emit_jump(compiler, type));
2997
2998 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2999 PTR_FAIL_IF_NULL(jump);
3000 set_jump(jump, compiler, (sljit_u32)((type & SLJIT_REWRITABLE_JUMP) | ((type & 0xff) << TYPE_SHIFT)));
3001 type &= 0xff;
3002
3003 /* Worst case size. */
3004 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
3005 compiler->size += (type >= SLJIT_JUMP) ? 5 : 6;
3006 #else
3007 compiler->size += (type >= SLJIT_JUMP) ? (10 + 3) : (2 + 10 + 3);
3008 #endif
3009
3010 inst = (sljit_u8*)ensure_buf(compiler, 2);
3011 PTR_FAIL_IF_NULL(inst);
3012
3013 *inst++ = 0;
3014 *inst++ = 1;
3015 return jump;
3016 }
3017
3018 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
3019 {
3020 sljit_u8 *inst;
3021 struct sljit_jump *jump;
3022
3023 CHECK_ERROR();
3024 CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
3025 ADJUST_LOCAL_OFFSET(src, srcw);
3026
3027 CHECK_EXTRA_REGS(src, srcw, (void)0);
3028
3029 if (src == SLJIT_IMM) {
3030 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
3031 FAIL_IF_NULL(jump);
3032 set_jump(jump, compiler, (sljit_u32)(JUMP_ADDR | (type << TYPE_SHIFT)));
3033 jump->u.target = (sljit_uw)srcw;
3034
3035 /* Worst case size. */
3036 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
3037 compiler->size += 5;
3038 #else
3039 compiler->size += 10 + 3;
3040 #endif
3041
3042 inst = (sljit_u8*)ensure_buf(compiler, 2);
3043 FAIL_IF_NULL(inst);
3044
3045 *inst++ = 0;
3046 *inst++ = 1;
3047 }
3048 else {
3049 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3050 /* REX_W is not necessary (src is not immediate). */
3051 compiler->mode32 = 1;
3052 #endif
3053 inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
3054 FAIL_IF(!inst);
3055 *inst++ = GROUP_FF;
3056 *inst = U8(*inst | ((type >= SLJIT_FAST_CALL) ? CALL_rm : JMP_rm));
3057 }
3058 return SLJIT_SUCCESS;
3059 }
3060
3061 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
3062 sljit_s32 dst, sljit_sw dstw,
3063 sljit_s32 type)
3064 {
3065 sljit_u8 *inst;
3066 sljit_u8 cond_set = 0;
3067 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3068 sljit_s32 reg;
3069 #endif
3070 /* ADJUST_LOCAL_OFFSET and CHECK_EXTRA_REGS might overwrite these values. */
3071 sljit_s32 dst_save = dst;
3072 sljit_sw dstw_save = dstw;
3073
3074 CHECK_ERROR();
3075 CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));
3076
3077 ADJUST_LOCAL_OFFSET(dst, dstw);
3078 CHECK_EXTRA_REGS(dst, dstw, (void)0);
3079
3080 /* setcc = jcc + 0x10. */
3081 cond_set = U8(get_jump_code((sljit_uw)type) + 0x10);
3082
3083 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3084 if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst)) {
3085 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 + 3);
3086 FAIL_IF(!inst);
3087 INC_SIZE(4 + 3);
3088 /* Set low register to conditional flag. */
3089 *inst++ = (reg_map[TMP_REG1] <= 7) ? REX : REX_B;
3090 *inst++ = GROUP_0F;
3091 *inst++ = cond_set;
3092 *inst++ = MOD_REG | reg_lmap[TMP_REG1];
3093 *inst++ = U8(REX | (reg_map[TMP_REG1] <= 7 ? 0 : REX_R) | (reg_map[dst] <= 7 ? 0 : REX_B));
3094 *inst++ = OR_rm8_r8;
3095 *inst++ = U8(MOD_REG | (reg_lmap[TMP_REG1] << 3) | reg_lmap[dst]);
3096 return SLJIT_SUCCESS;
3097 }
3098
3099 reg = (GET_OPCODE(op) < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG1;
3100
3101 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 + 4);
3102 FAIL_IF(!inst);
3103 INC_SIZE(4 + 4);
3104 /* Set low register to conditional flag. */
3105 *inst++ = (reg_map[reg] <= 7) ? REX : REX_B;
3106 *inst++ = GROUP_0F;
3107 *inst++ = cond_set;
3108 *inst++ = MOD_REG | reg_lmap[reg];
3109 *inst++ = REX_W | (reg_map[reg] <= 7 ? 0 : (REX_B | REX_R));
3110 /* The movzx instruction does not affect flags. */
3111 *inst++ = GROUP_0F;
3112 *inst++ = MOVZX_r_rm8;
3113 *inst = U8(MOD_REG | (reg_lmap[reg] << 3) | reg_lmap[reg]);
3114
3115 if (reg != TMP_REG1)
3116 return SLJIT_SUCCESS;
3117
3118 if (GET_OPCODE(op) < SLJIT_ADD) {
3119 compiler->mode32 = GET_OPCODE(op) != SLJIT_MOV;
3120 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
3121 }
3122
3123 SLJIT_SKIP_CHECKS(compiler);
3124 return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0);
3125
3126 #else
3127 /* The SLJIT_CONFIG_X86_32 code path starts here. */
3128 if (GET_OPCODE(op) < SLJIT_ADD && FAST_IS_REG(dst)) {
3129 if (reg_map[dst] <= 4) {
3130 /* Low byte is accessible. */
3131 inst = (sljit_u8*)ensure_buf(compiler, 1 + 3 + 3);
3132 FAIL_IF(!inst);
3133 INC_SIZE(3 + 3);
3134 /* Set low byte to conditional flag. */
3135 *inst++ = GROUP_0F;
3136 *inst++ = cond_set;
3137 *inst++ = U8(MOD_REG | reg_map[dst]);
3138
3139 *inst++ = GROUP_0F;
3140 *inst++ = MOVZX_r_rm8;
3141 *inst = U8(MOD_REG | (reg_map[dst] << 3) | reg_map[dst]);
3142 return SLJIT_SUCCESS;
3143 }
3144
3145 /* Low byte is not accessible. */
3146 if (cpu_feature_list == 0)
3147 get_cpu_features();
3148
3149 if (cpu_feature_list & CPU_FEATURE_CMOV) {
3150 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, 1);
3151 /* a xor reg, reg operation would overwrite the flags. */
3152 EMIT_MOV(compiler, dst, 0, SLJIT_IMM, 0);
3153
3154 inst = (sljit_u8*)ensure_buf(compiler, 1 + 3);
3155 FAIL_IF(!inst);
3156 INC_SIZE(3);
3157
3158 *inst++ = GROUP_0F;
3159 /* cmovcc = setcc - 0x50. */
3160 *inst++ = U8(cond_set - 0x50);
3161 *inst++ = U8(MOD_REG | (reg_map[dst] << 3) | reg_map[TMP_REG1]);
3162 return SLJIT_SUCCESS;
3163 }
3164
3165 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1);
3166 FAIL_IF(!inst);
3167 INC_SIZE(1 + 3 + 3 + 1);
3168 *inst++ = U8(XCHG_EAX_r | reg_map[TMP_REG1]);
3169 /* Set al to conditional flag. */
3170 *inst++ = GROUP_0F;
3171 *inst++ = cond_set;
3172 *inst++ = MOD_REG | 0 /* eax */;
3173
3174 *inst++ = GROUP_0F;
3175 *inst++ = MOVZX_r_rm8;
3176 *inst++ = U8(MOD_REG | (reg_map[dst] << 3) | 0 /* eax */);
3177 *inst++ = U8(XCHG_EAX_r | reg_map[TMP_REG1]);
3178 return SLJIT_SUCCESS;
3179 }
3180
3181 if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && reg_map[dst] <= 4) {
3182 SLJIT_ASSERT(reg_map[SLJIT_R0] == 0);
3183
3184 if (dst != SLJIT_R0) {
3185 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 3 + 2 + 1);
3186 FAIL_IF(!inst);
3187 INC_SIZE(1 + 3 + 2 + 1);
3188 /* Set low register to conditional flag. */
3189 *inst++ = U8(XCHG_EAX_r | reg_map[TMP_REG1]);
3190 *inst++ = GROUP_0F;
3191 *inst++ = cond_set;
3192 *inst++ = MOD_REG | 0 /* eax */;
3193 *inst++ = OR_rm8_r8;
3194 *inst++ = MOD_REG | (0 /* eax */ << 3) | reg_map[dst];
3195 *inst++ = U8(XCHG_EAX_r | reg_map[TMP_REG1]);
3196 }
3197 else {
3198 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2 + 3 + 2 + 2);
3199 FAIL_IF(!inst);
3200 INC_SIZE(2 + 3 + 2 + 2);
3201 /* Set low register to conditional flag. */
3202 *inst++ = XCHG_r_rm;
3203 *inst++ = U8(MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1]);
3204 *inst++ = GROUP_0F;
3205 *inst++ = cond_set;
3206 *inst++ = MOD_REG | 1 /* ecx */;
3207 *inst++ = OR_rm8_r8;
3208 *inst++ = MOD_REG | (1 /* ecx */ << 3) | 0 /* eax */;
3209 *inst++ = XCHG_r_rm;
3210 *inst++ = U8(MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1]);
3211 }
3212 return SLJIT_SUCCESS;
3213 }
3214
3215 /* Set TMP_REG1 to the bit. */
3216 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1);
3217 FAIL_IF(!inst);
3218 INC_SIZE(1 + 3 + 3 + 1);
3219 *inst++ = U8(XCHG_EAX_r | reg_map[TMP_REG1]);
3220 /* Set al to conditional flag. */
3221 *inst++ = GROUP_0F;
3222 *inst++ = cond_set;
3223 *inst++ = MOD_REG | 0 /* eax */;
3224
3225 *inst++ = GROUP_0F;
3226 *inst++ = MOVZX_r_rm8;
3227 *inst++ = MOD_REG | (0 << 3) /* eax */ | 0 /* eax */;
3228
3229 *inst++ = U8(XCHG_EAX_r | reg_map[TMP_REG1]);
3230
3231 if (GET_OPCODE(op) < SLJIT_ADD)
3232 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
3233
3234 SLJIT_SKIP_CHECKS(compiler);
3235 return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0);
3236 #endif /* SLJIT_CONFIG_X86_64 */
3237 }
3238
3239 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type,
3240 sljit_s32 dst_reg,
3241 sljit_s32 src, sljit_sw srcw)
3242 {
3243 sljit_u8* inst;
3244
3245 CHECK_ERROR();
3246 CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw));
3247
3248 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
3249 type &= ~SLJIT_32;
3250
3251 if (!sljit_has_cpu_feature(SLJIT_HAS_CMOV) || (dst_reg >= SLJIT_R3 && dst_reg <= SLJIT_S3))
3252 return sljit_emit_cmov_generic(compiler, type, dst_reg, src, srcw);
3253 #else
3254 if (!sljit_has_cpu_feature(SLJIT_HAS_CMOV))
3255 return sljit_emit_cmov_generic(compiler, type, dst_reg, src, srcw);
3256 #endif
3257
3258 /* ADJUST_LOCAL_OFFSET is not needed. */
3259 CHECK_EXTRA_REGS(src, srcw, (void)0);
3260
3261 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3262 compiler->mode32 = type & SLJIT_32;
3263 type &= ~SLJIT_32;
3264 #endif
3265
3266 if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
3267 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw);
3268 src = TMP_REG1;
3269 srcw = 0;
3270 }
3271
3272 inst = emit_x86_instruction(compiler, 2, dst_reg, 0, src, srcw);
3273 FAIL_IF(!inst);
3274 *inst++ = GROUP_0F;
3275 *inst = U8(get_jump_code((sljit_uw)type) - 0x40);
3276 return SLJIT_SUCCESS;
3277 }
3278
3279 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset)
3280 {
3281 CHECK_ERROR();
3282 CHECK(check_sljit_get_local_base(compiler, dst, dstw, offset));
3283 ADJUST_LOCAL_OFFSET(dst, dstw);
3284
3285 CHECK_EXTRA_REGS(dst, dstw, (void)0);
3286
3287 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3288 compiler->mode32 = 0;
3289 #endif
3290
3291 ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_SP), offset);
3292
3293 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3294 if (NOT_HALFWORD(offset)) {
3295 FAIL_IF(emit_load_imm64(compiler, TMP_REG1, offset));
3296 #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
3297 SLJIT_ASSERT(emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0) != SLJIT_ERR_UNSUPPORTED);
3298 return compiler->error;
3299 #else
3300 return emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0);
3301 #endif
3302 }
3303 #endif
3304
3305 if (offset != 0)
3306 return emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, SLJIT_IMM, offset);
3307 return emit_mov(compiler, dst, dstw, SLJIT_SP, 0);
3308 }
3309
3310 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
3311 {
3312 sljit_u8 *inst;
3313 struct sljit_const *const_;
3314 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3315 sljit_s32 reg;
3316 #endif
3317
3318 CHECK_ERROR_PTR();
3319 CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
3320 ADJUST_LOCAL_OFFSET(dst, dstw);
3321
3322 CHECK_EXTRA_REGS(dst, dstw, (void)0);
3323
3324 const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
3325 PTR_FAIL_IF(!const_);
3326 set_const(const_, compiler);
3327
3328 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3329 compiler->mode32 = 0;
3330 reg = FAST_IS_REG(dst) ? dst : TMP_REG1;
3331
3332 if (emit_load_imm64(compiler, reg, init_value))
3333 return NULL;
3334 #else
3335 if (emit_mov(compiler, dst, dstw, SLJIT_IMM, init_value))
3336 return NULL;
3337 #endif
3338
3339 inst = (sljit_u8*)ensure_buf(compiler, 2);
3340 PTR_FAIL_IF(!inst);
3341
3342 *inst++ = 0;
3343 *inst++ = 2;
3344
3345 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3346 if (dst & SLJIT_MEM)
3347 if (emit_mov(compiler, dst, dstw, TMP_REG1, 0))
3348 return NULL;
3349 #endif
3350
3351 return const_;
3352 }
3353
3354 SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
3355 {
3356 struct sljit_put_label *put_label;
3357 sljit_u8 *inst;
3358 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3359 sljit_s32 reg;
3360 sljit_uw start_size;
3361 #endif
3362
3363 CHECK_ERROR_PTR();
3364 CHECK_PTR(check_sljit_emit_put_label(compiler, dst, dstw));
3365 ADJUST_LOCAL_OFFSET(dst, dstw);
3366
3367 CHECK_EXTRA_REGS(dst, dstw, (void)0);
3368
3369 put_label = (struct sljit_put_label*)ensure_abuf(compiler, sizeof(struct sljit_put_label));
3370 PTR_FAIL_IF(!put_label);
3371 set_put_label(put_label, compiler, 0);
3372
3373 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3374 compiler->mode32 = 0;
3375 reg = FAST_IS_REG(dst) ? dst : TMP_REG1;
3376
3377 if (emit_load_imm64(compiler, reg, 0))
3378 return NULL;
3379 #else
3380 if (emit_mov(compiler, dst, dstw, SLJIT_IMM, 0))
3381 return NULL;
3382 #endif
3383
3384 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3385 if (dst & SLJIT_MEM) {
3386 start_size = compiler->size;
3387 if (emit_mov(compiler, dst, dstw, TMP_REG1, 0))
3388 return NULL;
3389 put_label->flags = compiler->size - start_size;
3390 }
3391 #endif
3392
3393 inst = (sljit_u8*)ensure_buf(compiler, 2);
3394 PTR_FAIL_IF(!inst);
3395
3396 *inst++ = 0;
3397 *inst++ = 3;
3398
3399 return put_label;
3400 }
3401
3402 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
3403 {
3404 SLJIT_UNUSED_ARG(executable_offset);
3405
3406 SLJIT_UPDATE_WX_FLAGS((void*)addr, (void*)(addr + sizeof(sljit_uw)), 0);
3407 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
3408 sljit_unaligned_store_sw((void*)addr, (sljit_sw)(new_target - (addr + 4) - (sljit_uw)executable_offset));
3409 #else
3410 sljit_unaligned_store_sw((void*)addr, (sljit_sw)new_target);
3411 #endif
3412 SLJIT_UPDATE_WX_FLAGS((void*)addr, (void*)(addr + sizeof(sljit_uw)), 1);
3413 }
3414
3415 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
3416 {
3417 SLJIT_UNUSED_ARG(executable_offset);
3418
3419 SLJIT_UPDATE_WX_FLAGS((void*)addr, (void*)(addr + sizeof(sljit_sw)), 0);
3420 sljit_unaligned_store_sw((void*)addr, new_constant);
3421 SLJIT_UPDATE_WX_FLAGS((void*)addr, (void*)(addr + sizeof(sljit_sw)), 1);
3422 }
3423