1 /*
2 * Stack-less Just-In-Time compiler
3 *
4 * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without modification, are
7 * permitted provided that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright notice, this list of
10 * conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright notice, this list
13 * of conditions and the following disclaimer in the documentation and/or other materials
14 * provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19 * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27 #include <sys/auxv.h>
28
29 #ifdef __ARCH__
30 #define ENABLE_STATIC_FACILITY_DETECTION 1
31 #else
32 #define ENABLE_STATIC_FACILITY_DETECTION 0
33 #endif
34 #define ENABLE_DYNAMIC_FACILITY_DETECTION 1
35
sljit_get_platform_name(void)36 SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
37 {
38 return "s390x" SLJIT_CPUINFO;
39 }
40
41 /* Instructions. */
42 typedef sljit_uw sljit_ins;
43
44 /* Instruction tags (most significant halfword). */
45 static const sljit_ins sljit_ins_const = (sljit_ins)1 << 48;
46
47 #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
48 #define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3)
49
50 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 4] = {
51 0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 0, 1
52 };
53
54 /* there are also a[2-15] available, but they are slower to access and
55 * their use is limited as mundaym explained:
56 * https://github.com/zherczeg/sljit/pull/91#discussion_r486895689
57 */
58
59 /* General Purpose Registers [0-15]. */
60 typedef sljit_uw sljit_gpr;
61
62 /*
63 * WARNING
64 * the following code is non standard and should be improved for
65 * consistency, but doesn't use SLJIT_NUMBER_OF_REGISTERS based
66 * registers because r0 and r1 are the ABI recommended volatiles.
67 * there is a gpr() function that maps sljit to physical register numbers
68 * that should be used instead of the usual index into reg_map[] and
69 * will be retired ASAP (TODO: carenas)
70 */
71
72 static const sljit_gpr r0 = 0; /* reg_map[SLJIT_NUMBER_OF_REGISTERS + 2]: 0 in address calculations; reserved */
73 static const sljit_gpr r1 = 1; /* reg_map[SLJIT_NUMBER_OF_REGISTERS + 3]: reserved */
74 static const sljit_gpr r2 = 2; /* reg_map[1]: 1st argument */
75 static const sljit_gpr r3 = 3; /* reg_map[2]: 2nd argument */
76 static const sljit_gpr r4 = 4; /* reg_map[3]: 3rd argument */
77 static const sljit_gpr r5 = 5; /* reg_map[4]: 4th argument */
78 static const sljit_gpr r6 = 6; /* reg_map[5]: 5th argument; 1st saved register */
79 static const sljit_gpr r7 = 7; /* reg_map[6] */
80 static const sljit_gpr r8 = 8; /* reg_map[7] */
81 static const sljit_gpr r9 = 9; /* reg_map[8] */
82 static const sljit_gpr r10 = 10; /* reg_map[9] */
83 static const sljit_gpr r11 = 11; /* reg_map[10] */
84 static const sljit_gpr r12 = 12; /* reg_map[11]: GOT */
85 static const sljit_gpr r13 = 13; /* reg_map[12]: Literal Pool pointer */
86 static const sljit_gpr r14 = 14; /* reg_map[0]: return address and flag register */
87 static const sljit_gpr r15 = 15; /* reg_map[SLJIT_NUMBER_OF_REGISTERS + 1]: stack pointer */
88
89 /* WARNING: r12 and r13 shouldn't be used as per ABI recommendation */
90 /* TODO(carenas): r12 might conflict in PIC code, reserve? */
91 /* TODO(carenas): r13 is usually pointed to "pool" per ABI, using a tmp
92 * like we do know might be faster though, reserve?
93 */
94
95 /* TODO(carenas): should be named TMP_REG[1-2] for consistency */
96 #define tmp0 r0
97 #define tmp1 r1
98
99 /* TODO(carenas): flags should move to a different register so that
100 * link register doesn't need to change
101 */
102
103 /* When reg cannot be unused. */
104 #define IS_GPR_REG(reg) ((reg > 0) && (reg) <= SLJIT_SP)
105
106 /* Link registers. The normal link register is r14, but since
107 we use that for flags we need to use r0 instead to do fast
108 calls so that flags are preserved. */
109 static const sljit_gpr link_r = 14; /* r14 */
110 static const sljit_gpr fast_link_r = 0; /* r0 */
111
112 #define TMP_FREG1 (0)
113
114 static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1] = {
115 1, 0, 2, 4, 6, 3, 5, 7, 15, 14, 13, 12, 11, 10, 9, 8,
116 };
117
118 #define R0A(r) (r)
119 #define R4A(r) ((r) << 4)
120 #define R8A(r) ((r) << 8)
121 #define R12A(r) ((r) << 12)
122 #define R16A(r) ((r) << 16)
123 #define R20A(r) ((r) << 20)
124 #define R28A(r) ((r) << 28)
125 #define R32A(r) ((r) << 32)
126 #define R36A(r) ((r) << 36)
127
128 #define R0(r) ((sljit_ins)reg_map[r])
129
130 #define F0(r) ((sljit_ins)freg_map[r])
131 #define F4(r) (R4A((sljit_ins)freg_map[r]))
132 #define F20(r) (R20A((sljit_ins)freg_map[r]))
133 #define F36(r) (R36A((sljit_ins)freg_map[r]))
134
135 struct sljit_s390x_const {
136 struct sljit_const const_; /* must be first */
137 sljit_sw init_value; /* required to build literal pool */
138 };
139
140 /* Convert SLJIT register to hardware register. */
gpr(sljit_s32 r)141 static SLJIT_INLINE sljit_gpr gpr(sljit_s32 r)
142 {
143 SLJIT_ASSERT(r >= 0 && r < (sljit_s32)(sizeof(reg_map) / sizeof(reg_map[0])));
144 return reg_map[r];
145 }
146
fgpr(sljit_s32 r)147 static SLJIT_INLINE sljit_gpr fgpr(sljit_s32 r)
148 {
149 SLJIT_ASSERT(r >= 0 && r < (sljit_s32)(sizeof(freg_map) / sizeof(freg_map[0])));
150 return freg_map[r];
151 }
152
153 /* Size of instruction in bytes. Tags must already be cleared. */
sizeof_ins(sljit_ins ins)154 static SLJIT_INLINE sljit_uw sizeof_ins(sljit_ins ins)
155 {
156 /* keep faulting instructions */
157 if (ins == 0)
158 return 2;
159
160 if ((ins & 0x00000000ffffL) == ins)
161 return 2;
162 if ((ins & 0x0000ffffffffL) == ins)
163 return 4;
164 if ((ins & 0xffffffffffffL) == ins)
165 return 6;
166
167 SLJIT_UNREACHABLE();
168 return (sljit_uw)-1;
169 }
170
push_inst(struct sljit_compiler * compiler,sljit_ins ins)171 static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins)
172 {
173 sljit_ins *ibuf = (sljit_ins *)ensure_buf(compiler, sizeof(sljit_ins));
174 FAIL_IF(!ibuf);
175 *ibuf = ins;
176 compiler->size++;
177 return SLJIT_SUCCESS;
178 }
179
encode_inst(void ** ptr,sljit_ins ins)180 static sljit_s32 encode_inst(void **ptr, sljit_ins ins)
181 {
182 sljit_u16 *ibuf = (sljit_u16 *)*ptr;
183 sljit_uw size = sizeof_ins(ins);
184
185 SLJIT_ASSERT((size & 6) == size);
186 switch (size) {
187 case 6:
188 *ibuf++ = (sljit_u16)(ins >> 32);
189 /* fallthrough */
190 case 4:
191 *ibuf++ = (sljit_u16)(ins >> 16);
192 /* fallthrough */
193 case 2:
194 *ibuf++ = (sljit_u16)(ins);
195 }
196 *ptr = (void*)ibuf;
197 return SLJIT_SUCCESS;
198 }
199
200 #define SLJIT_ADD_SUB_NO_COMPARE(status_flags_state) \
201 (((status_flags_state) & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB)) \
202 && !((status_flags_state) & SLJIT_CURRENT_FLAGS_COMPARE))
203
204 /* Map the given type to a 4-bit condition code mask. */
get_cc(struct sljit_compiler * compiler,sljit_s32 type)205 static SLJIT_INLINE sljit_u8 get_cc(struct sljit_compiler *compiler, sljit_s32 type) {
206 const sljit_u8 cc0 = 1 << 3; /* equal {,to zero} */
207 const sljit_u8 cc1 = 1 << 2; /* less than {,zero} */
208 const sljit_u8 cc2 = 1 << 1; /* greater than {,zero} */
209 const sljit_u8 cc3 = 1 << 0; /* {overflow,NaN} */
210
211 switch (type) {
212 case SLJIT_EQUAL:
213 if (SLJIT_ADD_SUB_NO_COMPARE(compiler->status_flags_state)) {
214 sljit_s32 type = GET_FLAG_TYPE(compiler->status_flags_state);
215 if (type >= SLJIT_SIG_LESS && type <= SLJIT_SIG_LESS_EQUAL)
216 return cc0;
217 if (type == SLJIT_OVERFLOW)
218 return (cc0 | cc3);
219 return (cc0 | cc2);
220 }
221 /* fallthrough */
222
223 case SLJIT_EQUAL_F64:
224 return cc0;
225
226 case SLJIT_NOT_EQUAL:
227 if (SLJIT_ADD_SUB_NO_COMPARE(compiler->status_flags_state)) {
228 sljit_s32 type = GET_FLAG_TYPE(compiler->status_flags_state);
229 if (type >= SLJIT_SIG_LESS && type <= SLJIT_SIG_LESS_EQUAL)
230 return (cc1 | cc2 | cc3);
231 if (type == SLJIT_OVERFLOW)
232 return (cc1 | cc2);
233 return (cc1 | cc3);
234 }
235 /* fallthrough */
236
237 case SLJIT_NOT_EQUAL_F64:
238 return (cc1 | cc2 | cc3);
239
240 case SLJIT_LESS:
241 return cc1;
242
243 case SLJIT_GREATER_EQUAL:
244 return (cc0 | cc2 | cc3);
245
246 case SLJIT_GREATER:
247 if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_COMPARE)
248 return cc2;
249 return cc3;
250
251 case SLJIT_LESS_EQUAL:
252 if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_COMPARE)
253 return (cc0 | cc1);
254 return (cc0 | cc1 | cc2);
255
256 case SLJIT_SIG_LESS:
257 case SLJIT_LESS_F64:
258 return cc1;
259
260 case SLJIT_NOT_CARRY:
261 if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_SUB)
262 return (cc2 | cc3);
263 /* fallthrough */
264
265 case SLJIT_SIG_LESS_EQUAL:
266 case SLJIT_LESS_EQUAL_F64:
267 return (cc0 | cc1);
268
269 case SLJIT_CARRY:
270 if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_SUB)
271 return (cc0 | cc1);
272 /* fallthrough */
273
274 case SLJIT_SIG_GREATER:
275 /* Overflow is considered greater, see SLJIT_SUB. */
276 return cc2 | cc3;
277
278 case SLJIT_SIG_GREATER_EQUAL:
279 return (cc0 | cc2 | cc3);
280
281 case SLJIT_OVERFLOW:
282 if (compiler->status_flags_state & SLJIT_SET_Z)
283 return (cc2 | cc3);
284 /* fallthrough */
285
286 case SLJIT_UNORDERED_F64:
287 return cc3;
288
289 case SLJIT_NOT_OVERFLOW:
290 if (compiler->status_flags_state & SLJIT_SET_Z)
291 return (cc0 | cc1);
292 /* fallthrough */
293
294 case SLJIT_ORDERED_F64:
295 return (cc0 | cc1 | cc2);
296
297 case SLJIT_GREATER_F64:
298 return cc2;
299
300 case SLJIT_GREATER_EQUAL_F64:
301 return (cc0 | cc2);
302 }
303
304 SLJIT_UNREACHABLE();
305 return (sljit_u8)-1;
306 }
307
308 /* Facility to bit index mappings.
309 Note: some facilities share the same bit index. */
310 typedef sljit_uw facility_bit;
311 #define STORE_FACILITY_LIST_EXTENDED_FACILITY 7
312 #define FAST_LONG_DISPLACEMENT_FACILITY 19
313 #define EXTENDED_IMMEDIATE_FACILITY 21
314 #define GENERAL_INSTRUCTION_EXTENSION_FACILITY 34
315 #define DISTINCT_OPERAND_FACILITY 45
316 #define HIGH_WORD_FACILITY 45
317 #define POPULATION_COUNT_FACILITY 45
318 #define LOAD_STORE_ON_CONDITION_1_FACILITY 45
319 #define MISCELLANEOUS_INSTRUCTION_EXTENSIONS_1_FACILITY 49
320 #define LOAD_STORE_ON_CONDITION_2_FACILITY 53
321 #define MISCELLANEOUS_INSTRUCTION_EXTENSIONS_2_FACILITY 58
322 #define VECTOR_FACILITY 129
323 #define VECTOR_ENHANCEMENTS_1_FACILITY 135
324
325 /* Report whether a facility is known to be present due to the compiler
326 settings. This function should always be compiled to a constant
327 value given a constant argument. */
have_facility_static(facility_bit x)328 static SLJIT_INLINE int have_facility_static(facility_bit x)
329 {
330 #if ENABLE_STATIC_FACILITY_DETECTION
331 switch (x) {
332 case FAST_LONG_DISPLACEMENT_FACILITY:
333 return (__ARCH__ >= 6 /* z990 */);
334 case EXTENDED_IMMEDIATE_FACILITY:
335 case STORE_FACILITY_LIST_EXTENDED_FACILITY:
336 return (__ARCH__ >= 7 /* z9-109 */);
337 case GENERAL_INSTRUCTION_EXTENSION_FACILITY:
338 return (__ARCH__ >= 8 /* z10 */);
339 case DISTINCT_OPERAND_FACILITY:
340 return (__ARCH__ >= 9 /* z196 */);
341 case MISCELLANEOUS_INSTRUCTION_EXTENSIONS_1_FACILITY:
342 return (__ARCH__ >= 10 /* zEC12 */);
343 case LOAD_STORE_ON_CONDITION_2_FACILITY:
344 case VECTOR_FACILITY:
345 return (__ARCH__ >= 11 /* z13 */);
346 case MISCELLANEOUS_INSTRUCTION_EXTENSIONS_2_FACILITY:
347 case VECTOR_ENHANCEMENTS_1_FACILITY:
348 return (__ARCH__ >= 12 /* z14 */);
349 default:
350 SLJIT_UNREACHABLE();
351 }
352 #endif
353 return 0;
354 }
355
get_hwcap()356 static SLJIT_INLINE unsigned long get_hwcap()
357 {
358 static unsigned long hwcap = 0;
359 if (SLJIT_UNLIKELY(!hwcap)) {
360 hwcap = getauxval(AT_HWCAP);
361 SLJIT_ASSERT(hwcap != 0);
362 }
363 return hwcap;
364 }
365
have_stfle()366 static SLJIT_INLINE int have_stfle()
367 {
368 if (have_facility_static(STORE_FACILITY_LIST_EXTENDED_FACILITY))
369 return 1;
370
371 return (get_hwcap() & HWCAP_S390_STFLE);
372 }
373
374 /* Report whether the given facility is available. This function always
375 performs a runtime check. */
have_facility_dynamic(facility_bit x)376 static int have_facility_dynamic(facility_bit x)
377 {
378 #if ENABLE_DYNAMIC_FACILITY_DETECTION
379 static struct {
380 sljit_uw bits[4];
381 } cpu_features;
382 size_t size = sizeof(cpu_features);
383 const sljit_uw word_index = x >> 6;
384 const sljit_uw bit_index = ((1UL << 63) >> (x & 63));
385
386 SLJIT_ASSERT(x < size * 8);
387 if (SLJIT_UNLIKELY(!have_stfle()))
388 return 0;
389
390 if (SLJIT_UNLIKELY(cpu_features.bits[0] == 0)) {
391 __asm__ __volatile__ (
392 "lgr %%r0, %0;"
393 "stfle 0(%1);"
394 /* outputs */:
395 /* inputs */: "d" ((size / 8) - 1), "a" (&cpu_features)
396 /* clobbers */: "r0", "cc", "memory"
397 );
398 SLJIT_ASSERT(cpu_features.bits[0] != 0);
399 }
400 return (cpu_features.bits[word_index] & bit_index) != 0;
401 #else
402 return 0;
403 #endif
404 }
405
406 #define HAVE_FACILITY(name, bit) \
407 static SLJIT_INLINE int name() \
408 { \
409 static int have = -1; \
410 /* Static check first. May allow the function to be optimized away. */ \
411 if (have_facility_static(bit)) \
412 have = 1; \
413 else if (SLJIT_UNLIKELY(have < 0)) \
414 have = have_facility_dynamic(bit) ? 1 : 0; \
415 \
416 return have; \
417 }
418
HAVE_FACILITY(have_eimm,EXTENDED_IMMEDIATE_FACILITY)419 HAVE_FACILITY(have_eimm, EXTENDED_IMMEDIATE_FACILITY)
420 HAVE_FACILITY(have_ldisp, FAST_LONG_DISPLACEMENT_FACILITY)
421 HAVE_FACILITY(have_genext, GENERAL_INSTRUCTION_EXTENSION_FACILITY)
422 HAVE_FACILITY(have_lscond1, LOAD_STORE_ON_CONDITION_1_FACILITY)
423 HAVE_FACILITY(have_lscond2, LOAD_STORE_ON_CONDITION_2_FACILITY)
424 HAVE_FACILITY(have_misc2, MISCELLANEOUS_INSTRUCTION_EXTENSIONS_2_FACILITY)
425 #undef HAVE_FACILITY
426
427 #define is_u12(d) (0 <= (d) && (d) <= 0x00000fffL)
428 #define is_u32(d) (0 <= (d) && (d) <= 0xffffffffL)
429
430 #define CHECK_SIGNED(v, bitlen) \
431 ((v) >= -(1 << ((bitlen) - 1)) && (v) < (1 << ((bitlen) - 1)))
432
433 #define is_s8(d) CHECK_SIGNED((d), 8)
434 #define is_s16(d) CHECK_SIGNED((d), 16)
435 #define is_s20(d) CHECK_SIGNED((d), 20)
436 #define is_s32(d) ((d) == (sljit_s32)(d))
437
438 static SLJIT_INLINE sljit_ins disp_s20(sljit_s32 d)
439 {
440 SLJIT_ASSERT(is_s20(d));
441
442 sljit_uw dh = (d >> 12) & 0xff;
443 sljit_uw dl = (d << 8) & 0xfff00;
444 return (dh | dl) << 8;
445 }
446
447 /* TODO(carenas): variadic macro is not strictly needed */
448 #define SLJIT_S390X_INSTRUCTION(op, ...) \
449 static SLJIT_INLINE sljit_ins op(__VA_ARGS__)
450
451 /* RR form instructions. */
452 #define SLJIT_S390X_RR(name, pattern) \
453 SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src) \
454 { \
455 return (pattern) | ((dst & 0xf) << 4) | (src & 0xf); \
456 }
457
458 /* AND */
459 SLJIT_S390X_RR(nr, 0x1400)
460
461 /* BRANCH AND SAVE */
462 SLJIT_S390X_RR(basr, 0x0d00)
463
464 /* BRANCH ON CONDITION */
465 SLJIT_S390X_RR(bcr, 0x0700) /* TODO(mundaym): type for mask? */
466
467 /* DIVIDE */
468 SLJIT_S390X_RR(dr, 0x1d00)
469
470 /* EXCLUSIVE OR */
471 SLJIT_S390X_RR(xr, 0x1700)
472
473 /* LOAD */
474 SLJIT_S390X_RR(lr, 0x1800)
475
476 /* LOAD COMPLEMENT */
477 SLJIT_S390X_RR(lcr, 0x1300)
478
479 /* OR */
480 SLJIT_S390X_RR(or, 0x1600)
481
482 #undef SLJIT_S390X_RR
483
484 /* RRE form instructions */
485 #define SLJIT_S390X_RRE(name, pattern) \
486 SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src) \
487 { \
488 return (pattern) | R4A(dst) | R0A(src); \
489 }
490
491 /* AND */
492 SLJIT_S390X_RRE(ngr, 0xb9800000)
493
494 /* DIVIDE LOGICAL */
495 SLJIT_S390X_RRE(dlr, 0xb9970000)
496 SLJIT_S390X_RRE(dlgr, 0xb9870000)
497
498 /* DIVIDE SINGLE */
499 SLJIT_S390X_RRE(dsgr, 0xb90d0000)
500
501 /* EXCLUSIVE OR */
502 SLJIT_S390X_RRE(xgr, 0xb9820000)
503
504 /* LOAD */
505 SLJIT_S390X_RRE(lgr, 0xb9040000)
506 SLJIT_S390X_RRE(lgfr, 0xb9140000)
507
508 /* LOAD BYTE */
509 SLJIT_S390X_RRE(lbr, 0xb9260000)
510 SLJIT_S390X_RRE(lgbr, 0xb9060000)
511
512 /* LOAD COMPLEMENT */
513 SLJIT_S390X_RRE(lcgr, 0xb9030000)
514
515 /* LOAD HALFWORD */
516 SLJIT_S390X_RRE(lhr, 0xb9270000)
517 SLJIT_S390X_RRE(lghr, 0xb9070000)
518
519 /* LOAD LOGICAL */
520 SLJIT_S390X_RRE(llgfr, 0xb9160000)
521
522 /* LOAD LOGICAL CHARACTER */
523 SLJIT_S390X_RRE(llcr, 0xb9940000)
524 SLJIT_S390X_RRE(llgcr, 0xb9840000)
525
526 /* LOAD LOGICAL HALFWORD */
527 SLJIT_S390X_RRE(llhr, 0xb9950000)
528 SLJIT_S390X_RRE(llghr, 0xb9850000)
529
530 /* MULTIPLY LOGICAL */
531 SLJIT_S390X_RRE(mlgr, 0xb9860000)
532
533 /* MULTIPLY SINGLE */
534 SLJIT_S390X_RRE(msgfr, 0xb91c0000)
535
536 /* OR */
537 SLJIT_S390X_RRE(ogr, 0xb9810000)
538
539 /* SUBTRACT */
540 SLJIT_S390X_RRE(sgr, 0xb9090000)
541
542 #undef SLJIT_S390X_RRE
543
544 /* RI-a form instructions */
545 #define SLJIT_S390X_RIA(name, pattern, imm_type) \
546 SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, imm_type imm) \
547 { \
548 return (pattern) | R20A(reg) | (imm & 0xffff); \
549 }
550
551 /* ADD HALFWORD IMMEDIATE */
552 SLJIT_S390X_RIA(aghi, 0xa70b0000, sljit_s16)
553
554 /* LOAD HALFWORD IMMEDIATE */
555 SLJIT_S390X_RIA(lhi, 0xa7080000, sljit_s16)
556 SLJIT_S390X_RIA(lghi, 0xa7090000, sljit_s16)
557
558 /* LOAD LOGICAL IMMEDIATE */
559 SLJIT_S390X_RIA(llihh, 0xa50c0000, sljit_u16)
560 SLJIT_S390X_RIA(llihl, 0xa50d0000, sljit_u16)
561 SLJIT_S390X_RIA(llilh, 0xa50e0000, sljit_u16)
562 SLJIT_S390X_RIA(llill, 0xa50f0000, sljit_u16)
563
564 /* MULTIPLY HALFWORD IMMEDIATE */
565 SLJIT_S390X_RIA(mhi, 0xa70c0000, sljit_s16)
566 SLJIT_S390X_RIA(mghi, 0xa70d0000, sljit_s16)
567
568 /* OR IMMEDIATE */
569 SLJIT_S390X_RIA(oilh, 0xa50a0000, sljit_u16)
570
571 #undef SLJIT_S390X_RIA
572
573 /* RIL-a form instructions (requires extended immediate facility) */
574 #define SLJIT_S390X_RILA(name, pattern, imm_type) \
575 SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, imm_type imm) \
576 { \
577 SLJIT_ASSERT(have_eimm()); \
578 return (pattern) | R36A(reg) | ((sljit_ins)imm & 0xffffffffu); \
579 }
580
581 /* ADD IMMEDIATE */
582 SLJIT_S390X_RILA(agfi, 0xc20800000000, sljit_s32)
583
584 /* ADD IMMEDIATE HIGH */
585 SLJIT_S390X_RILA(aih, 0xcc0800000000, sljit_s32) /* TODO(mundaym): high-word facility? */
586
587 /* AND IMMEDIATE */
588 SLJIT_S390X_RILA(nihf, 0xc00a00000000, sljit_u32)
589
590 /* EXCLUSIVE OR IMMEDIATE */
591 SLJIT_S390X_RILA(xilf, 0xc00700000000, sljit_u32)
592
593 /* INSERT IMMEDIATE */
594 SLJIT_S390X_RILA(iihf, 0xc00800000000, sljit_u32)
595 SLJIT_S390X_RILA(iilf, 0xc00900000000, sljit_u32)
596
597 /* LOAD IMMEDIATE */
598 SLJIT_S390X_RILA(lgfi, 0xc00100000000, sljit_s32)
599
600 /* LOAD LOGICAL IMMEDIATE */
601 SLJIT_S390X_RILA(llihf, 0xc00e00000000, sljit_u32)
602 SLJIT_S390X_RILA(llilf, 0xc00f00000000, sljit_u32)
603
604 /* SUBTRACT LOGICAL IMMEDIATE */
605 SLJIT_S390X_RILA(slfi, 0xc20500000000, sljit_u32)
606
607 #undef SLJIT_S390X_RILA
608
609 /* RX-a form instructions */
610 #define SLJIT_S390X_RXA(name, pattern) \
611 SLJIT_S390X_INSTRUCTION(name, sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b) \
612 { \
613 SLJIT_ASSERT((d & 0xfff) == d); \
614 \
615 return (pattern) | R20A(r) | R16A(x) | R12A(b) | (sljit_ins)(d & 0xfff); \
616 }
617
618 /* LOAD */
619 SLJIT_S390X_RXA(l, 0x58000000)
620
621 /* LOAD ADDRESS */
622 SLJIT_S390X_RXA(la, 0x41000000)
623
624 /* LOAD HALFWORD */
625 SLJIT_S390X_RXA(lh, 0x48000000)
626
627 /* MULTIPLY SINGLE */
628 SLJIT_S390X_RXA(ms, 0x71000000)
629
630 /* STORE */
631 SLJIT_S390X_RXA(st, 0x50000000)
632
633 /* STORE CHARACTER */
634 SLJIT_S390X_RXA(stc, 0x42000000)
635
636 /* STORE HALFWORD */
637 SLJIT_S390X_RXA(sth, 0x40000000)
638
639 #undef SLJIT_S390X_RXA
640
641 /* RXY-a instructions */
642 #define SLJIT_S390X_RXYA(name, pattern, cond) \
643 SLJIT_S390X_INSTRUCTION(name, sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b) \
644 { \
645 SLJIT_ASSERT(cond); \
646 \
647 return (pattern) | R36A(r) | R32A(x) | R28A(b) | disp_s20(d); \
648 }
649
650 /* LOAD */
651 SLJIT_S390X_RXYA(ly, 0xe30000000058, have_ldisp())
652 SLJIT_S390X_RXYA(lg, 0xe30000000004, 1)
653 SLJIT_S390X_RXYA(lgf, 0xe30000000014, 1)
654
655 /* LOAD BYTE */
656 SLJIT_S390X_RXYA(lb, 0xe30000000076, have_ldisp())
657 SLJIT_S390X_RXYA(lgb, 0xe30000000077, have_ldisp())
658
659 /* LOAD HALFWORD */
660 SLJIT_S390X_RXYA(lhy, 0xe30000000078, have_ldisp())
661 SLJIT_S390X_RXYA(lgh, 0xe30000000015, 1)
662
663 /* LOAD LOGICAL */
664 SLJIT_S390X_RXYA(llgf, 0xe30000000016, 1)
665
666 /* LOAD LOGICAL CHARACTER */
667 SLJIT_S390X_RXYA(llc, 0xe30000000094, have_eimm())
668 SLJIT_S390X_RXYA(llgc, 0xe30000000090, 1)
669
670 /* LOAD LOGICAL HALFWORD */
671 SLJIT_S390X_RXYA(llh, 0xe30000000095, have_eimm())
672 SLJIT_S390X_RXYA(llgh, 0xe30000000091, 1)
673
674 /* MULTIPLY SINGLE */
675 SLJIT_S390X_RXYA(msy, 0xe30000000051, have_ldisp())
676 SLJIT_S390X_RXYA(msg, 0xe3000000000c, 1)
677
678 /* STORE */
679 SLJIT_S390X_RXYA(sty, 0xe30000000050, have_ldisp())
680 SLJIT_S390X_RXYA(stg, 0xe30000000024, 1)
681
682 /* STORE CHARACTER */
683 SLJIT_S390X_RXYA(stcy, 0xe30000000072, have_ldisp())
684
685 /* STORE HALFWORD */
686 SLJIT_S390X_RXYA(sthy, 0xe30000000070, have_ldisp())
687
688 #undef SLJIT_S390X_RXYA
689
690 /* RSY-a instructions */
691 #define SLJIT_S390X_RSYA(name, pattern, cond) \
692 SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src, sljit_s32 d, sljit_gpr b) \
693 { \
694 SLJIT_ASSERT(cond); \
695 \
696 return (pattern) | R36A(dst) | R32A(src) | R28A(b) | disp_s20(d); \
697 }
698
699 /* LOAD MULTIPLE */
700 SLJIT_S390X_RSYA(lmg, 0xeb0000000004, 1)
701
702 /* SHIFT LEFT LOGICAL */
703 SLJIT_S390X_RSYA(sllg, 0xeb000000000d, 1)
704
705 /* SHIFT RIGHT SINGLE */
706 SLJIT_S390X_RSYA(srag, 0xeb000000000a, 1)
707
708 /* STORE MULTIPLE */
709 SLJIT_S390X_RSYA(stmg, 0xeb0000000024, 1)
710
711 #undef SLJIT_S390X_RSYA
712
713 /* RIE-f instructions (require general-instructions-extension facility) */
714 #define SLJIT_S390X_RIEF(name, pattern) \
715 SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src, sljit_u8 start, sljit_u8 end, sljit_u8 rot) \
716 { \
717 sljit_ins i3, i4, i5; \
718 \
719 SLJIT_ASSERT(have_genext()); \
720 i3 = (sljit_ins)start << 24; \
721 i4 = (sljit_ins)end << 16; \
722 i5 = (sljit_ins)rot << 8; \
723 \
724 return (pattern) | R36A(dst & 0xf) | R32A(src & 0xf) | i3 | i4 | i5; \
725 }
726
727 /* ROTATE THEN AND SELECTED BITS */
728 /* SLJIT_S390X_RIEF(rnsbg, 0xec0000000054) */
729
730 /* ROTATE THEN EXCLUSIVE OR SELECTED BITS */
731 /* SLJIT_S390X_RIEF(rxsbg, 0xec0000000057) */
732
733 /* ROTATE THEN OR SELECTED BITS */
734 SLJIT_S390X_RIEF(rosbg, 0xec0000000056)
735
736 /* ROTATE THEN INSERT SELECTED BITS */
737 /* SLJIT_S390X_RIEF(risbg, 0xec0000000055) */
738 /* SLJIT_S390X_RIEF(risbgn, 0xec0000000059) */
739
740 /* ROTATE THEN INSERT SELECTED BITS HIGH */
741 SLJIT_S390X_RIEF(risbhg, 0xec000000005d)
742
743 /* ROTATE THEN INSERT SELECTED BITS LOW */
744 /* SLJIT_S390X_RIEF(risblg, 0xec0000000051) */
745
746 #undef SLJIT_S390X_RIEF
747
748 /* RRF-c instructions (require load/store-on-condition 1 facility) */
749 #define SLJIT_S390X_RRFC(name, pattern) \
750 SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src, sljit_uw mask) \
751 { \
752 sljit_ins m3; \
753 \
754 SLJIT_ASSERT(have_lscond1()); \
755 m3 = (sljit_ins)(mask & 0xf) << 12; \
756 \
757 return (pattern) | m3 | R4A(dst) | R0A(src); \
758 }
759
760 /* LOAD HALFWORD IMMEDIATE ON CONDITION */
761 SLJIT_S390X_RRFC(locr, 0xb9f20000)
762 SLJIT_S390X_RRFC(locgr, 0xb9e20000)
763
764 #undef SLJIT_S390X_RRFC
765
766 /* RIE-g instructions (require load/store-on-condition 2 facility) */
767 #define SLJIT_S390X_RIEG(name, pattern) \
768 SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, sljit_sw imm, sljit_uw mask) \
769 { \
770 sljit_ins m3, i2; \
771 \
772 SLJIT_ASSERT(have_lscond2()); \
773 m3 = (sljit_ins)(mask & 0xf) << 32; \
774 i2 = (sljit_ins)(imm & 0xffffL) << 16; \
775 \
776 return (pattern) | R36A(reg) | m3 | i2; \
777 }
778
779 /* LOAD HALFWORD IMMEDIATE ON CONDITION */
780 SLJIT_S390X_RIEG(lochi, 0xec0000000042)
781 SLJIT_S390X_RIEG(locghi, 0xec0000000046)
782
783 #undef SLJIT_S390X_RIEG
784
785 #define SLJIT_S390X_RILB(name, pattern, cond) \
786 SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, sljit_sw ri) \
787 { \
788 SLJIT_ASSERT(cond); \
789 \
790 return (pattern) | R36A(reg) | (sljit_ins)(ri & 0xffffffff); \
791 }
792
793 /* BRANCH RELATIVE AND SAVE LONG */
794 SLJIT_S390X_RILB(brasl, 0xc00500000000, 1)
795
796 /* LOAD ADDRESS RELATIVE LONG */
797 SLJIT_S390X_RILB(larl, 0xc00000000000, 1)
798
799 /* LOAD RELATIVE LONG */
800 SLJIT_S390X_RILB(lgrl, 0xc40800000000, have_genext())
801
802 #undef SLJIT_S390X_RILB
803
SLJIT_S390X_INSTRUCTION(br,sljit_gpr target)804 SLJIT_S390X_INSTRUCTION(br, sljit_gpr target)
805 {
806 return 0x07f0 | target;
807 }
808
SLJIT_S390X_INSTRUCTION(brc,sljit_uw mask,sljit_sw target)809 SLJIT_S390X_INSTRUCTION(brc, sljit_uw mask, sljit_sw target)
810 {
811 sljit_ins m1 = (sljit_ins)(mask & 0xf) << 20;
812 sljit_ins ri2 = (sljit_ins)target & 0xffff;
813 return 0xa7040000L | m1 | ri2;
814 }
815
SLJIT_S390X_INSTRUCTION(brcl,sljit_uw mask,sljit_sw target)816 SLJIT_S390X_INSTRUCTION(brcl, sljit_uw mask, sljit_sw target)
817 {
818 sljit_ins m1 = (sljit_ins)(mask & 0xf) << 36;
819 sljit_ins ri2 = (sljit_ins)target & 0xffffffff;
820 return 0xc00400000000L | m1 | ri2;
821 }
822
SLJIT_S390X_INSTRUCTION(flogr,sljit_gpr dst,sljit_gpr src)823 SLJIT_S390X_INSTRUCTION(flogr, sljit_gpr dst, sljit_gpr src)
824 {
825 SLJIT_ASSERT(have_eimm());
826 return 0xb9830000 | R8A(dst) | R0A(src);
827 }
828
829 /* INSERT PROGRAM MASK */
SLJIT_S390X_INSTRUCTION(ipm,sljit_gpr dst)830 SLJIT_S390X_INSTRUCTION(ipm, sljit_gpr dst)
831 {
832 return 0xb2220000 | R4A(dst);
833 }
834
835 /* SET PROGRAM MASK */
SLJIT_S390X_INSTRUCTION(spm,sljit_gpr dst)836 SLJIT_S390X_INSTRUCTION(spm, sljit_gpr dst)
837 {
838 return 0x0400 | R4A(dst);
839 }
840
841 /* ROTATE THEN INSERT SELECTED BITS HIGH (ZERO) */
SLJIT_S390X_INSTRUCTION(risbhgz,sljit_gpr dst,sljit_gpr src,sljit_u8 start,sljit_u8 end,sljit_u8 rot)842 SLJIT_S390X_INSTRUCTION(risbhgz, sljit_gpr dst, sljit_gpr src, sljit_u8 start, sljit_u8 end, sljit_u8 rot)
843 {
844 return risbhg(dst, src, start, 0x8 | end, rot);
845 }
846
847 #undef SLJIT_S390X_INSTRUCTION
848
update_zero_overflow(struct sljit_compiler * compiler,sljit_s32 op,sljit_gpr dst_r)849 static sljit_s32 update_zero_overflow(struct sljit_compiler *compiler, sljit_s32 op, sljit_gpr dst_r)
850 {
851 /* Condition codes: bits 18 and 19.
852 Transformation:
853 0 (zero and no overflow) : unchanged
854 1 (non-zero and no overflow) : unchanged
855 2 (zero and overflow) : decreased by 1
856 3 (non-zero and overflow) : decreased by 1 if non-zero */
857 FAIL_IF(push_inst(compiler, brc(0xc, 2 + 2 + ((op & SLJIT_32) ? 1 : 2) + 2 + 3 + 1)));
858 FAIL_IF(push_inst(compiler, ipm(tmp1)));
859 FAIL_IF(push_inst(compiler, (op & SLJIT_32) ? or(dst_r, dst_r) : ogr(dst_r, dst_r)));
860 FAIL_IF(push_inst(compiler, brc(0x8, 2 + 3)));
861 FAIL_IF(push_inst(compiler, slfi(tmp1, 0x10000000)));
862 FAIL_IF(push_inst(compiler, spm(tmp1)));
863 return SLJIT_SUCCESS;
864 }
865
866 /* load 64-bit immediate into register without clobbering flags */
push_load_imm_inst(struct sljit_compiler * compiler,sljit_gpr target,sljit_sw v)867 static sljit_s32 push_load_imm_inst(struct sljit_compiler *compiler, sljit_gpr target, sljit_sw v)
868 {
869 /* 4 byte instructions */
870 if (is_s16(v))
871 return push_inst(compiler, lghi(target, (sljit_s16)v));
872
873 if (((sljit_uw)v & ~(sljit_uw)0x000000000000ffff) == 0)
874 return push_inst(compiler, llill(target, (sljit_u16)v));
875
876 if (((sljit_uw)v & ~(sljit_uw)0x00000000ffff0000) == 0)
877 return push_inst(compiler, llilh(target, (sljit_u16)(v >> 16)));
878
879 if (((sljit_uw)v & ~(sljit_uw)0x0000ffff00000000) == 0)
880 return push_inst(compiler, llihl(target, (sljit_u16)(v >> 32)));
881
882 if (((sljit_uw)v & ~(sljit_uw)0xffff000000000000) == 0)
883 return push_inst(compiler, llihh(target, (sljit_u16)(v >> 48)));
884
885 /* 6 byte instructions (requires extended immediate facility) */
886 if (have_eimm()) {
887 if (is_s32(v))
888 return push_inst(compiler, lgfi(target, (sljit_s32)v));
889
890 if (((sljit_uw)v >> 32) == 0)
891 return push_inst(compiler, llilf(target, (sljit_u32)v));
892
893 if (((sljit_uw)v << 32) == 0)
894 return push_inst(compiler, llihf(target, (sljit_u32)((sljit_uw)v >> 32)));
895
896 FAIL_IF(push_inst(compiler, llilf(target, (sljit_u32)v)));
897 return push_inst(compiler, iihf(target, (sljit_u32)(v >> 32)));
898 }
899
900 /* TODO(mundaym): instruction sequences that don't use extended immediates */
901 abort();
902 }
903
904 struct addr {
905 sljit_gpr base;
906 sljit_gpr index;
907 sljit_s32 offset;
908 };
909
910 /* transform memory operand into D(X,B) form with a signed 20-bit offset */
make_addr_bxy(struct sljit_compiler * compiler,struct addr * addr,sljit_s32 mem,sljit_sw off,sljit_gpr tmp)911 static sljit_s32 make_addr_bxy(struct sljit_compiler *compiler,
912 struct addr *addr, sljit_s32 mem, sljit_sw off,
913 sljit_gpr tmp /* clobbered, must not be r0 */)
914 {
915 sljit_gpr base = r0;
916 sljit_gpr index = r0;
917
918 SLJIT_ASSERT(tmp != r0);
919 if (mem & REG_MASK)
920 base = gpr(mem & REG_MASK);
921
922 if (mem & OFFS_REG_MASK) {
923 index = gpr(OFFS_REG(mem));
924 if (off != 0) {
925 /* shift and put the result into tmp */
926 SLJIT_ASSERT(0 <= off && off < 64);
927 FAIL_IF(push_inst(compiler, sllg(tmp, index, (sljit_s32)off, 0)));
928 index = tmp;
929 off = 0; /* clear offset */
930 }
931 }
932 else if (!is_s20(off)) {
933 FAIL_IF(push_load_imm_inst(compiler, tmp, off));
934 index = tmp;
935 off = 0; /* clear offset */
936 }
937 addr->base = base;
938 addr->index = index;
939 addr->offset = (sljit_s32)off;
940 return SLJIT_SUCCESS;
941 }
942
943 /* transform memory operand into D(X,B) form with an unsigned 12-bit offset */
make_addr_bx(struct sljit_compiler * compiler,struct addr * addr,sljit_s32 mem,sljit_sw off,sljit_gpr tmp)944 static sljit_s32 make_addr_bx(struct sljit_compiler *compiler,
945 struct addr *addr, sljit_s32 mem, sljit_sw off,
946 sljit_gpr tmp /* clobbered, must not be r0 */)
947 {
948 sljit_gpr base = r0;
949 sljit_gpr index = r0;
950
951 SLJIT_ASSERT(tmp != r0);
952 if (mem & REG_MASK)
953 base = gpr(mem & REG_MASK);
954
955 if (mem & OFFS_REG_MASK) {
956 index = gpr(OFFS_REG(mem));
957 if (off != 0) {
958 /* shift and put the result into tmp */
959 SLJIT_ASSERT(0 <= off && off < 64);
960 FAIL_IF(push_inst(compiler, sllg(tmp, index, (sljit_s32)off, 0)));
961 index = tmp;
962 off = 0; /* clear offset */
963 }
964 }
965 else if (!is_u12(off)) {
966 FAIL_IF(push_load_imm_inst(compiler, tmp, off));
967 index = tmp;
968 off = 0; /* clear offset */
969 }
970 addr->base = base;
971 addr->index = index;
972 addr->offset = (sljit_s32)off;
973 return SLJIT_SUCCESS;
974 }
975
976 #define EVAL(op, r, addr) op(r, addr.offset, addr.index, addr.base)
977 #define WHEN(cond, r, i1, i2, addr) \
978 (cond) ? EVAL(i1, r, addr) : EVAL(i2, r, addr)
979
980 /* May clobber tmp1. */
load_word(struct sljit_compiler * compiler,sljit_gpr dst,sljit_s32 src,sljit_sw srcw,sljit_s32 is_32bit)981 static sljit_s32 load_word(struct sljit_compiler *compiler, sljit_gpr dst,
982 sljit_s32 src, sljit_sw srcw,
983 sljit_s32 is_32bit)
984 {
985 struct addr addr;
986 sljit_ins ins;
987
988 SLJIT_ASSERT(src & SLJIT_MEM);
989 if (have_ldisp() || !is_32bit)
990 FAIL_IF(make_addr_bxy(compiler, &addr, src, srcw, tmp1));
991 else
992 FAIL_IF(make_addr_bx(compiler, &addr, src, srcw, tmp1));
993
994 if (is_32bit)
995 ins = WHEN(is_u12(addr.offset), dst, l, ly, addr);
996 else
997 ins = lg(dst, addr.offset, addr.index, addr.base);
998
999 return push_inst(compiler, ins);
1000 }
1001
1002 /* May clobber tmp1. */
store_word(struct sljit_compiler * compiler,sljit_gpr src,sljit_s32 dst,sljit_sw dstw,sljit_s32 is_32bit)1003 static sljit_s32 store_word(struct sljit_compiler *compiler, sljit_gpr src,
1004 sljit_s32 dst, sljit_sw dstw,
1005 sljit_s32 is_32bit)
1006 {
1007 struct addr addr;
1008 sljit_ins ins;
1009
1010 SLJIT_ASSERT(dst & SLJIT_MEM);
1011 if (have_ldisp() || !is_32bit)
1012 FAIL_IF(make_addr_bxy(compiler, &addr, dst, dstw, tmp1));
1013 else
1014 FAIL_IF(make_addr_bx(compiler, &addr, dst, dstw, tmp1));
1015
1016 if (is_32bit)
1017 ins = WHEN(is_u12(addr.offset), src, st, sty, addr);
1018 else
1019 ins = stg(src, addr.offset, addr.index, addr.base);
1020
1021 return push_inst(compiler, ins);
1022 }
1023
1024 #undef WHEN
1025
emit_move(struct sljit_compiler * compiler,sljit_gpr dst_r,sljit_s32 src,sljit_sw srcw)1026 static sljit_s32 emit_move(struct sljit_compiler *compiler,
1027 sljit_gpr dst_r,
1028 sljit_s32 src, sljit_sw srcw)
1029 {
1030 SLJIT_ASSERT(!IS_GPR_REG(src) || dst_r != gpr(src & REG_MASK));
1031
1032 if (src & SLJIT_IMM)
1033 return push_load_imm_inst(compiler, dst_r, srcw);
1034
1035 if (src & SLJIT_MEM)
1036 return load_word(compiler, dst_r, src, srcw, (compiler->mode & SLJIT_32) != 0);
1037
1038 sljit_gpr src_r = gpr(src & REG_MASK);
1039 return push_inst(compiler, (compiler->mode & SLJIT_32) ? lr(dst_r, src_r) : lgr(dst_r, src_r));
1040 }
1041
emit_rr(struct sljit_compiler * compiler,sljit_ins ins,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)1042 static sljit_s32 emit_rr(struct sljit_compiler *compiler, sljit_ins ins,
1043 sljit_s32 dst,
1044 sljit_s32 src1, sljit_sw src1w,
1045 sljit_s32 src2, sljit_sw src2w)
1046 {
1047 sljit_gpr dst_r = tmp0;
1048 sljit_gpr src_r = tmp1;
1049 sljit_s32 needs_move = 1;
1050
1051 if (FAST_IS_REG(dst)) {
1052 dst_r = gpr(dst);
1053
1054 if (dst == src1)
1055 needs_move = 0;
1056 else if (dst == src2) {
1057 dst_r = tmp0;
1058 needs_move = 2;
1059 }
1060 }
1061
1062 if (needs_move)
1063 FAIL_IF(emit_move(compiler, dst_r, src1, src1w));
1064
1065 if (FAST_IS_REG(src2))
1066 src_r = gpr(src2);
1067 else
1068 FAIL_IF(emit_move(compiler, tmp1, src2, src2w));
1069
1070 FAIL_IF(push_inst(compiler, ins | R4A(dst_r) | R0A(src_r)));
1071
1072 if (needs_move != 2)
1073 return SLJIT_SUCCESS;
1074
1075 dst_r = gpr(dst & REG_MASK);
1076 return push_inst(compiler, (compiler->mode & SLJIT_32) ? lr(dst_r, tmp0) : lgr(dst_r, tmp0));
1077 }
1078
emit_rr1(struct sljit_compiler * compiler,sljit_ins ins,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w)1079 static sljit_s32 emit_rr1(struct sljit_compiler *compiler, sljit_ins ins,
1080 sljit_s32 dst,
1081 sljit_s32 src1, sljit_sw src1w)
1082 {
1083 sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0;
1084 sljit_gpr src_r = tmp1;
1085
1086 if (FAST_IS_REG(src1))
1087 src_r = gpr(src1);
1088 else
1089 FAIL_IF(emit_move(compiler, tmp1, src1, src1w));
1090
1091 return push_inst(compiler, ins | R4A(dst_r) | R0A(src_r));
1092 }
1093
emit_rrf(struct sljit_compiler * compiler,sljit_ins ins,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)1094 static sljit_s32 emit_rrf(struct sljit_compiler *compiler, sljit_ins ins,
1095 sljit_s32 dst,
1096 sljit_s32 src1, sljit_sw src1w,
1097 sljit_s32 src2, sljit_sw src2w)
1098 {
1099 sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
1100 sljit_gpr src1_r = tmp0;
1101 sljit_gpr src2_r = tmp1;
1102
1103 if (FAST_IS_REG(src1))
1104 src1_r = gpr(src1);
1105 else
1106 FAIL_IF(emit_move(compiler, tmp0, src1, src1w));
1107
1108 if (FAST_IS_REG(src2))
1109 src2_r = gpr(src2);
1110 else
1111 FAIL_IF(emit_move(compiler, tmp1, src2, src2w));
1112
1113 return push_inst(compiler, ins | R4A(dst_r) | R0A(src1_r) | R12A(src2_r));
1114 }
1115
1116 typedef enum {
1117 RI_A,
1118 RIL_A,
1119 } emit_ril_type;
1120
emit_ri(struct sljit_compiler * compiler,sljit_ins ins,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_sw src2w,emit_ril_type type)1121 static sljit_s32 emit_ri(struct sljit_compiler *compiler, sljit_ins ins,
1122 sljit_s32 dst,
1123 sljit_s32 src1, sljit_sw src1w,
1124 sljit_sw src2w,
1125 emit_ril_type type)
1126 {
1127 sljit_gpr dst_r = tmp0;
1128 sljit_s32 needs_move = 1;
1129
1130 if (FAST_IS_REG(dst)) {
1131 dst_r = gpr(dst);
1132
1133 if (dst == src1)
1134 needs_move = 0;
1135 }
1136
1137 if (needs_move)
1138 FAIL_IF(emit_move(compiler, dst_r, src1, src1w));
1139
1140 if (type == RIL_A)
1141 return push_inst(compiler, ins | R36A(dst_r) | (src2w & 0xffffffff));
1142 return push_inst(compiler, ins | R20A(dst_r) | (src2w & 0xffff));
1143 }
1144
emit_rie_d(struct sljit_compiler * compiler,sljit_ins ins,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_sw src2w)1145 static sljit_s32 emit_rie_d(struct sljit_compiler *compiler, sljit_ins ins,
1146 sljit_s32 dst,
1147 sljit_s32 src1, sljit_sw src1w,
1148 sljit_sw src2w)
1149 {
1150 sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0;
1151 sljit_gpr src_r = tmp0;
1152
1153 if (!FAST_IS_REG(src1))
1154 FAIL_IF(emit_move(compiler, tmp0, src1, src1w));
1155 else
1156 src_r = gpr(src1 & REG_MASK);
1157
1158 return push_inst(compiler, ins | R36A(dst_r) | R32A(src_r) | (sljit_ins)(src2w & 0xffff) << 16);
1159 }
1160
1161 typedef enum {
1162 RX_A,
1163 RXY_A,
1164 } emit_rx_type;
1165
emit_rx(struct sljit_compiler * compiler,sljit_ins ins,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w,emit_rx_type type)1166 static sljit_s32 emit_rx(struct sljit_compiler *compiler, sljit_ins ins,
1167 sljit_s32 dst,
1168 sljit_s32 src1, sljit_sw src1w,
1169 sljit_s32 src2, sljit_sw src2w,
1170 emit_rx_type type)
1171 {
1172 sljit_gpr dst_r = tmp0;
1173 sljit_s32 needs_move = 1;
1174 sljit_gpr base, index;
1175
1176 SLJIT_ASSERT(src2 & SLJIT_MEM);
1177
1178 if (FAST_IS_REG(dst)) {
1179 dst_r = gpr(dst);
1180
1181 if (dst == src1)
1182 needs_move = 0;
1183 else if (dst == (src2 & REG_MASK) || (dst == OFFS_REG(src2))) {
1184 dst_r = tmp0;
1185 needs_move = 2;
1186 }
1187 }
1188
1189 if (needs_move)
1190 FAIL_IF(emit_move(compiler, dst_r, src1, src1w));
1191
1192 base = gpr(src2 & REG_MASK);
1193 index = tmp0;
1194
1195 if (src2 & OFFS_REG_MASK) {
1196 index = gpr(OFFS_REG(src2));
1197
1198 if (src2w != 0) {
1199 FAIL_IF(push_inst(compiler, sllg(tmp1, index, src2w & 0x3, 0)));
1200 src2w = 0;
1201 index = tmp1;
1202 }
1203 } else if ((type == RX_A && !is_u12(src2w)) || (type == RXY_A && !is_s20(src2w))) {
1204 FAIL_IF(push_load_imm_inst(compiler, tmp1, src2w));
1205
1206 if (src2 & REG_MASK)
1207 index = tmp1;
1208 else
1209 base = tmp1;
1210 src2w = 0;
1211 }
1212
1213 if (type == RX_A)
1214 ins |= R20A(dst_r) | R16A(index) | R12A(base) | (sljit_ins)src2w;
1215 else
1216 ins |= R36A(dst_r) | R32A(index) | R28A(base) | disp_s20((sljit_s32)src2w);
1217
1218 FAIL_IF(push_inst(compiler, ins));
1219
1220 if (needs_move != 2)
1221 return SLJIT_SUCCESS;
1222
1223 dst_r = gpr(dst);
1224 return push_inst(compiler, (compiler->mode & SLJIT_32) ? lr(dst_r, tmp0) : lgr(dst_r, tmp0));
1225 }
1226
emit_siy(struct sljit_compiler * compiler,sljit_ins ins,sljit_s32 dst,sljit_sw dstw,sljit_sw srcw)1227 static sljit_s32 emit_siy(struct sljit_compiler *compiler, sljit_ins ins,
1228 sljit_s32 dst, sljit_sw dstw,
1229 sljit_sw srcw)
1230 {
1231 SLJIT_ASSERT(dst & SLJIT_MEM);
1232
1233 sljit_gpr dst_r = tmp1;
1234
1235 if (dst & OFFS_REG_MASK) {
1236 sljit_gpr index = tmp1;
1237
1238 if ((dstw & 0x3) == 0)
1239 index = gpr(OFFS_REG(dst));
1240 else
1241 FAIL_IF(push_inst(compiler, sllg(tmp1, index, dstw & 0x3, 0)));
1242
1243 FAIL_IF(push_inst(compiler, la(tmp1, 0, dst_r, index)));
1244 dstw = 0;
1245 }
1246 else if (!is_s20(dstw)) {
1247 FAIL_IF(push_load_imm_inst(compiler, tmp1, dstw));
1248
1249 if (dst & REG_MASK)
1250 FAIL_IF(push_inst(compiler, la(tmp1, 0, dst_r, tmp1)));
1251
1252 dstw = 0;
1253 }
1254 else
1255 dst_r = gpr(dst & REG_MASK);
1256
1257 return push_inst(compiler, ins | ((sljit_ins)(srcw & 0xff) << 32) | R28A(dst_r) | disp_s20((sljit_s32)dstw));
1258 }
1259
1260 struct ins_forms {
1261 sljit_ins op_r;
1262 sljit_ins op_gr;
1263 sljit_ins op_rk;
1264 sljit_ins op_grk;
1265 sljit_ins op;
1266 sljit_ins op_y;
1267 sljit_ins op_g;
1268 };
1269
emit_commutative(struct sljit_compiler * compiler,const struct ins_forms * forms,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)1270 static sljit_s32 emit_commutative(struct sljit_compiler *compiler, const struct ins_forms *forms,
1271 sljit_s32 dst,
1272 sljit_s32 src1, sljit_sw src1w,
1273 sljit_s32 src2, sljit_sw src2w)
1274 {
1275 sljit_s32 mode = compiler->mode;
1276 sljit_ins ins, ins_k;
1277
1278 if ((src1 | src2) & SLJIT_MEM) {
1279 sljit_ins ins12, ins20;
1280
1281 if (mode & SLJIT_32) {
1282 ins12 = forms->op;
1283 ins20 = forms->op_y;
1284 }
1285 else {
1286 ins12 = 0;
1287 ins20 = forms->op_g;
1288 }
1289
1290 if (ins12 && ins20) {
1291 /* Extra instructions needed for address computation can be executed independently. */
1292 if ((src2 & SLJIT_MEM) && (!(src1 & SLJIT_MEM)
1293 || ((src1 & OFFS_REG_MASK) ? (src1w & 0x3) == 0 : is_s20(src1w)))) {
1294 if ((src2 & OFFS_REG_MASK) || is_u12(src2w) || !is_s20(src2w))
1295 return emit_rx(compiler, ins12, dst, src1, src1w, src2, src2w, RX_A);
1296
1297 return emit_rx(compiler, ins20, dst, src1, src1w, src2, src2w, RXY_A);
1298 }
1299
1300 if (src1 & SLJIT_MEM) {
1301 if ((src1 & OFFS_REG_MASK) || is_u12(src1w) || !is_s20(src1w))
1302 return emit_rx(compiler, ins12, dst, src2, src2w, src1, src1w, RX_A);
1303
1304 return emit_rx(compiler, ins20, dst, src2, src2w, src1, src1w, RXY_A);
1305 }
1306 }
1307 else if (ins12 || ins20) {
1308 emit_rx_type rx_type;
1309
1310 if (ins12) {
1311 rx_type = RX_A;
1312 ins = ins12;
1313 }
1314 else {
1315 rx_type = RXY_A;
1316 ins = ins20;
1317 }
1318
1319 if ((src2 & SLJIT_MEM) && (!(src1 & SLJIT_MEM)
1320 || ((src1 & OFFS_REG_MASK) ? (src1w & 0x3) == 0 : (rx_type == RX_A ? is_u12(src1w) : is_s20(src1w)))))
1321 return emit_rx(compiler, ins, dst, src1, src1w, src2, src2w, rx_type);
1322
1323 if (src1 & SLJIT_MEM)
1324 return emit_rx(compiler, ins, dst, src2, src2w, src1, src1w, rx_type);
1325 }
1326 }
1327
1328 if (mode & SLJIT_32) {
1329 ins = forms->op_r;
1330 ins_k = forms->op_rk;
1331 }
1332 else {
1333 ins = forms->op_gr;
1334 ins_k = forms->op_grk;
1335 }
1336
1337 SLJIT_ASSERT(ins != 0 || ins_k != 0);
1338
1339 if (ins && FAST_IS_REG(dst)) {
1340 if (dst == src1)
1341 return emit_rr(compiler, ins, dst, src1, src1w, src2, src2w);
1342
1343 if (dst == src2)
1344 return emit_rr(compiler, ins, dst, src2, src2w, src1, src1w);
1345 }
1346
1347 if (ins_k == 0)
1348 return emit_rr(compiler, ins, dst, src1, src1w, src2, src2w);
1349
1350 return emit_rrf(compiler, ins_k, dst, src1, src1w, src2, src2w);
1351 }
1352
emit_non_commutative(struct sljit_compiler * compiler,const struct ins_forms * forms,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)1353 static sljit_s32 emit_non_commutative(struct sljit_compiler *compiler, const struct ins_forms *forms,
1354 sljit_s32 dst,
1355 sljit_s32 src1, sljit_sw src1w,
1356 sljit_s32 src2, sljit_sw src2w)
1357 {
1358 sljit_s32 mode = compiler->mode;
1359 sljit_ins ins;
1360
1361 if (src2 & SLJIT_MEM) {
1362 sljit_ins ins12, ins20;
1363
1364 if (mode & SLJIT_32) {
1365 ins12 = forms->op;
1366 ins20 = forms->op_y;
1367 }
1368 else {
1369 ins12 = 0;
1370 ins20 = forms->op_g;
1371 }
1372
1373 if (ins12 && ins20) {
1374 if ((src2 & OFFS_REG_MASK) || is_u12(src2w) || !is_s20(src2w))
1375 return emit_rx(compiler, ins12, dst, src1, src1w, src2, src2w, RX_A);
1376
1377 return emit_rx(compiler, ins20, dst, src1, src1w, src2, src2w, RXY_A);
1378 }
1379 else if (ins12)
1380 return emit_rx(compiler, ins12, dst, src1, src1w, src2, src2w, RX_A);
1381 else if (ins20)
1382 return emit_rx(compiler, ins20, dst, src1, src1w, src2, src2w, RXY_A);
1383 }
1384
1385 ins = (mode & SLJIT_32) ? forms->op_rk : forms->op_grk;
1386
1387 if (ins == 0 || (FAST_IS_REG(dst) && dst == src1))
1388 return emit_rr(compiler, (mode & SLJIT_32) ? forms->op_r : forms->op_gr, dst, src1, src1w, src2, src2w);
1389
1390 return emit_rrf(compiler, ins, dst, src1, src1w, src2, src2w);
1391 }
1392
sljit_generate_code(struct sljit_compiler * compiler)1393 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
1394 {
1395 struct sljit_label *label;
1396 struct sljit_jump *jump;
1397 struct sljit_s390x_const *const_;
1398 struct sljit_put_label *put_label;
1399 sljit_sw executable_offset;
1400 sljit_uw ins_size = 0; /* instructions */
1401 sljit_uw pool_size = 0; /* literal pool */
1402 sljit_uw pad_size;
1403 sljit_uw i, j = 0;
1404 struct sljit_memory_fragment *buf;
1405 void *code, *code_ptr;
1406 sljit_uw *pool, *pool_ptr;
1407 sljit_sw source, offset; /* TODO(carenas): only need 32 bit */
1408
1409 CHECK_ERROR_PTR();
1410 CHECK_PTR(check_sljit_generate_code(compiler));
1411 reverse_buf(compiler);
1412
1413 /* branch handling */
1414 label = compiler->labels;
1415 jump = compiler->jumps;
1416 put_label = compiler->put_labels;
1417
1418 /* TODO(carenas): compiler->executable_size could be calculated
1419 * before to avoid the following loop (except for
1420 * pool_size)
1421 */
1422 /* calculate the size of the code */
1423 for (buf = compiler->buf; buf != NULL; buf = buf->next) {
1424 sljit_uw len = buf->used_size / sizeof(sljit_ins);
1425 sljit_ins *ibuf = (sljit_ins *)buf->memory;
1426 for (i = 0; i < len; ++i, ++j) {
1427 sljit_ins ins = ibuf[i];
1428
1429 /* TODO(carenas): instruction tag vs size/addr == j
1430 * using instruction tags for const is creative
1431 * but unlike all other architectures, and is not
1432 * done consistently for all other objects.
1433 * This might need reviewing later.
1434 */
1435 if (ins & sljit_ins_const) {
1436 pool_size += sizeof(*pool);
1437 ins &= ~sljit_ins_const;
1438 }
1439 if (label && label->size == j) {
1440 label->size = ins_size;
1441 label = label->next;
1442 }
1443 if (jump && jump->addr == j) {
1444 if ((jump->flags & SLJIT_REWRITABLE_JUMP) || (jump->flags & JUMP_ADDR)) {
1445 /* encoded: */
1446 /* brasl %r14, <rel_addr> (or brcl <mask>, <rel_addr>) */
1447 /* replace with: */
1448 /* lgrl %r1, <pool_addr> */
1449 /* bras %r14, %r1 (or bcr <mask>, %r1) */
1450 pool_size += sizeof(*pool);
1451 ins_size += 2;
1452 }
1453 jump = jump->next;
1454 }
1455 if (put_label && put_label->addr == j) {
1456 pool_size += sizeof(*pool);
1457 put_label = put_label->next;
1458 }
1459 ins_size += sizeof_ins(ins);
1460 }
1461 }
1462
1463 /* emit trailing label */
1464 if (label && label->size == j) {
1465 label->size = ins_size;
1466 label = label->next;
1467 }
1468
1469 SLJIT_ASSERT(!label);
1470 SLJIT_ASSERT(!jump);
1471 SLJIT_ASSERT(!put_label);
1472
1473 /* pad code size to 8 bytes so is accessible with half word offsets */
1474 /* the literal pool needs to be doubleword aligned */
1475 pad_size = ((ins_size + 7UL) & ~7UL) - ins_size;
1476 SLJIT_ASSERT(pad_size < 8UL);
1477
1478 /* allocate target buffer */
1479 code = SLJIT_MALLOC_EXEC(ins_size + pad_size + pool_size,
1480 compiler->exec_allocator_data);
1481 PTR_FAIL_WITH_EXEC_IF(code);
1482 code_ptr = code;
1483 executable_offset = SLJIT_EXEC_OFFSET(code);
1484
1485 /* TODO(carenas): pool is optional, and the ABI recommends it to
1486 * be created before the function code, instead of
1487 * globally; if generated code is too big could
1488 * need offsets bigger than 32bit words and asser()
1489 */
1490 pool = (sljit_uw *)((sljit_uw)code + ins_size + pad_size);
1491 pool_ptr = pool;
1492 const_ = (struct sljit_s390x_const *)compiler->consts;
1493
1494 /* update label addresses */
1495 label = compiler->labels;
1496 while (label) {
1497 label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(
1498 (sljit_uw)code_ptr + label->size, executable_offset);
1499 label = label->next;
1500 }
1501
1502 /* reset jumps */
1503 jump = compiler->jumps;
1504 put_label = compiler->put_labels;
1505
1506 /* emit the code */
1507 j = 0;
1508 for (buf = compiler->buf; buf != NULL; buf = buf->next) {
1509 sljit_uw len = buf->used_size / sizeof(sljit_ins);
1510 sljit_ins *ibuf = (sljit_ins *)buf->memory;
1511 for (i = 0; i < len; ++i, ++j) {
1512 sljit_ins ins = ibuf[i];
1513 if (ins & sljit_ins_const) {
1514 /* clear the const tag */
1515 ins &= ~sljit_ins_const;
1516
1517 /* update instruction with relative address of constant */
1518 source = (sljit_sw)code_ptr;
1519 offset = (sljit_sw)pool_ptr - source;
1520
1521 SLJIT_ASSERT(!(offset & 1));
1522 offset >>= 1; /* halfword (not byte) offset */
1523 SLJIT_ASSERT(is_s32(offset));
1524
1525 ins |= (sljit_ins)offset & 0xffffffff;
1526
1527 /* update address */
1528 const_->const_.addr = (sljit_uw)pool_ptr;
1529
1530 /* store initial value into pool and update pool address */
1531 *(pool_ptr++) = (sljit_uw)const_->init_value;
1532
1533 /* move to next constant */
1534 const_ = (struct sljit_s390x_const *)const_->const_.next;
1535 }
1536 if (jump && jump->addr == j) {
1537 sljit_sw target = (sljit_sw)((jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target);
1538 if ((jump->flags & SLJIT_REWRITABLE_JUMP) || (jump->flags & JUMP_ADDR)) {
1539 jump->addr = (sljit_uw)pool_ptr;
1540
1541 /* load address into tmp1 */
1542 source = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
1543 offset = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(pool_ptr, executable_offset) - source;
1544
1545 SLJIT_ASSERT(!(offset & 1));
1546 offset >>= 1;
1547 SLJIT_ASSERT(is_s32(offset));
1548
1549 encode_inst(&code_ptr, lgrl(tmp1, offset & 0xffffffff));
1550
1551 /* store jump target into pool and update pool address */
1552 *(pool_ptr++) = (sljit_uw)target;
1553
1554 /* branch to tmp1 */
1555 sljit_ins op = (ins >> 32) & 0xf;
1556 sljit_ins arg = (ins >> 36) & 0xf;
1557 switch (op) {
1558 case 4: /* brcl -> bcr */
1559 ins = bcr(arg, tmp1);
1560 break;
1561 case 5: /* brasl -> basr */
1562 ins = basr(arg, tmp1);
1563 break;
1564 default:
1565 abort();
1566 }
1567 }
1568 else {
1569 jump->addr = (sljit_uw)code_ptr + 2;
1570 source = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
1571 offset = target - source;
1572
1573 /* offset must be halfword aligned */
1574 SLJIT_ASSERT(!(offset & 1));
1575 offset >>= 1;
1576 SLJIT_ASSERT(is_s32(offset)); /* TODO(mundaym): handle arbitrary offsets */
1577
1578 /* patch jump target */
1579 ins |= (sljit_ins)offset & 0xffffffff;
1580 }
1581 jump = jump->next;
1582 }
1583 if (put_label && put_label->addr == j) {
1584 source = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
1585
1586 SLJIT_ASSERT(put_label->label);
1587 put_label->addr = (sljit_uw)code_ptr;
1588
1589 /* store target into pool */
1590 *pool_ptr = put_label->label->addr;
1591 offset = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(pool_ptr, executable_offset) - source;
1592 pool_ptr++;
1593
1594 SLJIT_ASSERT(!(offset & 1));
1595 offset >>= 1;
1596 SLJIT_ASSERT(is_s32(offset));
1597 ins |= (sljit_ins)offset & 0xffffffff;
1598
1599 put_label = put_label->next;
1600 }
1601 encode_inst(&code_ptr, ins);
1602 }
1603 }
1604 SLJIT_ASSERT((sljit_u8 *)code + ins_size == code_ptr);
1605 SLJIT_ASSERT((sljit_u8 *)pool + pool_size == (sljit_u8 *)pool_ptr);
1606
1607 compiler->error = SLJIT_ERR_COMPILED;
1608 compiler->executable_offset = executable_offset;
1609 compiler->executable_size = ins_size;
1610 code = SLJIT_ADD_EXEC_OFFSET(code, executable_offset);
1611 code_ptr = SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
1612 SLJIT_CACHE_FLUSH(code, code_ptr);
1613 SLJIT_UPDATE_WX_FLAGS(code, code_ptr, 1);
1614 return code;
1615 }
1616
sljit_has_cpu_feature(sljit_s32 feature_type)1617 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
1618 {
1619 /* TODO(mundaym): implement all */
1620 switch (feature_type) {
1621 case SLJIT_HAS_CLZ:
1622 return have_eimm() ? 1 : 0; /* FLOGR instruction */
1623 case SLJIT_HAS_CMOV:
1624 return have_lscond1() ? 1 : 0;
1625 case SLJIT_HAS_FPU:
1626 return 1;
1627 }
1628 return 0;
1629 }
1630
1631 /* --------------------------------------------------------------------- */
1632 /* Entry, exit */
1633 /* --------------------------------------------------------------------- */
1634
sljit_emit_enter(struct sljit_compiler * compiler,sljit_s32 options,sljit_s32 arg_types,sljit_s32 scratches,sljit_s32 saveds,sljit_s32 fscratches,sljit_s32 fsaveds,sljit_s32 local_size)1635 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
1636 sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
1637 sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
1638 {
1639 sljit_s32 word_arg_count = 0;
1640 sljit_s32 offset, i, tmp;
1641
1642 CHECK_ERROR();
1643 CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
1644 set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
1645
1646 /* Saved registers are stored in callee allocated save area. */
1647 SLJIT_ASSERT(gpr(SLJIT_FIRST_SAVED_REG) == r6 && gpr(SLJIT_S0) == r13);
1648
1649 offset = 2 * SSIZE_OF(sw);
1650 if (saveds + scratches >= SLJIT_NUMBER_OF_REGISTERS) {
1651 FAIL_IF(push_inst(compiler, stmg(r6, r14, offset, r15))); /* save registers TODO(MGM): optimize */
1652 offset += 9 * SSIZE_OF(sw);
1653 } else {
1654 if (scratches == SLJIT_FIRST_SAVED_REG) {
1655 FAIL_IF(push_inst(compiler, stg(r6, offset, 0, r15)));
1656 offset += SSIZE_OF(sw);
1657 } else if (scratches > SLJIT_FIRST_SAVED_REG) {
1658 FAIL_IF(push_inst(compiler, stmg(r6, r6 + (sljit_gpr)(scratches - SLJIT_FIRST_SAVED_REG), offset, r15)));
1659 offset += (scratches - (SLJIT_FIRST_SAVED_REG - 1)) * SSIZE_OF(sw);
1660 }
1661
1662 if (saveds == 0) {
1663 FAIL_IF(push_inst(compiler, stg(r14, offset, 0, r15)));
1664 offset += SSIZE_OF(sw);
1665 } else {
1666 FAIL_IF(push_inst(compiler, stmg(r14 - (sljit_gpr)saveds, r14, offset, r15)));
1667 offset += (saveds + 1) * SSIZE_OF(sw);
1668 }
1669 }
1670
1671 tmp = SLJIT_FS0 - fsaveds;
1672 for (i = SLJIT_FS0; i > tmp; i--) {
1673 FAIL_IF(push_inst(compiler, 0x60000000 /* std */ | F20(i) | R12A(r15) | (sljit_ins)offset));
1674 offset += SSIZE_OF(sw);
1675 }
1676
1677 for (i = fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) {
1678 FAIL_IF(push_inst(compiler, 0x60000000 /* std */ | F20(i) | R12A(r15) | (sljit_ins)offset));
1679 offset += SSIZE_OF(sw);
1680 }
1681
1682 local_size = (local_size + SLJIT_S390X_DEFAULT_STACK_FRAME_SIZE + 0xf) & ~0xf;
1683 compiler->local_size = local_size;
1684
1685 FAIL_IF(push_inst(compiler, 0xe30000000071 /* lay */ | R36A(r15) | R28A(r15) | disp_s20(-local_size)));
1686
1687 arg_types >>= SLJIT_ARG_SHIFT;
1688 tmp = 0;
1689 while (arg_types > 0) {
1690 if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) {
1691 if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) {
1692 FAIL_IF(push_inst(compiler, lgr(gpr(SLJIT_S0 - tmp), gpr(SLJIT_R0 + word_arg_count))));
1693 tmp++;
1694 }
1695 word_arg_count++;
1696 }
1697
1698 arg_types >>= SLJIT_ARG_SHIFT;
1699 }
1700
1701 return SLJIT_SUCCESS;
1702 }
1703
sljit_set_context(struct sljit_compiler * compiler,sljit_s32 options,sljit_s32 arg_types,sljit_s32 scratches,sljit_s32 saveds,sljit_s32 fscratches,sljit_s32 fsaveds,sljit_s32 local_size)1704 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
1705 sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
1706 sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
1707 {
1708 CHECK_ERROR();
1709 CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
1710 set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
1711
1712 compiler->local_size = (local_size + SLJIT_S390X_DEFAULT_STACK_FRAME_SIZE + 0xf) & ~0xf;
1713 return SLJIT_SUCCESS;
1714 }
1715
emit_stack_frame_release(struct sljit_compiler * compiler)1716 static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler)
1717 {
1718 sljit_s32 offset, i, tmp;
1719 sljit_s32 local_size = compiler->local_size;
1720 sljit_s32 saveds = compiler->saveds;
1721 sljit_s32 scratches = compiler->scratches;
1722
1723 if (is_u12(local_size))
1724 FAIL_IF(push_inst(compiler, 0x41000000 /* ly */ | R20A(r15) | R12A(r15) | (sljit_ins)local_size));
1725 else
1726 FAIL_IF(push_inst(compiler, 0xe30000000071 /* lay */ | R36A(r15) | R28A(r15) | disp_s20(local_size)));
1727
1728 offset = 2 * SSIZE_OF(sw);
1729 if (saveds + scratches >= SLJIT_NUMBER_OF_REGISTERS) {
1730 FAIL_IF(push_inst(compiler, lmg(r6, r14, offset, r15))); /* save registers TODO(MGM): optimize */
1731 offset += 9 * SSIZE_OF(sw);
1732 } else {
1733 if (scratches == SLJIT_FIRST_SAVED_REG) {
1734 FAIL_IF(push_inst(compiler, lg(r6, offset, 0, r15)));
1735 offset += SSIZE_OF(sw);
1736 } else if (scratches > SLJIT_FIRST_SAVED_REG) {
1737 FAIL_IF(push_inst(compiler, lmg(r6, r6 + (sljit_gpr)(scratches - SLJIT_FIRST_SAVED_REG), offset, r15)));
1738 offset += (scratches - (SLJIT_FIRST_SAVED_REG - 1)) * SSIZE_OF(sw);
1739 }
1740
1741 if (saveds == 0) {
1742 FAIL_IF(push_inst(compiler, lg(r14, offset, 0, r15)));
1743 offset += SSIZE_OF(sw);
1744 } else {
1745 FAIL_IF(push_inst(compiler, lmg(r14 - (sljit_gpr)saveds, r14, offset, r15)));
1746 offset += (saveds + 1) * SSIZE_OF(sw);
1747 }
1748 }
1749
1750 tmp = SLJIT_FS0 - compiler->fsaveds;
1751 for (i = SLJIT_FS0; i > tmp; i--) {
1752 FAIL_IF(push_inst(compiler, 0x68000000 /* ld */ | F20(i) | R12A(r15) | (sljit_ins)offset));
1753 offset += SSIZE_OF(sw);
1754 }
1755
1756 for (i = compiler->fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) {
1757 FAIL_IF(push_inst(compiler, 0x68000000 /* ld */ | F20(i) | R12A(r15) | (sljit_ins)offset));
1758 offset += SSIZE_OF(sw);
1759 }
1760
1761 return SLJIT_SUCCESS;
1762 }
1763
sljit_emit_return_void(struct sljit_compiler * compiler)1764 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler)
1765 {
1766 CHECK_ERROR();
1767 CHECK(check_sljit_emit_return_void(compiler));
1768
1769 FAIL_IF(emit_stack_frame_release(compiler));
1770 return push_inst(compiler, br(r14)); /* return */
1771 }
1772
1773 /* --------------------------------------------------------------------- */
1774 /* Operators */
1775 /* --------------------------------------------------------------------- */
1776
sljit_emit_op0(struct sljit_compiler * compiler,sljit_s32 op)1777 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
1778 {
1779 sljit_gpr arg0 = gpr(SLJIT_R0);
1780 sljit_gpr arg1 = gpr(SLJIT_R1);
1781
1782 CHECK_ERROR();
1783 CHECK(check_sljit_emit_op0(compiler, op));
1784
1785 op = GET_OPCODE(op) | (op & SLJIT_32);
1786 switch (op) {
1787 case SLJIT_BREAKPOINT:
1788 /* The following invalid instruction is emitted by gdb. */
1789 return push_inst(compiler, 0x0001 /* 2-byte trap */);
1790 case SLJIT_NOP:
1791 return push_inst(compiler, 0x0700 /* 2-byte nop */);
1792 case SLJIT_LMUL_UW:
1793 FAIL_IF(push_inst(compiler, mlgr(arg0, arg0)));
1794 break;
1795 case SLJIT_LMUL_SW:
1796 /* signed multiplication from: */
1797 /* Hacker's Delight, Second Edition: Chapter 8-3. */
1798 FAIL_IF(push_inst(compiler, srag(tmp0, arg0, 63, 0)));
1799 FAIL_IF(push_inst(compiler, srag(tmp1, arg1, 63, 0)));
1800 FAIL_IF(push_inst(compiler, ngr(tmp0, arg1)));
1801 FAIL_IF(push_inst(compiler, ngr(tmp1, arg0)));
1802
1803 /* unsigned multiplication */
1804 FAIL_IF(push_inst(compiler, mlgr(arg0, arg0)));
1805
1806 FAIL_IF(push_inst(compiler, sgr(arg0, tmp0)));
1807 FAIL_IF(push_inst(compiler, sgr(arg0, tmp1)));
1808 break;
1809 case SLJIT_DIV_U32:
1810 case SLJIT_DIVMOD_U32:
1811 FAIL_IF(push_inst(compiler, lhi(tmp0, 0)));
1812 FAIL_IF(push_inst(compiler, lr(tmp1, arg0)));
1813 FAIL_IF(push_inst(compiler, dlr(tmp0, arg1)));
1814 FAIL_IF(push_inst(compiler, lr(arg0, tmp1))); /* quotient */
1815 if (op == SLJIT_DIVMOD_U32)
1816 return push_inst(compiler, lr(arg1, tmp0)); /* remainder */
1817
1818 return SLJIT_SUCCESS;
1819 case SLJIT_DIV_S32:
1820 case SLJIT_DIVMOD_S32:
1821 FAIL_IF(push_inst(compiler, lhi(tmp0, 0)));
1822 FAIL_IF(push_inst(compiler, lr(tmp1, arg0)));
1823 FAIL_IF(push_inst(compiler, dr(tmp0, arg1)));
1824 FAIL_IF(push_inst(compiler, lr(arg0, tmp1))); /* quotient */
1825 if (op == SLJIT_DIVMOD_S32)
1826 return push_inst(compiler, lr(arg1, tmp0)); /* remainder */
1827
1828 return SLJIT_SUCCESS;
1829 case SLJIT_DIV_UW:
1830 case SLJIT_DIVMOD_UW:
1831 FAIL_IF(push_inst(compiler, lghi(tmp0, 0)));
1832 FAIL_IF(push_inst(compiler, lgr(tmp1, arg0)));
1833 FAIL_IF(push_inst(compiler, dlgr(tmp0, arg1)));
1834 FAIL_IF(push_inst(compiler, lgr(arg0, tmp1))); /* quotient */
1835 if (op == SLJIT_DIVMOD_UW)
1836 return push_inst(compiler, lgr(arg1, tmp0)); /* remainder */
1837
1838 return SLJIT_SUCCESS;
1839 case SLJIT_DIV_SW:
1840 case SLJIT_DIVMOD_SW:
1841 FAIL_IF(push_inst(compiler, lgr(tmp1, arg0)));
1842 FAIL_IF(push_inst(compiler, dsgr(tmp0, arg1)));
1843 FAIL_IF(push_inst(compiler, lgr(arg0, tmp1))); /* quotient */
1844 if (op == SLJIT_DIVMOD_SW)
1845 return push_inst(compiler, lgr(arg1, tmp0)); /* remainder */
1846
1847 return SLJIT_SUCCESS;
1848 case SLJIT_ENDBR:
1849 return SLJIT_SUCCESS;
1850 case SLJIT_SKIP_FRAMES_BEFORE_RETURN:
1851 return SLJIT_SUCCESS;
1852 default:
1853 SLJIT_UNREACHABLE();
1854 }
1855 /* swap result registers */
1856 FAIL_IF(push_inst(compiler, lgr(tmp0, arg0)));
1857 FAIL_IF(push_inst(compiler, lgr(arg0, arg1)));
1858 return push_inst(compiler, lgr(arg1, tmp0));
1859 }
1860
1861 /* LEVAL will be defined later with different parameters as needed */
1862 #define WHEN2(cond, i1, i2) (cond) ? LEVAL(i1) : LEVAL(i2)
1863
sljit_emit_op1(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1864 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
1865 sljit_s32 dst, sljit_sw dstw,
1866 sljit_s32 src, sljit_sw srcw)
1867 {
1868 sljit_ins ins;
1869 struct addr mem;
1870 sljit_gpr dst_r;
1871 sljit_gpr src_r;
1872 sljit_s32 opcode = GET_OPCODE(op);
1873
1874 CHECK_ERROR();
1875 CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
1876 ADJUST_LOCAL_OFFSET(dst, dstw);
1877 ADJUST_LOCAL_OFFSET(src, srcw);
1878
1879 if (opcode >= SLJIT_MOV && opcode <= SLJIT_MOV_P) {
1880 /* LOAD REGISTER */
1881 if (FAST_IS_REG(dst) && FAST_IS_REG(src)) {
1882 dst_r = gpr(dst);
1883 src_r = gpr(src);
1884 switch (opcode | (op & SLJIT_32)) {
1885 /* 32-bit */
1886 case SLJIT_MOV32_U8:
1887 ins = llcr(dst_r, src_r);
1888 break;
1889 case SLJIT_MOV32_S8:
1890 ins = lbr(dst_r, src_r);
1891 break;
1892 case SLJIT_MOV32_U16:
1893 ins = llhr(dst_r, src_r);
1894 break;
1895 case SLJIT_MOV32_S16:
1896 ins = lhr(dst_r, src_r);
1897 break;
1898 case SLJIT_MOV32:
1899 if (dst_r == src_r)
1900 return SLJIT_SUCCESS;
1901 ins = lr(dst_r, src_r);
1902 break;
1903 /* 64-bit */
1904 case SLJIT_MOV_U8:
1905 ins = llgcr(dst_r, src_r);
1906 break;
1907 case SLJIT_MOV_S8:
1908 ins = lgbr(dst_r, src_r);
1909 break;
1910 case SLJIT_MOV_U16:
1911 ins = llghr(dst_r, src_r);
1912 break;
1913 case SLJIT_MOV_S16:
1914 ins = lghr(dst_r, src_r);
1915 break;
1916 case SLJIT_MOV_U32:
1917 ins = llgfr(dst_r, src_r);
1918 break;
1919 case SLJIT_MOV_S32:
1920 ins = lgfr(dst_r, src_r);
1921 break;
1922 case SLJIT_MOV:
1923 case SLJIT_MOV_P:
1924 if (dst_r == src_r)
1925 return SLJIT_SUCCESS;
1926 ins = lgr(dst_r, src_r);
1927 break;
1928 default:
1929 ins = 0;
1930 SLJIT_UNREACHABLE();
1931 break;
1932 }
1933 FAIL_IF(push_inst(compiler, ins));
1934 return SLJIT_SUCCESS;
1935 }
1936 /* LOAD IMMEDIATE */
1937 if (FAST_IS_REG(dst) && (src & SLJIT_IMM)) {
1938 switch (opcode) {
1939 case SLJIT_MOV_U8:
1940 srcw = (sljit_sw)((sljit_u8)(srcw));
1941 break;
1942 case SLJIT_MOV_S8:
1943 srcw = (sljit_sw)((sljit_s8)(srcw));
1944 break;
1945 case SLJIT_MOV_U16:
1946 srcw = (sljit_sw)((sljit_u16)(srcw));
1947 break;
1948 case SLJIT_MOV_S16:
1949 srcw = (sljit_sw)((sljit_s16)(srcw));
1950 break;
1951 case SLJIT_MOV_U32:
1952 srcw = (sljit_sw)((sljit_u32)(srcw));
1953 break;
1954 case SLJIT_MOV_S32:
1955 case SLJIT_MOV32:
1956 srcw = (sljit_sw)((sljit_s32)(srcw));
1957 break;
1958 }
1959 return push_load_imm_inst(compiler, gpr(dst), srcw);
1960 }
1961 /* LOAD */
1962 /* TODO(carenas): avoid reg being defined later */
1963 #define LEVAL(i) EVAL(i, reg, mem)
1964 if (FAST_IS_REG(dst) && (src & SLJIT_MEM)) {
1965 sljit_gpr reg = gpr(dst);
1966
1967 FAIL_IF(make_addr_bxy(compiler, &mem, src, srcw, tmp1));
1968 /* TODO(carenas): convert all calls below to LEVAL */
1969 switch (opcode | (op & SLJIT_32)) {
1970 case SLJIT_MOV32_U8:
1971 ins = llc(reg, mem.offset, mem.index, mem.base);
1972 break;
1973 case SLJIT_MOV32_S8:
1974 ins = lb(reg, mem.offset, mem.index, mem.base);
1975 break;
1976 case SLJIT_MOV32_U16:
1977 ins = llh(reg, mem.offset, mem.index, mem.base);
1978 break;
1979 case SLJIT_MOV32_S16:
1980 ins = WHEN2(is_u12(mem.offset), lh, lhy);
1981 break;
1982 case SLJIT_MOV32:
1983 ins = WHEN2(is_u12(mem.offset), l, ly);
1984 break;
1985 case SLJIT_MOV_U8:
1986 ins = LEVAL(llgc);
1987 break;
1988 case SLJIT_MOV_S8:
1989 ins = lgb(reg, mem.offset, mem.index, mem.base);
1990 break;
1991 case SLJIT_MOV_U16:
1992 ins = LEVAL(llgh);
1993 break;
1994 case SLJIT_MOV_S16:
1995 ins = lgh(reg, mem.offset, mem.index, mem.base);
1996 break;
1997 case SLJIT_MOV_U32:
1998 ins = LEVAL(llgf);
1999 break;
2000 case SLJIT_MOV_S32:
2001 ins = lgf(reg, mem.offset, mem.index, mem.base);
2002 break;
2003 case SLJIT_MOV_P:
2004 case SLJIT_MOV:
2005 ins = lg(reg, mem.offset, mem.index, mem.base);
2006 break;
2007 default:
2008 ins = 0;
2009 SLJIT_UNREACHABLE();
2010 break;
2011 }
2012 FAIL_IF(push_inst(compiler, ins));
2013 return SLJIT_SUCCESS;
2014 }
2015 /* STORE and STORE IMMEDIATE */
2016 if ((dst & SLJIT_MEM)
2017 && (FAST_IS_REG(src) || (src & SLJIT_IMM))) {
2018 sljit_gpr reg = FAST_IS_REG(src) ? gpr(src) : tmp0;
2019 if (src & SLJIT_IMM) {
2020 /* TODO(mundaym): MOVE IMMEDIATE? */
2021 FAIL_IF(push_load_imm_inst(compiler, reg, srcw));
2022 }
2023 struct addr mem;
2024 FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1));
2025 switch (opcode) {
2026 case SLJIT_MOV_U8:
2027 case SLJIT_MOV_S8:
2028 return push_inst(compiler,
2029 WHEN2(is_u12(mem.offset), stc, stcy));
2030 case SLJIT_MOV_U16:
2031 case SLJIT_MOV_S16:
2032 return push_inst(compiler,
2033 WHEN2(is_u12(mem.offset), sth, sthy));
2034 case SLJIT_MOV_U32:
2035 case SLJIT_MOV_S32:
2036 case SLJIT_MOV32:
2037 return push_inst(compiler,
2038 WHEN2(is_u12(mem.offset), st, sty));
2039 case SLJIT_MOV_P:
2040 case SLJIT_MOV:
2041 FAIL_IF(push_inst(compiler, LEVAL(stg)));
2042 return SLJIT_SUCCESS;
2043 default:
2044 SLJIT_UNREACHABLE();
2045 }
2046 }
2047 #undef LEVAL
2048 /* MOVE CHARACTERS */
2049 if ((dst & SLJIT_MEM) && (src & SLJIT_MEM)) {
2050 struct addr mem;
2051 FAIL_IF(make_addr_bxy(compiler, &mem, src, srcw, tmp1));
2052 switch (opcode) {
2053 case SLJIT_MOV_U8:
2054 case SLJIT_MOV_S8:
2055 FAIL_IF(push_inst(compiler,
2056 EVAL(llgc, tmp0, mem)));
2057 FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1));
2058 return push_inst(compiler,
2059 EVAL(stcy, tmp0, mem));
2060 case SLJIT_MOV_U16:
2061 case SLJIT_MOV_S16:
2062 FAIL_IF(push_inst(compiler,
2063 EVAL(llgh, tmp0, mem)));
2064 FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1));
2065 return push_inst(compiler,
2066 EVAL(sthy, tmp0, mem));
2067 case SLJIT_MOV_U32:
2068 case SLJIT_MOV_S32:
2069 case SLJIT_MOV32:
2070 FAIL_IF(push_inst(compiler,
2071 EVAL(ly, tmp0, mem)));
2072 FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1));
2073 return push_inst(compiler,
2074 EVAL(sty, tmp0, mem));
2075 case SLJIT_MOV_P:
2076 case SLJIT_MOV:
2077 FAIL_IF(push_inst(compiler,
2078 EVAL(lg, tmp0, mem)));
2079 FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1));
2080 FAIL_IF(push_inst(compiler,
2081 EVAL(stg, tmp0, mem)));
2082 return SLJIT_SUCCESS;
2083 default:
2084 SLJIT_UNREACHABLE();
2085 }
2086 }
2087 SLJIT_UNREACHABLE();
2088 }
2089
2090 SLJIT_ASSERT((src & SLJIT_IMM) == 0); /* no immediates */
2091
2092 dst_r = FAST_IS_REG(dst) ? gpr(REG_MASK & dst) : tmp0;
2093 src_r = FAST_IS_REG(src) ? gpr(REG_MASK & src) : tmp0;
2094 if (src & SLJIT_MEM)
2095 FAIL_IF(load_word(compiler, src_r, src, srcw, src & SLJIT_32));
2096
2097 compiler->status_flags_state = op & (VARIABLE_FLAG_MASK | SLJIT_SET_Z);
2098
2099 /* TODO(mundaym): optimize loads and stores */
2100 switch (opcode | (op & SLJIT_32)) {
2101 case SLJIT_NOT:
2102 /* emulate ~x with x^-1 */
2103 FAIL_IF(push_load_imm_inst(compiler, tmp1, -1));
2104 if (src_r != dst_r)
2105 FAIL_IF(push_inst(compiler, lgr(dst_r, src_r)));
2106
2107 FAIL_IF(push_inst(compiler, xgr(dst_r, tmp1)));
2108 break;
2109 case SLJIT_NOT32:
2110 /* emulate ~x with x^-1 */
2111 if (have_eimm())
2112 FAIL_IF(push_inst(compiler, xilf(dst_r, 0xffffffff)));
2113 else {
2114 FAIL_IF(push_load_imm_inst(compiler, tmp1, -1));
2115 if (src_r != dst_r)
2116 FAIL_IF(push_inst(compiler, lr(dst_r, src_r)));
2117
2118 FAIL_IF(push_inst(compiler, xr(dst_r, tmp1)));
2119 }
2120 break;
2121 case SLJIT_CLZ:
2122 if (have_eimm()) {
2123 FAIL_IF(push_inst(compiler, flogr(tmp0, src_r))); /* clobbers tmp1 */
2124 if (dst_r != tmp0)
2125 FAIL_IF(push_inst(compiler, lgr(dst_r, tmp0)));
2126 } else {
2127 abort(); /* TODO(mundaym): no eimm (?) */
2128 }
2129 break;
2130 case SLJIT_CLZ32:
2131 if (have_eimm()) {
2132 FAIL_IF(push_inst(compiler, sllg(tmp1, src_r, 32, 0)));
2133 FAIL_IF(push_inst(compiler, iilf(tmp1, 0xffffffff)));
2134 FAIL_IF(push_inst(compiler, flogr(tmp0, tmp1))); /* clobbers tmp1 */
2135 if (dst_r != tmp0)
2136 FAIL_IF(push_inst(compiler, lr(dst_r, tmp0)));
2137 } else {
2138 abort(); /* TODO(mundaym): no eimm (?) */
2139 }
2140 break;
2141 default:
2142 SLJIT_UNREACHABLE();
2143 }
2144
2145 if ((op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == (SLJIT_SET_Z | SLJIT_SET_OVERFLOW))
2146 FAIL_IF(update_zero_overflow(compiler, op, dst_r));
2147
2148 /* TODO(carenas): doesn't need FAIL_IF */
2149 if (dst & SLJIT_MEM)
2150 FAIL_IF(store_word(compiler, dst_r, dst, dstw, op & SLJIT_32));
2151
2152 return SLJIT_SUCCESS;
2153 }
2154
is_commutative(sljit_s32 op)2155 static SLJIT_INLINE int is_commutative(sljit_s32 op)
2156 {
2157 switch (GET_OPCODE(op)) {
2158 case SLJIT_ADD:
2159 case SLJIT_ADDC:
2160 case SLJIT_MUL:
2161 case SLJIT_AND:
2162 case SLJIT_OR:
2163 case SLJIT_XOR:
2164 return 1;
2165 }
2166 return 0;
2167 }
2168
is_shift(sljit_s32 op)2169 static SLJIT_INLINE int is_shift(sljit_s32 op) {
2170 sljit_s32 v = GET_OPCODE(op);
2171 return (v == SLJIT_SHL || v == SLJIT_ASHR || v == SLJIT_LSHR) ? 1 : 0;
2172 }
2173
2174 static const struct ins_forms add_forms = {
2175 0x1a00, /* ar */
2176 0xb9080000, /* agr */
2177 0xb9f80000, /* ark */
2178 0xb9e80000, /* agrk */
2179 0x5a000000, /* a */
2180 0xe3000000005a, /* ay */
2181 0xe30000000008, /* ag */
2182 };
2183
2184 static const struct ins_forms logical_add_forms = {
2185 0x1e00, /* alr */
2186 0xb90a0000, /* algr */
2187 0xb9fa0000, /* alrk */
2188 0xb9ea0000, /* algrk */
2189 0x5e000000, /* al */
2190 0xe3000000005e, /* aly */
2191 0xe3000000000a, /* alg */
2192 };
2193
sljit_emit_add(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2194 static sljit_s32 sljit_emit_add(struct sljit_compiler *compiler, sljit_s32 op,
2195 sljit_s32 dst, sljit_sw dstw,
2196 sljit_s32 src1, sljit_sw src1w,
2197 sljit_s32 src2, sljit_sw src2w)
2198 {
2199 int sets_overflow = (op & VARIABLE_FLAG_MASK) == SLJIT_SET_OVERFLOW;
2200 int sets_zero_overflow = (op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == (SLJIT_SET_Z | SLJIT_SET_OVERFLOW);
2201 const struct ins_forms *forms;
2202 sljit_ins ins;
2203
2204 if (src2 & SLJIT_IMM) {
2205 if (!sets_zero_overflow && is_s8(src2w) && (src1 & SLJIT_MEM) && (dst == src1 && dstw == src1w)) {
2206 if (sets_overflow)
2207 ins = (op & SLJIT_32) ? 0xeb000000006a /* asi */ : 0xeb000000007a /* agsi */;
2208 else
2209 ins = (op & SLJIT_32) ? 0xeb000000006e /* alsi */ : 0xeb000000007e /* algsi */;
2210 return emit_siy(compiler, ins, dst, dstw, src2w);
2211 }
2212
2213 if (is_s16(src2w)) {
2214 if (sets_overflow)
2215 ins = (op & SLJIT_32) ? 0xec00000000d8 /* ahik */ : 0xec00000000d9 /* aghik */;
2216 else
2217 ins = (op & SLJIT_32) ? 0xec00000000da /* alhsik */ : 0xec00000000db /* alghsik */;
2218 FAIL_IF(emit_rie_d(compiler, ins, dst, src1, src1w, src2w));
2219 goto done;
2220 }
2221
2222 if (!sets_overflow) {
2223 if ((op & SLJIT_32) || is_u32(src2w)) {
2224 ins = (op & SLJIT_32) ? 0xc20b00000000 /* alfi */ : 0xc20a00000000 /* algfi */;
2225 FAIL_IF(emit_ri(compiler, ins, dst, src1, src1w, src2w, RIL_A));
2226 goto done;
2227 }
2228 if (is_u32(-src2w)) {
2229 FAIL_IF(emit_ri(compiler, 0xc20400000000 /* slgfi */, dst, src1, src1w, -src2w, RIL_A));
2230 goto done;
2231 }
2232 }
2233 else if ((op & SLJIT_32) || is_s32(src2w)) {
2234 ins = (op & SLJIT_32) ? 0xc20900000000 /* afi */ : 0xc20800000000 /* agfi */;
2235 FAIL_IF(emit_ri(compiler, ins, dst, src1, src1w, src2w, RIL_A));
2236 goto done;
2237 }
2238 }
2239
2240 forms = sets_overflow ? &add_forms : &logical_add_forms;
2241 FAIL_IF(emit_commutative(compiler, forms, dst, src1, src1w, src2, src2w));
2242
2243 done:
2244 if (sets_zero_overflow)
2245 FAIL_IF(update_zero_overflow(compiler, op, FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0));
2246
2247 if (dst & SLJIT_MEM)
2248 return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32);
2249
2250 return SLJIT_SUCCESS;
2251 }
2252
2253 static const struct ins_forms sub_forms = {
2254 0x1b00, /* sr */
2255 0xb9090000, /* sgr */
2256 0xb9f90000, /* srk */
2257 0xb9e90000, /* sgrk */
2258 0x5b000000, /* s */
2259 0xe3000000005b, /* sy */
2260 0xe30000000009, /* sg */
2261 };
2262
2263 static const struct ins_forms logical_sub_forms = {
2264 0x1f00, /* slr */
2265 0xb90b0000, /* slgr */
2266 0xb9fb0000, /* slrk */
2267 0xb9eb0000, /* slgrk */
2268 0x5f000000, /* sl */
2269 0xe3000000005f, /* sly */
2270 0xe3000000000b, /* slg */
2271 };
2272
sljit_emit_sub(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2273 static sljit_s32 sljit_emit_sub(struct sljit_compiler *compiler, sljit_s32 op,
2274 sljit_s32 dst, sljit_sw dstw,
2275 sljit_s32 src1, sljit_sw src1w,
2276 sljit_s32 src2, sljit_sw src2w)
2277 {
2278 sljit_s32 flag_type = GET_FLAG_TYPE(op);
2279 int sets_signed = (flag_type >= SLJIT_SIG_LESS && flag_type <= SLJIT_NOT_OVERFLOW);
2280 int sets_zero_overflow = (op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == (SLJIT_SET_Z | SLJIT_SET_OVERFLOW);
2281 const struct ins_forms *forms;
2282 sljit_ins ins;
2283
2284 if (dst == (sljit_s32)tmp0 && flag_type <= SLJIT_SIG_LESS_EQUAL) {
2285 int compare_signed = flag_type >= SLJIT_SIG_LESS;
2286
2287 compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_COMPARE;
2288
2289 if (src2 & SLJIT_IMM) {
2290 if (compare_signed || ((op & VARIABLE_FLAG_MASK) == 0 && is_s32(src2w)))
2291 {
2292 if ((op & SLJIT_32) || is_s32(src2w)) {
2293 ins = (op & SLJIT_32) ? 0xc20d00000000 /* cfi */ : 0xc20c00000000 /* cgfi */;
2294 return emit_ri(compiler, ins, src1, src1, src1w, src2w, RIL_A);
2295 }
2296 }
2297 else {
2298 if ((op & SLJIT_32) || is_u32(src2w)) {
2299 ins = (op & SLJIT_32) ? 0xc20f00000000 /* clfi */ : 0xc20e00000000 /* clgfi */;
2300 return emit_ri(compiler, ins, src1, src1, src1w, src2w, RIL_A);
2301 }
2302 if (is_s16(src2w))
2303 return emit_rie_d(compiler, 0xec00000000db /* alghsik */, (sljit_s32)tmp0, src1, src1w, src2w);
2304 }
2305 }
2306 else if (src2 & SLJIT_MEM) {
2307 if ((op & SLJIT_32) && ((src2 & OFFS_REG_MASK) || is_u12(src2w))) {
2308 ins = compare_signed ? 0x59000000 /* c */ : 0x55000000 /* cl */;
2309 return emit_rx(compiler, ins, src1, src1, src1w, src2, src2w, RX_A);
2310 }
2311
2312 if (compare_signed)
2313 ins = (op & SLJIT_32) ? 0xe30000000059 /* cy */ : 0xe30000000020 /* cg */;
2314 else
2315 ins = (op & SLJIT_32) ? 0xe30000000055 /* cly */ : 0xe30000000021 /* clg */;
2316 return emit_rx(compiler, ins, src1, src1, src1w, src2, src2w, RXY_A);
2317 }
2318
2319 if (compare_signed)
2320 ins = (op & SLJIT_32) ? 0x1900 /* cr */ : 0xb9200000 /* cgr */;
2321 else
2322 ins = (op & SLJIT_32) ? 0x1500 /* clr */ : 0xb9210000 /* clgr */;
2323 return emit_rr(compiler, ins, src1, src1, src1w, src2, src2w);
2324 }
2325
2326 if (src1 == SLJIT_IMM && src1w == 0 && (flag_type == 0 || sets_signed)) {
2327 ins = (op & SLJIT_32) ? 0x1300 /* lcr */ : 0xb9030000 /* lcgr */;
2328 FAIL_IF(emit_rr1(compiler, ins, dst, src2, src2w));
2329 goto done;
2330 }
2331
2332 if (src2 & SLJIT_IMM) {
2333 sljit_sw neg_src2w = -src2w;
2334
2335 if (sets_signed || neg_src2w != 0 || (op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == 0) {
2336 if (!sets_zero_overflow && is_s8(neg_src2w) && (src1 & SLJIT_MEM) && (dst == src1 && dstw == src1w)) {
2337 if (sets_signed)
2338 ins = (op & SLJIT_32) ? 0xeb000000006a /* asi */ : 0xeb000000007a /* agsi */;
2339 else
2340 ins = (op & SLJIT_32) ? 0xeb000000006e /* alsi */ : 0xeb000000007e /* algsi */;
2341 return emit_siy(compiler, ins, dst, dstw, neg_src2w);
2342 }
2343
2344 if (is_s16(neg_src2w)) {
2345 if (sets_signed)
2346 ins = (op & SLJIT_32) ? 0xec00000000d8 /* ahik */ : 0xec00000000d9 /* aghik */;
2347 else
2348 ins = (op & SLJIT_32) ? 0xec00000000da /* alhsik */ : 0xec00000000db /* alghsik */;
2349 FAIL_IF(emit_rie_d(compiler, ins, dst, src1, src1w, neg_src2w));
2350 goto done;
2351 }
2352 }
2353
2354 if (!sets_signed) {
2355 if ((op & SLJIT_32) || is_u32(src2w)) {
2356 ins = (op & SLJIT_32) ? 0xc20500000000 /* slfi */ : 0xc20400000000 /* slgfi */;
2357 FAIL_IF(emit_ri(compiler, ins, dst, src1, src1w, src2w, RIL_A));
2358 goto done;
2359 }
2360 if (is_u32(neg_src2w)) {
2361 FAIL_IF(emit_ri(compiler, 0xc20a00000000 /* algfi */, dst, src1, src1w, neg_src2w, RIL_A));
2362 goto done;
2363 }
2364 }
2365 else if ((op & SLJIT_32) || is_s32(neg_src2w)) {
2366 ins = (op & SLJIT_32) ? 0xc20900000000 /* afi */ : 0xc20800000000 /* agfi */;
2367 FAIL_IF(emit_ri(compiler, ins, dst, src1, src1w, neg_src2w, RIL_A));
2368 goto done;
2369 }
2370 }
2371
2372 forms = sets_signed ? &sub_forms : &logical_sub_forms;
2373 FAIL_IF(emit_non_commutative(compiler, forms, dst, src1, src1w, src2, src2w));
2374
2375 done:
2376 if (sets_signed) {
2377 sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
2378
2379 if ((op & VARIABLE_FLAG_MASK) != SLJIT_SET_OVERFLOW) {
2380 /* In case of overflow, the sign bit of the two source operands must be different, and
2381 - the first operand is greater if the sign bit of the result is set
2382 - the first operand is less if the sign bit of the result is not set
2383 The -result operation sets the corrent sign, because the result cannot be zero.
2384 The overflow is considered greater, since the result must be equal to INT_MIN so its sign bit is set. */
2385 FAIL_IF(push_inst(compiler, brc(0xe, 2 + 2)));
2386 FAIL_IF(push_inst(compiler, (op & SLJIT_32) ? lcr(tmp1, dst_r) : lcgr(tmp1, dst_r)));
2387 }
2388 else if (op & SLJIT_SET_Z)
2389 FAIL_IF(update_zero_overflow(compiler, op, dst_r));
2390 }
2391
2392 if (dst & SLJIT_MEM)
2393 return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32);
2394
2395 return SLJIT_SUCCESS;
2396 }
2397
2398 static const struct ins_forms multiply_forms = {
2399 0xb2520000, /* msr */
2400 0xb90c0000, /* msgr */
2401 0xb9fd0000, /* msrkc */
2402 0xb9ed0000, /* msgrkc */
2403 0x71000000, /* ms */
2404 0xe30000000051, /* msy */
2405 0xe3000000000c, /* msg */
2406 };
2407
2408 static const struct ins_forms multiply_overflow_forms = {
2409 0,
2410 0,
2411 0xb9fd0000, /* msrkc */
2412 0xb9ed0000, /* msgrkc */
2413 0,
2414 0xe30000000053, /* msc */
2415 0xe30000000083, /* msgc */
2416 };
2417
sljit_emit_multiply(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2418 static sljit_s32 sljit_emit_multiply(struct sljit_compiler *compiler, sljit_s32 op,
2419 sljit_s32 dst,
2420 sljit_s32 src1, sljit_sw src1w,
2421 sljit_s32 src2, sljit_sw src2w)
2422 {
2423 sljit_ins ins;
2424
2425 if (HAS_FLAGS(op)) {
2426 /* if have_misc2 fails, this operation should be emulated. 32 bit emulation:
2427 FAIL_IF(push_inst(compiler, lgfr(tmp0, src1_r)));
2428 FAIL_IF(push_inst(compiler, msgfr(tmp0, src2_r)));
2429 if (dst_r != tmp0) {
2430 FAIL_IF(push_inst(compiler, lr(dst_r, tmp0)));
2431 }
2432 FAIL_IF(push_inst(compiler, aih(tmp0, 1)));
2433 FAIL_IF(push_inst(compiler, nihf(tmp0, ~1U)));
2434 FAIL_IF(push_inst(compiler, ipm(tmp1)));
2435 FAIL_IF(push_inst(compiler, oilh(tmp1, 0x2000))); */
2436
2437 return emit_commutative(compiler, &multiply_overflow_forms, dst, src1, src1w, src2, src2w);
2438 }
2439
2440 if (src2 & SLJIT_IMM) {
2441 if (is_s16(src2w)) {
2442 ins = (op & SLJIT_32) ? 0xa70c0000 /* mhi */ : 0xa70d0000 /* mghi */;
2443 return emit_ri(compiler, ins, dst, src1, src1w, src2w, RI_A);
2444 }
2445
2446 if (is_s32(src2w)) {
2447 ins = (op & SLJIT_32) ? 0xc20100000000 /* msfi */ : 0xc20000000000 /* msgfi */;
2448 return emit_ri(compiler, ins, dst, src1, src1w, src2w, RIL_A);
2449 }
2450 }
2451
2452 return emit_commutative(compiler, &multiply_forms, dst, src1, src1w, src2, src2w);
2453 }
2454
sljit_emit_bitwise_imm(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_uw imm,sljit_s32 count16)2455 static sljit_s32 sljit_emit_bitwise_imm(struct sljit_compiler *compiler, sljit_s32 type,
2456 sljit_s32 dst,
2457 sljit_s32 src1, sljit_sw src1w,
2458 sljit_uw imm, sljit_s32 count16)
2459 {
2460 sljit_s32 mode = compiler->mode;
2461 sljit_gpr dst_r = tmp0;
2462 sljit_s32 needs_move = 1;
2463
2464 if (IS_GPR_REG(dst)) {
2465 dst_r = gpr(dst & REG_MASK);
2466 if (dst == src1)
2467 needs_move = 0;
2468 }
2469
2470 if (needs_move)
2471 FAIL_IF(emit_move(compiler, dst_r, src1, src1w));
2472
2473 if (type == SLJIT_AND) {
2474 if (!(mode & SLJIT_32))
2475 FAIL_IF(push_inst(compiler, 0xc00a00000000 /* nihf */ | R36A(dst_r) | (imm >> 32)));
2476 return push_inst(compiler, 0xc00b00000000 /* nilf */ | R36A(dst_r) | (imm & 0xffffffff));
2477 }
2478 else if (type == SLJIT_OR) {
2479 if (count16 >= 3) {
2480 FAIL_IF(push_inst(compiler, 0xc00c00000000 /* oihf */ | R36A(dst_r) | (imm >> 32)));
2481 return push_inst(compiler, 0xc00d00000000 /* oilf */ | R36A(dst_r) | (imm & 0xffffffff));
2482 }
2483
2484 if (count16 >= 2) {
2485 if ((imm & 0x00000000ffffffffull) == 0)
2486 return push_inst(compiler, 0xc00c00000000 /* oihf */ | R36A(dst_r) | (imm >> 32));
2487 if ((imm & 0xffffffff00000000ull) == 0)
2488 return push_inst(compiler, 0xc00d00000000 /* oilf */ | R36A(dst_r) | (imm & 0xffffffff));
2489 }
2490
2491 if ((imm & 0xffff000000000000ull) != 0)
2492 FAIL_IF(push_inst(compiler, 0xa5080000 /* oihh */ | R20A(dst_r) | (imm >> 48)));
2493 if ((imm & 0x0000ffff00000000ull) != 0)
2494 FAIL_IF(push_inst(compiler, 0xa5090000 /* oihl */ | R20A(dst_r) | ((imm >> 32) & 0xffff)));
2495 if ((imm & 0x00000000ffff0000ull) != 0)
2496 FAIL_IF(push_inst(compiler, 0xa50a0000 /* oilh */ | R20A(dst_r) | ((imm >> 16) & 0xffff)));
2497 if ((imm & 0x000000000000ffffull) != 0 || imm == 0)
2498 return push_inst(compiler, 0xa50b0000 /* oill */ | R20A(dst_r) | (imm & 0xffff));
2499 return SLJIT_SUCCESS;
2500 }
2501
2502 if ((imm & 0xffffffff00000000ull) != 0)
2503 FAIL_IF(push_inst(compiler, 0xc00600000000 /* xihf */ | R36A(dst_r) | (imm >> 32)));
2504 if ((imm & 0x00000000ffffffffull) != 0 || imm == 0)
2505 return push_inst(compiler, 0xc00700000000 /* xilf */ | R36A(dst_r) | (imm & 0xffffffff));
2506 return SLJIT_SUCCESS;
2507 }
2508
2509 static const struct ins_forms bitwise_and_forms = {
2510 0x1400, /* nr */
2511 0xb9800000, /* ngr */
2512 0xb9f40000, /* nrk */
2513 0xb9e40000, /* ngrk */
2514 0x54000000, /* n */
2515 0xe30000000054, /* ny */
2516 0xe30000000080, /* ng */
2517 };
2518
2519 static const struct ins_forms bitwise_or_forms = {
2520 0x1600, /* or */
2521 0xb9810000, /* ogr */
2522 0xb9f60000, /* ork */
2523 0xb9e60000, /* ogrk */
2524 0x56000000, /* o */
2525 0xe30000000056, /* oy */
2526 0xe30000000081, /* og */
2527 };
2528
2529 static const struct ins_forms bitwise_xor_forms = {
2530 0x1700, /* xr */
2531 0xb9820000, /* xgr */
2532 0xb9f70000, /* xrk */
2533 0xb9e70000, /* xgrk */
2534 0x57000000, /* x */
2535 0xe30000000057, /* xy */
2536 0xe30000000082, /* xg */
2537 };
2538
sljit_emit_bitwise(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2539 static sljit_s32 sljit_emit_bitwise(struct sljit_compiler *compiler, sljit_s32 op,
2540 sljit_s32 dst,
2541 sljit_s32 src1, sljit_sw src1w,
2542 sljit_s32 src2, sljit_sw src2w)
2543 {
2544 sljit_s32 type = GET_OPCODE(op);
2545 const struct ins_forms *forms;
2546
2547 if ((src2 & SLJIT_IMM) && (!(op & SLJIT_SET_Z) || (type == SLJIT_AND && dst == (sljit_s32)tmp0))) {
2548 sljit_s32 count16 = 0;
2549 sljit_uw imm = (sljit_uw)src2w;
2550
2551 if (op & SLJIT_32)
2552 imm &= 0xffffffffull;
2553
2554 if ((imm & 0x000000000000ffffull) != 0 || imm == 0)
2555 count16++;
2556 if ((imm & 0x00000000ffff0000ull) != 0)
2557 count16++;
2558 if ((imm & 0x0000ffff00000000ull) != 0)
2559 count16++;
2560 if ((imm & 0xffff000000000000ull) != 0)
2561 count16++;
2562
2563 if (type == SLJIT_AND && dst == (sljit_s32)tmp0 && count16 == 1) {
2564 sljit_gpr src_r = tmp0;
2565
2566 if (FAST_IS_REG(src1))
2567 src_r = gpr(src1 & REG_MASK);
2568 else
2569 FAIL_IF(emit_move(compiler, tmp0, src1, src1w));
2570
2571 if ((imm & 0x000000000000ffffull) != 0 || imm == 0)
2572 return push_inst(compiler, 0xa7010000 | R20A(src_r) | imm);
2573 if ((imm & 0x00000000ffff0000ull) != 0)
2574 return push_inst(compiler, 0xa7000000 | R20A(src_r) | (imm >> 16));
2575 if ((imm & 0x0000ffff00000000ull) != 0)
2576 return push_inst(compiler, 0xa7030000 | R20A(src_r) | (imm >> 32));
2577 return push_inst(compiler, 0xa7020000 | R20A(src_r) | (imm >> 48));
2578 }
2579
2580 if (!(op & SLJIT_SET_Z))
2581 return sljit_emit_bitwise_imm(compiler, type, dst, src1, src1w, imm, count16);
2582 }
2583
2584 if (type == SLJIT_AND)
2585 forms = &bitwise_and_forms;
2586 else if (type == SLJIT_OR)
2587 forms = &bitwise_or_forms;
2588 else
2589 forms = &bitwise_xor_forms;
2590
2591 return emit_commutative(compiler, forms, dst, src1, src1w, src2, src2w);
2592 }
2593
sljit_emit_shift(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2594 static sljit_s32 sljit_emit_shift(struct sljit_compiler *compiler, sljit_s32 op,
2595 sljit_s32 dst,
2596 sljit_s32 src1, sljit_sw src1w,
2597 sljit_s32 src2, sljit_sw src2w)
2598 {
2599 sljit_s32 type = GET_OPCODE(op);
2600 sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
2601 sljit_gpr src_r = tmp0;
2602 sljit_gpr base_r = tmp0;
2603 sljit_ins imm = 0;
2604 sljit_ins ins;
2605
2606 if (FAST_IS_REG(src1))
2607 src_r = gpr(src1 & REG_MASK);
2608 else
2609 FAIL_IF(emit_move(compiler, tmp0, src1, src1w));
2610
2611 if (src2 & SLJIT_IMM)
2612 imm = (sljit_ins)(src2w & ((op & SLJIT_32) ? 0x1f : 0x3f));
2613 else if (FAST_IS_REG(src2))
2614 base_r = gpr(src2 & REG_MASK);
2615 else {
2616 FAIL_IF(emit_move(compiler, tmp1, src2, src2w));
2617 base_r = tmp1;
2618 }
2619
2620 if ((op & SLJIT_32) && dst_r == src_r) {
2621 if (type == SLJIT_SHL)
2622 ins = 0x89000000 /* sll */;
2623 else if (type == SLJIT_LSHR)
2624 ins = 0x88000000 /* srl */;
2625 else
2626 ins = 0x8a000000 /* sra */;
2627
2628 FAIL_IF(push_inst(compiler, ins | R20A(dst_r) | R12A(base_r) | imm));
2629 }
2630 else {
2631 if (type == SLJIT_SHL)
2632 ins = (op & SLJIT_32) ? 0xeb00000000df /* sllk */ : 0xeb000000000d /* sllg */;
2633 else if (type == SLJIT_LSHR)
2634 ins = (op & SLJIT_32) ? 0xeb00000000de /* srlk */ : 0xeb000000000c /* srlg */;
2635 else
2636 ins = (op & SLJIT_32) ? 0xeb00000000dc /* srak */ : 0xeb000000000a /* srag */;
2637
2638 FAIL_IF(push_inst(compiler, ins | R36A(dst_r) | R32A(src_r) | R28A(base_r) | (imm << 16)));
2639 }
2640
2641 if ((op & SLJIT_SET_Z) && type != SLJIT_ASHR)
2642 return push_inst(compiler, (op & SLJIT_32) ? or(dst_r, dst_r) : ogr(dst_r, dst_r));
2643
2644 return SLJIT_SUCCESS;
2645 }
2646
2647 static const struct ins_forms addc_forms = {
2648 0xb9980000, /* alcr */
2649 0xb9880000, /* alcgr */
2650 0,
2651 0,
2652 0,
2653 0xe30000000098, /* alc */
2654 0xe30000000088, /* alcg */
2655 };
2656
2657 static const struct ins_forms subc_forms = {
2658 0xb9990000, /* slbr */
2659 0xb9890000, /* slbgr */
2660 0,
2661 0,
2662 0,
2663 0xe30000000099, /* slb */
2664 0xe30000000089, /* slbg */
2665 };
2666
sljit_emit_op2(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2667 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,
2668 sljit_s32 dst, sljit_sw dstw,
2669 sljit_s32 src1, sljit_sw src1w,
2670 sljit_s32 src2, sljit_sw src2w)
2671 {
2672 CHECK_ERROR();
2673 CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w));
2674 ADJUST_LOCAL_OFFSET(dst, dstw);
2675 ADJUST_LOCAL_OFFSET(src1, src1w);
2676 ADJUST_LOCAL_OFFSET(src2, src2w);
2677
2678 compiler->mode = op & SLJIT_32;
2679 compiler->status_flags_state = op & (VARIABLE_FLAG_MASK | SLJIT_SET_Z);
2680
2681 if (is_commutative(op) && (src1 & SLJIT_IMM) && !(src2 & SLJIT_IMM)) {
2682 src1 ^= src2;
2683 src2 ^= src1;
2684 src1 ^= src2;
2685
2686 src1w ^= src2w;
2687 src2w ^= src1w;
2688 src1w ^= src2w;
2689 }
2690
2691 switch (GET_OPCODE(op)) {
2692 case SLJIT_ADD:
2693 compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_ADD;
2694 return sljit_emit_add(compiler, op, dst, dstw, src1, src1w, src2, src2w);
2695 case SLJIT_ADDC:
2696 compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_ADD;
2697 FAIL_IF(emit_commutative(compiler, &addc_forms, dst, src1, src1w, src2, src2w));
2698 if (dst & SLJIT_MEM)
2699 return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32);
2700 return SLJIT_SUCCESS;
2701 case SLJIT_SUB:
2702 compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_SUB;
2703 return sljit_emit_sub(compiler, op, dst, dstw, src1, src1w, src2, src2w);
2704 case SLJIT_SUBC:
2705 compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_SUB;
2706 FAIL_IF(emit_non_commutative(compiler, &subc_forms, dst, src1, src1w, src2, src2w));
2707 if (dst & SLJIT_MEM)
2708 return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32);
2709 return SLJIT_SUCCESS;
2710 case SLJIT_MUL:
2711 FAIL_IF(sljit_emit_multiply(compiler, op, dst, src1, src1w, src2, src2w));
2712 break;
2713 case SLJIT_AND:
2714 case SLJIT_OR:
2715 case SLJIT_XOR:
2716 FAIL_IF(sljit_emit_bitwise(compiler, op, dst, src1, src1w, src2, src2w));
2717 break;
2718 case SLJIT_SHL:
2719 case SLJIT_LSHR:
2720 case SLJIT_ASHR:
2721 FAIL_IF(sljit_emit_shift(compiler, op, dst, src1, src1w, src2, src2w));
2722 break;
2723 }
2724
2725 if (dst & SLJIT_MEM)
2726 return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32);
2727 return SLJIT_SUCCESS;
2728 }
2729
sljit_emit_op2u(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2730 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op,
2731 sljit_s32 src1, sljit_sw src1w,
2732 sljit_s32 src2, sljit_sw src2w)
2733 {
2734 CHECK_ERROR();
2735 CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w));
2736
2737 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
2738 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
2739 compiler->skip_checks = 1;
2740 #endif
2741 return sljit_emit_op2(compiler, op, (sljit_s32)tmp0, 0, src1, src1w, src2, src2w);
2742 }
2743
sljit_emit_op_src(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src,sljit_sw srcw)2744 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(
2745 struct sljit_compiler *compiler,
2746 sljit_s32 op, sljit_s32 src, sljit_sw srcw)
2747 {
2748 sljit_gpr src_r;
2749
2750 CHECK_ERROR();
2751 CHECK(check_sljit_emit_op_src(compiler, op, src, srcw));
2752 ADJUST_LOCAL_OFFSET(src, srcw);
2753
2754 switch (op) {
2755 case SLJIT_FAST_RETURN:
2756 src_r = FAST_IS_REG(src) ? gpr(src) : tmp1;
2757 if (src & SLJIT_MEM)
2758 FAIL_IF(load_word(compiler, tmp1, src, srcw, 0));
2759
2760 return push_inst(compiler, br(src_r));
2761 case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN:
2762 /* TODO(carenas): implement? */
2763 return SLJIT_SUCCESS;
2764 case SLJIT_PREFETCH_L1:
2765 case SLJIT_PREFETCH_L2:
2766 case SLJIT_PREFETCH_L3:
2767 case SLJIT_PREFETCH_ONCE:
2768 /* TODO(carenas): implement */
2769 return SLJIT_SUCCESS;
2770 default:
2771 /* TODO(carenas): probably should not success by default */
2772 return SLJIT_SUCCESS;
2773 }
2774
2775 return SLJIT_SUCCESS;
2776 }
2777
sljit_get_register_index(sljit_s32 reg)2778 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg)
2779 {
2780 CHECK_REG_INDEX(check_sljit_get_register_index(reg));
2781 return (sljit_s32)gpr(reg);
2782 }
2783
sljit_get_float_register_index(sljit_s32 reg)2784 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg)
2785 {
2786 CHECK_REG_INDEX(check_sljit_get_float_register_index(reg));
2787 return (sljit_s32)fgpr(reg);
2788 }
2789
sljit_emit_op_custom(struct sljit_compiler * compiler,void * instruction,sljit_u32 size)2790 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
2791 void *instruction, sljit_u32 size)
2792 {
2793 sljit_ins ins = 0;
2794
2795 CHECK_ERROR();
2796 CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
2797
2798 memcpy((sljit_u8 *)&ins + sizeof(ins) - size, instruction, size);
2799 return push_inst(compiler, ins);
2800 }
2801
2802 /* --------------------------------------------------------------------- */
2803 /* Floating point operators */
2804 /* --------------------------------------------------------------------- */
2805
2806 #define FLOAT_LOAD 0
2807 #define FLOAT_STORE 1
2808
float_mem(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 reg,sljit_s32 mem,sljit_sw memw)2809 static sljit_s32 float_mem(struct sljit_compiler *compiler, sljit_s32 op,
2810 sljit_s32 reg,
2811 sljit_s32 mem, sljit_sw memw)
2812 {
2813 struct addr addr;
2814 sljit_ins ins;
2815
2816 SLJIT_ASSERT(mem & SLJIT_MEM);
2817
2818 if ((mem & OFFS_REG_MASK) || is_u12(memw) || !is_s20(memw)) {
2819 FAIL_IF(make_addr_bx(compiler, &addr, mem, memw, tmp1));
2820
2821 if (op & FLOAT_STORE)
2822 ins = (op & SLJIT_32) ? 0x70000000 /* ste */ : 0x60000000 /* std */;
2823 else
2824 ins = (op & SLJIT_32) ? 0x78000000 /* le */ : 0x68000000 /* ld */;
2825
2826 return push_inst(compiler, ins | F20(reg) | R16A(addr.index) | R12A(addr.base) | (sljit_ins)addr.offset);
2827 }
2828
2829 FAIL_IF(make_addr_bxy(compiler, &addr, mem, memw, tmp1));
2830
2831 if (op & FLOAT_STORE)
2832 ins = (op & SLJIT_32) ? 0xed0000000066 /* stey */ : 0xed0000000067 /* stdy */;
2833 else
2834 ins = (op & SLJIT_32) ? 0xed0000000064 /* ley */ : 0xed0000000065 /* ldy */;
2835
2836 return push_inst(compiler, ins | F36(reg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset));
2837 }
2838
emit_float(struct sljit_compiler * compiler,sljit_ins ins_r,sljit_ins ins,sljit_s32 reg,sljit_s32 src,sljit_sw srcw)2839 static sljit_s32 emit_float(struct sljit_compiler *compiler, sljit_ins ins_r, sljit_ins ins,
2840 sljit_s32 reg,
2841 sljit_s32 src, sljit_sw srcw)
2842 {
2843 struct addr addr;
2844
2845 if (!(src & SLJIT_MEM))
2846 return push_inst(compiler, ins_r | F4(reg) | F0(src));
2847
2848 FAIL_IF(make_addr_bx(compiler, &addr, src, srcw, tmp1));
2849 return push_inst(compiler, ins | F36(reg) | R32A(addr.index) | R28A(addr.base) | ((sljit_ins)addr.offset << 16));
2850 }
2851
sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)2852 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,
2853 sljit_s32 dst, sljit_sw dstw,
2854 sljit_s32 src, sljit_sw srcw)
2855 {
2856 sljit_ins dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0;
2857 sljit_ins ins;
2858
2859 if (src & SLJIT_MEM) {
2860 FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op & SLJIT_32), TMP_FREG1, src, srcw));
2861 src = TMP_FREG1;
2862 }
2863
2864 /* M3 is set to 5 */
2865 if (GET_OPCODE(op) == SLJIT_CONV_SW_FROM_F64)
2866 ins = (op & SLJIT_32) ? 0xb3a85000 /* cgebr */ : 0xb3a95000 /* cgdbr */;
2867 else
2868 ins = (op & SLJIT_32) ? 0xb3985000 /* cfebr */ : 0xb3995000 /* cfdbr */;
2869
2870 FAIL_IF(push_inst(compiler, ins | R4A(dst_r) | F0(src)));
2871
2872 if (dst & SLJIT_MEM)
2873 return store_word(compiler, dst_r, dst, dstw, GET_OPCODE(op) >= SLJIT_CONV_S32_FROM_F64);
2874
2875 return SLJIT_SUCCESS;
2876 }
2877
sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)2878 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
2879 sljit_s32 dst, sljit_sw dstw,
2880 sljit_s32 src, sljit_sw srcw)
2881 {
2882 sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
2883 sljit_ins ins;
2884
2885 if (src & SLJIT_IMM) {
2886 FAIL_IF(push_load_imm_inst(compiler, tmp0, srcw));
2887 src = (sljit_s32)tmp0;
2888 }
2889 else if (src & SLJIT_MEM) {
2890 FAIL_IF(load_word(compiler, tmp0, src, srcw, GET_OPCODE(op) >= SLJIT_CONV_F64_FROM_S32));
2891 src = (sljit_s32)tmp0;
2892 }
2893
2894 if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_SW)
2895 ins = (op & SLJIT_32) ? 0xb3a40000 /* cegbr */ : 0xb3a50000 /* cdgbr */;
2896 else
2897 ins = (op & SLJIT_32) ? 0xb3940000 /* cefbr */ : 0xb3950000 /* cdfbr */;
2898
2899 FAIL_IF(push_inst(compiler, ins | F4(dst_r) | R0(src)));
2900
2901 if (dst & SLJIT_MEM)
2902 return float_mem(compiler, FLOAT_STORE | (op & SLJIT_32), TMP_FREG1, dst, dstw);
2903
2904 return SLJIT_SUCCESS;
2905 }
2906
sljit_emit_fop1_cmp(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2907 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op,
2908 sljit_s32 src1, sljit_sw src1w,
2909 sljit_s32 src2, sljit_sw src2w)
2910 {
2911 sljit_ins ins_r, ins;
2912
2913 if (src1 & SLJIT_MEM) {
2914 FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op & SLJIT_32), TMP_FREG1, src1, src1w));
2915 src1 = TMP_FREG1;
2916 }
2917
2918 if (op & SLJIT_32) {
2919 ins_r = 0xb3090000 /* cebr */;
2920 ins = 0xed0000000009 /* ceb */;
2921 } else {
2922 ins_r = 0xb3190000 /* cdbr */;
2923 ins = 0xed0000000019 /* cdb */;
2924 }
2925
2926 return emit_float(compiler, ins_r, ins, src1, src2, src2w);
2927 }
2928
sljit_emit_fop1(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)2929 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
2930 sljit_s32 dst, sljit_sw dstw,
2931 sljit_s32 src, sljit_sw srcw)
2932 {
2933 sljit_s32 dst_r;
2934 sljit_ins ins;
2935
2936 CHECK_ERROR();
2937
2938 SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
2939
2940 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
2941
2942 if (op == SLJIT_CONV_F64_FROM_F32)
2943 FAIL_IF(emit_float(compiler, 0xb3040000 /* ldebr */, 0xed0000000004 /* ldeb */, dst_r, src, srcw));
2944 else {
2945 if (src & SLJIT_MEM) {
2946 FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op == SLJIT_CONV_F32_FROM_F64 ? 0 : (op & SLJIT_32)), dst_r, src, srcw));
2947 src = dst_r;
2948 }
2949
2950 switch (GET_OPCODE(op)) {
2951 case SLJIT_MOV_F64:
2952 if (FAST_IS_REG(dst)) {
2953 if (dst == src)
2954 return SLJIT_SUCCESS;
2955
2956 ins = (op & SLJIT_32) ? 0x3800 /* ler */ : 0x2800 /* ldr */;
2957 break;
2958 }
2959 return float_mem(compiler, FLOAT_STORE | (op & SLJIT_32), src, dst, dstw);
2960 case SLJIT_CONV_F64_FROM_F32:
2961 /* Only SLJIT_CONV_F32_FROM_F64. */
2962 ins = 0xb3440000 /* ledbr */;
2963 break;
2964 case SLJIT_NEG_F64:
2965 ins = (op & SLJIT_32) ? 0xb3030000 /* lcebr */ : 0xb3130000 /* lcdbr */;
2966 break;
2967 default:
2968 SLJIT_ASSERT(GET_OPCODE(op) == SLJIT_ABS_F64);
2969 ins = (op & SLJIT_32) ? 0xb3000000 /* lpebr */ : 0xb3100000 /* lpdbr */;
2970 break;
2971 }
2972
2973 FAIL_IF(push_inst(compiler, ins | F4(dst_r) | F0(src)));
2974 }
2975
2976 if (!(dst & SLJIT_MEM))
2977 return SLJIT_SUCCESS;
2978
2979 SLJIT_ASSERT(dst_r == TMP_FREG1);
2980
2981 return float_mem(compiler, FLOAT_STORE | (op & SLJIT_32), TMP_FREG1, dst, dstw);
2982 }
2983
2984 #define FLOAT_MOV(op, dst_r, src_r) \
2985 (((op & SLJIT_32) ? 0x3800 /* ler */ : 0x2800 /* ldr */) | F4(dst_r) | F0(src_r))
2986
sljit_emit_fop2(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2987 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op,
2988 sljit_s32 dst, sljit_sw dstw,
2989 sljit_s32 src1, sljit_sw src1w,
2990 sljit_s32 src2, sljit_sw src2w)
2991 {
2992 sljit_s32 dst_r = TMP_FREG1;
2993 sljit_ins ins_r, ins;
2994
2995 CHECK_ERROR();
2996 CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
2997 ADJUST_LOCAL_OFFSET(dst, dstw);
2998 ADJUST_LOCAL_OFFSET(src1, src1w);
2999 ADJUST_LOCAL_OFFSET(src2, src2w);
3000
3001 do {
3002 if (FAST_IS_REG(dst)) {
3003 dst_r = dst;
3004
3005 if (dst == src1)
3006 break;
3007
3008 if (dst == src2) {
3009 if (GET_OPCODE(op) == SLJIT_ADD_F64 || GET_OPCODE(op) == SLJIT_MUL_F64) {
3010 src2 = src1;
3011 src2w = src1w;
3012 src1 = dst;
3013 break;
3014 }
3015
3016 FAIL_IF(push_inst(compiler, FLOAT_MOV(op, TMP_FREG1, src2)));
3017 src2 = TMP_FREG1;
3018 }
3019 }
3020
3021 if (src1 & SLJIT_MEM)
3022 FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op & SLJIT_32), dst_r, src1, src1w));
3023 else
3024 FAIL_IF(push_inst(compiler, FLOAT_MOV(op, dst_r, src1)));
3025 } while (0);
3026
3027 switch (GET_OPCODE(op)) {
3028 case SLJIT_ADD_F64:
3029 ins_r = (op & SLJIT_32) ? 0xb30a0000 /* aebr */ : 0xb31a0000 /* adbr */;
3030 ins = (op & SLJIT_32) ? 0xed000000000a /* aeb */ : 0xed000000001a /* adb */;
3031 break;
3032 case SLJIT_SUB_F64:
3033 ins_r = (op & SLJIT_32) ? 0xb30b0000 /* sebr */ : 0xb31b0000 /* sdbr */;
3034 ins = (op & SLJIT_32) ? 0xed000000000b /* seb */ : 0xed000000001b /* sdb */;
3035 break;
3036 case SLJIT_MUL_F64:
3037 ins_r = (op & SLJIT_32) ? 0xb3170000 /* meebr */ : 0xb31c0000 /* mdbr */;
3038 ins = (op & SLJIT_32) ? 0xed0000000017 /* meeb */ : 0xed000000001c /* mdb */;
3039 break;
3040 default:
3041 SLJIT_ASSERT(GET_OPCODE(op) == SLJIT_DIV_F64);
3042 ins_r = (op & SLJIT_32) ? 0xb30d0000 /* debr */ : 0xb31d0000 /* ddbr */;
3043 ins = (op & SLJIT_32) ? 0xed000000000d /* deb */ : 0xed000000001d /* ddb */;
3044 break;
3045 }
3046
3047 FAIL_IF(emit_float(compiler, ins_r, ins, dst_r, src2, src2w));
3048
3049 if (dst & SLJIT_MEM)
3050 return float_mem(compiler, FLOAT_STORE | (op & SLJIT_32), TMP_FREG1, dst, dstw);
3051
3052 SLJIT_ASSERT(dst_r != TMP_FREG1);
3053 return SLJIT_SUCCESS;
3054 }
3055
3056 /* --------------------------------------------------------------------- */
3057 /* Other instructions */
3058 /* --------------------------------------------------------------------- */
3059
sljit_emit_fast_enter(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw)3060 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
3061 {
3062 CHECK_ERROR();
3063 CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
3064 ADJUST_LOCAL_OFFSET(dst, dstw);
3065
3066 if (FAST_IS_REG(dst))
3067 return push_inst(compiler, lgr(gpr(dst), fast_link_r));
3068
3069 /* memory */
3070 return store_word(compiler, fast_link_r, dst, dstw, 0);
3071 }
3072
3073 /* --------------------------------------------------------------------- */
3074 /* Conditional instructions */
3075 /* --------------------------------------------------------------------- */
3076
sljit_emit_label(struct sljit_compiler * compiler)3077 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
3078 {
3079 struct sljit_label *label;
3080
3081 CHECK_ERROR_PTR();
3082 CHECK_PTR(check_sljit_emit_label(compiler));
3083
3084 if (compiler->last_label && compiler->last_label->size == compiler->size)
3085 return compiler->last_label;
3086
3087 label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
3088 PTR_FAIL_IF(!label);
3089 set_label(label, compiler);
3090 return label;
3091 }
3092
sljit_emit_jump(struct sljit_compiler * compiler,sljit_s32 type)3093 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)
3094 {
3095 sljit_u8 mask = ((type & 0xff) < SLJIT_JUMP) ? get_cc(compiler, type & 0xff) : 0xf;
3096
3097 CHECK_ERROR_PTR();
3098 CHECK_PTR(check_sljit_emit_jump(compiler, type));
3099
3100 /* record jump */
3101 struct sljit_jump *jump = (struct sljit_jump *)
3102 ensure_abuf(compiler, sizeof(struct sljit_jump));
3103 PTR_FAIL_IF(!jump);
3104 set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
3105 jump->addr = compiler->size;
3106
3107 /* emit jump instruction */
3108 type &= 0xff;
3109 if (type >= SLJIT_FAST_CALL)
3110 PTR_FAIL_IF(push_inst(compiler, brasl(type == SLJIT_FAST_CALL ? fast_link_r : link_r, 0)));
3111 else
3112 PTR_FAIL_IF(push_inst(compiler, brcl(mask, 0)));
3113
3114 return jump;
3115 }
3116
sljit_emit_call(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 arg_types)3117 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type,
3118 sljit_s32 arg_types)
3119 {
3120 CHECK_ERROR_PTR();
3121 CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));
3122
3123 if (type & SLJIT_CALL_RETURN) {
3124 PTR_FAIL_IF(emit_stack_frame_release(compiler));
3125 type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP);
3126 }
3127
3128 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
3129 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
3130 compiler->skip_checks = 1;
3131 #endif
3132
3133 return sljit_emit_jump(compiler, type);
3134 }
3135
sljit_emit_ijump(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 src,sljit_sw srcw)3136 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
3137 {
3138 sljit_gpr src_r = FAST_IS_REG(src) ? gpr(src) : tmp1;
3139
3140 CHECK_ERROR();
3141 CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
3142
3143 if (src & SLJIT_IMM) {
3144 SLJIT_ASSERT(!(srcw & 1)); /* target address must be even */
3145 FAIL_IF(push_load_imm_inst(compiler, src_r, srcw));
3146 }
3147 else if (src & SLJIT_MEM) {
3148 ADJUST_LOCAL_OFFSET(src, srcw);
3149 FAIL_IF(load_word(compiler, src_r, src, srcw, 0 /* 64-bit */));
3150 }
3151
3152 /* emit jump instruction */
3153 if (type >= SLJIT_FAST_CALL)
3154 return push_inst(compiler, basr(type == SLJIT_FAST_CALL ? fast_link_r : link_r, src_r));
3155
3156 return push_inst(compiler, br(src_r));
3157 }
3158
sljit_emit_icall(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 arg_types,sljit_s32 src,sljit_sw srcw)3159 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type,
3160 sljit_s32 arg_types,
3161 sljit_s32 src, sljit_sw srcw)
3162 {
3163 CHECK_ERROR();
3164 CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));
3165
3166 SLJIT_ASSERT(gpr(TMP_REG2) == tmp1);
3167
3168 if (src & SLJIT_MEM) {
3169 ADJUST_LOCAL_OFFSET(src, srcw);
3170 FAIL_IF(load_word(compiler, tmp1, src, srcw, 0 /* 64-bit */));
3171 src = TMP_REG2;
3172 }
3173
3174 if (type & SLJIT_CALL_RETURN) {
3175 if (src >= SLJIT_FIRST_SAVED_REG && src <= SLJIT_S0) {
3176 FAIL_IF(push_inst(compiler, lgr(tmp1, gpr(src))));
3177 src = TMP_REG2;
3178 }
3179
3180 FAIL_IF(emit_stack_frame_release(compiler));
3181 type = SLJIT_JUMP;
3182 }
3183
3184 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
3185 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
3186 compiler->skip_checks = 1;
3187 #endif
3188
3189 return sljit_emit_ijump(compiler, type, src, srcw);
3190 }
3191
sljit_emit_op_flags(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 type)3192 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
3193 sljit_s32 dst, sljit_sw dstw,
3194 sljit_s32 type)
3195 {
3196 sljit_u8 mask = get_cc(compiler, type & 0xff);
3197
3198 CHECK_ERROR();
3199 CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));
3200
3201 sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
3202 sljit_gpr loc_r = tmp1;
3203 switch (GET_OPCODE(op)) {
3204 case SLJIT_AND:
3205 case SLJIT_OR:
3206 case SLJIT_XOR:
3207 compiler->status_flags_state = op & SLJIT_SET_Z;
3208
3209 /* dst is also source operand */
3210 if (dst & SLJIT_MEM)
3211 FAIL_IF(load_word(compiler, dst_r, dst, dstw, op & SLJIT_32));
3212
3213 break;
3214 case SLJIT_MOV32:
3215 op |= SLJIT_32;
3216 /* fallthrough */
3217 case SLJIT_MOV:
3218 /* can write straight into destination */
3219 loc_r = dst_r;
3220 break;
3221 default:
3222 SLJIT_UNREACHABLE();
3223 }
3224
3225 /* TODO(mundaym): fold into cmov helper function? */
3226 #define LEVAL(i) i(loc_r, 1, mask)
3227 if (have_lscond2()) {
3228 FAIL_IF(push_load_imm_inst(compiler, loc_r, 0));
3229 FAIL_IF(push_inst(compiler,
3230 WHEN2(op & SLJIT_32, lochi, locghi)));
3231 } else {
3232 /* TODO(mundaym): no load/store-on-condition 2 facility (ipm? branch-and-set?) */
3233 abort();
3234 }
3235 #undef LEVAL
3236
3237 /* apply bitwise op and set condition codes */
3238 switch (GET_OPCODE(op)) {
3239 #define LEVAL(i) i(dst_r, loc_r)
3240 case SLJIT_AND:
3241 FAIL_IF(push_inst(compiler,
3242 WHEN2(op & SLJIT_32, nr, ngr)));
3243 break;
3244 case SLJIT_OR:
3245 FAIL_IF(push_inst(compiler,
3246 WHEN2(op & SLJIT_32, or, ogr)));
3247 break;
3248 case SLJIT_XOR:
3249 FAIL_IF(push_inst(compiler,
3250 WHEN2(op & SLJIT_32, xr, xgr)));
3251 break;
3252 #undef LEVAL
3253 }
3254
3255 /* store result to memory if required */
3256 if (dst & SLJIT_MEM)
3257 return store_word(compiler, dst_r, dst, dstw, (op & SLJIT_32));
3258
3259 return SLJIT_SUCCESS;
3260 }
3261
sljit_emit_cmov(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 dst_reg,sljit_s32 src,sljit_sw srcw)3262 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type,
3263 sljit_s32 dst_reg,
3264 sljit_s32 src, sljit_sw srcw)
3265 {
3266 sljit_u8 mask = get_cc(compiler, type & 0xff);
3267 sljit_gpr dst_r = gpr(dst_reg & ~SLJIT_32);
3268 sljit_gpr src_r = FAST_IS_REG(src) ? gpr(src) : tmp0;
3269
3270 CHECK_ERROR();
3271 CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw));
3272
3273 if (src & SLJIT_IMM) {
3274 /* TODO(mundaym): fast path with lscond2 */
3275 FAIL_IF(push_load_imm_inst(compiler, src_r, srcw));
3276 }
3277
3278 #define LEVAL(i) i(dst_r, src_r, mask)
3279 if (have_lscond1())
3280 return push_inst(compiler,
3281 WHEN2(dst_reg & SLJIT_32, locr, locgr));
3282
3283 #undef LEVAL
3284
3285 /* TODO(mundaym): implement */
3286 return SLJIT_ERR_UNSUPPORTED;
3287 }
3288
3289 /* --------------------------------------------------------------------- */
3290 /* Other instructions */
3291 /* --------------------------------------------------------------------- */
3292
3293 /* On s390x we build a literal pool to hold constants. This has two main
3294 advantages:
3295
3296 1. we only need one instruction in the instruction stream (LGRL)
3297 2. we can store 64 bit addresses and use 32 bit offsets
3298
3299 To retrofit the extra information needed to build the literal pool we
3300 add a new sljit_s390x_const struct that contains the initial value but
3301 can still be cast to a sljit_const. */
3302
sljit_emit_const(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw,sljit_sw init_value)3303 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
3304 {
3305 struct sljit_s390x_const *const_;
3306 sljit_gpr dst_r;
3307
3308 CHECK_ERROR_PTR();
3309 CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
3310
3311 const_ = (struct sljit_s390x_const*)ensure_abuf(compiler,
3312 sizeof(struct sljit_s390x_const));
3313 PTR_FAIL_IF(!const_);
3314 set_const((struct sljit_const*)const_, compiler);
3315 const_->init_value = init_value;
3316
3317 dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
3318 if (have_genext())
3319 PTR_FAIL_IF(push_inst(compiler, sljit_ins_const | lgrl(dst_r, 0)));
3320 else {
3321 PTR_FAIL_IF(push_inst(compiler, sljit_ins_const | larl(tmp1, 0)));
3322 PTR_FAIL_IF(push_inst(compiler, lg(dst_r, 0, r0, tmp1)));
3323 }
3324
3325 if (dst & SLJIT_MEM)
3326 PTR_FAIL_IF(store_word(compiler, dst_r, dst, dstw, 0 /* always 64-bit */));
3327
3328 return (struct sljit_const*)const_;
3329 }
3330
sljit_set_jump_addr(sljit_uw addr,sljit_uw new_target,sljit_sw executable_offset)3331 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
3332 {
3333 /* Update the constant pool. */
3334 sljit_uw *ptr = (sljit_uw *)addr;
3335 SLJIT_UNUSED_ARG(executable_offset);
3336
3337 SLJIT_UPDATE_WX_FLAGS(ptr, ptr + 1, 0);
3338 *ptr = new_target;
3339 SLJIT_UPDATE_WX_FLAGS(ptr, ptr + 1, 1);
3340 SLJIT_CACHE_FLUSH(ptr, ptr + 1);
3341 }
3342
sljit_set_const(sljit_uw addr,sljit_sw new_constant,sljit_sw executable_offset)3343 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
3344 {
3345 sljit_set_jump_addr(addr, (sljit_uw)new_constant, executable_offset);
3346 }
3347
sljit_emit_put_label(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw)3348 SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label *sljit_emit_put_label(
3349 struct sljit_compiler *compiler,
3350 sljit_s32 dst, sljit_sw dstw)
3351 {
3352 struct sljit_put_label *put_label;
3353 sljit_gpr dst_r;
3354
3355 CHECK_ERROR_PTR();
3356 CHECK_PTR(check_sljit_emit_put_label(compiler, dst, dstw));
3357 ADJUST_LOCAL_OFFSET(dst, dstw);
3358
3359 put_label = (struct sljit_put_label*)ensure_abuf(compiler, sizeof(struct sljit_put_label));
3360 PTR_FAIL_IF(!put_label);
3361 set_put_label(put_label, compiler, 0);
3362
3363 dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
3364
3365 if (have_genext())
3366 PTR_FAIL_IF(push_inst(compiler, lgrl(dst_r, 0)));
3367 else {
3368 PTR_FAIL_IF(push_inst(compiler, larl(tmp1, 0)));
3369 PTR_FAIL_IF(push_inst(compiler, lg(dst_r, 0, r0, tmp1)));
3370 }
3371
3372 if (dst & SLJIT_MEM)
3373 PTR_FAIL_IF(store_word(compiler, dst_r, dst, dstw, 0));
3374
3375 return put_label;
3376 }
3377
3378 /* TODO(carenas): EVAL probably should move up or be refactored */
3379 #undef WHEN2
3380 #undef EVAL
3381
3382 #undef tmp1
3383 #undef tmp0
3384
3385 /* TODO(carenas): undef other macros that spill like is_u12? */
3386