1 /*
2 * Stack-less Just-In-Time compiler
3 *
4 * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without modification, are
7 * permitted provided that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright notice, this list of
10 * conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright notice, this list
13 * of conditions and the following disclaimer in the documentation and/or other materials
14 * provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19 * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27 #include <sys/auxv.h>
28
29 #ifdef __ARCH__
30 #define ENABLE_STATIC_FACILITY_DETECTION 1
31 #else
32 #define ENABLE_STATIC_FACILITY_DETECTION 0
33 #endif
34 #define ENABLE_DYNAMIC_FACILITY_DETECTION 1
35
sljit_get_platform_name(void)36 SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
37 {
38 return "s390x" SLJIT_CPUINFO;
39 }
40
41 /* Instructions. */
42 typedef sljit_uw sljit_ins;
43
44 /* Instruction tags (most significant halfword). */
45 static const sljit_ins sljit_ins_const = (sljit_ins)1 << 48;
46
47 #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
48 #define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3)
49
50 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 4] = {
51 0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 0, 1
52 };
53
54 /* there are also a[2-15] available, but they are slower to access and
55 * their use is limited as mundaym explained:
56 * https://github.com/zherczeg/sljit/pull/91#discussion_r486895689
57 */
58
59 /* General Purpose Registers [0-15]. */
60 typedef sljit_uw sljit_gpr;
61
62 /*
63 * WARNING
64 * the following code is non standard and should be improved for
65 * consistency, but doesn't use SLJIT_NUMBER_OF_REGISTERS based
66 * registers because r0 and r1 are the ABI recommended volatiles.
67 * there is a gpr() function that maps sljit to physical register numbers
68 * that should be used instead of the usual index into reg_map[] and
69 * will be retired ASAP (TODO: carenas)
70 */
71
72 static const sljit_gpr r0 = 0; /* reg_map[SLJIT_NUMBER_OF_REGISTERS + 2]: 0 in address calculations; reserved */
73 static const sljit_gpr r1 = 1; /* reg_map[SLJIT_NUMBER_OF_REGISTERS + 3]: reserved */
74 static const sljit_gpr r2 = 2; /* reg_map[1]: 1st argument */
75 static const sljit_gpr r3 = 3; /* reg_map[2]: 2nd argument */
76 static const sljit_gpr r4 = 4; /* reg_map[3]: 3rd argument */
77 static const sljit_gpr r5 = 5; /* reg_map[4]: 4th argument */
78 static const sljit_gpr r6 = 6; /* reg_map[5]: 5th argument; 1st saved register */
79 static const sljit_gpr r7 = 7; /* reg_map[6] */
80 static const sljit_gpr r8 = 8; /* reg_map[7] */
81 static const sljit_gpr r9 = 9; /* reg_map[8] */
82 static const sljit_gpr r10 = 10; /* reg_map[9] */
83 static const sljit_gpr r11 = 11; /* reg_map[10] */
84 static const sljit_gpr r12 = 12; /* reg_map[11]: GOT */
85 static const sljit_gpr r13 = 13; /* reg_map[12]: Literal Pool pointer */
86 static const sljit_gpr r14 = 14; /* reg_map[0]: return address and flag register */
87 static const sljit_gpr r15 = 15; /* reg_map[SLJIT_NUMBER_OF_REGISTERS + 1]: stack pointer */
88
89 /* WARNING: r12 and r13 shouldn't be used as per ABI recommendation */
90 /* TODO(carenas): r12 might conflict in PIC code, reserve? */
91 /* TODO(carenas): r13 is usually pointed to "pool" per ABI, using a tmp
92 * like we do know might be faster though, reserve?
93 */
94
95 /* TODO(carenas): should be named TMP_REG[1-2] for consistency */
96 #define tmp0 r0
97 #define tmp1 r1
98
99 /* TODO(carenas): flags should move to a different register so that
100 * link register doesn't need to change
101 */
102
103 /* When reg cannot be unused. */
104 #define IS_GPR_REG(reg) ((reg > 0) && (reg) <= SLJIT_SP)
105
106 /* Link register. */
107 static const sljit_gpr link_r = 14; /* r14 */
108
109 #define TMP_FREG1 (0)
110
111 static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1] = {
112 1, 0, 2, 4, 6, 3, 5, 7, 15, 14, 13, 12, 11, 10, 9, 8,
113 };
114
115 #define R0A(r) (r)
116 #define R4A(r) ((r) << 4)
117 #define R8A(r) ((r) << 8)
118 #define R12A(r) ((r) << 12)
119 #define R16A(r) ((r) << 16)
120 #define R20A(r) ((r) << 20)
121 #define R28A(r) ((r) << 28)
122 #define R32A(r) ((r) << 32)
123 #define R36A(r) ((r) << 36)
124
125 #define R0(r) ((sljit_ins)reg_map[r])
126
127 #define F0(r) ((sljit_ins)freg_map[r])
128 #define F4(r) (R4A((sljit_ins)freg_map[r]))
129 #define F20(r) (R20A((sljit_ins)freg_map[r]))
130 #define F36(r) (R36A((sljit_ins)freg_map[r]))
131
132 struct sljit_s390x_const {
133 struct sljit_const const_; /* must be first */
134 sljit_sw init_value; /* required to build literal pool */
135 };
136
137 /* Convert SLJIT register to hardware register. */
gpr(sljit_s32 r)138 static SLJIT_INLINE sljit_gpr gpr(sljit_s32 r)
139 {
140 SLJIT_ASSERT(r >= 0 && r < (sljit_s32)(sizeof(reg_map) / sizeof(reg_map[0])));
141 return reg_map[r];
142 }
143
fgpr(sljit_s32 r)144 static SLJIT_INLINE sljit_gpr fgpr(sljit_s32 r)
145 {
146 SLJIT_ASSERT(r >= 0 && r < (sljit_s32)(sizeof(freg_map) / sizeof(freg_map[0])));
147 return freg_map[r];
148 }
149
150 /* Size of instruction in bytes. Tags must already be cleared. */
sizeof_ins(sljit_ins ins)151 static SLJIT_INLINE sljit_uw sizeof_ins(sljit_ins ins)
152 {
153 /* keep faulting instructions */
154 if (ins == 0)
155 return 2;
156
157 if ((ins & 0x00000000ffffL) == ins)
158 return 2;
159 if ((ins & 0x0000ffffffffL) == ins)
160 return 4;
161 if ((ins & 0xffffffffffffL) == ins)
162 return 6;
163
164 SLJIT_UNREACHABLE();
165 return (sljit_uw)-1;
166 }
167
push_inst(struct sljit_compiler * compiler,sljit_ins ins)168 static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins)
169 {
170 sljit_ins *ibuf = (sljit_ins *)ensure_buf(compiler, sizeof(sljit_ins));
171 FAIL_IF(!ibuf);
172 *ibuf = ins;
173 compiler->size++;
174 return SLJIT_SUCCESS;
175 }
176
encode_inst(void ** ptr,sljit_ins ins)177 static sljit_s32 encode_inst(void **ptr, sljit_ins ins)
178 {
179 sljit_u16 *ibuf = (sljit_u16 *)*ptr;
180 sljit_uw size = sizeof_ins(ins);
181
182 SLJIT_ASSERT((size & 6) == size);
183 switch (size) {
184 case 6:
185 *ibuf++ = (sljit_u16)(ins >> 32);
186 /* fallthrough */
187 case 4:
188 *ibuf++ = (sljit_u16)(ins >> 16);
189 /* fallthrough */
190 case 2:
191 *ibuf++ = (sljit_u16)(ins);
192 }
193 *ptr = (void*)ibuf;
194 return SLJIT_SUCCESS;
195 }
196
197 #define SLJIT_ADD_SUB_NO_COMPARE(status_flags_state) \
198 (((status_flags_state) & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB)) \
199 && !((status_flags_state) & SLJIT_CURRENT_FLAGS_COMPARE))
200
201 /* Map the given type to a 4-bit condition code mask. */
get_cc(struct sljit_compiler * compiler,sljit_s32 type)202 static SLJIT_INLINE sljit_u8 get_cc(struct sljit_compiler *compiler, sljit_s32 type) {
203 const sljit_u8 cc0 = 1 << 3; /* equal {,to zero} */
204 const sljit_u8 cc1 = 1 << 2; /* less than {,zero} */
205 const sljit_u8 cc2 = 1 << 1; /* greater than {,zero} */
206 const sljit_u8 cc3 = 1 << 0; /* {overflow,NaN} */
207
208 switch (type) {
209 case SLJIT_EQUAL:
210 if (SLJIT_ADD_SUB_NO_COMPARE(compiler->status_flags_state)) {
211 sljit_s32 type = GET_FLAG_TYPE(compiler->status_flags_state);
212 if (type >= SLJIT_SIG_LESS && type <= SLJIT_SIG_LESS_EQUAL)
213 return cc0;
214 if (type == SLJIT_OVERFLOW)
215 return (cc0 | cc3);
216 return (cc0 | cc2);
217 }
218 /* fallthrough */
219
220 case SLJIT_F_EQUAL:
221 case SLJIT_ORDERED_EQUAL:
222 return cc0;
223
224 case SLJIT_NOT_EQUAL:
225 if (SLJIT_ADD_SUB_NO_COMPARE(compiler->status_flags_state)) {
226 sljit_s32 type = GET_FLAG_TYPE(compiler->status_flags_state);
227 if (type >= SLJIT_SIG_LESS && type <= SLJIT_SIG_LESS_EQUAL)
228 return (cc1 | cc2 | cc3);
229 if (type == SLJIT_OVERFLOW)
230 return (cc1 | cc2);
231 return (cc1 | cc3);
232 }
233 /* fallthrough */
234
235 case SLJIT_UNORDERED_OR_NOT_EQUAL:
236 return (cc1 | cc2 | cc3);
237
238 case SLJIT_LESS:
239 return cc1;
240
241 case SLJIT_GREATER_EQUAL:
242 case SLJIT_UNORDERED_OR_GREATER_EQUAL:
243 return (cc0 | cc2 | cc3);
244
245 case SLJIT_GREATER:
246 if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_COMPARE)
247 return cc2;
248 return cc3;
249
250 case SLJIT_LESS_EQUAL:
251 if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_COMPARE)
252 return (cc0 | cc1);
253 return (cc0 | cc1 | cc2);
254
255 case SLJIT_SIG_LESS:
256 case SLJIT_F_LESS:
257 case SLJIT_ORDERED_LESS:
258 return cc1;
259
260 case SLJIT_NOT_CARRY:
261 if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_SUB)
262 return (cc2 | cc3);
263 /* fallthrough */
264
265 case SLJIT_SIG_LESS_EQUAL:
266 case SLJIT_F_LESS_EQUAL:
267 case SLJIT_ORDERED_LESS_EQUAL:
268 return (cc0 | cc1);
269
270 case SLJIT_CARRY:
271 if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_SUB)
272 return (cc0 | cc1);
273 /* fallthrough */
274
275 case SLJIT_SIG_GREATER:
276 case SLJIT_UNORDERED_OR_GREATER:
277 /* Overflow is considered greater, see SLJIT_SUB. */
278 return cc2 | cc3;
279
280 case SLJIT_SIG_GREATER_EQUAL:
281 return (cc0 | cc2 | cc3);
282
283 case SLJIT_OVERFLOW:
284 if (compiler->status_flags_state & SLJIT_SET_Z)
285 return (cc2 | cc3);
286 /* fallthrough */
287
288 case SLJIT_UNORDERED:
289 return cc3;
290
291 case SLJIT_NOT_OVERFLOW:
292 if (compiler->status_flags_state & SLJIT_SET_Z)
293 return (cc0 | cc1);
294 /* fallthrough */
295
296 case SLJIT_ORDERED:
297 return (cc0 | cc1 | cc2);
298
299 case SLJIT_F_NOT_EQUAL:
300 case SLJIT_ORDERED_NOT_EQUAL:
301 return (cc1 | cc2);
302
303 case SLJIT_F_GREATER:
304 case SLJIT_ORDERED_GREATER:
305 return cc2;
306
307 case SLJIT_F_GREATER_EQUAL:
308 case SLJIT_ORDERED_GREATER_EQUAL:
309 return (cc0 | cc2);
310
311 case SLJIT_UNORDERED_OR_LESS_EQUAL:
312 return (cc0 | cc1 | cc3);
313
314 case SLJIT_UNORDERED_OR_EQUAL:
315 return (cc0 | cc3);
316
317 case SLJIT_UNORDERED_OR_LESS:
318 return (cc1 | cc3);
319 }
320
321 SLJIT_UNREACHABLE();
322 return (sljit_u8)-1;
323 }
324
325 /* Facility to bit index mappings.
326 Note: some facilities share the same bit index. */
327 typedef sljit_uw facility_bit;
328 #define STORE_FACILITY_LIST_EXTENDED_FACILITY 7
329 #define FAST_LONG_DISPLACEMENT_FACILITY 19
330 #define EXTENDED_IMMEDIATE_FACILITY 21
331 #define GENERAL_INSTRUCTION_EXTENSION_FACILITY 34
332 #define DISTINCT_OPERAND_FACILITY 45
333 #define HIGH_WORD_FACILITY 45
334 #define POPULATION_COUNT_FACILITY 45
335 #define LOAD_STORE_ON_CONDITION_1_FACILITY 45
336 #define MISCELLANEOUS_INSTRUCTION_EXTENSIONS_1_FACILITY 49
337 #define LOAD_STORE_ON_CONDITION_2_FACILITY 53
338 #define MISCELLANEOUS_INSTRUCTION_EXTENSIONS_2_FACILITY 58
339 #define VECTOR_FACILITY 129
340 #define VECTOR_ENHANCEMENTS_1_FACILITY 135
341
342 /* Report whether a facility is known to be present due to the compiler
343 settings. This function should always be compiled to a constant
344 value given a constant argument. */
have_facility_static(facility_bit x)345 static SLJIT_INLINE int have_facility_static(facility_bit x)
346 {
347 #if ENABLE_STATIC_FACILITY_DETECTION
348 switch (x) {
349 case FAST_LONG_DISPLACEMENT_FACILITY:
350 return (__ARCH__ >= 6 /* z990 */);
351 case EXTENDED_IMMEDIATE_FACILITY:
352 case STORE_FACILITY_LIST_EXTENDED_FACILITY:
353 return (__ARCH__ >= 7 /* z9-109 */);
354 case GENERAL_INSTRUCTION_EXTENSION_FACILITY:
355 return (__ARCH__ >= 8 /* z10 */);
356 case DISTINCT_OPERAND_FACILITY:
357 return (__ARCH__ >= 9 /* z196 */);
358 case MISCELLANEOUS_INSTRUCTION_EXTENSIONS_1_FACILITY:
359 return (__ARCH__ >= 10 /* zEC12 */);
360 case LOAD_STORE_ON_CONDITION_2_FACILITY:
361 case VECTOR_FACILITY:
362 return (__ARCH__ >= 11 /* z13 */);
363 case MISCELLANEOUS_INSTRUCTION_EXTENSIONS_2_FACILITY:
364 case VECTOR_ENHANCEMENTS_1_FACILITY:
365 return (__ARCH__ >= 12 /* z14 */);
366 default:
367 SLJIT_UNREACHABLE();
368 }
369 #endif
370 return 0;
371 }
372
get_hwcap()373 static SLJIT_INLINE unsigned long get_hwcap()
374 {
375 static unsigned long hwcap = 0;
376 if (SLJIT_UNLIKELY(!hwcap)) {
377 hwcap = getauxval(AT_HWCAP);
378 SLJIT_ASSERT(hwcap != 0);
379 }
380 return hwcap;
381 }
382
have_stfle()383 static SLJIT_INLINE int have_stfle()
384 {
385 if (have_facility_static(STORE_FACILITY_LIST_EXTENDED_FACILITY))
386 return 1;
387
388 return (get_hwcap() & HWCAP_S390_STFLE);
389 }
390
391 /* Report whether the given facility is available. This function always
392 performs a runtime check. */
have_facility_dynamic(facility_bit x)393 static int have_facility_dynamic(facility_bit x)
394 {
395 #if ENABLE_DYNAMIC_FACILITY_DETECTION
396 static struct {
397 sljit_uw bits[4];
398 } cpu_features;
399 size_t size = sizeof(cpu_features);
400 const sljit_uw word_index = x >> 6;
401 const sljit_uw bit_index = ((1UL << 63) >> (x & 63));
402
403 SLJIT_ASSERT(x < size * 8);
404 if (SLJIT_UNLIKELY(!have_stfle()))
405 return 0;
406
407 if (SLJIT_UNLIKELY(cpu_features.bits[0] == 0)) {
408 __asm__ __volatile__ (
409 "lgr %%r0, %0;"
410 "stfle 0(%1);"
411 /* outputs */:
412 /* inputs */: "d" ((size / 8) - 1), "a" (&cpu_features)
413 /* clobbers */: "r0", "cc", "memory"
414 );
415 SLJIT_ASSERT(cpu_features.bits[0] != 0);
416 }
417 return (cpu_features.bits[word_index] & bit_index) != 0;
418 #else
419 return 0;
420 #endif
421 }
422
423 #define HAVE_FACILITY(name, bit) \
424 static SLJIT_INLINE int name() \
425 { \
426 static int have = -1; \
427 /* Static check first. May allow the function to be optimized away. */ \
428 if (have_facility_static(bit)) \
429 have = 1; \
430 else if (SLJIT_UNLIKELY(have < 0)) \
431 have = have_facility_dynamic(bit) ? 1 : 0; \
432 \
433 return have; \
434 }
435
HAVE_FACILITY(have_eimm,EXTENDED_IMMEDIATE_FACILITY)436 HAVE_FACILITY(have_eimm, EXTENDED_IMMEDIATE_FACILITY)
437 HAVE_FACILITY(have_ldisp, FAST_LONG_DISPLACEMENT_FACILITY)
438 HAVE_FACILITY(have_genext, GENERAL_INSTRUCTION_EXTENSION_FACILITY)
439 HAVE_FACILITY(have_lscond1, LOAD_STORE_ON_CONDITION_1_FACILITY)
440 HAVE_FACILITY(have_lscond2, LOAD_STORE_ON_CONDITION_2_FACILITY)
441 HAVE_FACILITY(have_misc2, MISCELLANEOUS_INSTRUCTION_EXTENSIONS_2_FACILITY)
442 #undef HAVE_FACILITY
443
444 #define is_u12(d) (0 <= (d) && (d) <= 0x00000fffL)
445 #define is_u32(d) (0 <= (d) && (d) <= 0xffffffffL)
446
447 #define CHECK_SIGNED(v, bitlen) \
448 ((v) >= -(1 << ((bitlen) - 1)) && (v) < (1 << ((bitlen) - 1)))
449
450 #define is_s8(d) CHECK_SIGNED((d), 8)
451 #define is_s16(d) CHECK_SIGNED((d), 16)
452 #define is_s20(d) CHECK_SIGNED((d), 20)
453 #define is_s32(d) ((d) == (sljit_s32)(d))
454
455 static SLJIT_INLINE sljit_ins disp_s20(sljit_s32 d)
456 {
457 SLJIT_ASSERT(is_s20(d));
458
459 sljit_uw dh = (d >> 12) & 0xff;
460 sljit_uw dl = (d << 8) & 0xfff00;
461 return (dh | dl) << 8;
462 }
463
464 /* TODO(carenas): variadic macro is not strictly needed */
465 #define SLJIT_S390X_INSTRUCTION(op, ...) \
466 static SLJIT_INLINE sljit_ins op(__VA_ARGS__)
467
468 /* RR form instructions. */
469 #define SLJIT_S390X_RR(name, pattern) \
470 SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src) \
471 { \
472 return (pattern) | ((dst & 0xf) << 4) | (src & 0xf); \
473 }
474
475 /* AND */
476 SLJIT_S390X_RR(nr, 0x1400)
477
478 /* BRANCH AND SAVE */
479 SLJIT_S390X_RR(basr, 0x0d00)
480
481 /* BRANCH ON CONDITION */
482 SLJIT_S390X_RR(bcr, 0x0700) /* TODO(mundaym): type for mask? */
483
484 /* DIVIDE */
485 SLJIT_S390X_RR(dr, 0x1d00)
486
487 /* EXCLUSIVE OR */
488 SLJIT_S390X_RR(xr, 0x1700)
489
490 /* LOAD */
491 SLJIT_S390X_RR(lr, 0x1800)
492
493 /* LOAD COMPLEMENT */
494 SLJIT_S390X_RR(lcr, 0x1300)
495
496 /* OR */
497 SLJIT_S390X_RR(or, 0x1600)
498
499 #undef SLJIT_S390X_RR
500
501 /* RRE form instructions */
502 #define SLJIT_S390X_RRE(name, pattern) \
503 SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src) \
504 { \
505 return (pattern) | R4A(dst) | R0A(src); \
506 }
507
508 /* AND */
509 SLJIT_S390X_RRE(ngr, 0xb9800000)
510
511 /* DIVIDE LOGICAL */
512 SLJIT_S390X_RRE(dlr, 0xb9970000)
513 SLJIT_S390X_RRE(dlgr, 0xb9870000)
514
515 /* DIVIDE SINGLE */
516 SLJIT_S390X_RRE(dsgr, 0xb90d0000)
517
518 /* EXCLUSIVE OR */
519 SLJIT_S390X_RRE(xgr, 0xb9820000)
520
521 /* LOAD */
522 SLJIT_S390X_RRE(lgr, 0xb9040000)
523 SLJIT_S390X_RRE(lgfr, 0xb9140000)
524
525 /* LOAD BYTE */
526 SLJIT_S390X_RRE(lbr, 0xb9260000)
527 SLJIT_S390X_RRE(lgbr, 0xb9060000)
528
529 /* LOAD COMPLEMENT */
530 SLJIT_S390X_RRE(lcgr, 0xb9030000)
531
532 /* LOAD HALFWORD */
533 SLJIT_S390X_RRE(lhr, 0xb9270000)
534 SLJIT_S390X_RRE(lghr, 0xb9070000)
535
536 /* LOAD LOGICAL */
537 SLJIT_S390X_RRE(llgfr, 0xb9160000)
538
539 /* LOAD LOGICAL CHARACTER */
540 SLJIT_S390X_RRE(llcr, 0xb9940000)
541 SLJIT_S390X_RRE(llgcr, 0xb9840000)
542
543 /* LOAD LOGICAL HALFWORD */
544 SLJIT_S390X_RRE(llhr, 0xb9950000)
545 SLJIT_S390X_RRE(llghr, 0xb9850000)
546
547 /* MULTIPLY LOGICAL */
548 SLJIT_S390X_RRE(mlgr, 0xb9860000)
549
550 /* MULTIPLY SINGLE */
551 SLJIT_S390X_RRE(msgfr, 0xb91c0000)
552
553 /* OR */
554 SLJIT_S390X_RRE(ogr, 0xb9810000)
555
556 /* SUBTRACT */
557 SLJIT_S390X_RRE(sgr, 0xb9090000)
558
559 #undef SLJIT_S390X_RRE
560
561 /* RI-a form instructions */
562 #define SLJIT_S390X_RIA(name, pattern, imm_type) \
563 SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, imm_type imm) \
564 { \
565 return (pattern) | R20A(reg) | (imm & 0xffff); \
566 }
567
568 /* ADD HALFWORD IMMEDIATE */
569 SLJIT_S390X_RIA(aghi, 0xa70b0000, sljit_s16)
570
571 /* LOAD HALFWORD IMMEDIATE */
572 SLJIT_S390X_RIA(lhi, 0xa7080000, sljit_s16)
573 SLJIT_S390X_RIA(lghi, 0xa7090000, sljit_s16)
574
575 /* LOAD LOGICAL IMMEDIATE */
576 SLJIT_S390X_RIA(llihh, 0xa50c0000, sljit_u16)
577 SLJIT_S390X_RIA(llihl, 0xa50d0000, sljit_u16)
578 SLJIT_S390X_RIA(llilh, 0xa50e0000, sljit_u16)
579 SLJIT_S390X_RIA(llill, 0xa50f0000, sljit_u16)
580
581 /* MULTIPLY HALFWORD IMMEDIATE */
582 SLJIT_S390X_RIA(mhi, 0xa70c0000, sljit_s16)
583 SLJIT_S390X_RIA(mghi, 0xa70d0000, sljit_s16)
584
585 /* OR IMMEDIATE */
586 SLJIT_S390X_RIA(oilh, 0xa50a0000, sljit_u16)
587
588 #undef SLJIT_S390X_RIA
589
590 /* RIL-a form instructions (requires extended immediate facility) */
591 #define SLJIT_S390X_RILA(name, pattern, imm_type) \
592 SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, imm_type imm) \
593 { \
594 SLJIT_ASSERT(have_eimm()); \
595 return (pattern) | R36A(reg) | ((sljit_ins)imm & 0xffffffffu); \
596 }
597
598 /* ADD IMMEDIATE */
599 SLJIT_S390X_RILA(agfi, 0xc20800000000, sljit_s32)
600
601 /* ADD IMMEDIATE HIGH */
602 SLJIT_S390X_RILA(aih, 0xcc0800000000, sljit_s32) /* TODO(mundaym): high-word facility? */
603
604 /* AND IMMEDIATE */
605 SLJIT_S390X_RILA(nihf, 0xc00a00000000, sljit_u32)
606
607 /* EXCLUSIVE OR IMMEDIATE */
608 SLJIT_S390X_RILA(xilf, 0xc00700000000, sljit_u32)
609
610 /* INSERT IMMEDIATE */
611 SLJIT_S390X_RILA(iihf, 0xc00800000000, sljit_u32)
612 SLJIT_S390X_RILA(iilf, 0xc00900000000, sljit_u32)
613
614 /* LOAD IMMEDIATE */
615 SLJIT_S390X_RILA(lgfi, 0xc00100000000, sljit_s32)
616
617 /* LOAD LOGICAL IMMEDIATE */
618 SLJIT_S390X_RILA(llihf, 0xc00e00000000, sljit_u32)
619 SLJIT_S390X_RILA(llilf, 0xc00f00000000, sljit_u32)
620
621 /* SUBTRACT LOGICAL IMMEDIATE */
622 SLJIT_S390X_RILA(slfi, 0xc20500000000, sljit_u32)
623
624 #undef SLJIT_S390X_RILA
625
626 /* RX-a form instructions */
627 #define SLJIT_S390X_RXA(name, pattern) \
628 SLJIT_S390X_INSTRUCTION(name, sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b) \
629 { \
630 SLJIT_ASSERT((d & 0xfff) == d); \
631 \
632 return (pattern) | R20A(r) | R16A(x) | R12A(b) | (sljit_ins)(d & 0xfff); \
633 }
634
635 /* LOAD */
636 SLJIT_S390X_RXA(l, 0x58000000)
637
638 /* LOAD ADDRESS */
639 SLJIT_S390X_RXA(la, 0x41000000)
640
641 /* LOAD HALFWORD */
642 SLJIT_S390X_RXA(lh, 0x48000000)
643
644 /* MULTIPLY SINGLE */
645 SLJIT_S390X_RXA(ms, 0x71000000)
646
647 /* STORE */
648 SLJIT_S390X_RXA(st, 0x50000000)
649
650 /* STORE CHARACTER */
651 SLJIT_S390X_RXA(stc, 0x42000000)
652
653 /* STORE HALFWORD */
654 SLJIT_S390X_RXA(sth, 0x40000000)
655
656 #undef SLJIT_S390X_RXA
657
658 /* RXY-a instructions */
659 #define SLJIT_S390X_RXYA(name, pattern, cond) \
660 SLJIT_S390X_INSTRUCTION(name, sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b) \
661 { \
662 SLJIT_ASSERT(cond); \
663 \
664 return (pattern) | R36A(r) | R32A(x) | R28A(b) | disp_s20(d); \
665 }
666
667 /* LOAD */
668 SLJIT_S390X_RXYA(ly, 0xe30000000058, have_ldisp())
669 SLJIT_S390X_RXYA(lg, 0xe30000000004, 1)
670 SLJIT_S390X_RXYA(lgf, 0xe30000000014, 1)
671
672 /* LOAD BYTE */
673 SLJIT_S390X_RXYA(lb, 0xe30000000076, have_ldisp())
674 SLJIT_S390X_RXYA(lgb, 0xe30000000077, have_ldisp())
675
676 /* LOAD HALFWORD */
677 SLJIT_S390X_RXYA(lhy, 0xe30000000078, have_ldisp())
678 SLJIT_S390X_RXYA(lgh, 0xe30000000015, 1)
679
680 /* LOAD LOGICAL */
681 SLJIT_S390X_RXYA(llgf, 0xe30000000016, 1)
682
683 /* LOAD LOGICAL CHARACTER */
684 SLJIT_S390X_RXYA(llc, 0xe30000000094, have_eimm())
685 SLJIT_S390X_RXYA(llgc, 0xe30000000090, 1)
686
687 /* LOAD LOGICAL HALFWORD */
688 SLJIT_S390X_RXYA(llh, 0xe30000000095, have_eimm())
689 SLJIT_S390X_RXYA(llgh, 0xe30000000091, 1)
690
691 /* MULTIPLY SINGLE */
692 SLJIT_S390X_RXYA(msy, 0xe30000000051, have_ldisp())
693 SLJIT_S390X_RXYA(msg, 0xe3000000000c, 1)
694
695 /* STORE */
696 SLJIT_S390X_RXYA(sty, 0xe30000000050, have_ldisp())
697 SLJIT_S390X_RXYA(stg, 0xe30000000024, 1)
698
699 /* STORE CHARACTER */
700 SLJIT_S390X_RXYA(stcy, 0xe30000000072, have_ldisp())
701
702 /* STORE HALFWORD */
703 SLJIT_S390X_RXYA(sthy, 0xe30000000070, have_ldisp())
704
705 #undef SLJIT_S390X_RXYA
706
707 /* RSY-a instructions */
708 #define SLJIT_S390X_RSYA(name, pattern, cond) \
709 SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src, sljit_s32 d, sljit_gpr b) \
710 { \
711 SLJIT_ASSERT(cond); \
712 \
713 return (pattern) | R36A(dst) | R32A(src) | R28A(b) | disp_s20(d); \
714 }
715
716 /* LOAD MULTIPLE */
717 SLJIT_S390X_RSYA(lmg, 0xeb0000000004, 1)
718
719 /* SHIFT LEFT LOGICAL */
720 SLJIT_S390X_RSYA(sllg, 0xeb000000000d, 1)
721
722 /* SHIFT RIGHT SINGLE */
723 SLJIT_S390X_RSYA(srag, 0xeb000000000a, 1)
724
725 /* STORE MULTIPLE */
726 SLJIT_S390X_RSYA(stmg, 0xeb0000000024, 1)
727
728 #undef SLJIT_S390X_RSYA
729
730 /* RIE-f instructions (require general-instructions-extension facility) */
731 #define SLJIT_S390X_RIEF(name, pattern) \
732 SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src, sljit_u8 start, sljit_u8 end, sljit_u8 rot) \
733 { \
734 sljit_ins i3, i4, i5; \
735 \
736 SLJIT_ASSERT(have_genext()); \
737 i3 = (sljit_ins)start << 24; \
738 i4 = (sljit_ins)end << 16; \
739 i5 = (sljit_ins)rot << 8; \
740 \
741 return (pattern) | R36A(dst & 0xf) | R32A(src & 0xf) | i3 | i4 | i5; \
742 }
743
744 /* ROTATE THEN AND SELECTED BITS */
745 /* SLJIT_S390X_RIEF(rnsbg, 0xec0000000054) */
746
747 /* ROTATE THEN EXCLUSIVE OR SELECTED BITS */
748 /* SLJIT_S390X_RIEF(rxsbg, 0xec0000000057) */
749
750 /* ROTATE THEN OR SELECTED BITS */
751 SLJIT_S390X_RIEF(rosbg, 0xec0000000056)
752
753 /* ROTATE THEN INSERT SELECTED BITS */
754 /* SLJIT_S390X_RIEF(risbg, 0xec0000000055) */
755 /* SLJIT_S390X_RIEF(risbgn, 0xec0000000059) */
756
757 /* ROTATE THEN INSERT SELECTED BITS HIGH */
758 SLJIT_S390X_RIEF(risbhg, 0xec000000005d)
759
760 /* ROTATE THEN INSERT SELECTED BITS LOW */
761 /* SLJIT_S390X_RIEF(risblg, 0xec0000000051) */
762
763 #undef SLJIT_S390X_RIEF
764
765 /* RRF-c instructions (require load/store-on-condition 1 facility) */
766 #define SLJIT_S390X_RRFC(name, pattern) \
767 SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src, sljit_uw mask) \
768 { \
769 sljit_ins m3; \
770 \
771 SLJIT_ASSERT(have_lscond1()); \
772 m3 = (sljit_ins)(mask & 0xf) << 12; \
773 \
774 return (pattern) | m3 | R4A(dst) | R0A(src); \
775 }
776
777 /* LOAD HALFWORD IMMEDIATE ON CONDITION */
778 SLJIT_S390X_RRFC(locr, 0xb9f20000)
779 SLJIT_S390X_RRFC(locgr, 0xb9e20000)
780
781 #undef SLJIT_S390X_RRFC
782
783 /* RIE-g instructions (require load/store-on-condition 2 facility) */
784 #define SLJIT_S390X_RIEG(name, pattern) \
785 SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, sljit_sw imm, sljit_uw mask) \
786 { \
787 sljit_ins m3, i2; \
788 \
789 SLJIT_ASSERT(have_lscond2()); \
790 m3 = (sljit_ins)(mask & 0xf) << 32; \
791 i2 = (sljit_ins)(imm & 0xffffL) << 16; \
792 \
793 return (pattern) | R36A(reg) | m3 | i2; \
794 }
795
796 /* LOAD HALFWORD IMMEDIATE ON CONDITION */
797 SLJIT_S390X_RIEG(lochi, 0xec0000000042)
798 SLJIT_S390X_RIEG(locghi, 0xec0000000046)
799
800 #undef SLJIT_S390X_RIEG
801
802 #define SLJIT_S390X_RILB(name, pattern, cond) \
803 SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, sljit_sw ri) \
804 { \
805 SLJIT_ASSERT(cond); \
806 \
807 return (pattern) | R36A(reg) | (sljit_ins)(ri & 0xffffffff); \
808 }
809
810 /* BRANCH RELATIVE AND SAVE LONG */
811 SLJIT_S390X_RILB(brasl, 0xc00500000000, 1)
812
813 /* LOAD ADDRESS RELATIVE LONG */
814 SLJIT_S390X_RILB(larl, 0xc00000000000, 1)
815
816 /* LOAD RELATIVE LONG */
817 SLJIT_S390X_RILB(lgrl, 0xc40800000000, have_genext())
818
819 #undef SLJIT_S390X_RILB
820
SLJIT_S390X_INSTRUCTION(br,sljit_gpr target)821 SLJIT_S390X_INSTRUCTION(br, sljit_gpr target)
822 {
823 return 0x07f0 | target;
824 }
825
SLJIT_S390X_INSTRUCTION(brc,sljit_uw mask,sljit_sw target)826 SLJIT_S390X_INSTRUCTION(brc, sljit_uw mask, sljit_sw target)
827 {
828 sljit_ins m1 = (sljit_ins)(mask & 0xf) << 20;
829 sljit_ins ri2 = (sljit_ins)target & 0xffff;
830 return 0xa7040000L | m1 | ri2;
831 }
832
SLJIT_S390X_INSTRUCTION(brcl,sljit_uw mask,sljit_sw target)833 SLJIT_S390X_INSTRUCTION(brcl, sljit_uw mask, sljit_sw target)
834 {
835 sljit_ins m1 = (sljit_ins)(mask & 0xf) << 36;
836 sljit_ins ri2 = (sljit_ins)target & 0xffffffff;
837 return 0xc00400000000L | m1 | ri2;
838 }
839
SLJIT_S390X_INSTRUCTION(flogr,sljit_gpr dst,sljit_gpr src)840 SLJIT_S390X_INSTRUCTION(flogr, sljit_gpr dst, sljit_gpr src)
841 {
842 SLJIT_ASSERT(have_eimm());
843 return 0xb9830000 | R8A(dst) | R0A(src);
844 }
845
846 /* INSERT PROGRAM MASK */
SLJIT_S390X_INSTRUCTION(ipm,sljit_gpr dst)847 SLJIT_S390X_INSTRUCTION(ipm, sljit_gpr dst)
848 {
849 return 0xb2220000 | R4A(dst);
850 }
851
852 /* SET PROGRAM MASK */
SLJIT_S390X_INSTRUCTION(spm,sljit_gpr dst)853 SLJIT_S390X_INSTRUCTION(spm, sljit_gpr dst)
854 {
855 return 0x0400 | R4A(dst);
856 }
857
858 /* ROTATE THEN INSERT SELECTED BITS HIGH (ZERO) */
SLJIT_S390X_INSTRUCTION(risbhgz,sljit_gpr dst,sljit_gpr src,sljit_u8 start,sljit_u8 end,sljit_u8 rot)859 SLJIT_S390X_INSTRUCTION(risbhgz, sljit_gpr dst, sljit_gpr src, sljit_u8 start, sljit_u8 end, sljit_u8 rot)
860 {
861 return risbhg(dst, src, start, 0x8 | end, rot);
862 }
863
864 #undef SLJIT_S390X_INSTRUCTION
865
update_zero_overflow(struct sljit_compiler * compiler,sljit_s32 op,sljit_gpr dst_r)866 static sljit_s32 update_zero_overflow(struct sljit_compiler *compiler, sljit_s32 op, sljit_gpr dst_r)
867 {
868 /* Condition codes: bits 18 and 19.
869 Transformation:
870 0 (zero and no overflow) : unchanged
871 1 (non-zero and no overflow) : unchanged
872 2 (zero and overflow) : decreased by 1
873 3 (non-zero and overflow) : decreased by 1 if non-zero */
874 FAIL_IF(push_inst(compiler, brc(0xc, 2 + 2 + ((op & SLJIT_32) ? 1 : 2) + 2 + 3 + 1)));
875 FAIL_IF(push_inst(compiler, ipm(tmp1)));
876 FAIL_IF(push_inst(compiler, (op & SLJIT_32) ? or(dst_r, dst_r) : ogr(dst_r, dst_r)));
877 FAIL_IF(push_inst(compiler, brc(0x8, 2 + 3)));
878 FAIL_IF(push_inst(compiler, slfi(tmp1, 0x10000000)));
879 FAIL_IF(push_inst(compiler, spm(tmp1)));
880 return SLJIT_SUCCESS;
881 }
882
883 /* load 64-bit immediate into register without clobbering flags */
push_load_imm_inst(struct sljit_compiler * compiler,sljit_gpr target,sljit_sw v)884 static sljit_s32 push_load_imm_inst(struct sljit_compiler *compiler, sljit_gpr target, sljit_sw v)
885 {
886 /* 4 byte instructions */
887 if (is_s16(v))
888 return push_inst(compiler, lghi(target, (sljit_s16)v));
889
890 if (((sljit_uw)v & ~(sljit_uw)0x000000000000ffff) == 0)
891 return push_inst(compiler, llill(target, (sljit_u16)v));
892
893 if (((sljit_uw)v & ~(sljit_uw)0x00000000ffff0000) == 0)
894 return push_inst(compiler, llilh(target, (sljit_u16)(v >> 16)));
895
896 if (((sljit_uw)v & ~(sljit_uw)0x0000ffff00000000) == 0)
897 return push_inst(compiler, llihl(target, (sljit_u16)(v >> 32)));
898
899 if (((sljit_uw)v & ~(sljit_uw)0xffff000000000000) == 0)
900 return push_inst(compiler, llihh(target, (sljit_u16)(v >> 48)));
901
902 /* 6 byte instructions (requires extended immediate facility) */
903 if (have_eimm()) {
904 if (is_s32(v))
905 return push_inst(compiler, lgfi(target, (sljit_s32)v));
906
907 if (((sljit_uw)v >> 32) == 0)
908 return push_inst(compiler, llilf(target, (sljit_u32)v));
909
910 if (((sljit_uw)v << 32) == 0)
911 return push_inst(compiler, llihf(target, (sljit_u32)((sljit_uw)v >> 32)));
912
913 FAIL_IF(push_inst(compiler, llilf(target, (sljit_u32)v)));
914 return push_inst(compiler, iihf(target, (sljit_u32)(v >> 32)));
915 }
916
917 /* TODO(mundaym): instruction sequences that don't use extended immediates */
918 abort();
919 }
920
921 struct addr {
922 sljit_gpr base;
923 sljit_gpr index;
924 sljit_s32 offset;
925 };
926
927 /* transform memory operand into D(X,B) form with a signed 20-bit offset */
make_addr_bxy(struct sljit_compiler * compiler,struct addr * addr,sljit_s32 mem,sljit_sw off,sljit_gpr tmp)928 static sljit_s32 make_addr_bxy(struct sljit_compiler *compiler,
929 struct addr *addr, sljit_s32 mem, sljit_sw off,
930 sljit_gpr tmp /* clobbered, must not be r0 */)
931 {
932 sljit_gpr base = r0;
933 sljit_gpr index = r0;
934
935 SLJIT_ASSERT(tmp != r0);
936 if (mem & REG_MASK)
937 base = gpr(mem & REG_MASK);
938
939 if (mem & OFFS_REG_MASK) {
940 index = gpr(OFFS_REG(mem));
941 if (off != 0) {
942 /* shift and put the result into tmp */
943 SLJIT_ASSERT(0 <= off && off < 64);
944 FAIL_IF(push_inst(compiler, sllg(tmp, index, (sljit_s32)off, 0)));
945 index = tmp;
946 off = 0; /* clear offset */
947 }
948 }
949 else if (!is_s20(off)) {
950 FAIL_IF(push_load_imm_inst(compiler, tmp, off));
951 index = tmp;
952 off = 0; /* clear offset */
953 }
954 addr->base = base;
955 addr->index = index;
956 addr->offset = (sljit_s32)off;
957 return SLJIT_SUCCESS;
958 }
959
960 /* transform memory operand into D(X,B) form with an unsigned 12-bit offset */
make_addr_bx(struct sljit_compiler * compiler,struct addr * addr,sljit_s32 mem,sljit_sw off,sljit_gpr tmp)961 static sljit_s32 make_addr_bx(struct sljit_compiler *compiler,
962 struct addr *addr, sljit_s32 mem, sljit_sw off,
963 sljit_gpr tmp /* clobbered, must not be r0 */)
964 {
965 sljit_gpr base = r0;
966 sljit_gpr index = r0;
967
968 SLJIT_ASSERT(tmp != r0);
969 if (mem & REG_MASK)
970 base = gpr(mem & REG_MASK);
971
972 if (mem & OFFS_REG_MASK) {
973 index = gpr(OFFS_REG(mem));
974 if (off != 0) {
975 /* shift and put the result into tmp */
976 SLJIT_ASSERT(0 <= off && off < 64);
977 FAIL_IF(push_inst(compiler, sllg(tmp, index, (sljit_s32)off, 0)));
978 index = tmp;
979 off = 0; /* clear offset */
980 }
981 }
982 else if (!is_u12(off)) {
983 FAIL_IF(push_load_imm_inst(compiler, tmp, off));
984 index = tmp;
985 off = 0; /* clear offset */
986 }
987 addr->base = base;
988 addr->index = index;
989 addr->offset = (sljit_s32)off;
990 return SLJIT_SUCCESS;
991 }
992
993 #define EVAL(op, r, addr) op(r, addr.offset, addr.index, addr.base)
994 #define WHEN(cond, r, i1, i2, addr) \
995 (cond) ? EVAL(i1, r, addr) : EVAL(i2, r, addr)
996
997 /* May clobber tmp1. */
load_word(struct sljit_compiler * compiler,sljit_gpr dst_r,sljit_s32 src,sljit_sw srcw,sljit_s32 is_32bit)998 static sljit_s32 load_word(struct sljit_compiler *compiler, sljit_gpr dst_r,
999 sljit_s32 src, sljit_sw srcw,
1000 sljit_s32 is_32bit)
1001 {
1002 struct addr addr;
1003 sljit_ins ins;
1004
1005 SLJIT_ASSERT(src & SLJIT_MEM);
1006
1007 if (is_32bit && ((src & OFFS_REG_MASK) || is_u12(srcw) || !is_s20(srcw))) {
1008 FAIL_IF(make_addr_bx(compiler, &addr, src, srcw, tmp1));
1009 return push_inst(compiler, 0x58000000 /* l */ | R20A(dst_r) | R16A(addr.index) | R12A(addr.base) | (sljit_ins)addr.offset);
1010 }
1011
1012 FAIL_IF(make_addr_bxy(compiler, &addr, src, srcw, tmp1));
1013
1014 ins = is_32bit ? 0xe30000000058 /* ly */ : 0xe30000000004 /* lg */;
1015 return push_inst(compiler, ins | R36A(dst_r) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset));
1016 }
1017
1018 /* May clobber tmp1. */
load_unsigned_word(struct sljit_compiler * compiler,sljit_gpr dst_r,sljit_s32 src,sljit_sw srcw,sljit_s32 is_32bit)1019 static sljit_s32 load_unsigned_word(struct sljit_compiler *compiler, sljit_gpr dst_r,
1020 sljit_s32 src, sljit_sw srcw,
1021 sljit_s32 is_32bit)
1022 {
1023 struct addr addr;
1024 sljit_ins ins;
1025
1026 SLJIT_ASSERT(src & SLJIT_MEM);
1027
1028 FAIL_IF(make_addr_bxy(compiler, &addr, src, srcw, tmp1));
1029
1030 ins = is_32bit ? 0xe30000000016 /* llgf */ : 0xe30000000004 /* lg */;
1031 return push_inst(compiler, ins | R36A(dst_r) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset));
1032 }
1033
1034 /* May clobber tmp1. */
store_word(struct sljit_compiler * compiler,sljit_gpr src_r,sljit_s32 dst,sljit_sw dstw,sljit_s32 is_32bit)1035 static sljit_s32 store_word(struct sljit_compiler *compiler, sljit_gpr src_r,
1036 sljit_s32 dst, sljit_sw dstw,
1037 sljit_s32 is_32bit)
1038 {
1039 struct addr addr;
1040 sljit_ins ins;
1041
1042 SLJIT_ASSERT(dst & SLJIT_MEM);
1043
1044 if (is_32bit && ((dst & OFFS_REG_MASK) || is_u12(dstw) || !is_s20(dstw))) {
1045 FAIL_IF(make_addr_bx(compiler, &addr, dst, dstw, tmp1));
1046 return push_inst(compiler, 0x50000000 /* st */ | R20A(src_r) | R16A(addr.index) | R12A(addr.base) | (sljit_ins)addr.offset);
1047 }
1048
1049 FAIL_IF(make_addr_bxy(compiler, &addr, dst, dstw, tmp1));
1050
1051 ins = is_32bit ? 0xe30000000050 /* sty */ : 0xe30000000024 /* stg */;
1052 return push_inst(compiler, ins | R36A(src_r) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset));
1053 }
1054
1055 #undef WHEN
1056
emit_move(struct sljit_compiler * compiler,sljit_gpr dst_r,sljit_s32 src,sljit_sw srcw)1057 static sljit_s32 emit_move(struct sljit_compiler *compiler,
1058 sljit_gpr dst_r,
1059 sljit_s32 src, sljit_sw srcw)
1060 {
1061 SLJIT_ASSERT(!IS_GPR_REG(src) || dst_r != gpr(src & REG_MASK));
1062
1063 if (src & SLJIT_IMM)
1064 return push_load_imm_inst(compiler, dst_r, srcw);
1065
1066 if (src & SLJIT_MEM)
1067 return load_word(compiler, dst_r, src, srcw, (compiler->mode & SLJIT_32) != 0);
1068
1069 sljit_gpr src_r = gpr(src & REG_MASK);
1070 return push_inst(compiler, (compiler->mode & SLJIT_32) ? lr(dst_r, src_r) : lgr(dst_r, src_r));
1071 }
1072
emit_rr(struct sljit_compiler * compiler,sljit_ins ins,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)1073 static sljit_s32 emit_rr(struct sljit_compiler *compiler, sljit_ins ins,
1074 sljit_s32 dst,
1075 sljit_s32 src1, sljit_sw src1w,
1076 sljit_s32 src2, sljit_sw src2w)
1077 {
1078 sljit_gpr dst_r = tmp0;
1079 sljit_gpr src_r = tmp1;
1080 sljit_s32 needs_move = 1;
1081
1082 if (FAST_IS_REG(dst)) {
1083 dst_r = gpr(dst);
1084
1085 if (dst == src1)
1086 needs_move = 0;
1087 else if (dst == src2) {
1088 dst_r = tmp0;
1089 needs_move = 2;
1090 }
1091 }
1092
1093 if (needs_move)
1094 FAIL_IF(emit_move(compiler, dst_r, src1, src1w));
1095
1096 if (FAST_IS_REG(src2))
1097 src_r = gpr(src2);
1098 else
1099 FAIL_IF(emit_move(compiler, tmp1, src2, src2w));
1100
1101 FAIL_IF(push_inst(compiler, ins | R4A(dst_r) | R0A(src_r)));
1102
1103 if (needs_move != 2)
1104 return SLJIT_SUCCESS;
1105
1106 dst_r = gpr(dst & REG_MASK);
1107 return push_inst(compiler, (compiler->mode & SLJIT_32) ? lr(dst_r, tmp0) : lgr(dst_r, tmp0));
1108 }
1109
emit_rr1(struct sljit_compiler * compiler,sljit_ins ins,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w)1110 static sljit_s32 emit_rr1(struct sljit_compiler *compiler, sljit_ins ins,
1111 sljit_s32 dst,
1112 sljit_s32 src1, sljit_sw src1w)
1113 {
1114 sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0;
1115 sljit_gpr src_r = tmp1;
1116
1117 if (FAST_IS_REG(src1))
1118 src_r = gpr(src1);
1119 else
1120 FAIL_IF(emit_move(compiler, tmp1, src1, src1w));
1121
1122 return push_inst(compiler, ins | R4A(dst_r) | R0A(src_r));
1123 }
1124
emit_rrf(struct sljit_compiler * compiler,sljit_ins ins,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)1125 static sljit_s32 emit_rrf(struct sljit_compiler *compiler, sljit_ins ins,
1126 sljit_s32 dst,
1127 sljit_s32 src1, sljit_sw src1w,
1128 sljit_s32 src2, sljit_sw src2w)
1129 {
1130 sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
1131 sljit_gpr src1_r = tmp0;
1132 sljit_gpr src2_r = tmp1;
1133
1134 if (FAST_IS_REG(src1))
1135 src1_r = gpr(src1);
1136 else
1137 FAIL_IF(emit_move(compiler, tmp0, src1, src1w));
1138
1139 if (FAST_IS_REG(src2))
1140 src2_r = gpr(src2);
1141 else
1142 FAIL_IF(emit_move(compiler, tmp1, src2, src2w));
1143
1144 return push_inst(compiler, ins | R4A(dst_r) | R0A(src1_r) | R12A(src2_r));
1145 }
1146
1147 typedef enum {
1148 RI_A,
1149 RIL_A,
1150 } emit_ril_type;
1151
emit_ri(struct sljit_compiler * compiler,sljit_ins ins,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_sw src2w,emit_ril_type type)1152 static sljit_s32 emit_ri(struct sljit_compiler *compiler, sljit_ins ins,
1153 sljit_s32 dst,
1154 sljit_s32 src1, sljit_sw src1w,
1155 sljit_sw src2w,
1156 emit_ril_type type)
1157 {
1158 sljit_gpr dst_r = tmp0;
1159 sljit_s32 needs_move = 1;
1160
1161 if (FAST_IS_REG(dst)) {
1162 dst_r = gpr(dst);
1163
1164 if (dst == src1)
1165 needs_move = 0;
1166 }
1167
1168 if (needs_move)
1169 FAIL_IF(emit_move(compiler, dst_r, src1, src1w));
1170
1171 if (type == RIL_A)
1172 return push_inst(compiler, ins | R36A(dst_r) | (src2w & 0xffffffff));
1173 return push_inst(compiler, ins | R20A(dst_r) | (src2w & 0xffff));
1174 }
1175
emit_rie_d(struct sljit_compiler * compiler,sljit_ins ins,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_sw src2w)1176 static sljit_s32 emit_rie_d(struct sljit_compiler *compiler, sljit_ins ins,
1177 sljit_s32 dst,
1178 sljit_s32 src1, sljit_sw src1w,
1179 sljit_sw src2w)
1180 {
1181 sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0;
1182 sljit_gpr src_r = tmp0;
1183
1184 if (!FAST_IS_REG(src1))
1185 FAIL_IF(emit_move(compiler, tmp0, src1, src1w));
1186 else
1187 src_r = gpr(src1 & REG_MASK);
1188
1189 return push_inst(compiler, ins | R36A(dst_r) | R32A(src_r) | (sljit_ins)(src2w & 0xffff) << 16);
1190 }
1191
1192 typedef enum {
1193 RX_A,
1194 RXY_A,
1195 } emit_rx_type;
1196
emit_rx(struct sljit_compiler * compiler,sljit_ins ins,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w,emit_rx_type type)1197 static sljit_s32 emit_rx(struct sljit_compiler *compiler, sljit_ins ins,
1198 sljit_s32 dst,
1199 sljit_s32 src1, sljit_sw src1w,
1200 sljit_s32 src2, sljit_sw src2w,
1201 emit_rx_type type)
1202 {
1203 sljit_gpr dst_r = tmp0;
1204 sljit_s32 needs_move = 1;
1205 sljit_gpr base, index;
1206
1207 SLJIT_ASSERT(src2 & SLJIT_MEM);
1208
1209 if (FAST_IS_REG(dst)) {
1210 dst_r = gpr(dst);
1211
1212 if (dst == src1)
1213 needs_move = 0;
1214 else if (dst == (src2 & REG_MASK) || (dst == OFFS_REG(src2))) {
1215 dst_r = tmp0;
1216 needs_move = 2;
1217 }
1218 }
1219
1220 if (needs_move)
1221 FAIL_IF(emit_move(compiler, dst_r, src1, src1w));
1222
1223 base = gpr(src2 & REG_MASK);
1224 index = tmp0;
1225
1226 if (src2 & OFFS_REG_MASK) {
1227 index = gpr(OFFS_REG(src2));
1228
1229 if (src2w != 0) {
1230 FAIL_IF(push_inst(compiler, sllg(tmp1, index, src2w & 0x3, 0)));
1231 src2w = 0;
1232 index = tmp1;
1233 }
1234 } else if ((type == RX_A && !is_u12(src2w)) || (type == RXY_A && !is_s20(src2w))) {
1235 FAIL_IF(push_load_imm_inst(compiler, tmp1, src2w));
1236
1237 if (src2 & REG_MASK)
1238 index = tmp1;
1239 else
1240 base = tmp1;
1241 src2w = 0;
1242 }
1243
1244 if (type == RX_A)
1245 ins |= R20A(dst_r) | R16A(index) | R12A(base) | (sljit_ins)src2w;
1246 else
1247 ins |= R36A(dst_r) | R32A(index) | R28A(base) | disp_s20((sljit_s32)src2w);
1248
1249 FAIL_IF(push_inst(compiler, ins));
1250
1251 if (needs_move != 2)
1252 return SLJIT_SUCCESS;
1253
1254 dst_r = gpr(dst);
1255 return push_inst(compiler, (compiler->mode & SLJIT_32) ? lr(dst_r, tmp0) : lgr(dst_r, tmp0));
1256 }
1257
emit_siy(struct sljit_compiler * compiler,sljit_ins ins,sljit_s32 dst,sljit_sw dstw,sljit_sw srcw)1258 static sljit_s32 emit_siy(struct sljit_compiler *compiler, sljit_ins ins,
1259 sljit_s32 dst, sljit_sw dstw,
1260 sljit_sw srcw)
1261 {
1262 SLJIT_ASSERT(dst & SLJIT_MEM);
1263
1264 sljit_gpr dst_r = tmp1;
1265
1266 if (dst & OFFS_REG_MASK) {
1267 sljit_gpr index = tmp1;
1268
1269 if ((dstw & 0x3) == 0)
1270 index = gpr(OFFS_REG(dst));
1271 else
1272 FAIL_IF(push_inst(compiler, sllg(tmp1, index, dstw & 0x3, 0)));
1273
1274 FAIL_IF(push_inst(compiler, la(tmp1, 0, dst_r, index)));
1275 dstw = 0;
1276 }
1277 else if (!is_s20(dstw)) {
1278 FAIL_IF(push_load_imm_inst(compiler, tmp1, dstw));
1279
1280 if (dst & REG_MASK)
1281 FAIL_IF(push_inst(compiler, la(tmp1, 0, dst_r, tmp1)));
1282
1283 dstw = 0;
1284 }
1285 else
1286 dst_r = gpr(dst & REG_MASK);
1287
1288 return push_inst(compiler, ins | ((sljit_ins)(srcw & 0xff) << 32) | R28A(dst_r) | disp_s20((sljit_s32)dstw));
1289 }
1290
1291 struct ins_forms {
1292 sljit_ins op_r;
1293 sljit_ins op_gr;
1294 sljit_ins op_rk;
1295 sljit_ins op_grk;
1296 sljit_ins op;
1297 sljit_ins op_y;
1298 sljit_ins op_g;
1299 };
1300
emit_commutative(struct sljit_compiler * compiler,const struct ins_forms * forms,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)1301 static sljit_s32 emit_commutative(struct sljit_compiler *compiler, const struct ins_forms *forms,
1302 sljit_s32 dst,
1303 sljit_s32 src1, sljit_sw src1w,
1304 sljit_s32 src2, sljit_sw src2w)
1305 {
1306 sljit_s32 mode = compiler->mode;
1307 sljit_ins ins, ins_k;
1308
1309 if ((src1 | src2) & SLJIT_MEM) {
1310 sljit_ins ins12, ins20;
1311
1312 if (mode & SLJIT_32) {
1313 ins12 = forms->op;
1314 ins20 = forms->op_y;
1315 }
1316 else {
1317 ins12 = 0;
1318 ins20 = forms->op_g;
1319 }
1320
1321 if (ins12 && ins20) {
1322 /* Extra instructions needed for address computation can be executed independently. */
1323 if ((src2 & SLJIT_MEM) && (!(src1 & SLJIT_MEM)
1324 || ((src1 & OFFS_REG_MASK) ? (src1w & 0x3) == 0 : is_s20(src1w)))) {
1325 if ((src2 & OFFS_REG_MASK) || is_u12(src2w) || !is_s20(src2w))
1326 return emit_rx(compiler, ins12, dst, src1, src1w, src2, src2w, RX_A);
1327
1328 return emit_rx(compiler, ins20, dst, src1, src1w, src2, src2w, RXY_A);
1329 }
1330
1331 if (src1 & SLJIT_MEM) {
1332 if ((src1 & OFFS_REG_MASK) || is_u12(src1w) || !is_s20(src1w))
1333 return emit_rx(compiler, ins12, dst, src2, src2w, src1, src1w, RX_A);
1334
1335 return emit_rx(compiler, ins20, dst, src2, src2w, src1, src1w, RXY_A);
1336 }
1337 }
1338 else if (ins12 || ins20) {
1339 emit_rx_type rx_type;
1340
1341 if (ins12) {
1342 rx_type = RX_A;
1343 ins = ins12;
1344 }
1345 else {
1346 rx_type = RXY_A;
1347 ins = ins20;
1348 }
1349
1350 if ((src2 & SLJIT_MEM) && (!(src1 & SLJIT_MEM)
1351 || ((src1 & OFFS_REG_MASK) ? (src1w & 0x3) == 0 : (rx_type == RX_A ? is_u12(src1w) : is_s20(src1w)))))
1352 return emit_rx(compiler, ins, dst, src1, src1w, src2, src2w, rx_type);
1353
1354 if (src1 & SLJIT_MEM)
1355 return emit_rx(compiler, ins, dst, src2, src2w, src1, src1w, rx_type);
1356 }
1357 }
1358
1359 if (mode & SLJIT_32) {
1360 ins = forms->op_r;
1361 ins_k = forms->op_rk;
1362 }
1363 else {
1364 ins = forms->op_gr;
1365 ins_k = forms->op_grk;
1366 }
1367
1368 SLJIT_ASSERT(ins != 0 || ins_k != 0);
1369
1370 if (ins && FAST_IS_REG(dst)) {
1371 if (dst == src1)
1372 return emit_rr(compiler, ins, dst, src1, src1w, src2, src2w);
1373
1374 if (dst == src2)
1375 return emit_rr(compiler, ins, dst, src2, src2w, src1, src1w);
1376 }
1377
1378 if (ins_k == 0)
1379 return emit_rr(compiler, ins, dst, src1, src1w, src2, src2w);
1380
1381 return emit_rrf(compiler, ins_k, dst, src1, src1w, src2, src2w);
1382 }
1383
emit_non_commutative(struct sljit_compiler * compiler,const struct ins_forms * forms,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)1384 static sljit_s32 emit_non_commutative(struct sljit_compiler *compiler, const struct ins_forms *forms,
1385 sljit_s32 dst,
1386 sljit_s32 src1, sljit_sw src1w,
1387 sljit_s32 src2, sljit_sw src2w)
1388 {
1389 sljit_s32 mode = compiler->mode;
1390 sljit_ins ins;
1391
1392 if (src2 & SLJIT_MEM) {
1393 sljit_ins ins12, ins20;
1394
1395 if (mode & SLJIT_32) {
1396 ins12 = forms->op;
1397 ins20 = forms->op_y;
1398 }
1399 else {
1400 ins12 = 0;
1401 ins20 = forms->op_g;
1402 }
1403
1404 if (ins12 && ins20) {
1405 if ((src2 & OFFS_REG_MASK) || is_u12(src2w) || !is_s20(src2w))
1406 return emit_rx(compiler, ins12, dst, src1, src1w, src2, src2w, RX_A);
1407
1408 return emit_rx(compiler, ins20, dst, src1, src1w, src2, src2w, RXY_A);
1409 }
1410 else if (ins12)
1411 return emit_rx(compiler, ins12, dst, src1, src1w, src2, src2w, RX_A);
1412 else if (ins20)
1413 return emit_rx(compiler, ins20, dst, src1, src1w, src2, src2w, RXY_A);
1414 }
1415
1416 ins = (mode & SLJIT_32) ? forms->op_rk : forms->op_grk;
1417
1418 if (ins == 0 || (FAST_IS_REG(dst) && dst == src1))
1419 return emit_rr(compiler, (mode & SLJIT_32) ? forms->op_r : forms->op_gr, dst, src1, src1w, src2, src2w);
1420
1421 return emit_rrf(compiler, ins, dst, src1, src1w, src2, src2w);
1422 }
1423
sljit_generate_code(struct sljit_compiler * compiler)1424 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
1425 {
1426 struct sljit_label *label;
1427 struct sljit_jump *jump;
1428 struct sljit_s390x_const *const_;
1429 struct sljit_put_label *put_label;
1430 sljit_sw executable_offset;
1431 sljit_uw ins_size = 0; /* instructions */
1432 sljit_uw pool_size = 0; /* literal pool */
1433 sljit_uw pad_size;
1434 sljit_uw i, j = 0;
1435 struct sljit_memory_fragment *buf;
1436 void *code, *code_ptr;
1437 sljit_uw *pool, *pool_ptr;
1438 sljit_sw source, offset; /* TODO(carenas): only need 32 bit */
1439
1440 CHECK_ERROR_PTR();
1441 CHECK_PTR(check_sljit_generate_code(compiler));
1442 reverse_buf(compiler);
1443
1444 /* branch handling */
1445 label = compiler->labels;
1446 jump = compiler->jumps;
1447 put_label = compiler->put_labels;
1448
1449 /* TODO(carenas): compiler->executable_size could be calculated
1450 * before to avoid the following loop (except for
1451 * pool_size)
1452 */
1453 /* calculate the size of the code */
1454 for (buf = compiler->buf; buf != NULL; buf = buf->next) {
1455 sljit_uw len = buf->used_size / sizeof(sljit_ins);
1456 sljit_ins *ibuf = (sljit_ins *)buf->memory;
1457 for (i = 0; i < len; ++i, ++j) {
1458 sljit_ins ins = ibuf[i];
1459
1460 /* TODO(carenas): instruction tag vs size/addr == j
1461 * using instruction tags for const is creative
1462 * but unlike all other architectures, and is not
1463 * done consistently for all other objects.
1464 * This might need reviewing later.
1465 */
1466 if (ins & sljit_ins_const) {
1467 pool_size += sizeof(*pool);
1468 ins &= ~sljit_ins_const;
1469 }
1470 if (label && label->size == j) {
1471 label->size = ins_size;
1472 label = label->next;
1473 }
1474 if (jump && jump->addr == j) {
1475 if ((jump->flags & SLJIT_REWRITABLE_JUMP) || (jump->flags & JUMP_ADDR)) {
1476 /* encoded: */
1477 /* brasl %r14, <rel_addr> (or brcl <mask>, <rel_addr>) */
1478 /* replace with: */
1479 /* lgrl %r1, <pool_addr> */
1480 /* bras %r14, %r1 (or bcr <mask>, %r1) */
1481 pool_size += sizeof(*pool);
1482 ins_size += 2;
1483 }
1484 jump = jump->next;
1485 }
1486 if (put_label && put_label->addr == j) {
1487 pool_size += sizeof(*pool);
1488 put_label = put_label->next;
1489 }
1490 ins_size += sizeof_ins(ins);
1491 }
1492 }
1493
1494 /* emit trailing label */
1495 if (label && label->size == j) {
1496 label->size = ins_size;
1497 label = label->next;
1498 }
1499
1500 SLJIT_ASSERT(!label);
1501 SLJIT_ASSERT(!jump);
1502 SLJIT_ASSERT(!put_label);
1503
1504 /* pad code size to 8 bytes so is accessible with half word offsets */
1505 /* the literal pool needs to be doubleword aligned */
1506 pad_size = ((ins_size + 7UL) & ~7UL) - ins_size;
1507 SLJIT_ASSERT(pad_size < 8UL);
1508
1509 /* allocate target buffer */
1510 code = SLJIT_MALLOC_EXEC(ins_size + pad_size + pool_size,
1511 compiler->exec_allocator_data);
1512 PTR_FAIL_WITH_EXEC_IF(code);
1513 code_ptr = code;
1514 executable_offset = SLJIT_EXEC_OFFSET(code);
1515
1516 /* TODO(carenas): pool is optional, and the ABI recommends it to
1517 * be created before the function code, instead of
1518 * globally; if generated code is too big could
1519 * need offsets bigger than 32bit words and asser()
1520 */
1521 pool = (sljit_uw *)((sljit_uw)code + ins_size + pad_size);
1522 pool_ptr = pool;
1523 const_ = (struct sljit_s390x_const *)compiler->consts;
1524
1525 /* update label addresses */
1526 label = compiler->labels;
1527 while (label) {
1528 label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(
1529 (sljit_uw)code_ptr + label->size, executable_offset);
1530 label = label->next;
1531 }
1532
1533 /* reset jumps */
1534 jump = compiler->jumps;
1535 put_label = compiler->put_labels;
1536
1537 /* emit the code */
1538 j = 0;
1539 for (buf = compiler->buf; buf != NULL; buf = buf->next) {
1540 sljit_uw len = buf->used_size / sizeof(sljit_ins);
1541 sljit_ins *ibuf = (sljit_ins *)buf->memory;
1542 for (i = 0; i < len; ++i, ++j) {
1543 sljit_ins ins = ibuf[i];
1544 if (ins & sljit_ins_const) {
1545 /* clear the const tag */
1546 ins &= ~sljit_ins_const;
1547
1548 /* update instruction with relative address of constant */
1549 source = (sljit_sw)code_ptr;
1550 offset = (sljit_sw)pool_ptr - source;
1551
1552 SLJIT_ASSERT(!(offset & 1));
1553 offset >>= 1; /* halfword (not byte) offset */
1554 SLJIT_ASSERT(is_s32(offset));
1555
1556 ins |= (sljit_ins)offset & 0xffffffff;
1557
1558 /* update address */
1559 const_->const_.addr = (sljit_uw)pool_ptr;
1560
1561 /* store initial value into pool and update pool address */
1562 *(pool_ptr++) = (sljit_uw)const_->init_value;
1563
1564 /* move to next constant */
1565 const_ = (struct sljit_s390x_const *)const_->const_.next;
1566 }
1567 if (jump && jump->addr == j) {
1568 sljit_sw target = (sljit_sw)((jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target);
1569 if ((jump->flags & SLJIT_REWRITABLE_JUMP) || (jump->flags & JUMP_ADDR)) {
1570 jump->addr = (sljit_uw)pool_ptr;
1571
1572 /* load address into tmp1 */
1573 source = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
1574 offset = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(pool_ptr, executable_offset) - source;
1575
1576 SLJIT_ASSERT(!(offset & 1));
1577 offset >>= 1;
1578 SLJIT_ASSERT(is_s32(offset));
1579
1580 encode_inst(&code_ptr, lgrl(tmp1, offset & 0xffffffff));
1581
1582 /* store jump target into pool and update pool address */
1583 *(pool_ptr++) = (sljit_uw)target;
1584
1585 /* branch to tmp1 */
1586 sljit_ins op = (ins >> 32) & 0xf;
1587 sljit_ins arg = (ins >> 36) & 0xf;
1588 switch (op) {
1589 case 4: /* brcl -> bcr */
1590 ins = bcr(arg, tmp1);
1591 break;
1592 case 5: /* brasl -> basr */
1593 ins = basr(arg, tmp1);
1594 break;
1595 default:
1596 abort();
1597 }
1598 }
1599 else {
1600 jump->addr = (sljit_uw)code_ptr + 2;
1601 source = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
1602 offset = target - source;
1603
1604 /* offset must be halfword aligned */
1605 SLJIT_ASSERT(!(offset & 1));
1606 offset >>= 1;
1607 SLJIT_ASSERT(is_s32(offset)); /* TODO(mundaym): handle arbitrary offsets */
1608
1609 /* patch jump target */
1610 ins |= (sljit_ins)offset & 0xffffffff;
1611 }
1612 jump = jump->next;
1613 }
1614 if (put_label && put_label->addr == j) {
1615 source = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
1616
1617 SLJIT_ASSERT(put_label->label);
1618 put_label->addr = (sljit_uw)code_ptr;
1619
1620 /* store target into pool */
1621 *pool_ptr = put_label->label->addr;
1622 offset = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(pool_ptr, executable_offset) - source;
1623 pool_ptr++;
1624
1625 SLJIT_ASSERT(!(offset & 1));
1626 offset >>= 1;
1627 SLJIT_ASSERT(is_s32(offset));
1628 ins |= (sljit_ins)offset & 0xffffffff;
1629
1630 put_label = put_label->next;
1631 }
1632 encode_inst(&code_ptr, ins);
1633 }
1634 }
1635 SLJIT_ASSERT((sljit_u8 *)code + ins_size == code_ptr);
1636 SLJIT_ASSERT((sljit_u8 *)pool + pool_size == (sljit_u8 *)pool_ptr);
1637
1638 compiler->error = SLJIT_ERR_COMPILED;
1639 compiler->executable_offset = executable_offset;
1640 compiler->executable_size = ins_size;
1641 code = SLJIT_ADD_EXEC_OFFSET(code, executable_offset);
1642 code_ptr = SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
1643 SLJIT_CACHE_FLUSH(code, code_ptr);
1644 SLJIT_UPDATE_WX_FLAGS(code, code_ptr, 1);
1645 return code;
1646 }
1647
sljit_has_cpu_feature(sljit_s32 feature_type)1648 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
1649 {
1650 /* TODO(mundaym): implement all */
1651 switch (feature_type) {
1652 case SLJIT_HAS_FPU:
1653 case SLJIT_HAS_CLZ:
1654 case SLJIT_HAS_ROT:
1655 case SLJIT_HAS_PREFETCH:
1656 return 1;
1657 case SLJIT_HAS_CTZ:
1658 return 2;
1659 case SLJIT_HAS_CMOV:
1660 return have_lscond1() ? 1 : 0;
1661 }
1662 return 0;
1663 }
1664
sljit_cmp_info(sljit_s32 type)1665 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type)
1666 {
1667 return (type >= SLJIT_UNORDERED && type <= SLJIT_ORDERED_LESS_EQUAL);
1668 }
1669
1670 /* --------------------------------------------------------------------- */
1671 /* Entry, exit */
1672 /* --------------------------------------------------------------------- */
1673
sljit_emit_enter(struct sljit_compiler * compiler,sljit_s32 options,sljit_s32 arg_types,sljit_s32 scratches,sljit_s32 saveds,sljit_s32 fscratches,sljit_s32 fsaveds,sljit_s32 local_size)1674 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
1675 sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
1676 sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
1677 {
1678 sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options);
1679 sljit_s32 offset, i, tmp;
1680
1681 CHECK_ERROR();
1682 CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
1683 set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
1684
1685 /* Saved registers are stored in callee allocated save area. */
1686 SLJIT_ASSERT(gpr(SLJIT_FIRST_SAVED_REG) == r6 && gpr(SLJIT_S0) == r13);
1687
1688 offset = 2 * SSIZE_OF(sw);
1689 if (saveds + scratches >= SLJIT_NUMBER_OF_REGISTERS) {
1690 if (saved_arg_count == 0) {
1691 FAIL_IF(push_inst(compiler, stmg(r6, r14, offset, r15)));
1692 offset += 9 * SSIZE_OF(sw);
1693 } else {
1694 FAIL_IF(push_inst(compiler, stmg(r6, r13 - (sljit_gpr)saved_arg_count, offset, r15)));
1695 offset += (8 - saved_arg_count) * SSIZE_OF(sw);
1696 }
1697 } else {
1698 if (scratches == SLJIT_FIRST_SAVED_REG) {
1699 FAIL_IF(push_inst(compiler, stg(r6, offset, 0, r15)));
1700 offset += SSIZE_OF(sw);
1701 } else if (scratches > SLJIT_FIRST_SAVED_REG) {
1702 FAIL_IF(push_inst(compiler, stmg(r6, r6 + (sljit_gpr)(scratches - SLJIT_FIRST_SAVED_REG), offset, r15)));
1703 offset += (scratches - (SLJIT_FIRST_SAVED_REG - 1)) * SSIZE_OF(sw);
1704 }
1705
1706 if (saved_arg_count == 0) {
1707 if (saveds == 0) {
1708 FAIL_IF(push_inst(compiler, stg(r14, offset, 0, r15)));
1709 offset += SSIZE_OF(sw);
1710 } else {
1711 FAIL_IF(push_inst(compiler, stmg(r14 - (sljit_gpr)saveds, r14, offset, r15)));
1712 offset += (saveds + 1) * SSIZE_OF(sw);
1713 }
1714 } else if (saveds > saved_arg_count) {
1715 if (saveds == saved_arg_count + 1) {
1716 FAIL_IF(push_inst(compiler, stg(r14 - (sljit_gpr)saveds, offset, 0, r15)));
1717 offset += SSIZE_OF(sw);
1718 } else {
1719 FAIL_IF(push_inst(compiler, stmg(r14 - (sljit_gpr)saveds, r13 - (sljit_gpr)saved_arg_count, offset, r15)));
1720 offset += (saveds - saved_arg_count) * SSIZE_OF(sw);
1721 }
1722 }
1723 }
1724
1725 if (saved_arg_count > 0) {
1726 FAIL_IF(push_inst(compiler, stg(r14, offset, 0, r15)));
1727 offset += SSIZE_OF(sw);
1728 }
1729
1730 tmp = SLJIT_FS0 - fsaveds;
1731 for (i = SLJIT_FS0; i > tmp; i--) {
1732 FAIL_IF(push_inst(compiler, 0x60000000 /* std */ | F20(i) | R12A(r15) | (sljit_ins)offset));
1733 offset += SSIZE_OF(sw);
1734 }
1735
1736 for (i = fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) {
1737 FAIL_IF(push_inst(compiler, 0x60000000 /* std */ | F20(i) | R12A(r15) | (sljit_ins)offset));
1738 offset += SSIZE_OF(sw);
1739 }
1740
1741 local_size = (local_size + SLJIT_S390X_DEFAULT_STACK_FRAME_SIZE + 0xf) & ~0xf;
1742 compiler->local_size = local_size;
1743
1744 FAIL_IF(push_inst(compiler, 0xe30000000071 /* lay */ | R36A(r15) | R28A(r15) | disp_s20(-local_size)));
1745
1746 if (options & SLJIT_ENTER_REG_ARG)
1747 return SLJIT_SUCCESS;
1748
1749 arg_types >>= SLJIT_ARG_SHIFT;
1750 saved_arg_count = 0;
1751 tmp = 0;
1752 while (arg_types > 0) {
1753 if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) {
1754 if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) {
1755 FAIL_IF(push_inst(compiler, lgr(gpr(SLJIT_S0 - saved_arg_count), gpr(SLJIT_R0 + tmp))));
1756 saved_arg_count++;
1757 }
1758 tmp++;
1759 }
1760
1761 arg_types >>= SLJIT_ARG_SHIFT;
1762 }
1763
1764 return SLJIT_SUCCESS;
1765 }
1766
sljit_set_context(struct sljit_compiler * compiler,sljit_s32 options,sljit_s32 arg_types,sljit_s32 scratches,sljit_s32 saveds,sljit_s32 fscratches,sljit_s32 fsaveds,sljit_s32 local_size)1767 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
1768 sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
1769 sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
1770 {
1771 CHECK_ERROR();
1772 CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
1773 set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
1774
1775 compiler->local_size = (local_size + SLJIT_S390X_DEFAULT_STACK_FRAME_SIZE + 0xf) & ~0xf;
1776 return SLJIT_SUCCESS;
1777 }
1778
emit_stack_frame_release(struct sljit_compiler * compiler,sljit_gpr last_reg)1779 static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit_gpr last_reg)
1780 {
1781 sljit_s32 offset, i, tmp;
1782 sljit_s32 local_size = compiler->local_size;
1783 sljit_s32 saveds = compiler->saveds;
1784 sljit_s32 scratches = compiler->scratches;
1785 sljit_s32 kept_saveds_count = SLJIT_KEPT_SAVEDS_COUNT(compiler->options);
1786
1787 if (is_u12(local_size))
1788 FAIL_IF(push_inst(compiler, 0x41000000 /* ly */ | R20A(r15) | R12A(r15) | (sljit_ins)local_size));
1789 else
1790 FAIL_IF(push_inst(compiler, 0xe30000000071 /* lay */ | R36A(r15) | R28A(r15) | disp_s20(local_size)));
1791
1792 offset = 2 * SSIZE_OF(sw);
1793 if (saveds + scratches >= SLJIT_NUMBER_OF_REGISTERS) {
1794 if (kept_saveds_count == 0) {
1795 FAIL_IF(push_inst(compiler, lmg(r6, last_reg, offset, r15)));
1796 offset += 9 * SSIZE_OF(sw);
1797 } else {
1798 FAIL_IF(push_inst(compiler, lmg(r6, r13 - (sljit_gpr)kept_saveds_count, offset, r15)));
1799 offset += (8 - kept_saveds_count) * SSIZE_OF(sw);
1800 }
1801 } else {
1802 if (scratches == SLJIT_FIRST_SAVED_REG) {
1803 FAIL_IF(push_inst(compiler, lg(r6, offset, 0, r15)));
1804 offset += SSIZE_OF(sw);
1805 } else if (scratches > SLJIT_FIRST_SAVED_REG) {
1806 FAIL_IF(push_inst(compiler, lmg(r6, r6 + (sljit_gpr)(scratches - SLJIT_FIRST_SAVED_REG), offset, r15)));
1807 offset += (scratches - (SLJIT_FIRST_SAVED_REG - 1)) * SSIZE_OF(sw);
1808 }
1809
1810 if (kept_saveds_count == 0) {
1811 if (saveds == 0) {
1812 if (last_reg == r14)
1813 FAIL_IF(push_inst(compiler, lg(r14, offset, 0, r15)));
1814 offset += SSIZE_OF(sw);
1815 } else if (saveds == 1 && last_reg == r13) {
1816 FAIL_IF(push_inst(compiler, lg(r13, offset, 0, r15)));
1817 offset += 2 * SSIZE_OF(sw);
1818 } else {
1819 FAIL_IF(push_inst(compiler, lmg(r14 - (sljit_gpr)saveds, last_reg, offset, r15)));
1820 offset += (saveds + 1) * SSIZE_OF(sw);
1821 }
1822 } else if (saveds > kept_saveds_count) {
1823 if (saveds == kept_saveds_count + 1) {
1824 FAIL_IF(push_inst(compiler, lg(r14 - (sljit_gpr)saveds, offset, 0, r15)));
1825 offset += SSIZE_OF(sw);
1826 } else {
1827 FAIL_IF(push_inst(compiler, lmg(r14 - (sljit_gpr)saveds, r13 - (sljit_gpr)kept_saveds_count, offset, r15)));
1828 offset += (saveds - kept_saveds_count) * SSIZE_OF(sw);
1829 }
1830 }
1831 }
1832
1833 if (kept_saveds_count > 0) {
1834 if (last_reg == r14)
1835 FAIL_IF(push_inst(compiler, lg(r14, offset, 0, r15)));
1836 offset += SSIZE_OF(sw);
1837 }
1838
1839 tmp = SLJIT_FS0 - compiler->fsaveds;
1840 for (i = SLJIT_FS0; i > tmp; i--) {
1841 FAIL_IF(push_inst(compiler, 0x68000000 /* ld */ | F20(i) | R12A(r15) | (sljit_ins)offset));
1842 offset += SSIZE_OF(sw);
1843 }
1844
1845 for (i = compiler->fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) {
1846 FAIL_IF(push_inst(compiler, 0x68000000 /* ld */ | F20(i) | R12A(r15) | (sljit_ins)offset));
1847 offset += SSIZE_OF(sw);
1848 }
1849
1850 return SLJIT_SUCCESS;
1851 }
1852
sljit_emit_return_void(struct sljit_compiler * compiler)1853 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler)
1854 {
1855 CHECK_ERROR();
1856 CHECK(check_sljit_emit_return_void(compiler));
1857
1858 FAIL_IF(emit_stack_frame_release(compiler, r14));
1859 return push_inst(compiler, br(r14)); /* return */
1860 }
1861
sljit_emit_return_to(struct sljit_compiler * compiler,sljit_s32 src,sljit_sw srcw)1862 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *compiler,
1863 sljit_s32 src, sljit_sw srcw)
1864 {
1865 CHECK_ERROR();
1866 CHECK(check_sljit_emit_return_to(compiler, src, srcw));
1867
1868 if (src & SLJIT_MEM) {
1869 ADJUST_LOCAL_OFFSET(src, srcw);
1870 FAIL_IF(load_word(compiler, tmp1, src, srcw, 0 /* 64-bit */));
1871 src = TMP_REG2;
1872 srcw = 0;
1873 } else if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) {
1874 FAIL_IF(push_inst(compiler, lgr(tmp1, gpr(src))));
1875 src = TMP_REG2;
1876 srcw = 0;
1877 }
1878
1879 FAIL_IF(emit_stack_frame_release(compiler, r13));
1880
1881 SLJIT_SKIP_CHECKS(compiler);
1882 return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw);
1883 }
1884
1885 /* --------------------------------------------------------------------- */
1886 /* Operators */
1887 /* --------------------------------------------------------------------- */
1888
sljit_emit_op0(struct sljit_compiler * compiler,sljit_s32 op)1889 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
1890 {
1891 sljit_gpr arg0 = gpr(SLJIT_R0);
1892 sljit_gpr arg1 = gpr(SLJIT_R1);
1893
1894 CHECK_ERROR();
1895 CHECK(check_sljit_emit_op0(compiler, op));
1896
1897 op = GET_OPCODE(op) | (op & SLJIT_32);
1898 switch (op) {
1899 case SLJIT_BREAKPOINT:
1900 /* The following invalid instruction is emitted by gdb. */
1901 return push_inst(compiler, 0x0001 /* 2-byte trap */);
1902 case SLJIT_NOP:
1903 return push_inst(compiler, 0x0700 /* 2-byte nop */);
1904 case SLJIT_LMUL_UW:
1905 FAIL_IF(push_inst(compiler, mlgr(arg0, arg0)));
1906 break;
1907 case SLJIT_LMUL_SW:
1908 /* signed multiplication from: */
1909 /* Hacker's Delight, Second Edition: Chapter 8-3. */
1910 FAIL_IF(push_inst(compiler, srag(tmp0, arg0, 63, 0)));
1911 FAIL_IF(push_inst(compiler, srag(tmp1, arg1, 63, 0)));
1912 FAIL_IF(push_inst(compiler, ngr(tmp0, arg1)));
1913 FAIL_IF(push_inst(compiler, ngr(tmp1, arg0)));
1914
1915 /* unsigned multiplication */
1916 FAIL_IF(push_inst(compiler, mlgr(arg0, arg0)));
1917
1918 FAIL_IF(push_inst(compiler, sgr(arg0, tmp0)));
1919 FAIL_IF(push_inst(compiler, sgr(arg0, tmp1)));
1920 break;
1921 case SLJIT_DIV_U32:
1922 case SLJIT_DIVMOD_U32:
1923 FAIL_IF(push_inst(compiler, lhi(tmp0, 0)));
1924 FAIL_IF(push_inst(compiler, lr(tmp1, arg0)));
1925 FAIL_IF(push_inst(compiler, dlr(tmp0, arg1)));
1926 FAIL_IF(push_inst(compiler, lr(arg0, tmp1))); /* quotient */
1927 if (op == SLJIT_DIVMOD_U32)
1928 return push_inst(compiler, lr(arg1, tmp0)); /* remainder */
1929
1930 return SLJIT_SUCCESS;
1931 case SLJIT_DIV_S32:
1932 case SLJIT_DIVMOD_S32:
1933 FAIL_IF(push_inst(compiler, lhi(tmp0, 0)));
1934 FAIL_IF(push_inst(compiler, lr(tmp1, arg0)));
1935 FAIL_IF(push_inst(compiler, dr(tmp0, arg1)));
1936 FAIL_IF(push_inst(compiler, lr(arg0, tmp1))); /* quotient */
1937 if (op == SLJIT_DIVMOD_S32)
1938 return push_inst(compiler, lr(arg1, tmp0)); /* remainder */
1939
1940 return SLJIT_SUCCESS;
1941 case SLJIT_DIV_UW:
1942 case SLJIT_DIVMOD_UW:
1943 FAIL_IF(push_inst(compiler, lghi(tmp0, 0)));
1944 FAIL_IF(push_inst(compiler, lgr(tmp1, arg0)));
1945 FAIL_IF(push_inst(compiler, dlgr(tmp0, arg1)));
1946 FAIL_IF(push_inst(compiler, lgr(arg0, tmp1))); /* quotient */
1947 if (op == SLJIT_DIVMOD_UW)
1948 return push_inst(compiler, lgr(arg1, tmp0)); /* remainder */
1949
1950 return SLJIT_SUCCESS;
1951 case SLJIT_DIV_SW:
1952 case SLJIT_DIVMOD_SW:
1953 FAIL_IF(push_inst(compiler, lgr(tmp1, arg0)));
1954 FAIL_IF(push_inst(compiler, dsgr(tmp0, arg1)));
1955 FAIL_IF(push_inst(compiler, lgr(arg0, tmp1))); /* quotient */
1956 if (op == SLJIT_DIVMOD_SW)
1957 return push_inst(compiler, lgr(arg1, tmp0)); /* remainder */
1958
1959 return SLJIT_SUCCESS;
1960 case SLJIT_ENDBR:
1961 return SLJIT_SUCCESS;
1962 case SLJIT_SKIP_FRAMES_BEFORE_RETURN:
1963 return SLJIT_SUCCESS;
1964 default:
1965 SLJIT_UNREACHABLE();
1966 }
1967 /* swap result registers */
1968 FAIL_IF(push_inst(compiler, lgr(tmp0, arg0)));
1969 FAIL_IF(push_inst(compiler, lgr(arg0, arg1)));
1970 return push_inst(compiler, lgr(arg1, tmp0));
1971 }
1972
sljit_emit_clz_ctz(struct sljit_compiler * compiler,sljit_s32 op,sljit_gpr dst_r,sljit_gpr src_r)1973 static sljit_s32 sljit_emit_clz_ctz(struct sljit_compiler *compiler, sljit_s32 op, sljit_gpr dst_r, sljit_gpr src_r)
1974 {
1975 sljit_s32 is_ctz = (GET_OPCODE(op) == SLJIT_CTZ);
1976
1977 if ((op & SLJIT_32) && src_r != tmp0) {
1978 FAIL_IF(push_inst(compiler, 0xb9160000 /* llgfr */ | R4A(tmp0) | R0A(src_r)));
1979 src_r = tmp0;
1980 }
1981
1982 if (is_ctz) {
1983 FAIL_IF(push_inst(compiler, ((op & SLJIT_32) ? 0x1300 /* lcr */ : 0xb9030000 /* lcgr */) | R4A(tmp1) | R0A(src_r)));
1984
1985 if (src_r == tmp0)
1986 FAIL_IF(push_inst(compiler, ((op & SLJIT_32) ? 0x1400 /* nr */ : 0xb9800000 /* ngr */) | R4A(tmp0) | R0A(tmp1)));
1987 else
1988 FAIL_IF(push_inst(compiler, 0xb9e40000 /* ngrk */ | R12A(tmp1) | R4A(tmp0) | R0A(src_r)));
1989
1990 src_r = tmp0;
1991 }
1992
1993 FAIL_IF(push_inst(compiler, 0xb9830000 /* flogr */ | R4A(tmp0) | R0A(src_r)));
1994
1995 if (is_ctz)
1996 FAIL_IF(push_inst(compiler, 0xec00000000d9 /* aghik */ | R36A(tmp1) | R32A(tmp0) | ((sljit_ins)(-64 & 0xffff) << 16)));
1997
1998 if (op & SLJIT_32) {
1999 if (!is_ctz && dst_r != tmp0)
2000 return push_inst(compiler, 0xec00000000d9 /* aghik */ | R36A(dst_r) | R32A(tmp0) | ((sljit_ins)(-32 & 0xffff) << 16));
2001
2002 FAIL_IF(push_inst(compiler, 0xc20800000000 /* agfi */ | R36A(tmp0) | (sljit_u32)-32));
2003 }
2004
2005 if (is_ctz)
2006 FAIL_IF(push_inst(compiler, 0xec0000000057 /* rxsbg */ | R36A(tmp0) | R32A(tmp1) | ((sljit_ins)((op & SLJIT_32) ? 59 : 58) << 24) | (63 << 16) | ((sljit_ins)((op & SLJIT_32) ? 5 : 6) << 8)));
2007
2008 if (dst_r == tmp0)
2009 return SLJIT_SUCCESS;
2010
2011 return push_inst(compiler, ((op & SLJIT_32) ? 0x1800 /* lr */ : 0xb9040000 /* lgr */) | R4A(dst_r) | R0A(tmp0));
2012 }
2013
2014 /* LEVAL will be defined later with different parameters as needed */
2015 #define WHEN2(cond, i1, i2) (cond) ? LEVAL(i1) : LEVAL(i2)
2016
sljit_emit_op1(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)2017 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
2018 sljit_s32 dst, sljit_sw dstw,
2019 sljit_s32 src, sljit_sw srcw)
2020 {
2021 sljit_ins ins;
2022 struct addr mem;
2023 sljit_gpr dst_r;
2024 sljit_gpr src_r;
2025 sljit_s32 opcode = GET_OPCODE(op);
2026
2027 CHECK_ERROR();
2028 CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
2029 ADJUST_LOCAL_OFFSET(dst, dstw);
2030 ADJUST_LOCAL_OFFSET(src, srcw);
2031
2032 if (opcode >= SLJIT_MOV && opcode <= SLJIT_MOV_P) {
2033 /* LOAD REGISTER */
2034 if (FAST_IS_REG(dst) && FAST_IS_REG(src)) {
2035 dst_r = gpr(dst);
2036 src_r = gpr(src);
2037 switch (opcode | (op & SLJIT_32)) {
2038 /* 32-bit */
2039 case SLJIT_MOV32_U8:
2040 ins = llcr(dst_r, src_r);
2041 break;
2042 case SLJIT_MOV32_S8:
2043 ins = lbr(dst_r, src_r);
2044 break;
2045 case SLJIT_MOV32_U16:
2046 ins = llhr(dst_r, src_r);
2047 break;
2048 case SLJIT_MOV32_S16:
2049 ins = lhr(dst_r, src_r);
2050 break;
2051 case SLJIT_MOV32:
2052 if (dst_r == src_r)
2053 return SLJIT_SUCCESS;
2054 ins = lr(dst_r, src_r);
2055 break;
2056 /* 64-bit */
2057 case SLJIT_MOV_U8:
2058 ins = llgcr(dst_r, src_r);
2059 break;
2060 case SLJIT_MOV_S8:
2061 ins = lgbr(dst_r, src_r);
2062 break;
2063 case SLJIT_MOV_U16:
2064 ins = llghr(dst_r, src_r);
2065 break;
2066 case SLJIT_MOV_S16:
2067 ins = lghr(dst_r, src_r);
2068 break;
2069 case SLJIT_MOV_U32:
2070 ins = llgfr(dst_r, src_r);
2071 break;
2072 case SLJIT_MOV_S32:
2073 ins = lgfr(dst_r, src_r);
2074 break;
2075 case SLJIT_MOV:
2076 case SLJIT_MOV_P:
2077 if (dst_r == src_r)
2078 return SLJIT_SUCCESS;
2079 ins = lgr(dst_r, src_r);
2080 break;
2081 default:
2082 ins = 0;
2083 SLJIT_UNREACHABLE();
2084 break;
2085 }
2086 FAIL_IF(push_inst(compiler, ins));
2087 return SLJIT_SUCCESS;
2088 }
2089 /* LOAD IMMEDIATE */
2090 if (FAST_IS_REG(dst) && (src & SLJIT_IMM)) {
2091 switch (opcode) {
2092 case SLJIT_MOV_U8:
2093 srcw = (sljit_sw)((sljit_u8)(srcw));
2094 break;
2095 case SLJIT_MOV_S8:
2096 srcw = (sljit_sw)((sljit_s8)(srcw));
2097 break;
2098 case SLJIT_MOV_U16:
2099 srcw = (sljit_sw)((sljit_u16)(srcw));
2100 break;
2101 case SLJIT_MOV_S16:
2102 srcw = (sljit_sw)((sljit_s16)(srcw));
2103 break;
2104 case SLJIT_MOV_U32:
2105 srcw = (sljit_sw)((sljit_u32)(srcw));
2106 break;
2107 case SLJIT_MOV_S32:
2108 case SLJIT_MOV32:
2109 srcw = (sljit_sw)((sljit_s32)(srcw));
2110 break;
2111 }
2112 return push_load_imm_inst(compiler, gpr(dst), srcw);
2113 }
2114 /* LOAD */
2115 /* TODO(carenas): avoid reg being defined later */
2116 #define LEVAL(i) EVAL(i, reg, mem)
2117 if (FAST_IS_REG(dst) && (src & SLJIT_MEM)) {
2118 sljit_gpr reg = gpr(dst);
2119
2120 FAIL_IF(make_addr_bxy(compiler, &mem, src, srcw, tmp1));
2121 /* TODO(carenas): convert all calls below to LEVAL */
2122 switch (opcode | (op & SLJIT_32)) {
2123 case SLJIT_MOV32_U8:
2124 ins = llc(reg, mem.offset, mem.index, mem.base);
2125 break;
2126 case SLJIT_MOV32_S8:
2127 ins = lb(reg, mem.offset, mem.index, mem.base);
2128 break;
2129 case SLJIT_MOV32_U16:
2130 ins = llh(reg, mem.offset, mem.index, mem.base);
2131 break;
2132 case SLJIT_MOV32_S16:
2133 ins = WHEN2(is_u12(mem.offset), lh, lhy);
2134 break;
2135 case SLJIT_MOV32:
2136 ins = WHEN2(is_u12(mem.offset), l, ly);
2137 break;
2138 case SLJIT_MOV_U8:
2139 ins = LEVAL(llgc);
2140 break;
2141 case SLJIT_MOV_S8:
2142 ins = lgb(reg, mem.offset, mem.index, mem.base);
2143 break;
2144 case SLJIT_MOV_U16:
2145 ins = LEVAL(llgh);
2146 break;
2147 case SLJIT_MOV_S16:
2148 ins = lgh(reg, mem.offset, mem.index, mem.base);
2149 break;
2150 case SLJIT_MOV_U32:
2151 ins = LEVAL(llgf);
2152 break;
2153 case SLJIT_MOV_S32:
2154 ins = lgf(reg, mem.offset, mem.index, mem.base);
2155 break;
2156 case SLJIT_MOV_P:
2157 case SLJIT_MOV:
2158 ins = lg(reg, mem.offset, mem.index, mem.base);
2159 break;
2160 default:
2161 ins = 0;
2162 SLJIT_UNREACHABLE();
2163 break;
2164 }
2165 FAIL_IF(push_inst(compiler, ins));
2166 return SLJIT_SUCCESS;
2167 }
2168 /* STORE and STORE IMMEDIATE */
2169 if ((dst & SLJIT_MEM)
2170 && (FAST_IS_REG(src) || (src & SLJIT_IMM))) {
2171 sljit_gpr reg = FAST_IS_REG(src) ? gpr(src) : tmp0;
2172 if (src & SLJIT_IMM) {
2173 /* TODO(mundaym): MOVE IMMEDIATE? */
2174 FAIL_IF(push_load_imm_inst(compiler, reg, srcw));
2175 }
2176 struct addr mem;
2177 FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1));
2178 switch (opcode) {
2179 case SLJIT_MOV_U8:
2180 case SLJIT_MOV_S8:
2181 return push_inst(compiler,
2182 WHEN2(is_u12(mem.offset), stc, stcy));
2183 case SLJIT_MOV_U16:
2184 case SLJIT_MOV_S16:
2185 return push_inst(compiler,
2186 WHEN2(is_u12(mem.offset), sth, sthy));
2187 case SLJIT_MOV_U32:
2188 case SLJIT_MOV_S32:
2189 case SLJIT_MOV32:
2190 return push_inst(compiler,
2191 WHEN2(is_u12(mem.offset), st, sty));
2192 case SLJIT_MOV_P:
2193 case SLJIT_MOV:
2194 FAIL_IF(push_inst(compiler, LEVAL(stg)));
2195 return SLJIT_SUCCESS;
2196 default:
2197 SLJIT_UNREACHABLE();
2198 }
2199 }
2200 #undef LEVAL
2201 /* MOVE CHARACTERS */
2202 if ((dst & SLJIT_MEM) && (src & SLJIT_MEM)) {
2203 struct addr mem;
2204 FAIL_IF(make_addr_bxy(compiler, &mem, src, srcw, tmp1));
2205 switch (opcode) {
2206 case SLJIT_MOV_U8:
2207 case SLJIT_MOV_S8:
2208 FAIL_IF(push_inst(compiler,
2209 EVAL(llgc, tmp0, mem)));
2210 FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1));
2211 return push_inst(compiler,
2212 EVAL(stcy, tmp0, mem));
2213 case SLJIT_MOV_U16:
2214 case SLJIT_MOV_S16:
2215 FAIL_IF(push_inst(compiler,
2216 EVAL(llgh, tmp0, mem)));
2217 FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1));
2218 return push_inst(compiler,
2219 EVAL(sthy, tmp0, mem));
2220 case SLJIT_MOV_U32:
2221 case SLJIT_MOV_S32:
2222 case SLJIT_MOV32:
2223 FAIL_IF(push_inst(compiler,
2224 EVAL(ly, tmp0, mem)));
2225 FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1));
2226 return push_inst(compiler,
2227 EVAL(sty, tmp0, mem));
2228 case SLJIT_MOV_P:
2229 case SLJIT_MOV:
2230 FAIL_IF(push_inst(compiler,
2231 EVAL(lg, tmp0, mem)));
2232 FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1));
2233 FAIL_IF(push_inst(compiler,
2234 EVAL(stg, tmp0, mem)));
2235 return SLJIT_SUCCESS;
2236 default:
2237 SLJIT_UNREACHABLE();
2238 }
2239 }
2240 SLJIT_UNREACHABLE();
2241 }
2242
2243 SLJIT_ASSERT((src & SLJIT_IMM) == 0); /* no immediates */
2244
2245 dst_r = FAST_IS_REG(dst) ? gpr(REG_MASK & dst) : tmp0;
2246 src_r = FAST_IS_REG(src) ? gpr(REG_MASK & src) : tmp0;
2247
2248 compiler->status_flags_state = op & (VARIABLE_FLAG_MASK | SLJIT_SET_Z);
2249
2250 /* TODO(mundaym): optimize loads and stores */
2251 switch (opcode) {
2252 case SLJIT_NOT:
2253 if (src & SLJIT_MEM)
2254 FAIL_IF(load_word(compiler, src_r, src, srcw, op & SLJIT_32));
2255
2256 /* emulate ~x with x^-1 */
2257 if (!(op & SLJIT_32)) {
2258 FAIL_IF(push_load_imm_inst(compiler, tmp1, -1));
2259 if (src_r != dst_r)
2260 FAIL_IF(push_inst(compiler, lgr(dst_r, src_r)));
2261
2262 FAIL_IF(push_inst(compiler, xgr(dst_r, tmp1)));
2263 break;
2264 }
2265
2266 if (have_eimm())
2267 FAIL_IF(push_inst(compiler, xilf(dst_r, 0xffffffff)));
2268 else {
2269 FAIL_IF(push_load_imm_inst(compiler, tmp1, -1));
2270 if (src_r != dst_r)
2271 FAIL_IF(push_inst(compiler, lr(dst_r, src_r)));
2272
2273 FAIL_IF(push_inst(compiler, xr(dst_r, tmp1)));
2274 }
2275 break;
2276 case SLJIT_CLZ:
2277 case SLJIT_CTZ:
2278 if (src & SLJIT_MEM)
2279 FAIL_IF(load_unsigned_word(compiler, src_r, src, srcw, op & SLJIT_32));
2280
2281 FAIL_IF(sljit_emit_clz_ctz(compiler, op, dst_r, src_r));
2282 break;
2283 default:
2284 SLJIT_UNREACHABLE();
2285 }
2286
2287 if ((op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == (SLJIT_SET_Z | SLJIT_SET_OVERFLOW))
2288 FAIL_IF(update_zero_overflow(compiler, op, dst_r));
2289
2290 if (dst & SLJIT_MEM)
2291 return store_word(compiler, dst_r, dst, dstw, op & SLJIT_32);
2292
2293 return SLJIT_SUCCESS;
2294 }
2295
is_commutative(sljit_s32 op)2296 static SLJIT_INLINE int is_commutative(sljit_s32 op)
2297 {
2298 switch (GET_OPCODE(op)) {
2299 case SLJIT_ADD:
2300 case SLJIT_ADDC:
2301 case SLJIT_MUL:
2302 case SLJIT_AND:
2303 case SLJIT_OR:
2304 case SLJIT_XOR:
2305 return 1;
2306 }
2307 return 0;
2308 }
2309
2310 static const struct ins_forms add_forms = {
2311 0x1a00, /* ar */
2312 0xb9080000, /* agr */
2313 0xb9f80000, /* ark */
2314 0xb9e80000, /* agrk */
2315 0x5a000000, /* a */
2316 0xe3000000005a, /* ay */
2317 0xe30000000008, /* ag */
2318 };
2319
2320 static const struct ins_forms logical_add_forms = {
2321 0x1e00, /* alr */
2322 0xb90a0000, /* algr */
2323 0xb9fa0000, /* alrk */
2324 0xb9ea0000, /* algrk */
2325 0x5e000000, /* al */
2326 0xe3000000005e, /* aly */
2327 0xe3000000000a, /* alg */
2328 };
2329
sljit_emit_add(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2330 static sljit_s32 sljit_emit_add(struct sljit_compiler *compiler, sljit_s32 op,
2331 sljit_s32 dst, sljit_sw dstw,
2332 sljit_s32 src1, sljit_sw src1w,
2333 sljit_s32 src2, sljit_sw src2w)
2334 {
2335 int sets_overflow = (op & VARIABLE_FLAG_MASK) == SLJIT_SET_OVERFLOW;
2336 int sets_zero_overflow = (op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == (SLJIT_SET_Z | SLJIT_SET_OVERFLOW);
2337 const struct ins_forms *forms;
2338 sljit_ins ins;
2339
2340 if (src2 & SLJIT_IMM) {
2341 if (!sets_zero_overflow && is_s8(src2w) && (src1 & SLJIT_MEM) && (dst == src1 && dstw == src1w)) {
2342 if (sets_overflow)
2343 ins = (op & SLJIT_32) ? 0xeb000000006a /* asi */ : 0xeb000000007a /* agsi */;
2344 else
2345 ins = (op & SLJIT_32) ? 0xeb000000006e /* alsi */ : 0xeb000000007e /* algsi */;
2346 return emit_siy(compiler, ins, dst, dstw, src2w);
2347 }
2348
2349 if (is_s16(src2w)) {
2350 if (sets_overflow)
2351 ins = (op & SLJIT_32) ? 0xec00000000d8 /* ahik */ : 0xec00000000d9 /* aghik */;
2352 else
2353 ins = (op & SLJIT_32) ? 0xec00000000da /* alhsik */ : 0xec00000000db /* alghsik */;
2354 FAIL_IF(emit_rie_d(compiler, ins, dst, src1, src1w, src2w));
2355 goto done;
2356 }
2357
2358 if (!sets_overflow) {
2359 if ((op & SLJIT_32) || is_u32(src2w)) {
2360 ins = (op & SLJIT_32) ? 0xc20b00000000 /* alfi */ : 0xc20a00000000 /* algfi */;
2361 FAIL_IF(emit_ri(compiler, ins, dst, src1, src1w, src2w, RIL_A));
2362 goto done;
2363 }
2364 if (is_u32(-src2w)) {
2365 FAIL_IF(emit_ri(compiler, 0xc20400000000 /* slgfi */, dst, src1, src1w, -src2w, RIL_A));
2366 goto done;
2367 }
2368 }
2369 else if ((op & SLJIT_32) || is_s32(src2w)) {
2370 ins = (op & SLJIT_32) ? 0xc20900000000 /* afi */ : 0xc20800000000 /* agfi */;
2371 FAIL_IF(emit_ri(compiler, ins, dst, src1, src1w, src2w, RIL_A));
2372 goto done;
2373 }
2374 }
2375
2376 forms = sets_overflow ? &add_forms : &logical_add_forms;
2377 FAIL_IF(emit_commutative(compiler, forms, dst, src1, src1w, src2, src2w));
2378
2379 done:
2380 if (sets_zero_overflow)
2381 FAIL_IF(update_zero_overflow(compiler, op, FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0));
2382
2383 if (dst & SLJIT_MEM)
2384 return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32);
2385
2386 return SLJIT_SUCCESS;
2387 }
2388
2389 static const struct ins_forms sub_forms = {
2390 0x1b00, /* sr */
2391 0xb9090000, /* sgr */
2392 0xb9f90000, /* srk */
2393 0xb9e90000, /* sgrk */
2394 0x5b000000, /* s */
2395 0xe3000000005b, /* sy */
2396 0xe30000000009, /* sg */
2397 };
2398
2399 static const struct ins_forms logical_sub_forms = {
2400 0x1f00, /* slr */
2401 0xb90b0000, /* slgr */
2402 0xb9fb0000, /* slrk */
2403 0xb9eb0000, /* slgrk */
2404 0x5f000000, /* sl */
2405 0xe3000000005f, /* sly */
2406 0xe3000000000b, /* slg */
2407 };
2408
sljit_emit_sub(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2409 static sljit_s32 sljit_emit_sub(struct sljit_compiler *compiler, sljit_s32 op,
2410 sljit_s32 dst, sljit_sw dstw,
2411 sljit_s32 src1, sljit_sw src1w,
2412 sljit_s32 src2, sljit_sw src2w)
2413 {
2414 sljit_s32 flag_type = GET_FLAG_TYPE(op);
2415 int sets_signed = (flag_type >= SLJIT_SIG_LESS && flag_type <= SLJIT_NOT_OVERFLOW);
2416 int sets_zero_overflow = (op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == (SLJIT_SET_Z | SLJIT_SET_OVERFLOW);
2417 const struct ins_forms *forms;
2418 sljit_ins ins;
2419
2420 if (dst == (sljit_s32)tmp0 && flag_type <= SLJIT_SIG_LESS_EQUAL) {
2421 int compare_signed = flag_type >= SLJIT_SIG_LESS;
2422
2423 compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_COMPARE;
2424
2425 if (src2 & SLJIT_IMM) {
2426 if (compare_signed || ((op & VARIABLE_FLAG_MASK) == 0 && is_s32(src2w)))
2427 {
2428 if ((op & SLJIT_32) || is_s32(src2w)) {
2429 ins = (op & SLJIT_32) ? 0xc20d00000000 /* cfi */ : 0xc20c00000000 /* cgfi */;
2430 return emit_ri(compiler, ins, src1, src1, src1w, src2w, RIL_A);
2431 }
2432 }
2433 else {
2434 if ((op & SLJIT_32) || is_u32(src2w)) {
2435 ins = (op & SLJIT_32) ? 0xc20f00000000 /* clfi */ : 0xc20e00000000 /* clgfi */;
2436 return emit_ri(compiler, ins, src1, src1, src1w, src2w, RIL_A);
2437 }
2438 if (is_s16(src2w))
2439 return emit_rie_d(compiler, 0xec00000000db /* alghsik */, (sljit_s32)tmp0, src1, src1w, src2w);
2440 }
2441 }
2442 else if (src2 & SLJIT_MEM) {
2443 if ((op & SLJIT_32) && ((src2 & OFFS_REG_MASK) || is_u12(src2w))) {
2444 ins = compare_signed ? 0x59000000 /* c */ : 0x55000000 /* cl */;
2445 return emit_rx(compiler, ins, src1, src1, src1w, src2, src2w, RX_A);
2446 }
2447
2448 if (compare_signed)
2449 ins = (op & SLJIT_32) ? 0xe30000000059 /* cy */ : 0xe30000000020 /* cg */;
2450 else
2451 ins = (op & SLJIT_32) ? 0xe30000000055 /* cly */ : 0xe30000000021 /* clg */;
2452 return emit_rx(compiler, ins, src1, src1, src1w, src2, src2w, RXY_A);
2453 }
2454
2455 if (compare_signed)
2456 ins = (op & SLJIT_32) ? 0x1900 /* cr */ : 0xb9200000 /* cgr */;
2457 else
2458 ins = (op & SLJIT_32) ? 0x1500 /* clr */ : 0xb9210000 /* clgr */;
2459 return emit_rr(compiler, ins, src1, src1, src1w, src2, src2w);
2460 }
2461
2462 if (src1 == SLJIT_IMM && src1w == 0 && (flag_type == 0 || sets_signed)) {
2463 ins = (op & SLJIT_32) ? 0x1300 /* lcr */ : 0xb9030000 /* lcgr */;
2464 FAIL_IF(emit_rr1(compiler, ins, dst, src2, src2w));
2465 goto done;
2466 }
2467
2468 if (src2 & SLJIT_IMM) {
2469 sljit_sw neg_src2w = -src2w;
2470
2471 if (sets_signed || neg_src2w != 0 || (op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == 0) {
2472 if (!sets_zero_overflow && is_s8(neg_src2w) && (src1 & SLJIT_MEM) && (dst == src1 && dstw == src1w)) {
2473 if (sets_signed)
2474 ins = (op & SLJIT_32) ? 0xeb000000006a /* asi */ : 0xeb000000007a /* agsi */;
2475 else
2476 ins = (op & SLJIT_32) ? 0xeb000000006e /* alsi */ : 0xeb000000007e /* algsi */;
2477 return emit_siy(compiler, ins, dst, dstw, neg_src2w);
2478 }
2479
2480 if (is_s16(neg_src2w)) {
2481 if (sets_signed)
2482 ins = (op & SLJIT_32) ? 0xec00000000d8 /* ahik */ : 0xec00000000d9 /* aghik */;
2483 else
2484 ins = (op & SLJIT_32) ? 0xec00000000da /* alhsik */ : 0xec00000000db /* alghsik */;
2485 FAIL_IF(emit_rie_d(compiler, ins, dst, src1, src1w, neg_src2w));
2486 goto done;
2487 }
2488 }
2489
2490 if (!sets_signed) {
2491 if ((op & SLJIT_32) || is_u32(src2w)) {
2492 ins = (op & SLJIT_32) ? 0xc20500000000 /* slfi */ : 0xc20400000000 /* slgfi */;
2493 FAIL_IF(emit_ri(compiler, ins, dst, src1, src1w, src2w, RIL_A));
2494 goto done;
2495 }
2496 if (is_u32(neg_src2w)) {
2497 FAIL_IF(emit_ri(compiler, 0xc20a00000000 /* algfi */, dst, src1, src1w, neg_src2w, RIL_A));
2498 goto done;
2499 }
2500 }
2501 else if ((op & SLJIT_32) || is_s32(neg_src2w)) {
2502 ins = (op & SLJIT_32) ? 0xc20900000000 /* afi */ : 0xc20800000000 /* agfi */;
2503 FAIL_IF(emit_ri(compiler, ins, dst, src1, src1w, neg_src2w, RIL_A));
2504 goto done;
2505 }
2506 }
2507
2508 forms = sets_signed ? &sub_forms : &logical_sub_forms;
2509 FAIL_IF(emit_non_commutative(compiler, forms, dst, src1, src1w, src2, src2w));
2510
2511 done:
2512 if (sets_signed) {
2513 sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
2514
2515 if ((op & VARIABLE_FLAG_MASK) != SLJIT_SET_OVERFLOW) {
2516 /* In case of overflow, the sign bit of the two source operands must be different, and
2517 - the first operand is greater if the sign bit of the result is set
2518 - the first operand is less if the sign bit of the result is not set
2519 The -result operation sets the corrent sign, because the result cannot be zero.
2520 The overflow is considered greater, since the result must be equal to INT_MIN so its sign bit is set. */
2521 FAIL_IF(push_inst(compiler, brc(0xe, 2 + 2)));
2522 FAIL_IF(push_inst(compiler, (op & SLJIT_32) ? lcr(tmp1, dst_r) : lcgr(tmp1, dst_r)));
2523 }
2524 else if (op & SLJIT_SET_Z)
2525 FAIL_IF(update_zero_overflow(compiler, op, dst_r));
2526 }
2527
2528 if (dst & SLJIT_MEM)
2529 return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32);
2530
2531 return SLJIT_SUCCESS;
2532 }
2533
2534 static const struct ins_forms multiply_forms = {
2535 0xb2520000, /* msr */
2536 0xb90c0000, /* msgr */
2537 0xb9fd0000, /* msrkc */
2538 0xb9ed0000, /* msgrkc */
2539 0x71000000, /* ms */
2540 0xe30000000051, /* msy */
2541 0xe3000000000c, /* msg */
2542 };
2543
2544 static const struct ins_forms multiply_overflow_forms = {
2545 0,
2546 0,
2547 0xb9fd0000, /* msrkc */
2548 0xb9ed0000, /* msgrkc */
2549 0,
2550 0xe30000000053, /* msc */
2551 0xe30000000083, /* msgc */
2552 };
2553
sljit_emit_multiply(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2554 static sljit_s32 sljit_emit_multiply(struct sljit_compiler *compiler, sljit_s32 op,
2555 sljit_s32 dst,
2556 sljit_s32 src1, sljit_sw src1w,
2557 sljit_s32 src2, sljit_sw src2w)
2558 {
2559 sljit_ins ins;
2560
2561 if (HAS_FLAGS(op)) {
2562 /* if have_misc2 fails, this operation should be emulated. 32 bit emulation:
2563 FAIL_IF(push_inst(compiler, lgfr(tmp0, src1_r)));
2564 FAIL_IF(push_inst(compiler, msgfr(tmp0, src2_r)));
2565 if (dst_r != tmp0) {
2566 FAIL_IF(push_inst(compiler, lr(dst_r, tmp0)));
2567 }
2568 FAIL_IF(push_inst(compiler, aih(tmp0, 1)));
2569 FAIL_IF(push_inst(compiler, nihf(tmp0, ~1U)));
2570 FAIL_IF(push_inst(compiler, ipm(tmp1)));
2571 FAIL_IF(push_inst(compiler, oilh(tmp1, 0x2000))); */
2572
2573 return emit_commutative(compiler, &multiply_overflow_forms, dst, src1, src1w, src2, src2w);
2574 }
2575
2576 if (src2 & SLJIT_IMM) {
2577 if (is_s16(src2w)) {
2578 ins = (op & SLJIT_32) ? 0xa70c0000 /* mhi */ : 0xa70d0000 /* mghi */;
2579 return emit_ri(compiler, ins, dst, src1, src1w, src2w, RI_A);
2580 }
2581
2582 if (is_s32(src2w)) {
2583 ins = (op & SLJIT_32) ? 0xc20100000000 /* msfi */ : 0xc20000000000 /* msgfi */;
2584 return emit_ri(compiler, ins, dst, src1, src1w, src2w, RIL_A);
2585 }
2586 }
2587
2588 return emit_commutative(compiler, &multiply_forms, dst, src1, src1w, src2, src2w);
2589 }
2590
sljit_emit_bitwise_imm(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_uw imm,sljit_s32 count16)2591 static sljit_s32 sljit_emit_bitwise_imm(struct sljit_compiler *compiler, sljit_s32 type,
2592 sljit_s32 dst,
2593 sljit_s32 src1, sljit_sw src1w,
2594 sljit_uw imm, sljit_s32 count16)
2595 {
2596 sljit_s32 mode = compiler->mode;
2597 sljit_gpr dst_r = tmp0;
2598 sljit_s32 needs_move = 1;
2599
2600 if (IS_GPR_REG(dst)) {
2601 dst_r = gpr(dst & REG_MASK);
2602 if (dst == src1)
2603 needs_move = 0;
2604 }
2605
2606 if (needs_move)
2607 FAIL_IF(emit_move(compiler, dst_r, src1, src1w));
2608
2609 if (type == SLJIT_AND) {
2610 if (!(mode & SLJIT_32))
2611 FAIL_IF(push_inst(compiler, 0xc00a00000000 /* nihf */ | R36A(dst_r) | (imm >> 32)));
2612 return push_inst(compiler, 0xc00b00000000 /* nilf */ | R36A(dst_r) | (imm & 0xffffffff));
2613 }
2614 else if (type == SLJIT_OR) {
2615 if (count16 >= 3) {
2616 FAIL_IF(push_inst(compiler, 0xc00c00000000 /* oihf */ | R36A(dst_r) | (imm >> 32)));
2617 return push_inst(compiler, 0xc00d00000000 /* oilf */ | R36A(dst_r) | (imm & 0xffffffff));
2618 }
2619
2620 if (count16 >= 2) {
2621 if ((imm & 0x00000000ffffffffull) == 0)
2622 return push_inst(compiler, 0xc00c00000000 /* oihf */ | R36A(dst_r) | (imm >> 32));
2623 if ((imm & 0xffffffff00000000ull) == 0)
2624 return push_inst(compiler, 0xc00d00000000 /* oilf */ | R36A(dst_r) | (imm & 0xffffffff));
2625 }
2626
2627 if ((imm & 0xffff000000000000ull) != 0)
2628 FAIL_IF(push_inst(compiler, 0xa5080000 /* oihh */ | R20A(dst_r) | (imm >> 48)));
2629 if ((imm & 0x0000ffff00000000ull) != 0)
2630 FAIL_IF(push_inst(compiler, 0xa5090000 /* oihl */ | R20A(dst_r) | ((imm >> 32) & 0xffff)));
2631 if ((imm & 0x00000000ffff0000ull) != 0)
2632 FAIL_IF(push_inst(compiler, 0xa50a0000 /* oilh */ | R20A(dst_r) | ((imm >> 16) & 0xffff)));
2633 if ((imm & 0x000000000000ffffull) != 0 || imm == 0)
2634 return push_inst(compiler, 0xa50b0000 /* oill */ | R20A(dst_r) | (imm & 0xffff));
2635 return SLJIT_SUCCESS;
2636 }
2637
2638 if ((imm & 0xffffffff00000000ull) != 0)
2639 FAIL_IF(push_inst(compiler, 0xc00600000000 /* xihf */ | R36A(dst_r) | (imm >> 32)));
2640 if ((imm & 0x00000000ffffffffull) != 0 || imm == 0)
2641 return push_inst(compiler, 0xc00700000000 /* xilf */ | R36A(dst_r) | (imm & 0xffffffff));
2642 return SLJIT_SUCCESS;
2643 }
2644
2645 static const struct ins_forms bitwise_and_forms = {
2646 0x1400, /* nr */
2647 0xb9800000, /* ngr */
2648 0xb9f40000, /* nrk */
2649 0xb9e40000, /* ngrk */
2650 0x54000000, /* n */
2651 0xe30000000054, /* ny */
2652 0xe30000000080, /* ng */
2653 };
2654
2655 static const struct ins_forms bitwise_or_forms = {
2656 0x1600, /* or */
2657 0xb9810000, /* ogr */
2658 0xb9f60000, /* ork */
2659 0xb9e60000, /* ogrk */
2660 0x56000000, /* o */
2661 0xe30000000056, /* oy */
2662 0xe30000000081, /* og */
2663 };
2664
2665 static const struct ins_forms bitwise_xor_forms = {
2666 0x1700, /* xr */
2667 0xb9820000, /* xgr */
2668 0xb9f70000, /* xrk */
2669 0xb9e70000, /* xgrk */
2670 0x57000000, /* x */
2671 0xe30000000057, /* xy */
2672 0xe30000000082, /* xg */
2673 };
2674
sljit_emit_bitwise(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2675 static sljit_s32 sljit_emit_bitwise(struct sljit_compiler *compiler, sljit_s32 op,
2676 sljit_s32 dst,
2677 sljit_s32 src1, sljit_sw src1w,
2678 sljit_s32 src2, sljit_sw src2w)
2679 {
2680 sljit_s32 type = GET_OPCODE(op);
2681 const struct ins_forms *forms;
2682
2683 if ((src2 & SLJIT_IMM) && (!(op & SLJIT_SET_Z) || (type == SLJIT_AND && dst == (sljit_s32)tmp0))) {
2684 sljit_s32 count16 = 0;
2685 sljit_uw imm = (sljit_uw)src2w;
2686
2687 if (op & SLJIT_32)
2688 imm &= 0xffffffffull;
2689
2690 if ((imm & 0x000000000000ffffull) != 0 || imm == 0)
2691 count16++;
2692 if ((imm & 0x00000000ffff0000ull) != 0)
2693 count16++;
2694 if ((imm & 0x0000ffff00000000ull) != 0)
2695 count16++;
2696 if ((imm & 0xffff000000000000ull) != 0)
2697 count16++;
2698
2699 if (type == SLJIT_AND && dst == (sljit_s32)tmp0 && count16 == 1) {
2700 sljit_gpr src_r = tmp0;
2701
2702 if (FAST_IS_REG(src1))
2703 src_r = gpr(src1 & REG_MASK);
2704 else
2705 FAIL_IF(emit_move(compiler, tmp0, src1, src1w));
2706
2707 if ((imm & 0x000000000000ffffull) != 0 || imm == 0)
2708 return push_inst(compiler, 0xa7010000 | R20A(src_r) | imm);
2709 if ((imm & 0x00000000ffff0000ull) != 0)
2710 return push_inst(compiler, 0xa7000000 | R20A(src_r) | (imm >> 16));
2711 if ((imm & 0x0000ffff00000000ull) != 0)
2712 return push_inst(compiler, 0xa7030000 | R20A(src_r) | (imm >> 32));
2713 return push_inst(compiler, 0xa7020000 | R20A(src_r) | (imm >> 48));
2714 }
2715
2716 if (!(op & SLJIT_SET_Z))
2717 return sljit_emit_bitwise_imm(compiler, type, dst, src1, src1w, imm, count16);
2718 }
2719
2720 if (type == SLJIT_AND)
2721 forms = &bitwise_and_forms;
2722 else if (type == SLJIT_OR)
2723 forms = &bitwise_or_forms;
2724 else
2725 forms = &bitwise_xor_forms;
2726
2727 return emit_commutative(compiler, forms, dst, src1, src1w, src2, src2w);
2728 }
2729
sljit_emit_shift(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2730 static sljit_s32 sljit_emit_shift(struct sljit_compiler *compiler, sljit_s32 op,
2731 sljit_s32 dst,
2732 sljit_s32 src1, sljit_sw src1w,
2733 sljit_s32 src2, sljit_sw src2w)
2734 {
2735 sljit_s32 type = GET_OPCODE(op);
2736 sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
2737 sljit_gpr src_r = tmp0;
2738 sljit_gpr base_r = tmp0;
2739 sljit_ins imm = 0;
2740 sljit_ins ins;
2741
2742 if (FAST_IS_REG(src1))
2743 src_r = gpr(src1);
2744 else
2745 FAIL_IF(emit_move(compiler, tmp0, src1, src1w));
2746
2747 if (!(src2 & SLJIT_IMM)) {
2748 if (FAST_IS_REG(src2))
2749 base_r = gpr(src2);
2750 else {
2751 FAIL_IF(emit_move(compiler, tmp1, src2, src2w));
2752 base_r = tmp1;
2753 }
2754
2755 if ((op & SLJIT_32) && (type == SLJIT_MSHL || type == SLJIT_MLSHR || type == SLJIT_MASHR)) {
2756 if (base_r != tmp1) {
2757 FAIL_IF(push_inst(compiler, 0xec0000000055 /* risbg */ | R36A(tmp1) | R32A(base_r) | (59 << 24) | (1 << 23) | (63 << 16)));
2758 base_r = tmp1;
2759 } else
2760 FAIL_IF(push_inst(compiler, 0xa5070000 /* nill */ | R20A(tmp1) | 0x1f));
2761 }
2762 } else
2763 imm = (sljit_ins)(src2w & ((op & SLJIT_32) ? 0x1f : 0x3f));
2764
2765 if ((op & SLJIT_32) && dst_r == src_r) {
2766 if (type == SLJIT_SHL || type == SLJIT_MSHL)
2767 ins = 0x89000000 /* sll */;
2768 else if (type == SLJIT_LSHR || type == SLJIT_MLSHR)
2769 ins = 0x88000000 /* srl */;
2770 else
2771 ins = 0x8a000000 /* sra */;
2772
2773 FAIL_IF(push_inst(compiler, ins | R20A(dst_r) | R12A(base_r) | imm));
2774 } else {
2775 if (type == SLJIT_SHL || type == SLJIT_MSHL)
2776 ins = (op & SLJIT_32) ? 0xeb00000000df /* sllk */ : 0xeb000000000d /* sllg */;
2777 else if (type == SLJIT_LSHR || type == SLJIT_MLSHR)
2778 ins = (op & SLJIT_32) ? 0xeb00000000de /* srlk */ : 0xeb000000000c /* srlg */;
2779 else
2780 ins = (op & SLJIT_32) ? 0xeb00000000dc /* srak */ : 0xeb000000000a /* srag */;
2781
2782 FAIL_IF(push_inst(compiler, ins | R36A(dst_r) | R32A(src_r) | R28A(base_r) | (imm << 16)));
2783 }
2784
2785 if ((op & SLJIT_SET_Z) && type != SLJIT_ASHR)
2786 return push_inst(compiler, (op & SLJIT_32) ? or(dst_r, dst_r) : ogr(dst_r, dst_r));
2787
2788 return SLJIT_SUCCESS;
2789 }
2790
sljit_emit_rotate(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2791 static sljit_s32 sljit_emit_rotate(struct sljit_compiler *compiler, sljit_s32 op,
2792 sljit_s32 dst,
2793 sljit_s32 src1, sljit_sw src1w,
2794 sljit_s32 src2, sljit_sw src2w)
2795 {
2796 sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
2797 sljit_gpr src_r = tmp0;
2798 sljit_gpr base_r = tmp0;
2799 sljit_ins imm = 0;
2800 sljit_ins ins;
2801
2802 if (FAST_IS_REG(src1))
2803 src_r = gpr(src1);
2804 else
2805 FAIL_IF(emit_move(compiler, tmp0, src1, src1w));
2806
2807 if (!(src2 & SLJIT_IMM)) {
2808 if (FAST_IS_REG(src2))
2809 base_r = gpr(src2);
2810 else {
2811 FAIL_IF(emit_move(compiler, tmp1, src2, src2w));
2812 base_r = tmp1;
2813 }
2814 }
2815
2816 if (GET_OPCODE(op) == SLJIT_ROTR) {
2817 if (!(src2 & SLJIT_IMM)) {
2818 ins = (op & SLJIT_32) ? 0x1300 /* lcr */ : 0xb9030000 /* lcgr */;
2819 FAIL_IF(push_inst(compiler, ins | R4A(tmp1) | R0A(base_r)));
2820 base_r = tmp1;
2821 } else
2822 src2w = -src2w;
2823 }
2824
2825 if (src2 & SLJIT_IMM)
2826 imm = (sljit_ins)(src2w & ((op & SLJIT_32) ? 0x1f : 0x3f));
2827
2828 ins = (op & SLJIT_32) ? 0xeb000000001d /* rll */ : 0xeb000000001c /* rllg */;
2829 return push_inst(compiler, ins | R36A(dst_r) | R32A(src_r) | R28A(base_r) | (imm << 16));
2830 }
2831
2832 static const struct ins_forms addc_forms = {
2833 0xb9980000, /* alcr */
2834 0xb9880000, /* alcgr */
2835 0,
2836 0,
2837 0,
2838 0xe30000000098, /* alc */
2839 0xe30000000088, /* alcg */
2840 };
2841
2842 static const struct ins_forms subc_forms = {
2843 0xb9990000, /* slbr */
2844 0xb9890000, /* slbgr */
2845 0,
2846 0,
2847 0,
2848 0xe30000000099, /* slb */
2849 0xe30000000089, /* slbg */
2850 };
2851
sljit_emit_op2(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2852 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,
2853 sljit_s32 dst, sljit_sw dstw,
2854 sljit_s32 src1, sljit_sw src1w,
2855 sljit_s32 src2, sljit_sw src2w)
2856 {
2857 CHECK_ERROR();
2858 CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w));
2859 ADJUST_LOCAL_OFFSET(dst, dstw);
2860 ADJUST_LOCAL_OFFSET(src1, src1w);
2861 ADJUST_LOCAL_OFFSET(src2, src2w);
2862
2863 compiler->mode = op & SLJIT_32;
2864 compiler->status_flags_state = op & (VARIABLE_FLAG_MASK | SLJIT_SET_Z);
2865
2866 if (is_commutative(op) && (src1 & SLJIT_IMM) && !(src2 & SLJIT_IMM)) {
2867 src1 ^= src2;
2868 src2 ^= src1;
2869 src1 ^= src2;
2870
2871 src1w ^= src2w;
2872 src2w ^= src1w;
2873 src1w ^= src2w;
2874 }
2875
2876 switch (GET_OPCODE(op)) {
2877 case SLJIT_ADD:
2878 compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_ADD;
2879 return sljit_emit_add(compiler, op, dst, dstw, src1, src1w, src2, src2w);
2880 case SLJIT_ADDC:
2881 compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_ADD;
2882 FAIL_IF(emit_commutative(compiler, &addc_forms, dst, src1, src1w, src2, src2w));
2883 if (dst & SLJIT_MEM)
2884 return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32);
2885 return SLJIT_SUCCESS;
2886 case SLJIT_SUB:
2887 compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_SUB;
2888 return sljit_emit_sub(compiler, op, dst, dstw, src1, src1w, src2, src2w);
2889 case SLJIT_SUBC:
2890 compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_SUB;
2891 FAIL_IF(emit_non_commutative(compiler, &subc_forms, dst, src1, src1w, src2, src2w));
2892 if (dst & SLJIT_MEM)
2893 return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32);
2894 return SLJIT_SUCCESS;
2895 case SLJIT_MUL:
2896 FAIL_IF(sljit_emit_multiply(compiler, op, dst, src1, src1w, src2, src2w));
2897 break;
2898 case SLJIT_AND:
2899 case SLJIT_OR:
2900 case SLJIT_XOR:
2901 FAIL_IF(sljit_emit_bitwise(compiler, op, dst, src1, src1w, src2, src2w));
2902 break;
2903 case SLJIT_SHL:
2904 case SLJIT_MSHL:
2905 case SLJIT_LSHR:
2906 case SLJIT_MLSHR:
2907 case SLJIT_ASHR:
2908 case SLJIT_MASHR:
2909 FAIL_IF(sljit_emit_shift(compiler, op, dst, src1, src1w, src2, src2w));
2910 break;
2911 case SLJIT_ROTL:
2912 case SLJIT_ROTR:
2913 FAIL_IF(sljit_emit_rotate(compiler, op, dst, src1, src1w, src2, src2w));
2914 break;
2915 }
2916
2917 if (dst & SLJIT_MEM)
2918 return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32);
2919 return SLJIT_SUCCESS;
2920 }
2921
sljit_emit_op2u(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2922 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op,
2923 sljit_s32 src1, sljit_sw src1w,
2924 sljit_s32 src2, sljit_sw src2w)
2925 {
2926 CHECK_ERROR();
2927 CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w));
2928
2929 SLJIT_SKIP_CHECKS(compiler);
2930 return sljit_emit_op2(compiler, op, (sljit_s32)tmp0, 0, src1, src1w, src2, src2w);
2931 }
2932
sljit_emit_shift_into(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src_dst,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2933 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op,
2934 sljit_s32 src_dst,
2935 sljit_s32 src1, sljit_sw src1w,
2936 sljit_s32 src2, sljit_sw src2w)
2937 {
2938 sljit_s32 is_right;
2939 sljit_sw bit_length = (op & SLJIT_32) ? 32 : 64;
2940 sljit_gpr src_dst_r = gpr(src_dst);
2941 sljit_gpr src1_r = tmp0;
2942 sljit_gpr src2_r = tmp1;
2943 sljit_ins ins;
2944
2945 CHECK_ERROR();
2946 CHECK(check_sljit_emit_shift_into(compiler, op, src_dst, src1, src1w, src2, src2w));
2947
2948 is_right = (GET_OPCODE(op) == SLJIT_LSHR || GET_OPCODE(op) == SLJIT_MLSHR);
2949
2950 if (src_dst == src1) {
2951 SLJIT_SKIP_CHECKS(compiler);
2952 return sljit_emit_op2(compiler, (is_right ? SLJIT_ROTR : SLJIT_ROTL) | (op & SLJIT_32), src_dst, 0, src_dst, 0, src2, src2w);
2953 }
2954
2955 ADJUST_LOCAL_OFFSET(src1, src1w);
2956 ADJUST_LOCAL_OFFSET(src2, src2w);
2957
2958 if (src1 & SLJIT_MEM)
2959 FAIL_IF(load_word(compiler, tmp0, src1, src1w, op & SLJIT_32));
2960 else if (src1 & SLJIT_IMM)
2961 FAIL_IF(push_load_imm_inst(compiler, tmp0, src1w));
2962 else
2963 src1_r = gpr(src1);
2964
2965 if (src2 & SLJIT_IMM) {
2966 src2w &= bit_length - 1;
2967
2968 if (src2w == 0)
2969 return SLJIT_SUCCESS;
2970 } else if (!(src2 & SLJIT_MEM))
2971 src2_r = gpr(src2);
2972 else
2973 FAIL_IF(load_word(compiler, tmp1, src2, src2w, op & SLJIT_32));
2974
2975 if (src2 & SLJIT_IMM) {
2976 if (op & SLJIT_32) {
2977 ins = is_right ? 0x88000000 /* srl */ : 0x89000000 /* sll */;
2978 FAIL_IF(push_inst(compiler, ins | R20A(src_dst_r) | (sljit_ins)src2w));
2979 } else {
2980 ins = is_right ? 0xeb000000000c /* srlg */ : 0xeb000000000d /* sllg */;
2981 FAIL_IF(push_inst(compiler, ins | R36A(src_dst_r) | R32A(src_dst_r) | ((sljit_ins)src2w << 16)));
2982 }
2983
2984 ins = 0xec0000000055 /* risbg */;
2985
2986 if (is_right) {
2987 src2w = bit_length - src2w;
2988 ins |= ((sljit_ins)(64 - bit_length) << 24) | ((sljit_ins)(63 - src2w) << 16) | ((sljit_ins)src2w << 8);
2989 } else
2990 ins |= ((sljit_ins)(64 - src2w) << 24) | ((sljit_ins)63 << 16) | ((sljit_ins)src2w << 8);
2991
2992 return push_inst(compiler, ins | R36A(src_dst_r) | R32A(src1_r));
2993 }
2994
2995 if (op & SLJIT_32) {
2996 if (GET_OPCODE(op) == SLJIT_MSHL || GET_OPCODE(op) == SLJIT_MLSHR) {
2997 if (src2_r != tmp1) {
2998 FAIL_IF(push_inst(compiler, 0xec0000000055 /* risbg */ | R36A(tmp1) | R32A(src2_r) | (59 << 24) | (1 << 23) | (63 << 16)));
2999 src2_r = tmp1;
3000 } else
3001 FAIL_IF(push_inst(compiler, 0xa5070000 /* nill */ | R20A(tmp1) | 0x1f));
3002 }
3003
3004 ins = is_right ? 0x88000000 /* srl */ : 0x89000000 /* sll */;
3005 FAIL_IF(push_inst(compiler, ins | R20A(src_dst_r) | R12A(src2_r)));
3006
3007 if (src2_r != tmp1) {
3008 FAIL_IF(push_inst(compiler, 0xa50f0000 /* llill */ | R20A(tmp1) | 0x1f));
3009 FAIL_IF(push_inst(compiler, 0x1700 /* xr */ | R4A(tmp1) | R0A(src2_r)));
3010 } else
3011 FAIL_IF(push_inst(compiler, 0xc00700000000 /* xilf */ | R36A(tmp1) | 0x1f));
3012
3013 if (src1_r == tmp0) {
3014 ins = is_right ? 0x89000000 /* sll */ : 0x88000000 /* srl */;
3015 FAIL_IF(push_inst(compiler, ins | R20A(tmp0) | R12A(tmp1) | 0x1));
3016 } else {
3017 ins = is_right ? 0xeb00000000df /* sllk */ : 0xeb00000000de /* srlk */;
3018 FAIL_IF(push_inst(compiler, ins | R36A(tmp0) | R32A(src1_r) | R28A(tmp1) | (0x1 << 16)));
3019 }
3020
3021 return push_inst(compiler, 0x1600 /* or */ | R4A(src_dst_r) | R0A(tmp0));
3022 }
3023
3024 ins = is_right ? 0xeb000000000c /* srlg */ : 0xeb000000000d /* sllg */;
3025 FAIL_IF(push_inst(compiler, ins | R36A(src_dst_r) | R32A(src_dst_r) | R28A(src2_r)));
3026
3027 ins = is_right ? 0xeb000000000d /* sllg */ : 0xeb000000000c /* srlg */;
3028
3029 if (!(op & SLJIT_SHIFT_INTO_NON_ZERO)) {
3030 if (src2_r != tmp1)
3031 FAIL_IF(push_inst(compiler, 0xa50f0000 /* llill */ | R20A(tmp1) | 0x3f));
3032
3033 FAIL_IF(push_inst(compiler, ins | R36A(tmp0) | R32A(src1_r) | (0x1 << 16)));
3034 src1_r = tmp0;
3035
3036 if (src2_r != tmp1)
3037 FAIL_IF(push_inst(compiler, 0xb9820000 /* xgr */ | R4A(tmp1) | R0A(src2_r)));
3038 else
3039 FAIL_IF(push_inst(compiler, 0xc00700000000 /* xilf */ | R36A(tmp1) | 0x3f));
3040 } else
3041 FAIL_IF(push_inst(compiler, 0xb9030000 /* lcgr */ | R4A(tmp1) | R0A(src2_r)));
3042
3043 FAIL_IF(push_inst(compiler, ins | R36A(tmp0) | R32A(src1_r) | R28A(tmp1)));
3044 return push_inst(compiler, 0xb9810000 /* ogr */ | R4A(src_dst_r) | R0A(tmp0));
3045 }
3046
sljit_emit_op_src(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src,sljit_sw srcw)3047 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(
3048 struct sljit_compiler *compiler,
3049 sljit_s32 op, sljit_s32 src, sljit_sw srcw)
3050 {
3051 sljit_gpr src_r;
3052 struct addr addr;
3053
3054 CHECK_ERROR();
3055 CHECK(check_sljit_emit_op_src(compiler, op, src, srcw));
3056 ADJUST_LOCAL_OFFSET(src, srcw);
3057
3058 switch (op) {
3059 case SLJIT_FAST_RETURN:
3060 src_r = FAST_IS_REG(src) ? gpr(src) : tmp1;
3061 if (src & SLJIT_MEM)
3062 FAIL_IF(load_word(compiler, tmp1, src, srcw, 0));
3063
3064 return push_inst(compiler, br(src_r));
3065 case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN:
3066 return SLJIT_SUCCESS;
3067 case SLJIT_PREFETCH_L1:
3068 case SLJIT_PREFETCH_L2:
3069 case SLJIT_PREFETCH_L3:
3070 case SLJIT_PREFETCH_ONCE:
3071 FAIL_IF(make_addr_bxy(compiler, &addr, src, srcw, tmp1));
3072 return push_inst(compiler, 0xe31000000036 /* pfd */ | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset));
3073 default:
3074 return SLJIT_SUCCESS;
3075 }
3076
3077 return SLJIT_SUCCESS;
3078 }
3079
sljit_get_register_index(sljit_s32 reg)3080 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg)
3081 {
3082 CHECK_REG_INDEX(check_sljit_get_register_index(reg));
3083 return (sljit_s32)gpr(reg);
3084 }
3085
sljit_get_float_register_index(sljit_s32 reg)3086 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg)
3087 {
3088 CHECK_REG_INDEX(check_sljit_get_float_register_index(reg));
3089 return (sljit_s32)fgpr(reg);
3090 }
3091
sljit_emit_op_custom(struct sljit_compiler * compiler,void * instruction,sljit_u32 size)3092 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
3093 void *instruction, sljit_u32 size)
3094 {
3095 sljit_ins ins = 0;
3096
3097 CHECK_ERROR();
3098 CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
3099
3100 memcpy((sljit_u8 *)&ins + sizeof(ins) - size, instruction, size);
3101 return push_inst(compiler, ins);
3102 }
3103
3104 /* --------------------------------------------------------------------- */
3105 /* Floating point operators */
3106 /* --------------------------------------------------------------------- */
3107
3108 #define FLOAT_LOAD 0
3109 #define FLOAT_STORE 1
3110
float_mem(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 reg,sljit_s32 mem,sljit_sw memw)3111 static sljit_s32 float_mem(struct sljit_compiler *compiler, sljit_s32 op,
3112 sljit_s32 reg,
3113 sljit_s32 mem, sljit_sw memw)
3114 {
3115 struct addr addr;
3116 sljit_ins ins;
3117
3118 SLJIT_ASSERT(mem & SLJIT_MEM);
3119
3120 if ((mem & OFFS_REG_MASK) || is_u12(memw) || !is_s20(memw)) {
3121 FAIL_IF(make_addr_bx(compiler, &addr, mem, memw, tmp1));
3122
3123 if (op & FLOAT_STORE)
3124 ins = (op & SLJIT_32) ? 0x70000000 /* ste */ : 0x60000000 /* std */;
3125 else
3126 ins = (op & SLJIT_32) ? 0x78000000 /* le */ : 0x68000000 /* ld */;
3127
3128 return push_inst(compiler, ins | F20(reg) | R16A(addr.index) | R12A(addr.base) | (sljit_ins)addr.offset);
3129 }
3130
3131 FAIL_IF(make_addr_bxy(compiler, &addr, mem, memw, tmp1));
3132
3133 if (op & FLOAT_STORE)
3134 ins = (op & SLJIT_32) ? 0xed0000000066 /* stey */ : 0xed0000000067 /* stdy */;
3135 else
3136 ins = (op & SLJIT_32) ? 0xed0000000064 /* ley */ : 0xed0000000065 /* ldy */;
3137
3138 return push_inst(compiler, ins | F36(reg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset));
3139 }
3140
emit_float(struct sljit_compiler * compiler,sljit_ins ins_r,sljit_ins ins,sljit_s32 reg,sljit_s32 src,sljit_sw srcw)3141 static sljit_s32 emit_float(struct sljit_compiler *compiler, sljit_ins ins_r, sljit_ins ins,
3142 sljit_s32 reg,
3143 sljit_s32 src, sljit_sw srcw)
3144 {
3145 struct addr addr;
3146
3147 if (!(src & SLJIT_MEM))
3148 return push_inst(compiler, ins_r | F4(reg) | F0(src));
3149
3150 FAIL_IF(make_addr_bx(compiler, &addr, src, srcw, tmp1));
3151 return push_inst(compiler, ins | F36(reg) | R32A(addr.index) | R28A(addr.base) | ((sljit_ins)addr.offset << 16));
3152 }
3153
sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)3154 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,
3155 sljit_s32 dst, sljit_sw dstw,
3156 sljit_s32 src, sljit_sw srcw)
3157 {
3158 sljit_ins dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0;
3159 sljit_ins ins;
3160
3161 if (src & SLJIT_MEM) {
3162 FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op & SLJIT_32), TMP_FREG1, src, srcw));
3163 src = TMP_FREG1;
3164 }
3165
3166 /* M3 is set to 5 */
3167 if (GET_OPCODE(op) == SLJIT_CONV_SW_FROM_F64)
3168 ins = (op & SLJIT_32) ? 0xb3a85000 /* cgebr */ : 0xb3a95000 /* cgdbr */;
3169 else
3170 ins = (op & SLJIT_32) ? 0xb3985000 /* cfebr */ : 0xb3995000 /* cfdbr */;
3171
3172 FAIL_IF(push_inst(compiler, ins | R4A(dst_r) | F0(src)));
3173
3174 if (dst & SLJIT_MEM)
3175 return store_word(compiler, dst_r, dst, dstw, GET_OPCODE(op) >= SLJIT_CONV_S32_FROM_F64);
3176
3177 return SLJIT_SUCCESS;
3178 }
3179
sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)3180 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
3181 sljit_s32 dst, sljit_sw dstw,
3182 sljit_s32 src, sljit_sw srcw)
3183 {
3184 sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
3185 sljit_ins ins;
3186
3187 if (src & SLJIT_IMM) {
3188 FAIL_IF(push_load_imm_inst(compiler, tmp0, srcw));
3189 src = (sljit_s32)tmp0;
3190 }
3191 else if (src & SLJIT_MEM) {
3192 FAIL_IF(load_word(compiler, tmp0, src, srcw, GET_OPCODE(op) >= SLJIT_CONV_F64_FROM_S32));
3193 src = (sljit_s32)tmp0;
3194 }
3195
3196 if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_SW)
3197 ins = (op & SLJIT_32) ? 0xb3a40000 /* cegbr */ : 0xb3a50000 /* cdgbr */;
3198 else
3199 ins = (op & SLJIT_32) ? 0xb3940000 /* cefbr */ : 0xb3950000 /* cdfbr */;
3200
3201 FAIL_IF(push_inst(compiler, ins | F4(dst_r) | R0(src)));
3202
3203 if (dst & SLJIT_MEM)
3204 return float_mem(compiler, FLOAT_STORE | (op & SLJIT_32), TMP_FREG1, dst, dstw);
3205
3206 return SLJIT_SUCCESS;
3207 }
3208
sljit_emit_fop1_cmp(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)3209 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op,
3210 sljit_s32 src1, sljit_sw src1w,
3211 sljit_s32 src2, sljit_sw src2w)
3212 {
3213 sljit_ins ins_r, ins;
3214
3215 if (src1 & SLJIT_MEM) {
3216 FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op & SLJIT_32), TMP_FREG1, src1, src1w));
3217 src1 = TMP_FREG1;
3218 }
3219
3220 if (op & SLJIT_32) {
3221 ins_r = 0xb3090000 /* cebr */;
3222 ins = 0xed0000000009 /* ceb */;
3223 } else {
3224 ins_r = 0xb3190000 /* cdbr */;
3225 ins = 0xed0000000019 /* cdb */;
3226 }
3227
3228 return emit_float(compiler, ins_r, ins, src1, src2, src2w);
3229 }
3230
sljit_emit_fop1(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)3231 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
3232 sljit_s32 dst, sljit_sw dstw,
3233 sljit_s32 src, sljit_sw srcw)
3234 {
3235 sljit_s32 dst_r;
3236 sljit_ins ins;
3237
3238 CHECK_ERROR();
3239
3240 SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
3241
3242 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
3243
3244 if (op == SLJIT_CONV_F64_FROM_F32)
3245 FAIL_IF(emit_float(compiler, 0xb3040000 /* ldebr */, 0xed0000000004 /* ldeb */, dst_r, src, srcw));
3246 else {
3247 if (src & SLJIT_MEM) {
3248 FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op == SLJIT_CONV_F32_FROM_F64 ? 0 : (op & SLJIT_32)), dst_r, src, srcw));
3249 src = dst_r;
3250 }
3251
3252 switch (GET_OPCODE(op)) {
3253 case SLJIT_MOV_F64:
3254 if (FAST_IS_REG(dst)) {
3255 if (dst == src)
3256 return SLJIT_SUCCESS;
3257
3258 ins = (op & SLJIT_32) ? 0x3800 /* ler */ : 0x2800 /* ldr */;
3259 break;
3260 }
3261 return float_mem(compiler, FLOAT_STORE | (op & SLJIT_32), src, dst, dstw);
3262 case SLJIT_CONV_F64_FROM_F32:
3263 /* Only SLJIT_CONV_F32_FROM_F64. */
3264 ins = 0xb3440000 /* ledbr */;
3265 break;
3266 case SLJIT_NEG_F64:
3267 ins = (op & SLJIT_32) ? 0xb3030000 /* lcebr */ : 0xb3130000 /* lcdbr */;
3268 break;
3269 default:
3270 SLJIT_ASSERT(GET_OPCODE(op) == SLJIT_ABS_F64);
3271 ins = (op & SLJIT_32) ? 0xb3000000 /* lpebr */ : 0xb3100000 /* lpdbr */;
3272 break;
3273 }
3274
3275 FAIL_IF(push_inst(compiler, ins | F4(dst_r) | F0(src)));
3276 }
3277
3278 if (!(dst & SLJIT_MEM))
3279 return SLJIT_SUCCESS;
3280
3281 SLJIT_ASSERT(dst_r == TMP_FREG1);
3282
3283 return float_mem(compiler, FLOAT_STORE | (op & SLJIT_32), TMP_FREG1, dst, dstw);
3284 }
3285
3286 #define FLOAT_MOV(op, dst_r, src_r) \
3287 (((op & SLJIT_32) ? 0x3800 /* ler */ : 0x2800 /* ldr */) | F4(dst_r) | F0(src_r))
3288
sljit_emit_fop2(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)3289 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op,
3290 sljit_s32 dst, sljit_sw dstw,
3291 sljit_s32 src1, sljit_sw src1w,
3292 sljit_s32 src2, sljit_sw src2w)
3293 {
3294 sljit_s32 dst_r = TMP_FREG1;
3295 sljit_ins ins_r, ins;
3296
3297 CHECK_ERROR();
3298 CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
3299 ADJUST_LOCAL_OFFSET(dst, dstw);
3300 ADJUST_LOCAL_OFFSET(src1, src1w);
3301 ADJUST_LOCAL_OFFSET(src2, src2w);
3302
3303 do {
3304 if (FAST_IS_REG(dst)) {
3305 dst_r = dst;
3306
3307 if (dst == src1)
3308 break;
3309
3310 if (dst == src2) {
3311 if (GET_OPCODE(op) == SLJIT_ADD_F64 || GET_OPCODE(op) == SLJIT_MUL_F64) {
3312 src2 = src1;
3313 src2w = src1w;
3314 src1 = dst;
3315 break;
3316 }
3317
3318 FAIL_IF(push_inst(compiler, FLOAT_MOV(op, TMP_FREG1, src2)));
3319 src2 = TMP_FREG1;
3320 }
3321 }
3322
3323 if (src1 & SLJIT_MEM)
3324 FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op & SLJIT_32), dst_r, src1, src1w));
3325 else
3326 FAIL_IF(push_inst(compiler, FLOAT_MOV(op, dst_r, src1)));
3327 } while (0);
3328
3329 switch (GET_OPCODE(op)) {
3330 case SLJIT_ADD_F64:
3331 ins_r = (op & SLJIT_32) ? 0xb30a0000 /* aebr */ : 0xb31a0000 /* adbr */;
3332 ins = (op & SLJIT_32) ? 0xed000000000a /* aeb */ : 0xed000000001a /* adb */;
3333 break;
3334 case SLJIT_SUB_F64:
3335 ins_r = (op & SLJIT_32) ? 0xb30b0000 /* sebr */ : 0xb31b0000 /* sdbr */;
3336 ins = (op & SLJIT_32) ? 0xed000000000b /* seb */ : 0xed000000001b /* sdb */;
3337 break;
3338 case SLJIT_MUL_F64:
3339 ins_r = (op & SLJIT_32) ? 0xb3170000 /* meebr */ : 0xb31c0000 /* mdbr */;
3340 ins = (op & SLJIT_32) ? 0xed0000000017 /* meeb */ : 0xed000000001c /* mdb */;
3341 break;
3342 default:
3343 SLJIT_ASSERT(GET_OPCODE(op) == SLJIT_DIV_F64);
3344 ins_r = (op & SLJIT_32) ? 0xb30d0000 /* debr */ : 0xb31d0000 /* ddbr */;
3345 ins = (op & SLJIT_32) ? 0xed000000000d /* deb */ : 0xed000000001d /* ddb */;
3346 break;
3347 }
3348
3349 FAIL_IF(emit_float(compiler, ins_r, ins, dst_r, src2, src2w));
3350
3351 if (dst & SLJIT_MEM)
3352 return float_mem(compiler, FLOAT_STORE | (op & SLJIT_32), TMP_FREG1, dst, dstw);
3353
3354 SLJIT_ASSERT(dst_r != TMP_FREG1);
3355 return SLJIT_SUCCESS;
3356 }
3357
3358 /* --------------------------------------------------------------------- */
3359 /* Other instructions */
3360 /* --------------------------------------------------------------------- */
3361
sljit_emit_fast_enter(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw)3362 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
3363 {
3364 CHECK_ERROR();
3365 CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
3366 ADJUST_LOCAL_OFFSET(dst, dstw);
3367
3368 if (FAST_IS_REG(dst))
3369 return push_inst(compiler, lgr(gpr(dst), link_r));
3370
3371 /* memory */
3372 return store_word(compiler, link_r, dst, dstw, 0);
3373 }
3374
3375 /* --------------------------------------------------------------------- */
3376 /* Conditional instructions */
3377 /* --------------------------------------------------------------------- */
3378
sljit_emit_label(struct sljit_compiler * compiler)3379 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
3380 {
3381 struct sljit_label *label;
3382
3383 CHECK_ERROR_PTR();
3384 CHECK_PTR(check_sljit_emit_label(compiler));
3385
3386 if (compiler->last_label && compiler->last_label->size == compiler->size)
3387 return compiler->last_label;
3388
3389 label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
3390 PTR_FAIL_IF(!label);
3391 set_label(label, compiler);
3392 return label;
3393 }
3394
sljit_emit_jump(struct sljit_compiler * compiler,sljit_s32 type)3395 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)
3396 {
3397 sljit_u8 mask = ((type & 0xff) < SLJIT_JUMP) ? get_cc(compiler, type & 0xff) : 0xf;
3398
3399 CHECK_ERROR_PTR();
3400 CHECK_PTR(check_sljit_emit_jump(compiler, type));
3401
3402 /* record jump */
3403 struct sljit_jump *jump = (struct sljit_jump *)
3404 ensure_abuf(compiler, sizeof(struct sljit_jump));
3405 PTR_FAIL_IF(!jump);
3406 set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
3407 jump->addr = compiler->size;
3408
3409 /* emit jump instruction */
3410 type &= 0xff;
3411 if (type >= SLJIT_FAST_CALL)
3412 PTR_FAIL_IF(push_inst(compiler, brasl(link_r, 0)));
3413 else
3414 PTR_FAIL_IF(push_inst(compiler, brcl(mask, 0)));
3415
3416 return jump;
3417 }
3418
sljit_emit_call(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 arg_types)3419 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type,
3420 sljit_s32 arg_types)
3421 {
3422 SLJIT_UNUSED_ARG(arg_types);
3423 CHECK_ERROR_PTR();
3424 CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));
3425
3426 if (type & SLJIT_CALL_RETURN) {
3427 PTR_FAIL_IF(emit_stack_frame_release(compiler, r14));
3428 type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP);
3429 }
3430
3431 SLJIT_SKIP_CHECKS(compiler);
3432 return sljit_emit_jump(compiler, type);
3433 }
3434
sljit_emit_ijump(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 src,sljit_sw srcw)3435 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
3436 {
3437 sljit_gpr src_r = FAST_IS_REG(src) ? gpr(src) : tmp1;
3438
3439 CHECK_ERROR();
3440 CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
3441
3442 if (src & SLJIT_IMM) {
3443 SLJIT_ASSERT(!(srcw & 1)); /* target address must be even */
3444 FAIL_IF(push_load_imm_inst(compiler, src_r, srcw));
3445 }
3446 else if (src & SLJIT_MEM) {
3447 ADJUST_LOCAL_OFFSET(src, srcw);
3448 FAIL_IF(load_word(compiler, src_r, src, srcw, 0 /* 64-bit */));
3449 }
3450
3451 /* emit jump instruction */
3452 if (type >= SLJIT_FAST_CALL)
3453 return push_inst(compiler, basr(link_r, src_r));
3454
3455 return push_inst(compiler, br(src_r));
3456 }
3457
sljit_emit_icall(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 arg_types,sljit_s32 src,sljit_sw srcw)3458 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type,
3459 sljit_s32 arg_types,
3460 sljit_s32 src, sljit_sw srcw)
3461 {
3462 CHECK_ERROR();
3463 CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));
3464
3465 SLJIT_ASSERT(gpr(TMP_REG2) == tmp1);
3466
3467 if (src & SLJIT_MEM) {
3468 ADJUST_LOCAL_OFFSET(src, srcw);
3469 FAIL_IF(load_word(compiler, tmp1, src, srcw, 0 /* 64-bit */));
3470 src = TMP_REG2;
3471 srcw = 0;
3472 }
3473
3474 if (type & SLJIT_CALL_RETURN) {
3475 if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) {
3476 FAIL_IF(push_inst(compiler, lgr(tmp1, gpr(src))));
3477 src = TMP_REG2;
3478 srcw = 0;
3479 }
3480
3481 FAIL_IF(emit_stack_frame_release(compiler, r14));
3482 type = SLJIT_JUMP;
3483 }
3484
3485 SLJIT_SKIP_CHECKS(compiler);
3486 return sljit_emit_ijump(compiler, type, src, srcw);
3487 }
3488
sljit_emit_op_flags(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 type)3489 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
3490 sljit_s32 dst, sljit_sw dstw,
3491 sljit_s32 type)
3492 {
3493 sljit_u8 mask = get_cc(compiler, type);
3494
3495 CHECK_ERROR();
3496 CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));
3497
3498 sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
3499 sljit_gpr loc_r = tmp1;
3500 switch (GET_OPCODE(op)) {
3501 case SLJIT_AND:
3502 case SLJIT_OR:
3503 case SLJIT_XOR:
3504 compiler->status_flags_state = op & SLJIT_SET_Z;
3505
3506 /* dst is also source operand */
3507 if (dst & SLJIT_MEM)
3508 FAIL_IF(load_word(compiler, dst_r, dst, dstw, op & SLJIT_32));
3509
3510 break;
3511 case SLJIT_MOV32:
3512 op |= SLJIT_32;
3513 /* fallthrough */
3514 case SLJIT_MOV:
3515 /* can write straight into destination */
3516 loc_r = dst_r;
3517 break;
3518 default:
3519 SLJIT_UNREACHABLE();
3520 }
3521
3522 /* TODO(mundaym): fold into cmov helper function? */
3523 #define LEVAL(i) i(loc_r, 1, mask)
3524 if (have_lscond2()) {
3525 FAIL_IF(push_load_imm_inst(compiler, loc_r, 0));
3526 FAIL_IF(push_inst(compiler,
3527 WHEN2(op & SLJIT_32, lochi, locghi)));
3528 } else {
3529 /* TODO(mundaym): no load/store-on-condition 2 facility (ipm? branch-and-set?) */
3530 abort();
3531 }
3532 #undef LEVAL
3533
3534 /* apply bitwise op and set condition codes */
3535 switch (GET_OPCODE(op)) {
3536 #define LEVAL(i) i(dst_r, loc_r)
3537 case SLJIT_AND:
3538 FAIL_IF(push_inst(compiler,
3539 WHEN2(op & SLJIT_32, nr, ngr)));
3540 break;
3541 case SLJIT_OR:
3542 FAIL_IF(push_inst(compiler,
3543 WHEN2(op & SLJIT_32, or, ogr)));
3544 break;
3545 case SLJIT_XOR:
3546 FAIL_IF(push_inst(compiler,
3547 WHEN2(op & SLJIT_32, xr, xgr)));
3548 break;
3549 #undef LEVAL
3550 }
3551
3552 /* store result to memory if required */
3553 if (dst & SLJIT_MEM)
3554 return store_word(compiler, dst_r, dst, dstw, (op & SLJIT_32));
3555
3556 return SLJIT_SUCCESS;
3557 }
3558
sljit_emit_cmov(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 dst_reg,sljit_s32 src,sljit_sw srcw)3559 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type,
3560 sljit_s32 dst_reg,
3561 sljit_s32 src, sljit_sw srcw)
3562 {
3563 sljit_ins mask = get_cc(compiler, type & ~SLJIT_32);
3564 sljit_gpr src_r;
3565 sljit_ins ins;
3566
3567 CHECK_ERROR();
3568 CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw));
3569
3570 if (type & SLJIT_32)
3571 srcw = (sljit_s32)srcw;
3572
3573 if (have_lscond2() && (src & SLJIT_IMM) && is_s16(srcw)) {
3574 ins = (type & SLJIT_32) ? 0xec0000000042 /* lochi */ : 0xec0000000046 /* locghi */;
3575 return push_inst(compiler, ins | R36A(gpr(dst_reg)) | (mask << 32) | (sljit_ins)(srcw & 0xffff) << 16);
3576 }
3577
3578 if (src & SLJIT_IMM) {
3579 FAIL_IF(push_load_imm_inst(compiler, tmp0, srcw));
3580 src_r = tmp0;
3581 } else
3582 src_r = gpr(src);
3583
3584 if (have_lscond1()) {
3585 ins = (type & SLJIT_32) ? 0xb9f20000 /* locr */ : 0xb9e20000 /* locgr */;
3586 return push_inst(compiler, ins | (mask << 12) | R4A(gpr(dst_reg)) | R0A(src_r));
3587 }
3588
3589 return sljit_emit_cmov_generic(compiler, type, dst_reg, src, srcw);
3590 }
3591
sljit_emit_mem(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 reg,sljit_s32 mem,sljit_sw memw)3592 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type,
3593 sljit_s32 reg,
3594 sljit_s32 mem, sljit_sw memw)
3595 {
3596 sljit_ins ins, reg1, reg2, base, offs = 0;
3597
3598 CHECK_ERROR();
3599 CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw));
3600
3601 if (!(reg & REG_PAIR_MASK))
3602 return sljit_emit_mem_unaligned(compiler, type, reg, mem, memw);
3603
3604 ADJUST_LOCAL_OFFSET(mem, memw);
3605
3606 base = gpr(mem & REG_MASK);
3607 reg1 = gpr(REG_PAIR_FIRST(reg));
3608 reg2 = gpr(REG_PAIR_SECOND(reg));
3609
3610 if (mem & OFFS_REG_MASK) {
3611 memw &= 0x3;
3612 offs = gpr(OFFS_REG(mem));
3613
3614 if (memw != 0) {
3615 FAIL_IF(push_inst(compiler, 0xeb000000000d /* sllg */ | R36A(tmp1) | R32A(offs) | ((sljit_ins)memw << 16)));
3616 offs = tmp1;
3617 } else if (!(type & SLJIT_MEM_STORE) && (base == reg1 || base == reg2) && (offs == reg1 || offs == reg2)) {
3618 FAIL_IF(push_inst(compiler, 0xb9f80000 | R12A(tmp1) | R4A(base) | R0A(offs)));
3619 base = tmp1;
3620 offs = 0;
3621 }
3622
3623 memw = 0;
3624 } else if (memw < -0x80000 || memw > 0x7ffff - ((reg2 == reg1 + 1) ? 0 : SSIZE_OF(sw))) {
3625 FAIL_IF(push_load_imm_inst(compiler, tmp1, memw));
3626
3627 if (base == 0)
3628 base = tmp1;
3629 else
3630 offs = tmp1;
3631
3632 memw = 0;
3633 }
3634
3635 if (offs == 0 && reg2 == (reg1 + 1)) {
3636 ins = (type & SLJIT_MEM_STORE) ? 0xeb0000000024 /* stmg */ : 0xeb0000000004 /* lmg */;
3637 return push_inst(compiler, ins | R36A(reg1) | R32A(reg2) | R28A(base) | disp_s20((sljit_s32)memw));
3638 }
3639
3640 ins = ((type & SLJIT_MEM_STORE) ? 0xe30000000024 /* stg */ : 0xe30000000004 /* lg */) | R32A(offs) | R28A(base);
3641
3642 if (!(type & SLJIT_MEM_STORE) && base == reg1) {
3643 FAIL_IF(push_inst(compiler, ins | R36A(reg2) | disp_s20((sljit_s32)memw + SSIZE_OF(sw))));
3644 return push_inst(compiler, ins | R36A(reg1) | disp_s20((sljit_s32)memw));
3645 }
3646
3647 FAIL_IF(push_inst(compiler, ins | R36A(reg1) | disp_s20((sljit_s32)memw)));
3648 return push_inst(compiler, ins | R36A(reg2) | disp_s20((sljit_s32)memw + SSIZE_OF(sw)));
3649 }
3650
3651 /* --------------------------------------------------------------------- */
3652 /* Other instructions */
3653 /* --------------------------------------------------------------------- */
3654
3655 /* On s390x we build a literal pool to hold constants. This has two main
3656 advantages:
3657
3658 1. we only need one instruction in the instruction stream (LGRL)
3659 2. we can store 64 bit addresses and use 32 bit offsets
3660
3661 To retrofit the extra information needed to build the literal pool we
3662 add a new sljit_s390x_const struct that contains the initial value but
3663 can still be cast to a sljit_const. */
3664
sljit_emit_const(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw,sljit_sw init_value)3665 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
3666 {
3667 struct sljit_s390x_const *const_;
3668 sljit_gpr dst_r;
3669
3670 CHECK_ERROR_PTR();
3671 CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
3672
3673 const_ = (struct sljit_s390x_const*)ensure_abuf(compiler,
3674 sizeof(struct sljit_s390x_const));
3675 PTR_FAIL_IF(!const_);
3676 set_const((struct sljit_const*)const_, compiler);
3677 const_->init_value = init_value;
3678
3679 dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
3680 if (have_genext())
3681 PTR_FAIL_IF(push_inst(compiler, sljit_ins_const | lgrl(dst_r, 0)));
3682 else {
3683 PTR_FAIL_IF(push_inst(compiler, sljit_ins_const | larl(tmp1, 0)));
3684 PTR_FAIL_IF(push_inst(compiler, lg(dst_r, 0, r0, tmp1)));
3685 }
3686
3687 if (dst & SLJIT_MEM)
3688 PTR_FAIL_IF(store_word(compiler, dst_r, dst, dstw, 0 /* always 64-bit */));
3689
3690 return (struct sljit_const*)const_;
3691 }
3692
sljit_set_jump_addr(sljit_uw addr,sljit_uw new_target,sljit_sw executable_offset)3693 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
3694 {
3695 /* Update the constant pool. */
3696 sljit_uw *ptr = (sljit_uw *)addr;
3697 SLJIT_UNUSED_ARG(executable_offset);
3698
3699 SLJIT_UPDATE_WX_FLAGS(ptr, ptr + 1, 0);
3700 *ptr = new_target;
3701 SLJIT_UPDATE_WX_FLAGS(ptr, ptr + 1, 1);
3702 SLJIT_CACHE_FLUSH(ptr, ptr + 1);
3703 }
3704
sljit_set_const(sljit_uw addr,sljit_sw new_constant,sljit_sw executable_offset)3705 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
3706 {
3707 sljit_set_jump_addr(addr, (sljit_uw)new_constant, executable_offset);
3708 }
3709
sljit_emit_put_label(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw)3710 SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label *sljit_emit_put_label(
3711 struct sljit_compiler *compiler,
3712 sljit_s32 dst, sljit_sw dstw)
3713 {
3714 struct sljit_put_label *put_label;
3715 sljit_gpr dst_r;
3716
3717 CHECK_ERROR_PTR();
3718 CHECK_PTR(check_sljit_emit_put_label(compiler, dst, dstw));
3719 ADJUST_LOCAL_OFFSET(dst, dstw);
3720
3721 put_label = (struct sljit_put_label*)ensure_abuf(compiler, sizeof(struct sljit_put_label));
3722 PTR_FAIL_IF(!put_label);
3723 set_put_label(put_label, compiler, 0);
3724
3725 dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
3726
3727 if (have_genext())
3728 PTR_FAIL_IF(push_inst(compiler, lgrl(dst_r, 0)));
3729 else {
3730 PTR_FAIL_IF(push_inst(compiler, larl(tmp1, 0)));
3731 PTR_FAIL_IF(push_inst(compiler, lg(dst_r, 0, r0, tmp1)));
3732 }
3733
3734 if (dst & SLJIT_MEM)
3735 PTR_FAIL_IF(store_word(compiler, dst_r, dst, dstw, 0));
3736
3737 return put_label;
3738 }
3739
3740 /* TODO(carenas): EVAL probably should move up or be refactored */
3741 #undef WHEN2
3742 #undef EVAL
3743
3744 #undef tmp1
3745 #undef tmp0
3746
3747 /* TODO(carenas): undef other macros that spill like is_u12? */
3748