1 /*
2 * Stack-less Just-In-Time compiler
3 *
4 * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without modification, are
7 * permitted provided that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright notice, this list of
10 * conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright notice, this list
13 * of conditions and the following disclaimer in the documentation and/or other materials
14 * provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19 * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27 #include <sys/auxv.h>
28
29 #ifdef __ARCH__
30 #define ENABLE_STATIC_FACILITY_DETECTION 1
31 #else
32 #define ENABLE_STATIC_FACILITY_DETECTION 0
33 #endif
34 #define ENABLE_DYNAMIC_FACILITY_DETECTION 1
35
sljit_get_platform_name(void)36 SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
37 {
38 return "s390x" SLJIT_CPUINFO;
39 }
40
41 /* Instructions. */
42 typedef sljit_uw sljit_ins;
43
44 /* Instruction tags (most significant halfword). */
45 const sljit_ins sljit_ins_const = (sljit_ins)1 << 48;
46
47 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 4] = {
48 14, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 0, 1
49 };
50
51 /* there are also a[2-15] available, but they are slower to access and
52 * their use is limited as mundaym explained:
53 * https://github.com/zherczeg/sljit/pull/91#discussion_r486895689
54 */
55
56 /* General Purpose Registers [0-15]. */
57 typedef sljit_uw sljit_gpr;
58
59 /*
60 * WARNING
61 * the following code is non standard and should be improved for
62 * consistency, but doesn't use SLJIT_NUMBER_OF_REGISTERS based
63 * registers because r0 and r1 are the ABI recommended volatiles.
64 * there is a gpr() function that maps sljit to physical register numbers
65 * that should be used instead of the usual index into reg_map[] and
66 * will be retired ASAP (TODO: carenas)
67 */
68
69 const sljit_gpr r0 = 0; /* reg_map[SLJIT_NUMBER_OF_REGISTERS + 2]: 0 in address calculations; reserved */
70 const sljit_gpr r1 = 1; /* reg_map[SLJIT_NUMBER_OF_REGISTERS + 3]: reserved */
71 const sljit_gpr r2 = 2; /* reg_map[1]: 1st argument */
72 const sljit_gpr r3 = 3; /* reg_map[2]: 2nd argument */
73 const sljit_gpr r4 = 4; /* reg_map[3]: 3rd argument */
74 const sljit_gpr r5 = 5; /* reg_map[4]: 4th argument */
75 const sljit_gpr r6 = 6; /* reg_map[5]: 5th argument; 1st saved register */
76 const sljit_gpr r7 = 7; /* reg_map[6] */
77 const sljit_gpr r8 = 8; /* reg_map[7] */
78 const sljit_gpr r9 = 9; /* reg_map[8] */
79 const sljit_gpr r10 = 10; /* reg_map[9] */
80 const sljit_gpr r11 = 11; /* reg_map[10] */
81 const sljit_gpr r12 = 12; /* reg_map[11]: GOT */
82 const sljit_gpr r13 = 13; /* reg_map[12]: Literal Pool pointer */
83 const sljit_gpr r14 = 14; /* reg_map[0]: return address and flag register */
84 const sljit_gpr r15 = 15; /* reg_map[SLJIT_NUMBER_OF_REGISTERS + 1]: stack pointer */
85
86 /* WARNING: r12 and r13 shouldn't be used as per ABI recommendation */
87 /* TODO(carenas): r12 might conflict in PIC code, reserve? */
88 /* TODO(carenas): r13 is usually pointed to "pool" per ABI, using a tmp
89 * like we do know might be faster though, reserve?
90 */
91
92 /* TODO(carenas): should be named TMP_REG[1-2] for consistency */
93 #define tmp0 r0
94 #define tmp1 r1
95
96 /* TODO(carenas): flags should move to a different register so that
97 * link register doesn't need to change
98 */
99
100 /* Link registers. The normal link register is r14, but since
101 we use that for flags we need to use r0 instead to do fast
102 calls so that flags are preserved. */
103 const sljit_gpr link_r = 14; /* r14 */
104 const sljit_gpr fast_link_r = 0; /* r0 */
105
106 /* Flag register layout:
107
108 0 32 33 34 36 64
109 +---------------+---+---+-------+-------+
110 | ZERO | 0 | 0 | C C |///////|
111 +---------------+---+---+-------+-------+
112 */
113 const sljit_gpr flag_r = 14; /* r14 */
114
115 struct sljit_s390x_const {
116 struct sljit_const const_; /* must be first */
117 sljit_sw init_value; /* required to build literal pool */
118 };
119
120 /* Convert SLJIT register to hardware register. */
gpr(sljit_s32 r)121 static SLJIT_INLINE sljit_gpr gpr(sljit_s32 r)
122 {
123 SLJIT_ASSERT(r != SLJIT_UNUSED);
124 SLJIT_ASSERT(r < (sljit_s32)(sizeof(reg_map) / sizeof(reg_map[0])));
125 return reg_map[r];
126 }
127
128 /* Size of instruction in bytes. Tags must already be cleared. */
sizeof_ins(sljit_ins ins)129 static SLJIT_INLINE sljit_uw sizeof_ins(sljit_ins ins)
130 {
131 /* keep faulting instructions */
132 if (ins == 0)
133 return 2;
134
135 if ((ins & 0x00000000ffffL) == ins)
136 return 2;
137 if ((ins & 0x0000ffffffffL) == ins)
138 return 4;
139 if ((ins & 0xffffffffffffL) == ins)
140 return 6;
141
142 SLJIT_UNREACHABLE();
143 return (sljit_uw)-1;
144 }
145
push_inst(struct sljit_compiler * compiler,sljit_ins ins)146 static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins)
147 {
148 sljit_ins *ibuf = (sljit_ins *)ensure_buf(compiler, sizeof(sljit_ins));
149 FAIL_IF(!ibuf);
150 *ibuf = ins;
151 compiler->size++;
152 return SLJIT_SUCCESS;
153 }
154
encode_inst(void ** ptr,sljit_ins ins)155 static sljit_s32 encode_inst(void **ptr, sljit_ins ins)
156 {
157 sljit_u16 *ibuf = (sljit_u16 *)*ptr;
158 sljit_uw size = sizeof_ins(ins);
159
160 SLJIT_ASSERT((size & 6) == size);
161 switch (size) {
162 case 6:
163 *ibuf++ = (sljit_u16)(ins >> 32);
164 /* fallthrough */
165 case 4:
166 *ibuf++ = (sljit_u16)(ins >> 16);
167 /* fallthrough */
168 case 2:
169 *ibuf++ = (sljit_u16)(ins);
170 }
171 *ptr = (void*)ibuf;
172 return SLJIT_SUCCESS;
173 }
174
175 /* Map the given type to a 4-bit condition code mask. */
get_cc(sljit_s32 type)176 static SLJIT_INLINE sljit_u8 get_cc(sljit_s32 type) {
177 const sljit_u8 eq = 1 << 3; /* equal {,to zero} */
178 const sljit_u8 lt = 1 << 2; /* less than {,zero} */
179 const sljit_u8 gt = 1 << 1; /* greater than {,zero} */
180 const sljit_u8 ov = 1 << 0; /* {overflow,NaN} */
181
182 switch (type) {
183 case SLJIT_EQUAL:
184 case SLJIT_EQUAL_F64:
185 return eq;
186
187 case SLJIT_NOT_EQUAL:
188 case SLJIT_NOT_EQUAL_F64:
189 return ~eq;
190
191 case SLJIT_LESS:
192 case SLJIT_SIG_LESS:
193 case SLJIT_LESS_F64:
194 return lt;
195
196 case SLJIT_LESS_EQUAL:
197 case SLJIT_SIG_LESS_EQUAL:
198 case SLJIT_LESS_EQUAL_F64:
199 return (lt | eq);
200
201 case SLJIT_GREATER:
202 case SLJIT_SIG_GREATER:
203 case SLJIT_GREATER_F64:
204 return gt;
205
206 case SLJIT_GREATER_EQUAL:
207 case SLJIT_SIG_GREATER_EQUAL:
208 case SLJIT_GREATER_EQUAL_F64:
209 return (gt | eq);
210
211 case SLJIT_OVERFLOW:
212 case SLJIT_MUL_OVERFLOW:
213 case SLJIT_UNORDERED_F64:
214 return ov;
215
216 case SLJIT_NOT_OVERFLOW:
217 case SLJIT_MUL_NOT_OVERFLOW:
218 case SLJIT_ORDERED_F64:
219 return ~ov;
220 }
221
222 SLJIT_UNREACHABLE();
223 return (sljit_u8)-1;
224 }
225
226 /* Facility to bit index mappings.
227 Note: some facilities share the same bit index. */
228 typedef sljit_uw facility_bit;
229 #define STORE_FACILITY_LIST_EXTENDED_FACILITY 7
230 #define FAST_LONG_DISPLACEMENT_FACILITY 19
231 #define EXTENDED_IMMEDIATE_FACILITY 21
232 #define GENERAL_INSTRUCTION_EXTENSION_FACILITY 34
233 #define DISTINCT_OPERAND_FACILITY 45
234 #define HIGH_WORD_FACILITY 45
235 #define POPULATION_COUNT_FACILITY 45
236 #define LOAD_STORE_ON_CONDITION_1_FACILITY 45
237 #define MISCELLANEOUS_INSTRUCTION_EXTENSIONS_1_FACILITY 49
238 #define LOAD_STORE_ON_CONDITION_2_FACILITY 53
239 #define MISCELLANEOUS_INSTRUCTION_EXTENSIONS_2_FACILITY 58
240 #define VECTOR_FACILITY 129
241 #define VECTOR_ENHANCEMENTS_1_FACILITY 135
242
243 /* Report whether a facility is known to be present due to the compiler
244 settings. This function should always be compiled to a constant
245 value given a constant argument. */
have_facility_static(facility_bit x)246 static SLJIT_INLINE int have_facility_static(facility_bit x)
247 {
248 #if ENABLE_STATIC_FACILITY_DETECTION
249 switch (x) {
250 case FAST_LONG_DISPLACEMENT_FACILITY:
251 return (__ARCH__ >= 6 /* z990 */);
252 case EXTENDED_IMMEDIATE_FACILITY:
253 case STORE_FACILITY_LIST_EXTENDED_FACILITY:
254 return (__ARCH__ >= 7 /* z9-109 */);
255 case GENERAL_INSTRUCTION_EXTENSION_FACILITY:
256 return (__ARCH__ >= 8 /* z10 */);
257 case DISTINCT_OPERAND_FACILITY:
258 return (__ARCH__ >= 9 /* z196 */);
259 case MISCELLANEOUS_INSTRUCTION_EXTENSIONS_1_FACILITY:
260 return (__ARCH__ >= 10 /* zEC12 */);
261 case LOAD_STORE_ON_CONDITION_2_FACILITY:
262 case VECTOR_FACILITY:
263 return (__ARCH__ >= 11 /* z13 */);
264 case MISCELLANEOUS_INSTRUCTION_EXTENSIONS_2_FACILITY:
265 case VECTOR_ENHANCEMENTS_1_FACILITY:
266 return (__ARCH__ >= 12 /* z14 */);
267 default:
268 SLJIT_UNREACHABLE();
269 }
270 #endif
271 return 0;
272 }
273
get_hwcap()274 static SLJIT_INLINE unsigned long get_hwcap()
275 {
276 static unsigned long hwcap = 0;
277 if (SLJIT_UNLIKELY(!hwcap)) {
278 hwcap = getauxval(AT_HWCAP);
279 SLJIT_ASSERT(hwcap != 0);
280 }
281 return hwcap;
282 }
283
have_stfle()284 static SLJIT_INLINE int have_stfle()
285 {
286 if (have_facility_static(STORE_FACILITY_LIST_EXTENDED_FACILITY))
287 return 1;
288
289 return (get_hwcap() & HWCAP_S390_STFLE);
290 }
291
292 /* Report whether the given facility is available. This function always
293 performs a runtime check. */
have_facility_dynamic(facility_bit x)294 static int have_facility_dynamic(facility_bit x)
295 {
296 #if ENABLE_DYNAMIC_FACILITY_DETECTION
297 static struct {
298 sljit_uw bits[4];
299 } cpu_features;
300 size_t size = sizeof(cpu_features);
301 const sljit_uw word_index = x >> 6;
302 const sljit_uw bit_index = ((1UL << 63) >> (x & 63));
303
304 SLJIT_ASSERT(x < size * 8);
305 if (SLJIT_UNLIKELY(!have_stfle()))
306 return 0;
307
308 if (SLJIT_UNLIKELY(cpu_features.bits[0] == 0)) {
309 __asm__ __volatile__ (
310 "lgr %%r0, %0;"
311 "stfle 0(%1);"
312 /* outputs */:
313 /* inputs */: "d" ((size / 8) - 1), "a" (&cpu_features)
314 /* clobbers */: "r0", "cc", "memory"
315 );
316 SLJIT_ASSERT(cpu_features.bits[0] != 0);
317 }
318 return (cpu_features.bits[word_index] & bit_index) != 0;
319 #else
320 return 0;
321 #endif
322 }
323
324 #define HAVE_FACILITY(name, bit) \
325 static SLJIT_INLINE int name() \
326 { \
327 static int have = -1; \
328 /* Static check first. May allow the function to be optimized away. */ \
329 if (have_facility_static(bit)) \
330 have = 1; \
331 else if (SLJIT_UNLIKELY(have < 0)) \
332 have = have_facility_dynamic(bit) ? 1 : 0; \
333 \
334 return have; \
335 }
336
HAVE_FACILITY(have_eimm,EXTENDED_IMMEDIATE_FACILITY)337 HAVE_FACILITY(have_eimm, EXTENDED_IMMEDIATE_FACILITY)
338 HAVE_FACILITY(have_ldisp, FAST_LONG_DISPLACEMENT_FACILITY)
339 HAVE_FACILITY(have_genext, GENERAL_INSTRUCTION_EXTENSION_FACILITY)
340 HAVE_FACILITY(have_lscond1, LOAD_STORE_ON_CONDITION_1_FACILITY)
341 HAVE_FACILITY(have_lscond2, LOAD_STORE_ON_CONDITION_2_FACILITY)
342 HAVE_FACILITY(have_misc2, MISCELLANEOUS_INSTRUCTION_EXTENSIONS_2_FACILITY)
343 #undef HAVE_FACILITY
344
345 #define is_u12(d) (0 <= (d) && (d) <= 0x00000fffL)
346 #define is_u32(d) (0 <= (d) && (d) <= 0xffffffffL)
347
348 #define CHECK_SIGNED(v, bitlen) \
349 ((v) == (((v) << (sizeof(v) * 8 - bitlen)) >> (sizeof(v) * 8 - bitlen)))
350
351 #define is_s16(d) CHECK_SIGNED((d), 16)
352 #define is_s20(d) CHECK_SIGNED((d), 20)
353 #define is_s32(d) CHECK_SIGNED((d), 32)
354
355 static SLJIT_INLINE sljit_uw disp_s20(sljit_s32 d)
356 {
357 sljit_uw dh = (d >> 12) & 0xff;
358 sljit_uw dl = (d << 8) & 0xfff00;
359
360 SLJIT_ASSERT(is_s20(d));
361 return dh | dl;
362 }
363
364 /* TODO(carenas): variadic macro is not strictly needed */
365 #define SLJIT_S390X_INSTRUCTION(op, ...) \
366 static SLJIT_INLINE sljit_ins op(__VA_ARGS__)
367
368 /* RR form instructions. */
369 #define SLJIT_S390X_RR(name, pattern) \
370 SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src) \
371 { \
372 return (pattern) | ((dst & 0xf) << 4) | (src & 0xf); \
373 }
374
375 /* ADD */
376 SLJIT_S390X_RR(ar, 0x1a00)
377
378 /* ADD LOGICAL */
379 SLJIT_S390X_RR(alr, 0x1e00)
380
381 /* AND */
382 SLJIT_S390X_RR(nr, 0x1400)
383
384 /* BRANCH AND SAVE */
385 SLJIT_S390X_RR(basr, 0x0d00)
386
387 /* BRANCH ON CONDITION */
388 SLJIT_S390X_RR(bcr, 0x0700) /* TODO(mundaym): type for mask? */
389
390 /* COMPARE */
391 SLJIT_S390X_RR(cr, 0x1900)
392
393 /* COMPARE LOGICAL */
394 SLJIT_S390X_RR(clr, 0x1500)
395
396 /* DIVIDE */
397 SLJIT_S390X_RR(dr, 0x1d00)
398
399 /* EXCLUSIVE OR */
400 SLJIT_S390X_RR(xr, 0x1700)
401
402 /* LOAD */
403 SLJIT_S390X_RR(lr, 0x1800)
404
405 /* LOAD COMPLEMENT */
406 SLJIT_S390X_RR(lcr, 0x1300)
407
408 /* OR */
409 SLJIT_S390X_RR(or, 0x1600)
410
411 /* SUBTRACT */
412 SLJIT_S390X_RR(sr, 0x1b00)
413
414 /* SUBTRACT LOGICAL */
415 SLJIT_S390X_RR(slr, 0x1f00)
416
417 #undef SLJIT_S390X_RR
418
419 /* RRE form instructions */
420 #define SLJIT_S390X_RRE(name, pattern) \
421 SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src) \
422 { \
423 return (pattern) | ((dst & 0xf) << 4) | (src & 0xf); \
424 }
425
426 /* ADD */
427 SLJIT_S390X_RRE(agr, 0xb9080000)
428
429 /* ADD LOGICAL */
430 SLJIT_S390X_RRE(algr, 0xb90a0000)
431
432 /* ADD LOGICAL WITH CARRY */
433 SLJIT_S390X_RRE(alcr, 0xb9980000)
434 SLJIT_S390X_RRE(alcgr, 0xb9880000)
435
436 /* AND */
437 SLJIT_S390X_RRE(ngr, 0xb9800000)
438
439 /* COMPARE */
440 SLJIT_S390X_RRE(cgr, 0xb9200000)
441
442 /* COMPARE LOGICAL */
443 SLJIT_S390X_RRE(clgr, 0xb9210000)
444
445 /* DIVIDE LOGICAL */
446 SLJIT_S390X_RRE(dlr, 0xb9970000)
447 SLJIT_S390X_RRE(dlgr, 0xb9870000)
448
449 /* DIVIDE SINGLE */
450 SLJIT_S390X_RRE(dsgr, 0xb90d0000)
451
452 /* EXCLUSIVE OR */
453 SLJIT_S390X_RRE(xgr, 0xb9820000)
454
455 /* LOAD */
456 SLJIT_S390X_RRE(lgr, 0xb9040000)
457 SLJIT_S390X_RRE(lgfr, 0xb9140000)
458
459 /* LOAD BYTE */
460 SLJIT_S390X_RRE(lbr, 0xb9260000)
461 SLJIT_S390X_RRE(lgbr, 0xb9060000)
462
463 /* LOAD COMPLEMENT */
464 SLJIT_S390X_RRE(lcgr, 0xb9030000)
465
466 /* LOAD HALFWORD */
467 SLJIT_S390X_RRE(lhr, 0xb9270000)
468 SLJIT_S390X_RRE(lghr, 0xb9070000)
469
470 /* LOAD LOGICAL */
471 SLJIT_S390X_RRE(llgfr, 0xb9160000)
472
473 /* LOAD LOGICAL CHARACTER */
474 SLJIT_S390X_RRE(llcr, 0xb9940000)
475 SLJIT_S390X_RRE(llgcr, 0xb9840000)
476
477 /* LOAD LOGICAL HALFWORD */
478 SLJIT_S390X_RRE(llhr, 0xb9950000)
479 SLJIT_S390X_RRE(llghr, 0xb9850000)
480
481 /* MULTIPLY LOGICAL */
482 SLJIT_S390X_RRE(mlgr, 0xb9860000)
483
484 /* MULTIPLY SINGLE */
485 SLJIT_S390X_RRE(msr, 0xb2520000)
486 SLJIT_S390X_RRE(msgr, 0xb90c0000)
487 SLJIT_S390X_RRE(msgfr, 0xb91c0000)
488
489 /* OR */
490 SLJIT_S390X_RRE(ogr, 0xb9810000)
491
492 /* SUBTRACT */
493 SLJIT_S390X_RRE(sgr, 0xb9090000)
494
495 /* SUBTRACT LOGICAL */
496 SLJIT_S390X_RRE(slgr, 0xb90b0000)
497
498 /* SUBTRACT LOGICAL WITH BORROW */
499 SLJIT_S390X_RRE(slbr, 0xb9990000)
500 SLJIT_S390X_RRE(slbgr, 0xb9890000)
501
502 #undef SLJIT_S390X_RRE
503
504 /* RI-a form instructions */
505 #define SLJIT_S390X_RIA(name, pattern, imm_type) \
506 SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, imm_type imm) \
507 { \
508 return (pattern) | ((reg & 0xf) << 20) | (imm & 0xffff); \
509 }
510
511 /* ADD HALFWORD IMMEDIATE */
512 SLJIT_S390X_RIA(ahi, 0xa70a0000, sljit_s16)
513 SLJIT_S390X_RIA(aghi, 0xa70b0000, sljit_s16)
514
515 /* COMPARE HALFWORD IMMEDIATE */
516 SLJIT_S390X_RIA(chi, 0xa70e0000, sljit_s16)
517 SLJIT_S390X_RIA(cghi, 0xa70f0000, sljit_s16)
518
519 /* LOAD HALFWORD IMMEDIATE */
520 SLJIT_S390X_RIA(lhi, 0xa7080000, sljit_s16)
521 SLJIT_S390X_RIA(lghi, 0xa7090000, sljit_s16)
522
523 /* LOAD LOGICAL IMMEDIATE */
524 SLJIT_S390X_RIA(llihh, 0xa50c0000, sljit_u16)
525 SLJIT_S390X_RIA(llihl, 0xa50d0000, sljit_u16)
526 SLJIT_S390X_RIA(llilh, 0xa50e0000, sljit_u16)
527 SLJIT_S390X_RIA(llill, 0xa50f0000, sljit_u16)
528
529 /* MULTIPLY HALFWORD IMMEDIATE */
530 SLJIT_S390X_RIA(mhi, 0xa70c0000, sljit_s16)
531 SLJIT_S390X_RIA(mghi, 0xa70d0000, sljit_s16)
532
533 /* OR IMMEDIATE */
534 SLJIT_S390X_RIA(oilh, 0xa50a0000, sljit_u16)
535
536 /* TEST UNDER MASK */
537 SLJIT_S390X_RIA(tmlh, 0xa7000000, sljit_u16)
538
539 #undef SLJIT_S390X_RIA
540
541 /* RIL-a form instructions (requires extended immediate facility) */
542 #define SLJIT_S390X_RILA(name, pattern, imm_type) \
543 SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, imm_type imm) \
544 { \
545 SLJIT_ASSERT(have_eimm()); \
546 return (pattern) | ((sljit_ins)(reg & 0xf) << 36) | (imm & 0xffffffff); \
547 }
548
549 /* ADD IMMEDIATE */
550 SLJIT_S390X_RILA(afi, 0xc20900000000, sljit_s32)
551 SLJIT_S390X_RILA(agfi, 0xc20800000000, sljit_s32)
552
553 /* ADD IMMEDIATE HIGH */
554 SLJIT_S390X_RILA(aih, 0xcc0800000000, sljit_s32) /* TODO(mundaym): high-word facility? */
555
556 /* ADD LOGICAL IMMEDIATE */
557 SLJIT_S390X_RILA(alfi, 0xc20b00000000, sljit_u32)
558 SLJIT_S390X_RILA(algfi, 0xc20a00000000, sljit_u32)
559
560 /* AND IMMEDIATE */
561 SLJIT_S390X_RILA(nihf, 0xc00a00000000, sljit_u32)
562 SLJIT_S390X_RILA(nilf, 0xc00b00000000, sljit_u32)
563
564 /* COMPARE IMMEDIATE */
565 SLJIT_S390X_RILA(cfi, 0xc20d00000000, sljit_s32)
566 SLJIT_S390X_RILA(cgfi, 0xc20c00000000, sljit_s32)
567
568 /* COMPARE IMMEDIATE HIGH */
569 SLJIT_S390X_RILA(cih, 0xcc0d00000000, sljit_s32) /* TODO(mundaym): high-word facility? */
570
571 /* COMPARE LOGICAL IMMEDIATE */
572 SLJIT_S390X_RILA(clfi, 0xc20f00000000, sljit_u32)
573 SLJIT_S390X_RILA(clgfi, 0xc20e00000000, sljit_u32)
574
575 /* EXCLUSIVE OR IMMEDIATE */
576 SLJIT_S390X_RILA(xilf, 0xc00700000000, sljit_u32)
577
578 /* INSERT IMMEDIATE */
579 SLJIT_S390X_RILA(iihf, 0xc00800000000, sljit_u32)
580 SLJIT_S390X_RILA(iilf, 0xc00900000000, sljit_u32)
581
582 /* LOAD IMMEDIATE */
583 SLJIT_S390X_RILA(lgfi, 0xc00100000000, sljit_s32)
584
585 /* LOAD LOGICAL IMMEDIATE */
586 SLJIT_S390X_RILA(llihf, 0xc00e00000000, sljit_u32)
587 SLJIT_S390X_RILA(llilf, 0xc00f00000000, sljit_u32)
588
589 /* OR IMMEDIATE */
590 SLJIT_S390X_RILA(oilf, 0xc00d00000000, sljit_u32)
591
592 #undef SLJIT_S390X_RILA
593
594 /* RX-a form instructions */
595 #define SLJIT_S390X_RXA(name, pattern) \
596 SLJIT_S390X_INSTRUCTION(name, sljit_gpr r, sljit_u16 d, sljit_gpr x, sljit_gpr b) \
597 { \
598 sljit_ins ri, xi, bi, di; \
599 \
600 SLJIT_ASSERT((d & 0xfff) == d); \
601 ri = (sljit_ins)(r & 0xf) << 20; \
602 xi = (sljit_ins)(x & 0xf) << 16; \
603 bi = (sljit_ins)(b & 0xf) << 12; \
604 di = (sljit_ins)(d & 0xfff); \
605 \
606 return (pattern) | ri | xi | bi | di; \
607 }
608
609 /* ADD */
610 SLJIT_S390X_RXA(a, 0x5a000000)
611
612 /* ADD LOGICAL */
613 SLJIT_S390X_RXA(al, 0x5e000000)
614
615 /* AND */
616 SLJIT_S390X_RXA(n, 0x54000000)
617
618 /* EXCLUSIVE OR */
619 SLJIT_S390X_RXA(x, 0x57000000)
620
621 /* LOAD */
622 SLJIT_S390X_RXA(l, 0x58000000)
623
624 /* LOAD ADDRESS */
625 SLJIT_S390X_RXA(la, 0x41000000)
626
627 /* LOAD HALFWORD */
628 SLJIT_S390X_RXA(lh, 0x48000000)
629
630 /* MULTIPLY SINGLE */
631 SLJIT_S390X_RXA(ms, 0x71000000)
632
633 /* OR */
634 SLJIT_S390X_RXA(o, 0x56000000)
635
636 /* STORE */
637 SLJIT_S390X_RXA(st, 0x50000000)
638
639 /* STORE CHARACTER */
640 SLJIT_S390X_RXA(stc, 0x42000000)
641
642 /* STORE HALFWORD */
643 SLJIT_S390X_RXA(sth, 0x40000000)
644
645 /* SUBTRACT */
646 SLJIT_S390X_RXA(s, 0x5b000000)
647
648 /* SUBTRACT LOGICAL */
649 SLJIT_S390X_RXA(sl, 0x5f000000)
650
651 #undef SLJIT_S390X_RXA
652
653 /* RXY-a instructions */
654 #define SLJIT_S390X_RXYA(name, pattern, cond) \
655 SLJIT_S390X_INSTRUCTION(name, sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b) \
656 { \
657 sljit_ins ri, xi, bi, di; \
658 \
659 SLJIT_ASSERT(cond); \
660 ri = (sljit_ins)(r & 0xf) << 36; \
661 xi = (sljit_ins)(x & 0xf) << 32; \
662 bi = (sljit_ins)(b & 0xf) << 28; \
663 di = (sljit_ins)disp_s20(d) << 8; \
664 \
665 return (pattern) | ri | xi | bi | di; \
666 }
667
668 /* ADD */
669 SLJIT_S390X_RXYA(ay, 0xe3000000005a, have_ldisp())
670 SLJIT_S390X_RXYA(ag, 0xe30000000008, 1)
671
672 /* ADD LOGICAL */
673 SLJIT_S390X_RXYA(aly, 0xe3000000005e, have_ldisp())
674 SLJIT_S390X_RXYA(alg, 0xe3000000000a, 1)
675
676 /* ADD LOGICAL WITH CARRY */
677 SLJIT_S390X_RXYA(alc, 0xe30000000098, 1)
678 SLJIT_S390X_RXYA(alcg, 0xe30000000088, 1)
679
680 /* AND */
681 SLJIT_S390X_RXYA(ny, 0xe30000000054, have_ldisp())
682 SLJIT_S390X_RXYA(ng, 0xe30000000080, 1)
683
684 /* EXCLUSIVE OR */
685 SLJIT_S390X_RXYA(xy, 0xe30000000057, have_ldisp())
686 SLJIT_S390X_RXYA(xg, 0xe30000000082, 1)
687
688 /* LOAD */
689 SLJIT_S390X_RXYA(ly, 0xe30000000058, have_ldisp())
690 SLJIT_S390X_RXYA(lg, 0xe30000000004, 1)
691 SLJIT_S390X_RXYA(lgf, 0xe30000000014, 1)
692
693 /* LOAD BYTE */
694 SLJIT_S390X_RXYA(lb, 0xe30000000076, have_ldisp())
695 SLJIT_S390X_RXYA(lgb, 0xe30000000077, have_ldisp())
696
697 /* LOAD HALFWORD */
698 SLJIT_S390X_RXYA(lhy, 0xe30000000078, have_ldisp())
699 SLJIT_S390X_RXYA(lgh, 0xe30000000015, 1)
700
701 /* LOAD LOGICAL */
702 SLJIT_S390X_RXYA(llgf, 0xe30000000016, 1)
703
704 /* LOAD LOGICAL CHARACTER */
705 SLJIT_S390X_RXYA(llc, 0xe30000000094, have_eimm())
706 SLJIT_S390X_RXYA(llgc, 0xe30000000090, 1)
707
708 /* LOAD LOGICAL HALFWORD */
709 SLJIT_S390X_RXYA(llh, 0xe30000000095, have_eimm())
710 SLJIT_S390X_RXYA(llgh, 0xe30000000091, 1)
711
712 /* MULTIPLY SINGLE */
713 SLJIT_S390X_RXYA(msy, 0xe30000000051, have_ldisp())
714 SLJIT_S390X_RXYA(msg, 0xe3000000000c, 1)
715
716 /* OR */
717 SLJIT_S390X_RXYA(oy, 0xe30000000056, have_ldisp())
718 SLJIT_S390X_RXYA(og, 0xe30000000081, 1)
719
720 /* STORE */
721 SLJIT_S390X_RXYA(sty, 0xe30000000050, have_ldisp())
722 SLJIT_S390X_RXYA(stg, 0xe30000000024, 1)
723
724 /* STORE CHARACTER */
725 SLJIT_S390X_RXYA(stcy, 0xe30000000072, have_ldisp())
726
727 /* STORE HALFWORD */
728 SLJIT_S390X_RXYA(sthy, 0xe30000000070, have_ldisp())
729
730 /* SUBTRACT */
731 SLJIT_S390X_RXYA(sy, 0xe3000000005b, have_ldisp())
732 SLJIT_S390X_RXYA(sg, 0xe30000000009, 1)
733
734 /* SUBTRACT LOGICAL */
735 SLJIT_S390X_RXYA(sly, 0xe3000000005f, have_ldisp())
736 SLJIT_S390X_RXYA(slg, 0xe3000000000b, 1)
737
738 /* SUBTRACT LOGICAL WITH BORROW */
739 SLJIT_S390X_RXYA(slb, 0xe30000000099, 1)
740 SLJIT_S390X_RXYA(slbg, 0xe30000000089, 1)
741
742 #undef SLJIT_S390X_RXYA
743
744 /* RS-a instructions */
745 #define SLJIT_S390X_RSA(name, pattern) \
746 SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, sljit_sw d, sljit_gpr b) \
747 { \
748 sljit_ins r1 = (sljit_ins)(reg & 0xf) << 20; \
749 sljit_ins b2 = (sljit_ins)(b & 0xf) << 12; \
750 sljit_ins d2 = (sljit_ins)(d & 0xfff); \
751 return (pattern) | r1 | b2 | d2; \
752 }
753
754 /* SHIFT LEFT SINGLE LOGICAL */
755 SLJIT_S390X_RSA(sll, 0x89000000)
756
757 /* SHIFT RIGHT SINGLE */
758 SLJIT_S390X_RSA(sra, 0x8a000000)
759
760 /* SHIFT RIGHT SINGLE LOGICAL */
761 SLJIT_S390X_RSA(srl, 0x88000000)
762
763 #undef SLJIT_S390X_RSA
764
765 /* RSY-a instructions */
766 #define SLJIT_S390X_RSYA(name, pattern, cond) \
767 SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src, sljit_sw d, sljit_gpr b) \
768 { \
769 sljit_ins r1, r3, b2, d2; \
770 \
771 SLJIT_ASSERT(cond); \
772 r1 = (sljit_ins)(dst & 0xf) << 36; \
773 r3 = (sljit_ins)(src & 0xf) << 32; \
774 b2 = (sljit_ins)(b & 0xf) << 28; \
775 d2 = (sljit_ins)disp_s20(d) << 8; \
776 \
777 return (pattern) | r1 | r3 | b2 | d2; \
778 }
779
780 /* LOAD MULTIPLE */
781 SLJIT_S390X_RSYA(lmg, 0xeb0000000004, 1)
782
783 /* SHIFT LEFT LOGICAL */
784 SLJIT_S390X_RSYA(sllg, 0xeb000000000d, 1)
785
786 /* SHIFT RIGHT SINGLE */
787 SLJIT_S390X_RSYA(srag, 0xeb000000000a, 1)
788
789 /* SHIFT RIGHT SINGLE LOGICAL */
790 SLJIT_S390X_RSYA(srlg, 0xeb000000000c, 1)
791
792 /* STORE MULTIPLE */
793 SLJIT_S390X_RSYA(stmg, 0xeb0000000024, 1)
794
795 #undef SLJIT_S390X_RSYA
796
797 /* RIE-f instructions (require general-instructions-extension facility) */
798 #define SLJIT_S390X_RIEF(name, pattern) \
799 SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src, sljit_u8 start, sljit_u8 end, sljit_u8 rot) \
800 { \
801 sljit_ins r1, r2, i3, i4, i5; \
802 \
803 SLJIT_ASSERT(have_genext()); \
804 r1 = (sljit_ins)(dst & 0xf) << 36; \
805 r2 = (sljit_ins)(src & 0xf) << 32; \
806 i3 = (sljit_ins)start << 24; \
807 i4 = (sljit_ins)end << 16; \
808 i5 = (sljit_ins)rot << 8; \
809 \
810 return (pattern) | r1 | r2 | i3 | i4 | i5; \
811 }
812
813 /* ROTATE THEN AND SELECTED BITS */
814 /* SLJIT_S390X_RIEF(rnsbg, 0xec0000000054) */
815
816 /* ROTATE THEN EXCLUSIVE OR SELECTED BITS */
817 /* SLJIT_S390X_RIEF(rxsbg, 0xec0000000057) */
818
819 /* ROTATE THEN OR SELECTED BITS */
820 SLJIT_S390X_RIEF(rosbg, 0xec0000000056)
821
822 /* ROTATE THEN INSERT SELECTED BITS */
823 /* SLJIT_S390X_RIEF(risbg, 0xec0000000055) */
824 /* SLJIT_S390X_RIEF(risbgn, 0xec0000000059) */
825
826 /* ROTATE THEN INSERT SELECTED BITS HIGH */
827 SLJIT_S390X_RIEF(risbhg, 0xec000000005d)
828
829 /* ROTATE THEN INSERT SELECTED BITS LOW */
830 /* SLJIT_S390X_RIEF(risblg, 0xec0000000051) */
831
832 #undef SLJIT_S390X_RIEF
833
834 /* RRF-a instructions */
835 #define SLJIT_S390X_RRFA(name, pattern, cond) \
836 SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src1, sljit_gpr src2) \
837 { \
838 sljit_ins r1, r2, r3; \
839 \
840 SLJIT_ASSERT(cond); \
841 r1 = (sljit_ins)(dst & 0xf) << 4; \
842 r2 = (sljit_ins)(src1 & 0xf); \
843 r3 = (sljit_ins)(src2 & 0xf) << 12; \
844 \
845 return (pattern) | r3 | r1 | r2; \
846 }
847
848 /* MULTIPLY */
849 SLJIT_S390X_RRFA(msrkc, 0xb9fd0000, have_misc2())
850 SLJIT_S390X_RRFA(msgrkc, 0xb9ed0000, have_misc2())
851
852 #undef SLJIT_S390X_RRFA
853
854 /* RRF-c instructions (require load/store-on-condition 1 facility) */
855 #define SLJIT_S390X_RRFC(name, pattern) \
856 SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src, sljit_uw mask) \
857 { \
858 sljit_ins r1, r2, m3; \
859 \
860 SLJIT_ASSERT(have_lscond1()); \
861 r1 = (sljit_ins)(dst & 0xf) << 4; \
862 r2 = (sljit_ins)(src & 0xf); \
863 m3 = (sljit_ins)(mask & 0xf) << 12; \
864 \
865 return (pattern) | m3 | r1 | r2; \
866 }
867
868 /* LOAD HALFWORD IMMEDIATE ON CONDITION */
869 SLJIT_S390X_RRFC(locr, 0xb9f20000)
870 SLJIT_S390X_RRFC(locgr, 0xb9e20000)
871
872 #undef SLJIT_S390X_RRFC
873
874 /* RIE-g instructions (require load/store-on-condition 2 facility) */
875 #define SLJIT_S390X_RIEG(name, pattern) \
876 SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, sljit_sw imm, sljit_uw mask) \
877 { \
878 sljit_ins r1, m3, i2; \
879 \
880 SLJIT_ASSERT(have_lscond2()); \
881 r1 = (sljit_ins)(reg & 0xf) << 36; \
882 m3 = (sljit_ins)(mask & 0xf) << 32; \
883 i2 = (sljit_ins)(imm & 0xffffL) << 16; \
884 \
885 return (pattern) | r1 | m3 | i2; \
886 }
887
888 /* LOAD HALFWORD IMMEDIATE ON CONDITION */
889 SLJIT_S390X_RIEG(lochi, 0xec0000000042)
890 SLJIT_S390X_RIEG(locghi, 0xec0000000046)
891
892 #undef SLJIT_S390X_RIEG
893
894 #define SLJIT_S390X_RILB(name, pattern, cond) \
895 SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, sljit_sw ri) \
896 { \
897 sljit_ins r1, ri2; \
898 \
899 SLJIT_ASSERT(cond); \
900 r1 = (sljit_ins)(reg & 0xf) << 36; \
901 ri2 = (sljit_ins)(ri & 0xffffffff); \
902 \
903 return (pattern) | r1 | ri2; \
904 }
905
906 /* BRANCH RELATIVE AND SAVE LONG */
907 SLJIT_S390X_RILB(brasl, 0xc00500000000, 1)
908
909 /* LOAD ADDRESS RELATIVE LONG */
910 SLJIT_S390X_RILB(larl, 0xc00000000000, 1)
911
912 /* LOAD RELATIVE LONG */
913 SLJIT_S390X_RILB(lgrl, 0xc40800000000, have_genext())
914
915 #undef SLJIT_S390X_RILB
916
SLJIT_S390X_INSTRUCTION(br,sljit_gpr target)917 SLJIT_S390X_INSTRUCTION(br, sljit_gpr target)
918 {
919 return 0x07f0 | target;
920 }
921
SLJIT_S390X_INSTRUCTION(brcl,sljit_uw mask,sljit_sw target)922 SLJIT_S390X_INSTRUCTION(brcl, sljit_uw mask, sljit_sw target)
923 {
924 sljit_ins m1 = (sljit_ins)(mask & 0xf) << 36;
925 sljit_ins ri2 = (sljit_ins)target & 0xffffffff;
926 return 0xc00400000000L | m1 | ri2;
927 }
928
SLJIT_S390X_INSTRUCTION(flogr,sljit_gpr dst,sljit_gpr src)929 SLJIT_S390X_INSTRUCTION(flogr, sljit_gpr dst, sljit_gpr src)
930 {
931 sljit_ins r1 = ((sljit_ins)dst & 0xf) << 8;
932 sljit_ins r2 = ((sljit_ins)src & 0xf);
933 SLJIT_ASSERT(have_eimm());
934 return 0xb9830000 | r1 | r2;
935 }
936
937 /* INSERT PROGRAM MASK */
SLJIT_S390X_INSTRUCTION(ipm,sljit_gpr dst)938 SLJIT_S390X_INSTRUCTION(ipm, sljit_gpr dst)
939 {
940 return 0xb2220000 | ((sljit_ins)(dst & 0xf) << 4);
941 }
942
943 /* ROTATE THEN INSERT SELECTED BITS HIGH (ZERO) */
SLJIT_S390X_INSTRUCTION(risbhgz,sljit_gpr dst,sljit_gpr src,sljit_u8 start,sljit_u8 end,sljit_u8 rot)944 SLJIT_S390X_INSTRUCTION(risbhgz, sljit_gpr dst, sljit_gpr src, sljit_u8 start, sljit_u8 end, sljit_u8 rot)
945 {
946 return risbhg(dst, src, start, 0x8 | end, rot);
947 }
948
949 #undef SLJIT_S390X_INSTRUCTION
950
951 /* load condition code as needed to match type */
push_load_cc(struct sljit_compiler * compiler,sljit_s32 type)952 static sljit_s32 push_load_cc(struct sljit_compiler *compiler, sljit_s32 type)
953 {
954 type &= ~SLJIT_I32_OP;
955 switch (type) {
956 case SLJIT_ZERO:
957 case SLJIT_NOT_ZERO:
958 return push_inst(compiler, cih(flag_r, 0));
959 break;
960 default:
961 return push_inst(compiler, tmlh(flag_r, 0x3000));
962 break;
963 }
964 return SLJIT_SUCCESS;
965 }
966
push_store_zero_flag(struct sljit_compiler * compiler,sljit_s32 op,sljit_gpr source)967 static sljit_s32 push_store_zero_flag(struct sljit_compiler *compiler, sljit_s32 op, sljit_gpr source)
968 {
969 /* insert low 32-bits into high 32-bits of flag register */
970 FAIL_IF(push_inst(compiler, risbhgz(flag_r, source, 0, 31, 32)));
971 if (!(op & SLJIT_I32_OP)) {
972 /* OR high 32-bits with high 32-bits of flag register */
973 return push_inst(compiler, rosbg(flag_r, source, 0, 31, 0));
974 }
975 return SLJIT_SUCCESS;
976 }
977
978 /* load 64-bit immediate into register without clobbering flags */
push_load_imm_inst(struct sljit_compiler * compiler,sljit_gpr target,sljit_sw v)979 static sljit_s32 push_load_imm_inst(struct sljit_compiler *compiler, sljit_gpr target, sljit_sw v)
980 {
981 /* 4 byte instructions */
982 if (is_s16(v))
983 return push_inst(compiler, lghi(target, (sljit_s16)v));
984
985 if ((sljit_uw)v == (v & 0x000000000000ffffU))
986 return push_inst(compiler, llill(target, (sljit_u16)v));
987
988 if ((sljit_uw)v == (v & 0x00000000ffff0000U))
989 return push_inst(compiler, llilh(target, (sljit_u16)(v >> 16)));
990
991 if ((sljit_uw)v == (v & 0x0000ffff00000000U))
992 return push_inst(compiler, llihl(target, (sljit_u16)(v >> 32)));
993
994 if ((sljit_uw)v == (v & 0xffff000000000000U))
995 return push_inst(compiler, llihh(target, (sljit_u16)(v >> 48)));
996
997 /* 6 byte instructions (requires extended immediate facility) */
998 if (have_eimm()) {
999 if (is_s32(v))
1000 return push_inst(compiler, lgfi(target, (sljit_s32)v));
1001
1002 if ((sljit_uw)v == (v & 0x00000000ffffffffU))
1003 return push_inst(compiler, llilf(target, (sljit_u32)v));
1004
1005 if ((sljit_uw)v == (v & 0xffffffff00000000U))
1006 return push_inst(compiler, llihf(target, (sljit_u32)(v >> 32)));
1007
1008 FAIL_IF(push_inst(compiler, llilf(target, (sljit_u32)v)));
1009 return push_inst(compiler, iihf(target, (sljit_u32)(v >> 32)));
1010 }
1011 /* TODO(mundaym): instruction sequences that don't use extended immediates */
1012 abort();
1013 }
1014
1015 struct addr {
1016 sljit_gpr base;
1017 sljit_gpr index;
1018 sljit_sw offset;
1019 };
1020
1021 /* transform memory operand into D(X,B) form with a signed 20-bit offset */
make_addr_bxy(struct sljit_compiler * compiler,struct addr * addr,sljit_s32 mem,sljit_sw off,sljit_gpr tmp)1022 static sljit_s32 make_addr_bxy(struct sljit_compiler *compiler,
1023 struct addr *addr, sljit_s32 mem, sljit_sw off,
1024 sljit_gpr tmp /* clobbered, must not be r0 */)
1025 {
1026 sljit_gpr base = r0;
1027 sljit_gpr index = r0;
1028
1029 SLJIT_ASSERT(tmp != r0);
1030 if (mem & REG_MASK)
1031 base = gpr(mem & REG_MASK);
1032
1033 if (mem & OFFS_REG_MASK) {
1034 index = gpr(OFFS_REG(mem));
1035 if (off != 0) {
1036 /* shift and put the result into tmp */
1037 SLJIT_ASSERT(0 <= off && off < 64);
1038 FAIL_IF(push_inst(compiler, sllg(tmp, index, off, 0)));
1039 index = tmp;
1040 off = 0; /* clear offset */
1041 }
1042 }
1043 else if (!is_s20(off)) {
1044 FAIL_IF(push_load_imm_inst(compiler, tmp, off));
1045 index = tmp;
1046 off = 0; /* clear offset */
1047 }
1048 addr->base = base;
1049 addr->index = index;
1050 addr->offset = off;
1051 return SLJIT_SUCCESS;
1052 }
1053
1054 /* transform memory operand into D(X,B) form with an unsigned 12-bit offset */
make_addr_bx(struct sljit_compiler * compiler,struct addr * addr,sljit_s32 mem,sljit_sw off,sljit_gpr tmp)1055 static sljit_s32 make_addr_bx(struct sljit_compiler *compiler,
1056 struct addr *addr, sljit_s32 mem, sljit_sw off,
1057 sljit_gpr tmp /* clobbered, must not be r0 */)
1058 {
1059 sljit_gpr base = r0;
1060 sljit_gpr index = r0;
1061
1062 SLJIT_ASSERT(tmp != r0);
1063 if (mem & REG_MASK)
1064 base = gpr(mem & REG_MASK);
1065
1066 if (mem & OFFS_REG_MASK) {
1067 index = gpr(OFFS_REG(mem));
1068 if (off != 0) {
1069 /* shift and put the result into tmp */
1070 SLJIT_ASSERT(0 <= off && off < 64);
1071 FAIL_IF(push_inst(compiler, sllg(tmp, index, off, 0)));
1072 index = tmp;
1073 off = 0; /* clear offset */
1074 }
1075 }
1076 else if (!is_u12(off)) {
1077 FAIL_IF(push_load_imm_inst(compiler, tmp, off));
1078 index = tmp;
1079 off = 0; /* clear offset */
1080 }
1081 addr->base = base;
1082 addr->index = index;
1083 addr->offset = off;
1084 return SLJIT_SUCCESS;
1085 }
1086
1087 #define EVAL(op, r, addr) op(r, addr.offset, addr.index, addr.base)
1088 #define WHEN(cond, r, i1, i2, addr) \
1089 (cond) ? EVAL(i1, r, addr) : EVAL(i2, r, addr)
1090
load_word(struct sljit_compiler * compiler,sljit_gpr dst,sljit_s32 src,sljit_sw srcw,sljit_gpr tmp,sljit_s32 is_32bit)1091 static sljit_s32 load_word(struct sljit_compiler *compiler, sljit_gpr dst,
1092 sljit_s32 src, sljit_sw srcw,
1093 sljit_gpr tmp /* clobbered */, sljit_s32 is_32bit)
1094 {
1095 struct addr addr;
1096 sljit_ins ins;
1097
1098 SLJIT_ASSERT(src & SLJIT_MEM);
1099 if (have_ldisp() || !is_32bit)
1100 FAIL_IF(make_addr_bxy(compiler, &addr, src, srcw, tmp));
1101 else
1102 FAIL_IF(make_addr_bx(compiler, &addr, src, srcw, tmp));
1103
1104 if (is_32bit)
1105 ins = WHEN(is_u12(addr.offset), dst, l, ly, addr);
1106 else
1107 ins = lg(dst, addr.offset, addr.index, addr.base);
1108
1109 return push_inst(compiler, ins);
1110 }
1111
store_word(struct sljit_compiler * compiler,sljit_gpr src,sljit_s32 dst,sljit_sw dstw,sljit_gpr tmp,sljit_s32 is_32bit)1112 static sljit_s32 store_word(struct sljit_compiler *compiler, sljit_gpr src,
1113 sljit_s32 dst, sljit_sw dstw,
1114 sljit_gpr tmp /* clobbered */, sljit_s32 is_32bit)
1115 {
1116 struct addr addr;
1117 sljit_ins ins;
1118
1119 SLJIT_ASSERT(dst & SLJIT_MEM);
1120 if (have_ldisp() || !is_32bit)
1121 FAIL_IF(make_addr_bxy(compiler, &addr, dst, dstw, tmp));
1122 else
1123 FAIL_IF(make_addr_bx(compiler, &addr, dst, dstw, tmp));
1124
1125 if (is_32bit)
1126 ins = WHEN(is_u12(addr.offset), src, st, sty, addr);
1127 else
1128 ins = stg(src, addr.offset, addr.index, addr.base);
1129
1130 return push_inst(compiler, ins);
1131 }
1132
1133 #undef WHEN
1134
sljit_generate_code(struct sljit_compiler * compiler)1135 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
1136 {
1137 struct sljit_label *label;
1138 struct sljit_jump *jump;
1139 struct sljit_s390x_const *const_;
1140 struct sljit_put_label *put_label;
1141 sljit_sw executable_offset;
1142 sljit_uw ins_size = 0; /* instructions */
1143 sljit_uw pool_size = 0; /* literal pool */
1144 sljit_uw pad_size;
1145 sljit_uw i, j = 0;
1146 struct sljit_memory_fragment *buf;
1147 void *code, *code_ptr;
1148 sljit_uw *pool, *pool_ptr;
1149
1150 sljit_uw source;
1151 sljit_sw offset; /* TODO(carenas): only need 32 bit */
1152
1153 CHECK_ERROR_PTR();
1154 CHECK_PTR(check_sljit_generate_code(compiler));
1155 reverse_buf(compiler);
1156
1157 /* branch handling */
1158 label = compiler->labels;
1159 jump = compiler->jumps;
1160 put_label = compiler->put_labels;
1161
1162 /* TODO(carenas): compiler->executable_size could be calculated
1163 * before to avoid the following loop (except for
1164 * pool_size)
1165 */
1166 /* calculate the size of the code */
1167 for (buf = compiler->buf; buf != NULL; buf = buf->next) {
1168 sljit_uw len = buf->used_size / sizeof(sljit_ins);
1169 sljit_ins *ibuf = (sljit_ins *)buf->memory;
1170 for (i = 0; i < len; ++i, ++j) {
1171 sljit_ins ins = ibuf[i];
1172
1173 /* TODO(carenas): instruction tag vs size/addr == j
1174 * using instruction tags for const is creative
1175 * but unlike all other architectures, and is not
1176 * done consistently for all other objects.
1177 * This might need reviewing later.
1178 */
1179 if (ins & sljit_ins_const) {
1180 pool_size += sizeof(*pool);
1181 ins &= ~sljit_ins_const;
1182 }
1183 if (label && label->size == j) {
1184 label->size = ins_size;
1185 label = label->next;
1186 }
1187 if (jump && jump->addr == j) {
1188 if ((jump->flags & SLJIT_REWRITABLE_JUMP) || (jump->flags & JUMP_ADDR)) {
1189 /* encoded: */
1190 /* brasl %r14, <rel_addr> (or brcl <mask>, <rel_addr>) */
1191 /* replace with: */
1192 /* lgrl %r1, <pool_addr> */
1193 /* bras %r14, %r1 (or bcr <mask>, %r1) */
1194 pool_size += sizeof(*pool);
1195 ins_size += 2;
1196 }
1197 jump = jump->next;
1198 }
1199 if (put_label && put_label->addr == j) {
1200 pool_size += sizeof(*pool);
1201 put_label = put_label->next;
1202 }
1203 ins_size += sizeof_ins(ins);
1204 }
1205 }
1206
1207 /* emit trailing label */
1208 if (label && label->size == j) {
1209 label->size = ins_size;
1210 label = label->next;
1211 }
1212
1213 SLJIT_ASSERT(!label);
1214 SLJIT_ASSERT(!jump);
1215 SLJIT_ASSERT(!put_label);
1216
1217 /* pad code size to 8 bytes so is accessible with half word offsets */
1218 /* the literal pool needs to be doubleword aligned */
1219 pad_size = ((ins_size + 7UL) & ~7UL) - ins_size;
1220 SLJIT_ASSERT(pad_size < 8UL);
1221
1222 /* allocate target buffer */
1223 code = SLJIT_MALLOC_EXEC(ins_size + pad_size + pool_size,
1224 compiler->exec_allocator_data);
1225 PTR_FAIL_WITH_EXEC_IF(code);
1226 code_ptr = code;
1227 executable_offset = SLJIT_EXEC_OFFSET(code);
1228
1229 /* TODO(carenas): pool is optional, and the ABI recommends it to
1230 * be created before the function code, instead of
1231 * globally; if generated code is too big could
1232 * need offsets bigger than 32bit words and asser()
1233 */
1234 pool = (sljit_uw *)((sljit_uw)code + ins_size + pad_size);
1235 pool_ptr = pool;
1236 const_ = (struct sljit_s390x_const *)compiler->consts;
1237
1238 /* update label addresses */
1239 label = compiler->labels;
1240 while (label) {
1241 label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(
1242 (sljit_uw)code_ptr + label->size, executable_offset);
1243 label = label->next;
1244 }
1245
1246 /* reset jumps */
1247 jump = compiler->jumps;
1248 put_label = compiler->put_labels;
1249
1250 /* emit the code */
1251 j = 0;
1252 for (buf = compiler->buf; buf != NULL; buf = buf->next) {
1253 sljit_uw len = buf->used_size / sizeof(sljit_ins);
1254 sljit_ins *ibuf = (sljit_ins *)buf->memory;
1255 for (i = 0; i < len; ++i, ++j) {
1256 sljit_ins ins = ibuf[i];
1257 if (ins & sljit_ins_const) {
1258 /* clear the const tag */
1259 ins &= ~sljit_ins_const;
1260
1261 /* update instruction with relative address of constant */
1262 source = (sljit_uw)code_ptr;
1263 offset = (sljit_uw)pool_ptr - source;
1264 SLJIT_ASSERT(!(offset & 1));
1265 offset >>= 1; /* halfword (not byte) offset */
1266 SLJIT_ASSERT(is_s32(offset));
1267 ins |= (sljit_ins)offset & 0xffffffff;
1268
1269 /* update address */
1270 const_->const_.addr = (sljit_uw)pool_ptr;
1271
1272 /* store initial value into pool and update pool address */
1273 *(pool_ptr++) = const_->init_value;
1274
1275 /* move to next constant */
1276 const_ = (struct sljit_s390x_const *)const_->const_.next;
1277 }
1278 if (jump && jump->addr == j) {
1279 sljit_sw target = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target;
1280 if ((jump->flags & SLJIT_REWRITABLE_JUMP) || (jump->flags & JUMP_ADDR)) {
1281 jump->addr = (sljit_uw)pool_ptr;
1282
1283 /* load address into tmp1 */
1284 source = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
1285 offset = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(pool_ptr, executable_offset) - source;
1286 SLJIT_ASSERT(!(offset & 1));
1287 offset >>= 1;
1288 SLJIT_ASSERT(is_s32(offset));
1289 encode_inst(&code_ptr,
1290 lgrl(tmp1, offset & 0xffffffff));
1291
1292 /* store jump target into pool and update pool address */
1293 *(pool_ptr++) = target;
1294
1295 /* branch to tmp1 */
1296 sljit_ins op = (ins >> 32) & 0xf;
1297 sljit_ins arg = (ins >> 36) & 0xf;
1298 switch (op) {
1299 case 4: /* brcl -> bcr */
1300 ins = bcr(arg, tmp1);
1301 break;
1302 case 5: /* brasl -> basr */
1303 ins = basr(arg, tmp1);
1304 break;
1305 default:
1306 abort();
1307 }
1308 }
1309 else {
1310 jump->addr = (sljit_uw)code_ptr + 2;
1311 source = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
1312 offset = target - source;
1313
1314 /* offset must be halfword aligned */
1315 SLJIT_ASSERT(!(offset & 1));
1316 offset >>= 1;
1317 SLJIT_ASSERT(is_s32(offset)); /* TODO(mundaym): handle arbitrary offsets */
1318
1319 /* patch jump target */
1320 ins |= (sljit_ins)offset & 0xffffffff;
1321 }
1322 jump = jump->next;
1323 }
1324 if (put_label && put_label->addr == j) {
1325 source = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
1326
1327 SLJIT_ASSERT(put_label->label);
1328 put_label->addr = (sljit_uw)code_ptr;
1329
1330 /* store target into pool */
1331 *pool_ptr = put_label->label->addr;
1332 offset = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(pool_ptr, executable_offset) - source;
1333 pool_ptr++;
1334
1335 SLJIT_ASSERT(!(offset & 1));
1336 offset >>= 1;
1337 SLJIT_ASSERT(is_s32(offset));
1338 ins |= (sljit_ins)offset & 0xffffffff;
1339
1340 put_label = put_label->next;
1341 }
1342 encode_inst(&code_ptr, ins);
1343 }
1344 }
1345 SLJIT_ASSERT((sljit_u8 *)code + ins_size == code_ptr);
1346 SLJIT_ASSERT((sljit_u8 *)pool + pool_size == (sljit_u8 *)pool_ptr);
1347
1348 compiler->error = SLJIT_ERR_COMPILED;
1349 compiler->executable_offset = executable_offset;
1350 compiler->executable_size = ins_size;
1351 code = SLJIT_ADD_EXEC_OFFSET(code, executable_offset);
1352 code_ptr = SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
1353 SLJIT_CACHE_FLUSH(code, code_ptr);
1354 SLJIT_UPDATE_WX_FLAGS(code, code_ptr, 1);
1355 return code;
1356 }
1357
sljit_has_cpu_feature(sljit_s32 feature_type)1358 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
1359 {
1360 /* TODO(mundaym): implement all */
1361 switch (feature_type) {
1362 case SLJIT_HAS_CLZ:
1363 return have_eimm() ? 1 : 0; /* FLOGR instruction */
1364 case SLJIT_HAS_CMOV:
1365 return have_lscond1() ? 1 : 0;
1366 case SLJIT_HAS_FPU:
1367 return 0;
1368 }
1369 return 0;
1370 }
1371
1372 /* --------------------------------------------------------------------- */
1373 /* Entry, exit */
1374 /* --------------------------------------------------------------------- */
1375
sljit_emit_enter(struct sljit_compiler * compiler,sljit_s32 options,sljit_s32 arg_types,sljit_s32 scratches,sljit_s32 saveds,sljit_s32 fscratches,sljit_s32 fsaveds,sljit_s32 local_size)1376 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
1377 sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
1378 sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
1379 {
1380 sljit_s32 args = get_arg_count(arg_types);
1381 sljit_sw frame_size;
1382
1383 CHECK_ERROR();
1384 CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
1385 set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
1386
1387 /* saved registers go in callee allocated save area */
1388 compiler->local_size = (local_size + 0xf) & ~0xf;
1389 frame_size = compiler->local_size + SLJIT_S390X_DEFAULT_STACK_FRAME_SIZE;
1390
1391 FAIL_IF(push_inst(compiler, stmg(r6, r15, r6 * sizeof(sljit_sw), r15))); /* save registers TODO(MGM): optimize */
1392 if (frame_size != 0) {
1393 if (is_s16(-frame_size))
1394 FAIL_IF(push_inst(compiler, aghi(r15, -((sljit_s16)frame_size))));
1395 else if (is_s32(-frame_size))
1396 FAIL_IF(push_inst(compiler, agfi(r15, -((sljit_s32)frame_size))));
1397 else {
1398 FAIL_IF(push_load_imm_inst(compiler, tmp1, -frame_size));
1399 FAIL_IF(push_inst(compiler, la(r15, 0, tmp1, r15)));
1400 }
1401 }
1402
1403 if (args >= 1)
1404 FAIL_IF(push_inst(compiler, lgr(gpr(SLJIT_S0), gpr(SLJIT_R0))));
1405 if (args >= 2)
1406 FAIL_IF(push_inst(compiler, lgr(gpr(SLJIT_S1), gpr(SLJIT_R1))));
1407 if (args >= 3)
1408 FAIL_IF(push_inst(compiler, lgr(gpr(SLJIT_S2), gpr(SLJIT_R2))));
1409 SLJIT_ASSERT(args < 4);
1410
1411 return SLJIT_SUCCESS;
1412 }
1413
sljit_set_context(struct sljit_compiler * compiler,sljit_s32 options,sljit_s32 arg_types,sljit_s32 scratches,sljit_s32 saveds,sljit_s32 fscratches,sljit_s32 fsaveds,sljit_s32 local_size)1414 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
1415 sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
1416 sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
1417 {
1418 CHECK_ERROR();
1419 CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
1420 set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
1421
1422 /* TODO(mundaym): stack space for saved floating point registers */
1423 compiler->local_size = (local_size + 0xf) & ~0xf;
1424 return SLJIT_SUCCESS;
1425 }
1426
sljit_emit_return(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src,sljit_sw srcw)1427 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw)
1428 {
1429 sljit_sw size;
1430 sljit_gpr end;
1431
1432 CHECK_ERROR();
1433 CHECK(check_sljit_emit_return(compiler, op, src, srcw));
1434
1435 FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));
1436
1437 size = compiler->local_size + SLJIT_S390X_DEFAULT_STACK_FRAME_SIZE + (r6 * sizeof(sljit_sw));
1438 if (!is_s20(size)) {
1439 FAIL_IF(push_load_imm_inst(compiler, tmp1, compiler->local_size + SLJIT_S390X_DEFAULT_STACK_FRAME_SIZE));
1440 FAIL_IF(push_inst(compiler, la(r15, 0, tmp1, r15)));
1441 size = r6 * sizeof(sljit_sw);
1442 end = r14; /* r15 has been restored already */
1443 }
1444 else
1445 end = r15;
1446
1447 FAIL_IF(push_inst(compiler, lmg(r6, end, size, r15))); /* restore registers TODO(MGM): optimize */
1448 FAIL_IF(push_inst(compiler, br(r14))); /* return */
1449
1450 return SLJIT_SUCCESS;
1451 }
1452
1453 /* --------------------------------------------------------------------- */
1454 /* Operators */
1455 /* --------------------------------------------------------------------- */
1456
sljit_emit_op0(struct sljit_compiler * compiler,sljit_s32 op)1457 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
1458 {
1459 sljit_gpr arg0 = gpr(SLJIT_R0);
1460 sljit_gpr arg1 = gpr(SLJIT_R1);
1461
1462 CHECK_ERROR();
1463 CHECK(check_sljit_emit_op0(compiler, op));
1464
1465 op = GET_OPCODE(op) | (op & SLJIT_I32_OP);
1466 switch (op) {
1467 case SLJIT_BREAKPOINT:
1468 /* TODO(mundaym): insert real breakpoint? */
1469 case SLJIT_NOP:
1470 return push_inst(compiler, 0x0700 /* 2-byte nop */);
1471 case SLJIT_LMUL_UW:
1472 FAIL_IF(push_inst(compiler, mlgr(arg0, arg0)));
1473 break;
1474 case SLJIT_LMUL_SW:
1475 /* signed multiplication from: */
1476 /* Hacker's Delight, Second Edition: Chapter 8-3. */
1477 FAIL_IF(push_inst(compiler, srag(tmp0, arg0, 63, 0)));
1478 FAIL_IF(push_inst(compiler, srag(tmp1, arg1, 63, 0)));
1479 FAIL_IF(push_inst(compiler, ngr(tmp0, arg1)));
1480 FAIL_IF(push_inst(compiler, ngr(tmp1, arg0)));
1481
1482 /* unsigned multiplication */
1483 FAIL_IF(push_inst(compiler, mlgr(arg0, arg0)));
1484
1485 FAIL_IF(push_inst(compiler, sgr(arg0, tmp0)));
1486 FAIL_IF(push_inst(compiler, sgr(arg0, tmp1)));
1487 break;
1488 case SLJIT_DIV_U32:
1489 case SLJIT_DIVMOD_U32:
1490 FAIL_IF(push_inst(compiler, lhi(tmp0, 0)));
1491 FAIL_IF(push_inst(compiler, lr(tmp1, arg0)));
1492 FAIL_IF(push_inst(compiler, dlr(tmp0, arg1)));
1493 FAIL_IF(push_inst(compiler, lr(arg0, tmp1))); /* quotient */
1494 if (op == SLJIT_DIVMOD_U32)
1495 return push_inst(compiler, lr(arg1, tmp0)); /* remainder */
1496
1497 return SLJIT_SUCCESS;
1498 case SLJIT_DIV_S32:
1499 case SLJIT_DIVMOD_S32:
1500 FAIL_IF(push_inst(compiler, lhi(tmp0, 0)));
1501 FAIL_IF(push_inst(compiler, lr(tmp1, arg0)));
1502 FAIL_IF(push_inst(compiler, dr(tmp0, arg1)));
1503 FAIL_IF(push_inst(compiler, lr(arg0, tmp1))); /* quotient */
1504 if (op == SLJIT_DIVMOD_S32)
1505 return push_inst(compiler, lr(arg1, tmp0)); /* remainder */
1506
1507 return SLJIT_SUCCESS;
1508 case SLJIT_DIV_UW:
1509 case SLJIT_DIVMOD_UW:
1510 FAIL_IF(push_inst(compiler, lghi(tmp0, 0)));
1511 FAIL_IF(push_inst(compiler, lgr(tmp1, arg0)));
1512 FAIL_IF(push_inst(compiler, dlgr(tmp0, arg1)));
1513 FAIL_IF(push_inst(compiler, lgr(arg0, tmp1))); /* quotient */
1514 if (op == SLJIT_DIVMOD_UW)
1515 return push_inst(compiler, lgr(arg1, tmp0)); /* remainder */
1516
1517 return SLJIT_SUCCESS;
1518 case SLJIT_DIV_SW:
1519 case SLJIT_DIVMOD_SW:
1520 FAIL_IF(push_inst(compiler, lgr(tmp1, arg0)));
1521 FAIL_IF(push_inst(compiler, dsgr(tmp0, arg1)));
1522 FAIL_IF(push_inst(compiler, lgr(arg0, tmp1))); /* quotient */
1523 if (op == SLJIT_DIVMOD_SW)
1524 return push_inst(compiler, lgr(arg1, tmp0)); /* remainder */
1525
1526 return SLJIT_SUCCESS;
1527 case SLJIT_ENDBR:
1528 return SLJIT_SUCCESS;
1529 case SLJIT_SKIP_FRAMES_BEFORE_RETURN:
1530 return SLJIT_SUCCESS;
1531 default:
1532 SLJIT_UNREACHABLE();
1533 }
1534 /* swap result registers */
1535 FAIL_IF(push_inst(compiler, lgr(tmp0, arg0)));
1536 FAIL_IF(push_inst(compiler, lgr(arg0, arg1)));
1537 return push_inst(compiler, lgr(arg1, tmp0));
1538 }
1539
1540 /* LEVAL will be defined later with different parameters as needed */
1541 #define WHEN2(cond, i1, i2) (cond) ? LEVAL(i1) : LEVAL(i2)
1542
sljit_emit_op1(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1543 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
1544 sljit_s32 dst, sljit_sw dstw,
1545 sljit_s32 src, sljit_sw srcw)
1546 {
1547 sljit_ins ins;
1548 struct addr mem;
1549 sljit_gpr dst_r;
1550 sljit_gpr src_r;
1551 sljit_s32 opcode = GET_OPCODE(op);
1552
1553 CHECK_ERROR();
1554 CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
1555 ADJUST_LOCAL_OFFSET(dst, dstw);
1556 ADJUST_LOCAL_OFFSET(src, srcw);
1557
1558 if ((dst == SLJIT_UNUSED) && !HAS_FLAGS(op)) {
1559 /* TODO(carenas): implement prefetch? */
1560 return SLJIT_SUCCESS;
1561 }
1562 if (opcode >= SLJIT_MOV && opcode <= SLJIT_MOV_P) {
1563 /* LOAD REGISTER */
1564 if (FAST_IS_REG(dst) && FAST_IS_REG(src)) {
1565 dst_r = gpr(dst);
1566 src_r = gpr(src);
1567 switch (opcode | (op & SLJIT_I32_OP)) {
1568 /* 32-bit */
1569 case SLJIT_MOV32_U8:
1570 ins = llcr(dst_r, src_r);
1571 break;
1572 case SLJIT_MOV32_S8:
1573 ins = lbr(dst_r, src_r);
1574 break;
1575 case SLJIT_MOV32_U16:
1576 ins = llhr(dst_r, src_r);
1577 break;
1578 case SLJIT_MOV32_S16:
1579 ins = lhr(dst_r, src_r);
1580 break;
1581 case SLJIT_MOV32:
1582 ins = lr(dst_r, src_r);
1583 break;
1584 /* 64-bit */
1585 case SLJIT_MOV_U8:
1586 ins = llgcr(dst_r, src_r);
1587 break;
1588 case SLJIT_MOV_S8:
1589 ins = lgbr(dst_r, src_r);
1590 break;
1591 case SLJIT_MOV_U16:
1592 ins = llghr(dst_r, src_r);
1593 break;
1594 case SLJIT_MOV_S16:
1595 ins = lghr(dst_r, src_r);
1596 break;
1597 case SLJIT_MOV_U32:
1598 ins = llgfr(dst_r, src_r);
1599 break;
1600 case SLJIT_MOV_S32:
1601 ins = lgfr(dst_r, src_r);
1602 break;
1603 case SLJIT_MOV:
1604 case SLJIT_MOV_P:
1605 ins = lgr(dst_r, src_r);
1606 break;
1607 default:
1608 ins = 0;
1609 SLJIT_UNREACHABLE();
1610 }
1611 FAIL_IF(push_inst(compiler, ins));
1612 if (HAS_FLAGS(op)) {
1613 /* only handle zero flag */
1614 SLJIT_ASSERT(!(op & VARIABLE_FLAG_MASK));
1615 return push_store_zero_flag(compiler, op, dst_r);
1616 }
1617 return SLJIT_SUCCESS;
1618 }
1619 /* LOAD IMMEDIATE */
1620 if (FAST_IS_REG(dst) && (src & SLJIT_IMM)) {
1621 switch (opcode) {
1622 case SLJIT_MOV_U8:
1623 srcw = (sljit_sw)((sljit_u8)(srcw));
1624 break;
1625 case SLJIT_MOV_S8:
1626 srcw = (sljit_sw)((sljit_s8)(srcw));
1627 break;
1628 case SLJIT_MOV_U16:
1629 srcw = (sljit_sw)((sljit_u16)(srcw));
1630 break;
1631 case SLJIT_MOV_S16:
1632 srcw = (sljit_sw)((sljit_s16)(srcw));
1633 break;
1634 case SLJIT_MOV_U32:
1635 srcw = (sljit_sw)((sljit_u32)(srcw));
1636 break;
1637 case SLJIT_MOV_S32:
1638 srcw = (sljit_sw)((sljit_s32)(srcw));
1639 break;
1640 }
1641 return push_load_imm_inst(compiler, gpr(dst), srcw);
1642 }
1643 /* LOAD */
1644 /* TODO(carenas): avoid reg being defined later */
1645 #define LEVAL(i) EVAL(i, reg, mem)
1646 if (FAST_IS_REG(dst) && (src & SLJIT_MEM)) {
1647 sljit_gpr reg = gpr(dst);
1648
1649 FAIL_IF(make_addr_bxy(compiler, &mem, src, srcw, tmp1));
1650 /* TODO(carenas): convert all calls below to LEVAL */
1651 switch (opcode | (op & SLJIT_I32_OP)) {
1652 case SLJIT_MOV32_U8:
1653 ins = llc(reg, mem.offset, mem.index, mem.base);
1654 break;
1655 case SLJIT_MOV32_S8:
1656 ins = lb(reg, mem.offset, mem.index, mem.base);
1657 break;
1658 case SLJIT_MOV32_U16:
1659 ins = llh(reg, mem.offset, mem.index, mem.base);
1660 break;
1661 case SLJIT_MOV32_S16:
1662 ins = WHEN2(is_u12(mem.offset), lh, lhy);
1663 break;
1664 case SLJIT_MOV32:
1665 ins = WHEN2(is_u12(mem.offset), l, ly);
1666 break;
1667 case SLJIT_MOV_U8:
1668 ins = LEVAL(llgc);
1669 break;
1670 case SLJIT_MOV_S8:
1671 ins = lgb(reg, mem.offset, mem.index, mem.base);
1672 break;
1673 case SLJIT_MOV_U16:
1674 ins = LEVAL(llgh);
1675 break;
1676 case SLJIT_MOV_S16:
1677 ins = lgh(reg, mem.offset, mem.index, mem.base);
1678 break;
1679 case SLJIT_MOV_U32:
1680 ins = LEVAL(llgf);
1681 break;
1682 case SLJIT_MOV_S32:
1683 ins = lgf(reg, mem.offset, mem.index, mem.base);
1684 break;
1685 case SLJIT_MOV_P:
1686 case SLJIT_MOV:
1687 ins = lg(reg, mem.offset, mem.index, mem.base);
1688 break;
1689 default:
1690 SLJIT_UNREACHABLE();
1691 }
1692 FAIL_IF(push_inst(compiler, ins));
1693 if (HAS_FLAGS(op)) {
1694 /* only handle zero flag */
1695 SLJIT_ASSERT(!(op & VARIABLE_FLAG_MASK));
1696 return push_store_zero_flag(compiler, op, reg);
1697 }
1698 return SLJIT_SUCCESS;
1699 }
1700 /* STORE and STORE IMMEDIATE */
1701 if ((dst & SLJIT_MEM)
1702 && (FAST_IS_REG(src) || (src & SLJIT_IMM))) {
1703 sljit_gpr reg = FAST_IS_REG(src) ? gpr(src) : tmp0;
1704 if (src & SLJIT_IMM) {
1705 /* TODO(mundaym): MOVE IMMEDIATE? */
1706 FAIL_IF(push_load_imm_inst(compiler, reg, srcw));
1707 }
1708 struct addr mem;
1709 FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1));
1710 switch (opcode) {
1711 case SLJIT_MOV_U8:
1712 case SLJIT_MOV_S8:
1713 return push_inst(compiler,
1714 WHEN2(is_u12(mem.offset), stc, stcy));
1715 case SLJIT_MOV_U16:
1716 case SLJIT_MOV_S16:
1717 return push_inst(compiler,
1718 WHEN2(is_u12(mem.offset), sth, sthy));
1719 case SLJIT_MOV_U32:
1720 case SLJIT_MOV_S32:
1721 return push_inst(compiler,
1722 WHEN2(is_u12(mem.offset), st, sty));
1723 case SLJIT_MOV_P:
1724 case SLJIT_MOV:
1725 FAIL_IF(push_inst(compiler, LEVAL(stg)));
1726 if (HAS_FLAGS(op)) {
1727 /* only handle zero flag */
1728 SLJIT_ASSERT(!(op & VARIABLE_FLAG_MASK));
1729 return push_store_zero_flag(compiler, op, reg);
1730 }
1731 return SLJIT_SUCCESS;
1732 default:
1733 SLJIT_UNREACHABLE();
1734 }
1735 }
1736 #undef LEVAL
1737 /* MOVE CHARACTERS */
1738 if ((dst & SLJIT_MEM) && (src & SLJIT_MEM)) {
1739 struct addr mem;
1740 FAIL_IF(make_addr_bxy(compiler, &mem, src, srcw, tmp1));
1741 switch (opcode) {
1742 case SLJIT_MOV_U8:
1743 case SLJIT_MOV_S8:
1744 FAIL_IF(push_inst(compiler,
1745 EVAL(llgc, tmp0, mem)));
1746 FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1));
1747 return push_inst(compiler,
1748 EVAL(stcy, tmp0, mem));
1749 case SLJIT_MOV_U16:
1750 case SLJIT_MOV_S16:
1751 FAIL_IF(push_inst(compiler,
1752 EVAL(llgh, tmp0, mem)));
1753 FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1));
1754 return push_inst(compiler,
1755 EVAL(sthy, tmp0, mem));
1756 case SLJIT_MOV_U32:
1757 case SLJIT_MOV_S32:
1758 FAIL_IF(push_inst(compiler,
1759 EVAL(ly, tmp0, mem)));
1760 FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1));
1761 return push_inst(compiler,
1762 EVAL(sty, tmp0, mem));
1763 case SLJIT_MOV_P:
1764 case SLJIT_MOV:
1765 FAIL_IF(push_inst(compiler,
1766 EVAL(lg, tmp0, mem)));
1767 FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1));
1768 FAIL_IF(push_inst(compiler,
1769 EVAL(stg, tmp0, mem)));
1770 if (HAS_FLAGS(op)) {
1771 /* only handle zero flag */
1772 SLJIT_ASSERT(!(op & VARIABLE_FLAG_MASK));
1773 return push_store_zero_flag(compiler, op, tmp0);
1774 }
1775 return SLJIT_SUCCESS;
1776 default:
1777 SLJIT_UNREACHABLE();
1778 }
1779 }
1780 SLJIT_UNREACHABLE();
1781 }
1782
1783 SLJIT_ASSERT((src & SLJIT_IMM) == 0); /* no immediates */
1784
1785 dst_r = SLOW_IS_REG(dst) ? gpr(REG_MASK & dst) : tmp0;
1786 src_r = FAST_IS_REG(src) ? gpr(REG_MASK & src) : tmp0;
1787 if (src & SLJIT_MEM)
1788 FAIL_IF(load_word(compiler, src_r, src, srcw, tmp1, src & SLJIT_I32_OP));
1789
1790 /* TODO(mundaym): optimize loads and stores */
1791 switch (opcode | (op & SLJIT_I32_OP)) {
1792 case SLJIT_NOT:
1793 /* emulate ~x with x^-1 */
1794 FAIL_IF(push_load_imm_inst(compiler, tmp1, -1));
1795 if (src_r != dst_r)
1796 FAIL_IF(push_inst(compiler, lgr(dst_r, src_r)));
1797
1798 FAIL_IF(push_inst(compiler, xgr(dst_r, tmp1)));
1799 break;
1800 case SLJIT_NOT32:
1801 /* emulate ~x with x^-1 */
1802 if (have_eimm())
1803 FAIL_IF(push_inst(compiler, xilf(dst_r, -1)));
1804 else {
1805 FAIL_IF(push_load_imm_inst(compiler, tmp1, -1));
1806 if (src_r != dst_r)
1807 FAIL_IF(push_inst(compiler, lr(dst_r, src_r)));
1808
1809 FAIL_IF(push_inst(compiler, xr(dst_r, tmp1)));
1810 }
1811 break;
1812 case SLJIT_NEG:
1813 FAIL_IF(push_inst(compiler, lcgr(dst_r, src_r)));
1814 break;
1815 case SLJIT_NEG32:
1816 FAIL_IF(push_inst(compiler, lcr(dst_r, src_r)));
1817 break;
1818 case SLJIT_CLZ:
1819 if (have_eimm()) {
1820 FAIL_IF(push_inst(compiler, flogr(tmp0, src_r))); /* clobbers tmp1 */
1821 if (dst_r != tmp0)
1822 FAIL_IF(push_inst(compiler, lgr(dst_r, tmp0)));
1823 } else {
1824 abort(); /* TODO(mundaym): no eimm (?) */
1825 }
1826 break;
1827 case SLJIT_CLZ32:
1828 if (have_eimm()) {
1829 FAIL_IF(push_inst(compiler, sllg(tmp1, src_r, 32, 0)));
1830 FAIL_IF(push_inst(compiler, iilf(tmp1, 0xffffffff)));
1831 FAIL_IF(push_inst(compiler, flogr(tmp0, tmp1))); /* clobbers tmp1 */
1832 if (dst_r != tmp0)
1833 FAIL_IF(push_inst(compiler, lr(dst_r, tmp0)));
1834 } else {
1835 abort(); /* TODO(mundaym): no eimm (?) */
1836 }
1837 break;
1838 default:
1839 SLJIT_UNREACHABLE();
1840 }
1841
1842 /* write condition code to emulated flag register */
1843 if (op & VARIABLE_FLAG_MASK)
1844 FAIL_IF(push_inst(compiler, ipm(flag_r)));
1845
1846 /* write zero flag to emulated flag register */
1847 if (op & SLJIT_SET_Z)
1848 FAIL_IF(push_store_zero_flag(compiler, op, dst_r));
1849
1850 /* TODO(carenas): doesn't need FAIL_IF */
1851 if ((dst != SLJIT_UNUSED) && (dst & SLJIT_MEM))
1852 FAIL_IF(store_word(compiler, dst_r, dst, dstw, tmp1, op & SLJIT_I32_OP));
1853
1854 return SLJIT_SUCCESS;
1855 }
1856
is_commutative(sljit_s32 op)1857 static SLJIT_INLINE int is_commutative(sljit_s32 op)
1858 {
1859 switch (GET_OPCODE(op)) {
1860 case SLJIT_ADD:
1861 case SLJIT_ADDC:
1862 case SLJIT_MUL:
1863 case SLJIT_AND:
1864 case SLJIT_OR:
1865 case SLJIT_XOR:
1866 return 1;
1867 }
1868 return 0;
1869 }
1870
is_shift(sljit_s32 op)1871 static SLJIT_INLINE int is_shift(sljit_s32 op) {
1872 sljit_s32 v = GET_OPCODE(op);
1873 return (v == SLJIT_SHL || v == SLJIT_ASHR || v == SLJIT_LSHR) ? 1 : 0;
1874 }
1875
sets_signed_flag(sljit_s32 op)1876 static SLJIT_INLINE int sets_signed_flag(sljit_s32 op)
1877 {
1878 switch (GET_FLAG_TYPE(op)) {
1879 case SLJIT_OVERFLOW:
1880 case SLJIT_NOT_OVERFLOW:
1881 case SLJIT_SIG_LESS:
1882 case SLJIT_SIG_LESS_EQUAL:
1883 case SLJIT_SIG_GREATER:
1884 case SLJIT_SIG_GREATER_EQUAL:
1885 return 1;
1886 }
1887 return 0;
1888 }
1889
1890 /* Report whether we have an instruction for:
1891 op dst src imm
1892 where dst and src are separate registers. */
have_op_3_imm(sljit_s32 op,sljit_sw imm)1893 static int have_op_3_imm(sljit_s32 op, sljit_sw imm) {
1894 return 0; /* TODO(mundaym): implement */
1895 }
1896
1897 /* Report whether we have an instruction for:
1898 op reg imm
1899 where reg is both a source and the destination. */
have_op_2_imm(sljit_s32 op,sljit_sw imm)1900 static int have_op_2_imm(sljit_s32 op, sljit_sw imm) {
1901 switch (GET_OPCODE(op) | (op & SLJIT_I32_OP)) {
1902 case SLJIT_ADD32:
1903 case SLJIT_ADD:
1904 if (!HAS_FLAGS(op) || sets_signed_flag(op))
1905 return have_eimm() ? is_s32(imm) : is_s16(imm);
1906
1907 return have_eimm() && is_u32(imm);
1908 case SLJIT_MUL32:
1909 case SLJIT_MUL:
1910 /* TODO(mundaym): general extension check */
1911 /* for ms{,g}fi */
1912 if (op & VARIABLE_FLAG_MASK)
1913 return 0;
1914
1915 return have_genext() && is_s16(imm);
1916 case SLJIT_OR32:
1917 case SLJIT_XOR32:
1918 case SLJIT_AND32:
1919 /* only use if have extended immediate facility */
1920 /* this ensures flags are set correctly */
1921 return have_eimm();
1922 case SLJIT_AND:
1923 case SLJIT_OR:
1924 case SLJIT_XOR:
1925 /* TODO(mundaym): make this more flexible */
1926 /* avoid using immediate variations, flags */
1927 /* won't be set correctly */
1928 return 0;
1929 case SLJIT_ADDC32:
1930 case SLJIT_ADDC:
1931 /* no ADD LOGICAL WITH CARRY IMMEDIATE */
1932 return 0;
1933 case SLJIT_SUB:
1934 case SLJIT_SUB32:
1935 case SLJIT_SUBC:
1936 case SLJIT_SUBC32:
1937 /* no SUBTRACT IMMEDIATE */
1938 /* TODO(mundaym): SUBTRACT LOGICAL IMMEDIATE */
1939 return 0;
1940 }
1941 return 0;
1942 }
1943
sljit_emit_op2(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)1944 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,
1945 sljit_s32 dst, sljit_sw dstw,
1946 sljit_s32 src1, sljit_sw src1w,
1947 sljit_s32 src2, sljit_sw src2w)
1948 {
1949 CHECK_ERROR();
1950 CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
1951 ADJUST_LOCAL_OFFSET(dst, dstw);
1952 ADJUST_LOCAL_OFFSET(src1, src1w);
1953 ADJUST_LOCAL_OFFSET(src2, src2w);
1954
1955 if (dst == SLJIT_UNUSED && !HAS_FLAGS(op))
1956 return SLJIT_SUCCESS;
1957
1958 sljit_gpr dst_r = SLOW_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
1959
1960 if (is_commutative(op)) {
1961 #define SWAP_ARGS \
1962 do { \
1963 sljit_s32 t = src1; \
1964 sljit_sw tw = src1w; \
1965 src1 = src2; \
1966 src1w = src2w; \
1967 src2 = t; \
1968 src2w = tw; \
1969 } while(0);
1970
1971 /* prefer immediate in src2 */
1972 if (src1 & SLJIT_IMM) {
1973 SWAP_ARGS
1974 }
1975
1976 /* prefer to have src1 use same register as dst */
1977 if (FAST_IS_REG(src2) && gpr(src2 & REG_MASK) == dst_r) {
1978 SWAP_ARGS
1979 }
1980
1981 /* prefer memory argument in src2 */
1982 if (FAST_IS_REG(src2) && (src1 & SLJIT_MEM)) {
1983 SWAP_ARGS
1984 }
1985 #undef SWAP_ARGS
1986 }
1987
1988 /* src1 must be in a register */
1989 sljit_gpr src1_r = FAST_IS_REG(src1) ? gpr(src1 & REG_MASK) : tmp0;
1990 if (src1 & SLJIT_IMM)
1991 FAIL_IF(push_load_imm_inst(compiler, src1_r, src1w));
1992
1993 if (src1 & SLJIT_MEM)
1994 FAIL_IF(load_word(compiler, src1_r, src1, src1w, tmp1, op & SLJIT_I32_OP));
1995
1996 /* emit comparison before subtract */
1997 if (GET_OPCODE(op) == SLJIT_SUB && (op & VARIABLE_FLAG_MASK)) {
1998 sljit_sw cmp = 0;
1999 switch (GET_FLAG_TYPE(op)) {
2000 case SLJIT_LESS:
2001 case SLJIT_LESS_EQUAL:
2002 case SLJIT_GREATER:
2003 case SLJIT_GREATER_EQUAL:
2004 cmp = 1; /* unsigned */
2005 break;
2006 case SLJIT_EQUAL:
2007 case SLJIT_SIG_LESS:
2008 case SLJIT_SIG_LESS_EQUAL:
2009 case SLJIT_SIG_GREATER:
2010 case SLJIT_SIG_GREATER_EQUAL:
2011 cmp = -1; /* signed */
2012 break;
2013 }
2014 if (cmp) {
2015 /* clear flags - no need to generate now */
2016 op &= ~VARIABLE_FLAG_MASK;
2017 sljit_gpr src2_r = FAST_IS_REG(src2) ? gpr(src2 & REG_MASK) : tmp1;
2018 if (src2 & SLJIT_IMM) {
2019 #define LEVAL(i) i(src1_r, src2w)
2020 if (cmp > 0 && is_u32(src2w)) {
2021 /* unsigned */
2022 FAIL_IF(push_inst(compiler,
2023 WHEN2(op & SLJIT_I32_OP, clfi, clgfi)));
2024 }
2025 else if (cmp < 0 && is_s16(src2w)) {
2026 /* signed */
2027 FAIL_IF(push_inst(compiler,
2028 WHEN2(op & SLJIT_I32_OP, chi, cghi)));
2029 }
2030 else if (cmp < 0 && is_s32(src2w)) {
2031 /* signed */
2032 FAIL_IF(push_inst(compiler,
2033 WHEN2(op & SLJIT_I32_OP, cfi, cgfi)));
2034 }
2035 #undef LEVAL
2036 #define LEVAL(i) i(src1_r, src2_r)
2037 else {
2038 FAIL_IF(push_load_imm_inst(compiler, src2_r, src2w));
2039 if (cmp > 0) {
2040 /* unsigned */
2041 FAIL_IF(push_inst(compiler,
2042 WHEN2(op & SLJIT_I32_OP, clr, clgr)));
2043 }
2044 if (cmp < 0) {
2045 /* signed */
2046 FAIL_IF(push_inst(compiler,
2047 WHEN2(op & SLJIT_I32_OP, cr, cgr)));
2048 }
2049 }
2050 }
2051 else {
2052 if (src2 & SLJIT_MEM) {
2053 /* TODO(mundaym): comparisons with memory */
2054 /* load src2 into register */
2055 FAIL_IF(load_word(compiler, src2_r, src2, src2w, tmp1, op & SLJIT_I32_OP));
2056 }
2057 if (cmp > 0) {
2058 /* unsigned */
2059 FAIL_IF(push_inst(compiler,
2060 WHEN2(op & SLJIT_I32_OP, clr, clgr)));
2061 }
2062 if (cmp < 0) {
2063 /* signed */
2064 FAIL_IF(push_inst(compiler,
2065 WHEN2(op & SLJIT_I32_OP, cr, cgr)));
2066 }
2067 #undef LEVAL
2068 }
2069 FAIL_IF(push_inst(compiler, ipm(flag_r)));
2070 }
2071 }
2072
2073 if (!HAS_FLAGS(op) && dst == SLJIT_UNUSED)
2074 return SLJIT_SUCCESS;
2075
2076 /* need to specify signed or logical operation */
2077 int signed_flags = sets_signed_flag(op);
2078
2079 if (is_shift(op)) {
2080 /* handle shifts first, they have more constraints than other operations */
2081 sljit_sw d = 0;
2082 sljit_gpr b = FAST_IS_REG(src2) ? gpr(src2 & REG_MASK) : r0;
2083 if (src2 & SLJIT_IMM)
2084 d = src2w & ((op & SLJIT_I32_OP) ? 31 : 63);
2085
2086 if (src2 & SLJIT_MEM) {
2087 /* shift amount (b) cannot be in r0 (i.e. tmp0) */
2088 FAIL_IF(load_word(compiler, tmp1, src2, src2w, tmp1, op & SLJIT_I32_OP));
2089 b = tmp1;
2090 }
2091 /* src1 and dst share the same register in the base 32-bit ISA */
2092 /* TODO(mundaym): not needed when distinct-operand facility is available */
2093 int workaround_alias = op & SLJIT_I32_OP && src1_r != dst_r;
2094 if (workaround_alias) {
2095 /* put src1 into tmp0 so we can overwrite it */
2096 FAIL_IF(push_inst(compiler, lr(tmp0, src1_r)));
2097 src1_r = tmp0;
2098 }
2099 switch (GET_OPCODE(op) | (op & SLJIT_I32_OP)) {
2100 case SLJIT_SHL:
2101 FAIL_IF(push_inst(compiler, sllg(dst_r, src1_r, d, b)));
2102 break;
2103 case SLJIT_SHL32:
2104 FAIL_IF(push_inst(compiler, sll(src1_r, d, b)));
2105 break;
2106 case SLJIT_LSHR:
2107 FAIL_IF(push_inst(compiler, srlg(dst_r, src1_r, d, b)));
2108 break;
2109 case SLJIT_LSHR32:
2110 FAIL_IF(push_inst(compiler, srl(src1_r, d, b)));
2111 break;
2112 case SLJIT_ASHR:
2113 FAIL_IF(push_inst(compiler, srag(dst_r, src1_r, d, b)));
2114 break;
2115 case SLJIT_ASHR32:
2116 FAIL_IF(push_inst(compiler, sra(src1_r, d, b)));
2117 break;
2118 default:
2119 SLJIT_UNREACHABLE();
2120 }
2121 if (workaround_alias && dst_r != src1_r)
2122 FAIL_IF(push_inst(compiler, lr(dst_r, src1_r)));
2123
2124 }
2125 else if ((GET_OPCODE(op) == SLJIT_MUL) && HAS_FLAGS(op)) {
2126 /* multiply instructions do not generally set flags so we need to manually */
2127 /* detect overflow conditions */
2128 /* TODO(mundaym): 64-bit overflow */
2129 SLJIT_ASSERT(GET_FLAG_TYPE(op) == SLJIT_MUL_OVERFLOW ||
2130 GET_FLAG_TYPE(op) == SLJIT_MUL_NOT_OVERFLOW);
2131 sljit_gpr src2_r = FAST_IS_REG(src2) ? gpr(src2 & REG_MASK) : tmp1;
2132 if (src2 & SLJIT_IMM) {
2133 /* load src2 into register */
2134 FAIL_IF(push_load_imm_inst(compiler, src2_r, src2w));
2135 }
2136 if (src2 & SLJIT_MEM) {
2137 /* load src2 into register */
2138 FAIL_IF(load_word(compiler, src2_r, src2, src2w, tmp1, op & SLJIT_I32_OP));
2139 }
2140 if (have_misc2()) {
2141 #define LEVAL(i) i(dst_r, src1_r, src2_r)
2142 FAIL_IF(push_inst(compiler,
2143 WHEN2(op & SLJIT_I32_OP, msrkc, msgrkc)));
2144 #undef LEVAL
2145 }
2146 else if (op & SLJIT_I32_OP) {
2147 op &= ~VARIABLE_FLAG_MASK;
2148 FAIL_IF(push_inst(compiler, lgfr(tmp0, src1_r)));
2149 FAIL_IF(push_inst(compiler, msgfr(tmp0, src2_r)));
2150 if (dst_r != tmp0) {
2151 FAIL_IF(push_inst(compiler, lr(dst_r, tmp0)));
2152 }
2153 FAIL_IF(push_inst(compiler, aih(tmp0, 1)));
2154 FAIL_IF(push_inst(compiler, nihf(tmp0, ~1U)));
2155 FAIL_IF(push_inst(compiler, ipm(flag_r)));
2156 FAIL_IF(push_inst(compiler, oilh(flag_r, 0x2000)));
2157 }
2158 else
2159 return SLJIT_ERR_UNSUPPORTED;
2160
2161 }
2162 else if ((GET_OPCODE(op) == SLJIT_SUB) && (op & SLJIT_SET_Z) && !signed_flags) {
2163 /* subtract logical instructions do not set the right flags unfortunately */
2164 /* instead, negate src2 and issue an add logical */
2165 /* TODO(mundaym): distinct operand facility where needed */
2166 if (src1_r != dst_r && src1_r != tmp0) {
2167 #define LEVAL(i) i(tmp0, src1_r)
2168 FAIL_IF(push_inst(compiler,
2169 WHEN2(op & SLJIT_I32_OP, lr, lgr)));
2170 src1_r = tmp0;
2171 #undef LEVAL
2172 }
2173 sljit_gpr src2_r = FAST_IS_REG(src2) ? gpr(src2 & REG_MASK) : tmp1;
2174 if (src2 & SLJIT_IMM) {
2175 /* load src2 into register */
2176 FAIL_IF(push_load_imm_inst(compiler, src2_r, src2w));
2177 }
2178 if (src2 & SLJIT_MEM) {
2179 /* load src2 into register */
2180 FAIL_IF(load_word(compiler, src2_r, src2, src2w, tmp1, op & SLJIT_I32_OP));
2181 }
2182 if (op & SLJIT_I32_OP) {
2183 FAIL_IF(push_inst(compiler, lcr(tmp1, src2_r)));
2184 FAIL_IF(push_inst(compiler, alr(src1_r, tmp1)));
2185 if (src1_r != dst_r)
2186 FAIL_IF(push_inst(compiler, lr(dst_r, src1_r)));
2187 }
2188 else {
2189 FAIL_IF(push_inst(compiler, lcgr(tmp1, src2_r)));
2190 FAIL_IF(push_inst(compiler, algr(src1_r, tmp1)));
2191 if (src1_r != dst_r)
2192 FAIL_IF(push_inst(compiler, lgr(dst_r, src1_r)));
2193 }
2194 }
2195 else if ((src2 & SLJIT_IMM) && (src1_r == dst_r) && have_op_2_imm(op, src2w)) {
2196 switch (GET_OPCODE(op) | (op & SLJIT_I32_OP)) {
2197 #define LEVAL(i) i(dst_r, src2w)
2198 case SLJIT_ADD:
2199 if (!HAS_FLAGS(op) || signed_flags) {
2200 FAIL_IF(push_inst(compiler,
2201 WHEN2(is_s16(src2w), aghi, agfi)));
2202 }
2203 else
2204 FAIL_IF(push_inst(compiler, LEVAL(algfi)));
2205
2206 break;
2207 case SLJIT_ADD32:
2208 if (!HAS_FLAGS(op) || signed_flags)
2209 FAIL_IF(push_inst(compiler,
2210 WHEN2(is_s16(src2w), ahi, afi)));
2211 else
2212 FAIL_IF(push_inst(compiler, LEVAL(alfi)));
2213
2214 break;
2215 #undef LEVAL /* TODO(carenas): move down and refactor? */
2216 case SLJIT_MUL:
2217 FAIL_IF(push_inst(compiler, mhi(dst_r, src2w)));
2218 break;
2219 case SLJIT_MUL32:
2220 FAIL_IF(push_inst(compiler, mghi(dst_r, src2w)));
2221 break;
2222 case SLJIT_OR32:
2223 FAIL_IF(push_inst(compiler, oilf(dst_r, src2w)));
2224 break;
2225 case SLJIT_XOR32:
2226 FAIL_IF(push_inst(compiler, xilf(dst_r, src2w)));
2227 break;
2228 case SLJIT_AND32:
2229 FAIL_IF(push_inst(compiler, nilf(dst_r, src2w)));
2230 break;
2231 default:
2232 SLJIT_UNREACHABLE();
2233 }
2234 }
2235 else if ((src2 & SLJIT_IMM) && have_op_3_imm(op, src2w)) {
2236 abort(); /* TODO(mundaym): implement */
2237 }
2238 else if ((src2 & SLJIT_MEM) && (dst_r == src1_r)) {
2239 /* most 32-bit instructions can only handle 12-bit immediate offsets */
2240 int need_u12 = !have_ldisp() &&
2241 (op & SLJIT_I32_OP) &&
2242 (GET_OPCODE(op) != SLJIT_ADDC) &&
2243 (GET_OPCODE(op) != SLJIT_SUBC);
2244 struct addr mem;
2245 if (need_u12)
2246 FAIL_IF(make_addr_bx(compiler, &mem, src2, src2w, tmp1));
2247 else
2248 FAIL_IF(make_addr_bxy(compiler, &mem, src2, src2w, tmp1));
2249
2250 int can_u12 = is_u12(mem.offset) ? 1 : 0;
2251 sljit_ins ins = 0;
2252 switch (GET_OPCODE(op) | (op & SLJIT_I32_OP)) {
2253 /* 64-bit ops */
2254 #define LEVAL(i) EVAL(i, dst_r, mem)
2255 case SLJIT_ADD:
2256 ins = WHEN2(signed_flags, ag, alg);
2257 break;
2258 case SLJIT_SUB:
2259 ins = WHEN2(signed_flags, sg, slg);
2260 break;
2261 case SLJIT_ADDC:
2262 ins = LEVAL(alcg);
2263 break;
2264 case SLJIT_SUBC:
2265 ins = LEVAL(slbg);
2266 break;
2267 case SLJIT_MUL:
2268 ins = LEVAL(msg);
2269 break;
2270 case SLJIT_OR:
2271 ins = LEVAL(og);
2272 break;
2273 case SLJIT_XOR:
2274 ins = LEVAL(xg);
2275 break;
2276 case SLJIT_AND:
2277 ins = LEVAL(ng);
2278 break;
2279 /* 32-bit ops */
2280 case SLJIT_ADD32:
2281 if (signed_flags)
2282 ins = WHEN2(can_u12, a, ay);
2283 else
2284 ins = WHEN2(can_u12, al, aly);
2285 break;
2286 case SLJIT_SUB32:
2287 if (signed_flags)
2288 ins = WHEN2(can_u12, s, sy);
2289 else
2290 ins = WHEN2(can_u12, sl, sly);
2291 break;
2292 case SLJIT_ADDC32:
2293 ins = LEVAL(alc);
2294 break;
2295 case SLJIT_SUBC32:
2296 ins = LEVAL(slb);
2297 break;
2298 case SLJIT_MUL32:
2299 ins = WHEN2(can_u12, ms, msy);
2300 break;
2301 case SLJIT_OR32:
2302 ins = WHEN2(can_u12, o, oy);
2303 break;
2304 case SLJIT_XOR32:
2305 ins = WHEN2(can_u12, x, xy);
2306 break;
2307 case SLJIT_AND32:
2308 ins = WHEN2(can_u12, n, ny);
2309 break;
2310 #undef LEVAL
2311 default:
2312 SLJIT_UNREACHABLE();
2313 }
2314 FAIL_IF(push_inst(compiler, ins));
2315 }
2316 else {
2317 sljit_gpr src2_r = FAST_IS_REG(src2) ? gpr(src2 & REG_MASK) : tmp1;
2318 if (src2 & SLJIT_IMM) {
2319 /* load src2 into register */
2320 FAIL_IF(push_load_imm_inst(compiler, src2_r, src2w));
2321 }
2322 if (src2 & SLJIT_MEM) {
2323 /* load src2 into register */
2324 FAIL_IF(load_word(compiler, src2_r, src2, src2w, tmp1, op & SLJIT_I32_OP));
2325 }
2326 /* TODO(mundaym): distinct operand facility where needed */
2327 #define LEVAL(i) i(tmp0, src1_r)
2328 if (src1_r != dst_r && src1_r != tmp0) {
2329 FAIL_IF(push_inst(compiler,
2330 WHEN2(op & SLJIT_I32_OP, lr, lgr)));
2331 src1_r = tmp0;
2332 }
2333 #undef LEVAL
2334 sljit_ins ins = 0;
2335 switch (GET_OPCODE(op) | (op & SLJIT_I32_OP)) {
2336 #define LEVAL(i) i(src1_r, src2_r)
2337 /* 64-bit ops */
2338 case SLJIT_ADD:
2339 ins = WHEN2(signed_flags, agr, algr);
2340 break;
2341 case SLJIT_SUB:
2342 ins = WHEN2(signed_flags, sgr, slgr);
2343 break;
2344 case SLJIT_ADDC:
2345 ins = LEVAL(alcgr);
2346 break;
2347 case SLJIT_SUBC:
2348 ins = LEVAL(slbgr);
2349 break;
2350 case SLJIT_MUL:
2351 ins = LEVAL(msgr);
2352 break;
2353 case SLJIT_AND:
2354 ins = LEVAL(ngr);
2355 break;
2356 case SLJIT_OR:
2357 ins = LEVAL(ogr);
2358 break;
2359 case SLJIT_XOR:
2360 ins = LEVAL(xgr);
2361 break;
2362 /* 32-bit ops */
2363 case SLJIT_ADD32:
2364 ins = WHEN2(signed_flags, ar, alr);
2365 break;
2366 case SLJIT_SUB32:
2367 ins = WHEN2(signed_flags, sr, slr);
2368 break;
2369 case SLJIT_ADDC32:
2370 ins = LEVAL(alcr);
2371 break;
2372 case SLJIT_SUBC32:
2373 ins = LEVAL(slbr);
2374 break;
2375 case SLJIT_MUL32:
2376 ins = LEVAL(msr);
2377 break;
2378 case SLJIT_AND32:
2379 ins = LEVAL(nr);
2380 break;
2381 case SLJIT_OR32:
2382 ins = LEVAL(or);
2383 break;
2384 case SLJIT_XOR32:
2385 ins = LEVAL(xr);
2386 break;
2387 #undef LEVAL
2388 default:
2389 SLJIT_UNREACHABLE();
2390 }
2391 FAIL_IF(push_inst(compiler, ins));
2392 #define LEVAL(i) i(dst_r, src1_r)
2393 if (src1_r != dst_r)
2394 FAIL_IF(push_inst(compiler,
2395 WHEN2(op & SLJIT_I32_OP, lr, lgr)));
2396 #undef LEVAL
2397 }
2398
2399 /* write condition code to emulated flag register */
2400 if (op & VARIABLE_FLAG_MASK)
2401 FAIL_IF(push_inst(compiler, ipm(flag_r)));
2402
2403 /* write zero flag to emulated flag register */
2404 if (op & SLJIT_SET_Z)
2405 FAIL_IF(push_store_zero_flag(compiler, op, dst_r));
2406
2407 /* finally write the result to memory if required */
2408 if (dst & SLJIT_MEM) {
2409 SLJIT_ASSERT(dst_r != tmp1);
2410 /* TODO(carenas): s/FAIL_IF/ return */
2411 FAIL_IF(store_word(compiler, dst_r, dst, dstw, tmp1, op & SLJIT_I32_OP));
2412 }
2413
2414 return SLJIT_SUCCESS;
2415 }
2416
sljit_emit_op_src(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src,sljit_sw srcw)2417 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(
2418 struct sljit_compiler *compiler,
2419 sljit_s32 op, sljit_s32 src, sljit_sw srcw)
2420 {
2421 sljit_gpr src_r;
2422
2423 CHECK_ERROR();
2424 CHECK(check_sljit_emit_op_src(compiler, op, src, srcw));
2425 ADJUST_LOCAL_OFFSET(src, srcw);
2426
2427 switch (op) {
2428 case SLJIT_FAST_RETURN:
2429 src_r = FAST_IS_REG(src) ? gpr(src) : tmp1;
2430 if (src & SLJIT_MEM)
2431 FAIL_IF(load_word(compiler, tmp1, src, srcw, tmp1, 0));
2432
2433 return push_inst(compiler, br(src_r));
2434 case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN:
2435 /* TODO(carenas): implement? */
2436 return SLJIT_SUCCESS;
2437 case SLJIT_PREFETCH_L1:
2438 case SLJIT_PREFETCH_L2:
2439 case SLJIT_PREFETCH_L3:
2440 case SLJIT_PREFETCH_ONCE:
2441 /* TODO(carenas): implement */
2442 return SLJIT_SUCCESS;
2443 default:
2444 /* TODO(carenas): probably should not success by default */
2445 return SLJIT_SUCCESS;
2446 }
2447
2448 return SLJIT_SUCCESS;
2449 }
2450
sljit_get_register_index(sljit_s32 reg)2451 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg)
2452 {
2453 CHECK_REG_INDEX(check_sljit_get_register_index(reg));
2454 return gpr(reg);
2455 }
2456
sljit_get_float_register_index(sljit_s32 reg)2457 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg)
2458 {
2459 CHECK_REG_INDEX(check_sljit_get_float_register_index(reg));
2460 abort();
2461 }
2462
sljit_emit_op_custom(struct sljit_compiler * compiler,void * instruction,sljit_s32 size)2463 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
2464 void *instruction, sljit_s32 size)
2465 {
2466 sljit_ins ins = 0;
2467
2468 CHECK_ERROR();
2469 CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
2470
2471 memcpy((sljit_u8 *)&ins + sizeof(ins) - size, instruction, size);
2472 return push_inst(compiler, ins);
2473 }
2474
2475 /* --------------------------------------------------------------------- */
2476 /* Floating point operators */
2477 /* --------------------------------------------------------------------- */
2478
sljit_emit_fop1(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)2479 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
2480 sljit_s32 dst, sljit_sw dstw,
2481 sljit_s32 src, sljit_sw srcw)
2482 {
2483 CHECK_ERROR();
2484 abort();
2485 }
2486
sljit_emit_fop2(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2487 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op,
2488 sljit_s32 dst, sljit_sw dstw,
2489 sljit_s32 src1, sljit_sw src1w,
2490 sljit_s32 src2, sljit_sw src2w)
2491 {
2492 CHECK_ERROR();
2493 abort();
2494 }
2495
2496 /* --------------------------------------------------------------------- */
2497 /* Other instructions */
2498 /* --------------------------------------------------------------------- */
2499
sljit_emit_fast_enter(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw)2500 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
2501 {
2502 CHECK_ERROR();
2503 CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
2504 ADJUST_LOCAL_OFFSET(dst, dstw);
2505
2506 if (FAST_IS_REG(dst))
2507 return push_inst(compiler, lgr(gpr(dst), fast_link_r));
2508
2509 /* memory */
2510 return store_word(compiler, fast_link_r, dst, dstw, tmp1, 0);
2511 }
2512
2513 /* --------------------------------------------------------------------- */
2514 /* Conditional instructions */
2515 /* --------------------------------------------------------------------- */
2516
sljit_emit_label(struct sljit_compiler * compiler)2517 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
2518 {
2519 struct sljit_label *label;
2520
2521 CHECK_ERROR_PTR();
2522 CHECK_PTR(check_sljit_emit_label(compiler));
2523
2524 if (compiler->last_label && compiler->last_label->size == compiler->size)
2525 return compiler->last_label;
2526
2527 label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
2528 PTR_FAIL_IF(!label);
2529 set_label(label, compiler);
2530 return label;
2531 }
2532
sljit_emit_jump(struct sljit_compiler * compiler,sljit_s32 type)2533 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)
2534 {
2535 sljit_u8 mask = ((type & 0xff) < SLJIT_JUMP) ? get_cc(type & 0xff) : 0xf;
2536
2537 CHECK_ERROR_PTR();
2538 CHECK_PTR(check_sljit_emit_jump(compiler, type));
2539
2540 /* reload condition code */
2541 if (mask != 0xf)
2542 PTR_FAIL_IF(push_load_cc(compiler, type & 0xff));
2543
2544 /* record jump */
2545 struct sljit_jump *jump = (struct sljit_jump *)
2546 ensure_abuf(compiler, sizeof(struct sljit_jump));
2547 PTR_FAIL_IF(!jump);
2548 set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
2549 jump->addr = compiler->size;
2550
2551 /* emit jump instruction */
2552 type &= 0xff;
2553 if (type >= SLJIT_FAST_CALL)
2554 PTR_FAIL_IF(push_inst(compiler, brasl(type == SLJIT_FAST_CALL ? fast_link_r : link_r, 0)));
2555 else
2556 PTR_FAIL_IF(push_inst(compiler, brcl(mask, 0)));
2557
2558 return jump;
2559 }
2560
sljit_emit_call(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 arg_types)2561 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type,
2562 sljit_s32 arg_types)
2563 {
2564 CHECK_ERROR_PTR();
2565 CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));
2566
2567 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
2568 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
2569 compiler->skip_checks = 1;
2570 #endif
2571
2572 return sljit_emit_jump(compiler, type);
2573 }
2574
sljit_emit_ijump(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 src,sljit_sw srcw)2575 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
2576 {
2577 sljit_gpr src_r = FAST_IS_REG(src) ? gpr(src) : tmp1;
2578
2579 CHECK_ERROR();
2580 CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
2581 ADJUST_LOCAL_OFFSET(src, srcw);
2582
2583 if (src & SLJIT_IMM) {
2584 SLJIT_ASSERT(!(srcw & 1)); /* target address must be even */
2585 FAIL_IF(push_load_imm_inst(compiler, src_r, srcw));
2586 }
2587 else if (src & SLJIT_MEM)
2588 FAIL_IF(load_word(compiler, src_r, src, srcw, tmp1, 0 /* 64-bit */));
2589
2590 /* emit jump instruction */
2591 if (type >= SLJIT_FAST_CALL)
2592 return push_inst(compiler, basr(type == SLJIT_FAST_CALL ? fast_link_r : link_r, src_r));
2593
2594 return push_inst(compiler, br(src_r));
2595 }
2596
sljit_emit_icall(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 arg_types,sljit_s32 src,sljit_sw srcw)2597 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type,
2598 sljit_s32 arg_types,
2599 sljit_s32 src, sljit_sw srcw)
2600 {
2601 CHECK_ERROR();
2602 CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));
2603
2604 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
2605 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
2606 compiler->skip_checks = 1;
2607 #endif
2608
2609 return sljit_emit_ijump(compiler, type, src, srcw);
2610 }
2611
sljit_emit_op_flags(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 type)2612 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
2613 sljit_s32 dst, sljit_sw dstw,
2614 sljit_s32 type)
2615 {
2616 sljit_u8 mask = get_cc(type & 0xff);
2617
2618 CHECK_ERROR();
2619 CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));
2620
2621 sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
2622 sljit_gpr loc_r = tmp1;
2623 switch (GET_OPCODE(op)) {
2624 case SLJIT_AND:
2625 case SLJIT_OR:
2626 case SLJIT_XOR:
2627 /* dst is also source operand */
2628 if (dst & SLJIT_MEM)
2629 FAIL_IF(load_word(compiler, dst_r, dst, dstw, tmp1, op & SLJIT_I32_OP));
2630
2631 break;
2632 case SLJIT_MOV:
2633 case (SLJIT_MOV32 & ~SLJIT_I32_OP):
2634 /* can write straight into destination */
2635 loc_r = dst_r;
2636 break;
2637 default:
2638 SLJIT_UNREACHABLE();
2639 }
2640
2641 if (mask != 0xf)
2642 FAIL_IF(push_load_cc(compiler, type & 0xff));
2643
2644 /* TODO(mundaym): fold into cmov helper function? */
2645 #define LEVAL(i) i(loc_r, 1, mask)
2646 if (have_lscond2()) {
2647 FAIL_IF(push_load_imm_inst(compiler, loc_r, 0));
2648 FAIL_IF(push_inst(compiler,
2649 WHEN2(op & SLJIT_I32_OP, lochi, locghi)));
2650 } else {
2651 /* TODO(mundaym): no load/store-on-condition 2 facility (ipm? branch-and-set?) */
2652 abort();
2653 }
2654 #undef LEVAL
2655
2656 /* apply bitwise op and set condition codes */
2657 switch (GET_OPCODE(op)) {
2658 #define LEVAL(i) i(dst_r, loc_r)
2659 case SLJIT_AND:
2660 FAIL_IF(push_inst(compiler,
2661 WHEN2(op & SLJIT_I32_OP, nr, ngr)));
2662 break;
2663 case SLJIT_OR:
2664 FAIL_IF(push_inst(compiler,
2665 WHEN2(op & SLJIT_I32_OP, or, ogr)));
2666 break;
2667 case SLJIT_XOR:
2668 FAIL_IF(push_inst(compiler,
2669 WHEN2(op & SLJIT_I32_OP, xr, xgr)));
2670 break;
2671 #undef LEVAL
2672 }
2673
2674 /* set zero flag if needed */
2675 if (op & SLJIT_SET_Z)
2676 FAIL_IF(push_store_zero_flag(compiler, op, dst_r));
2677
2678 /* store result to memory if required */
2679 /* TODO(carenas): s/FAIL_IF/ return */
2680 if (dst & SLJIT_MEM)
2681 FAIL_IF(store_word(compiler, dst_r, dst, dstw, tmp1, op & SLJIT_I32_OP));
2682
2683 return SLJIT_SUCCESS;
2684 }
2685
sljit_emit_cmov(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 dst_reg,sljit_s32 src,sljit_sw srcw)2686 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type,
2687 sljit_s32 dst_reg,
2688 sljit_s32 src, sljit_sw srcw)
2689 {
2690 sljit_u8 mask = get_cc(type & 0xff);
2691 sljit_gpr dst_r = gpr(dst_reg & ~SLJIT_I32_OP);
2692 sljit_gpr src_r = FAST_IS_REG(src) ? gpr(src) : tmp0;
2693
2694 CHECK_ERROR();
2695 CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw));
2696
2697 if (mask != 0xf)
2698 FAIL_IF(push_load_cc(compiler, type & 0xff));
2699
2700 if (src & SLJIT_IMM) {
2701 /* TODO(mundaym): fast path with lscond2 */
2702 FAIL_IF(push_load_imm_inst(compiler, src_r, srcw));
2703 }
2704
2705 #define LEVAL(i) i(dst_r, src_r, mask)
2706 if (have_lscond1())
2707 return push_inst(compiler,
2708 WHEN2(dst_reg & SLJIT_I32_OP, locr, locgr));
2709
2710 #undef LEVAL
2711
2712 /* TODO(mundaym): implement */
2713 return SLJIT_ERR_UNSUPPORTED;
2714 }
2715
2716 /* --------------------------------------------------------------------- */
2717 /* Other instructions */
2718 /* --------------------------------------------------------------------- */
2719
2720 /* On s390x we build a literal pool to hold constants. This has two main
2721 advantages:
2722
2723 1. we only need one instruction in the instruction stream (LGRL)
2724 2. we can store 64 bit addresses and use 32 bit offsets
2725
2726 To retrofit the extra information needed to build the literal pool we
2727 add a new sljit_s390x_const struct that contains the initial value but
2728 can still be cast to a sljit_const. */
2729
sljit_emit_const(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw,sljit_sw init_value)2730 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
2731 {
2732 struct sljit_s390x_const *const_;
2733 sljit_gpr dst_r;
2734
2735 CHECK_ERROR_PTR();
2736 CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
2737
2738 const_ = (struct sljit_s390x_const*)ensure_abuf(compiler,
2739 sizeof(struct sljit_s390x_const));
2740 PTR_FAIL_IF(!const_);
2741 set_const((struct sljit_const*)const_, compiler);
2742 const_->init_value = init_value;
2743
2744 dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
2745 if (have_genext())
2746 PTR_FAIL_IF(push_inst(compiler, sljit_ins_const | lgrl(dst_r, 0)));
2747 else {
2748 PTR_FAIL_IF(push_inst(compiler, sljit_ins_const | larl(tmp1, 0)));
2749 PTR_FAIL_IF(push_inst(compiler, lg(dst_r, 0, r0, tmp1)));
2750 }
2751
2752 if (dst & SLJIT_MEM)
2753 PTR_FAIL_IF(store_word(compiler, dst_r, dst, dstw, tmp1, 0 /* always 64-bit */));
2754
2755 return (struct sljit_const*)const_;
2756 }
2757
sljit_set_jump_addr(sljit_uw addr,sljit_uw new_target,sljit_sw executable_offset)2758 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
2759 {
2760 /* Update the constant pool. */
2761 sljit_uw *ptr = (sljit_uw *)addr;
2762 SLJIT_UNUSED_ARG(executable_offset);
2763
2764 SLJIT_UPDATE_WX_FLAGS(ptr, ptr + 1, 0);
2765 *ptr = new_target;
2766 SLJIT_UPDATE_WX_FLAGS(ptr, ptr + 1, 1);
2767 SLJIT_CACHE_FLUSH(ptr, ptr + 1);
2768 }
2769
sljit_set_const(sljit_uw addr,sljit_sw new_constant,sljit_sw executable_offset)2770 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
2771 {
2772 sljit_set_jump_addr(addr, new_constant, executable_offset);
2773 }
2774
sljit_emit_put_label(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw)2775 SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label *sljit_emit_put_label(
2776 struct sljit_compiler *compiler,
2777 sljit_s32 dst, sljit_sw dstw)
2778 {
2779 struct sljit_put_label *put_label;
2780 sljit_gpr dst_r;
2781
2782 CHECK_ERROR_PTR();
2783 CHECK_PTR(check_sljit_emit_put_label(compiler, dst, dstw));
2784 ADJUST_LOCAL_OFFSET(dst, dstw);
2785
2786 put_label = (struct sljit_put_label*)ensure_abuf(compiler, sizeof(struct sljit_put_label));
2787 PTR_FAIL_IF(!put_label);
2788 set_put_label(put_label, compiler, 0);
2789
2790 dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
2791
2792 if (have_genext())
2793 PTR_FAIL_IF(push_inst(compiler, lgrl(dst_r, 0)));
2794 else {
2795 PTR_FAIL_IF(push_inst(compiler, larl(tmp1, 0)));
2796 PTR_FAIL_IF(push_inst(compiler, lg(dst_r, 0, r0, tmp1)));
2797 }
2798
2799 if (dst & SLJIT_MEM)
2800 PTR_FAIL_IF(store_word(compiler, dst_r, dst, dstw, tmp1, 0));
2801
2802 return put_label;
2803 }
2804
2805 /* TODO(carenas): EVAL probably should move up or be refactored */
2806 #undef WHEN2
2807 #undef EVAL
2808
2809 #undef tmp1
2810 #undef tmp0
2811
2812 /* TODO(carenas): undef other macros that spill like is_u12? */
2813