• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *    Stack-less Just-In-Time compiler
3  *
4  *    Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without modification, are
7  * permitted provided that the following conditions are met:
8  *
9  *   1. Redistributions of source code must retain the above copyright notice, this list of
10  *      conditions and the following disclaimer.
11  *
12  *   2. Redistributions in binary form must reproduce the above copyright notice, this list
13  *      of conditions and the following disclaimer in the documentation and/or other materials
14  *      provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19  * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 #include <sys/auxv.h>
28 
29 #ifdef __ARCH__
30 #define ENABLE_STATIC_FACILITY_DETECTION 1
31 #else
32 #define ENABLE_STATIC_FACILITY_DETECTION 0
33 #endif
34 #define ENABLE_DYNAMIC_FACILITY_DETECTION 1
35 
sljit_get_platform_name(void)36 SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
37 {
38 	return "s390x" SLJIT_CPUINFO;
39 }
40 
41 /* Instructions. */
42 typedef sljit_uw sljit_ins;
43 
44 /* Instruction tags (most significant halfword). */
45 const sljit_ins sljit_ins_const = (sljit_ins)1 << 48;
46 
47 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 4] = {
48 	14, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 0, 1
49 };
50 
51 /* there are also a[2-15] available, but they are slower to access and
52  * their use is limited as mundaym explained:
53  *   https://github.com/zherczeg/sljit/pull/91#discussion_r486895689
54  */
55 
56 /* General Purpose Registers [0-15]. */
57 typedef sljit_uw sljit_gpr;
58 
59 /*
60  * WARNING
61  * the following code is non standard and should be improved for
62  * consistency, but doesn't use SLJIT_NUMBER_OF_REGISTERS based
63  * registers because r0 and r1 are the ABI recommended volatiles.
64  * there is a gpr() function that maps sljit to physical register numbers
65  * that should be used instead of the usual index into reg_map[] and
66  * will be retired ASAP (TODO: carenas)
67  */
68 
69 const sljit_gpr r0 = 0;	/* reg_map[SLJIT_NUMBER_OF_REGISTERS + 2]: 0 in address calculations; reserved */
70 const sljit_gpr r1 = 1;	/* reg_map[SLJIT_NUMBER_OF_REGISTERS + 3]: reserved */
71 const sljit_gpr r2 = 2;	/* reg_map[1]: 1st argument */
72 const sljit_gpr r3 = 3;	/* reg_map[2]: 2nd argument */
73 const sljit_gpr r4 = 4;	/* reg_map[3]: 3rd argument */
74 const sljit_gpr r5 = 5;	/* reg_map[4]: 4th argument */
75 const sljit_gpr r6 = 6;	/* reg_map[5]: 5th argument; 1st saved register */
76 const sljit_gpr r7 = 7;	/* reg_map[6] */
77 const sljit_gpr r8 = 8;	/* reg_map[7] */
78 const sljit_gpr r9 = 9;	/* reg_map[8] */
79 const sljit_gpr r10 = 10;	/* reg_map[9] */
80 const sljit_gpr r11 = 11;	/* reg_map[10] */
81 const sljit_gpr r12 = 12;	/* reg_map[11]: GOT */
82 const sljit_gpr r13 = 13;	/* reg_map[12]: Literal Pool pointer */
83 const sljit_gpr r14 = 14;	/* reg_map[0]: return address and flag register */
84 const sljit_gpr r15 = 15;	/* reg_map[SLJIT_NUMBER_OF_REGISTERS + 1]: stack pointer */
85 
86 /* WARNING: r12 and r13 shouldn't be used as per ABI recommendation */
87 /* TODO(carenas): r12 might conflict in PIC code, reserve? */
88 /* TODO(carenas): r13 is usually pointed to "pool" per ABI, using a tmp
89  *                like we do know might be faster though, reserve?
90  */
91 
92 /* TODO(carenas): should be named TMP_REG[1-2] for consistency */
93 #define tmp0	r0
94 #define tmp1	r1
95 
96 /* TODO(carenas): flags should move to a different register so that
97  *                link register doesn't need to change
98  */
99 
100 /* Link registers. The normal link register is r14, but since
101    we use that for flags we need to use r0 instead to do fast
102    calls so that flags are preserved. */
103 const sljit_gpr link_r = 14;     /* r14 */
104 const sljit_gpr fast_link_r = 0; /* r0 */
105 
106 /* Flag register layout:
107 
108    0               32  33  34      36      64
109    +---------------+---+---+-------+-------+
110    |      ZERO     | 0 | 0 |  C C  |///////|
111    +---------------+---+---+-------+-------+
112 */
113 const sljit_gpr flag_r = 14; /* r14 */
114 
115 struct sljit_s390x_const {
116 	struct sljit_const const_; /* must be first */
117 	sljit_sw init_value;       /* required to build literal pool */
118 };
119 
120 /* Convert SLJIT register to hardware register. */
gpr(sljit_s32 r)121 static SLJIT_INLINE sljit_gpr gpr(sljit_s32 r)
122 {
123 	SLJIT_ASSERT(r != SLJIT_UNUSED);
124 	SLJIT_ASSERT(r < (sljit_s32)(sizeof(reg_map) / sizeof(reg_map[0])));
125 	return reg_map[r];
126 }
127 
128 /* Size of instruction in bytes. Tags must already be cleared. */
sizeof_ins(sljit_ins ins)129 static SLJIT_INLINE sljit_uw sizeof_ins(sljit_ins ins)
130 {
131 	/* keep faulting instructions */
132 	if (ins == 0)
133 		 return 2;
134 
135 	if ((ins & 0x00000000ffffL) == ins)
136 		 return 2;
137 	if ((ins & 0x0000ffffffffL) == ins)
138 		 return 4;
139 	if ((ins & 0xffffffffffffL) == ins)
140 		 return 6;
141 
142 	SLJIT_UNREACHABLE();
143 	return (sljit_uw)-1;
144 }
145 
push_inst(struct sljit_compiler * compiler,sljit_ins ins)146 static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins)
147 {
148 	sljit_ins *ibuf = (sljit_ins *)ensure_buf(compiler, sizeof(sljit_ins));
149 	FAIL_IF(!ibuf);
150 	*ibuf = ins;
151 	compiler->size++;
152 	return SLJIT_SUCCESS;
153 }
154 
encode_inst(void ** ptr,sljit_ins ins)155 static sljit_s32 encode_inst(void **ptr, sljit_ins ins)
156 {
157 	sljit_u16 *ibuf = (sljit_u16 *)*ptr;
158 	sljit_uw size = sizeof_ins(ins);
159 
160 	SLJIT_ASSERT((size & 6) == size);
161 	switch (size) {
162 	case 6:
163 		*ibuf++ = (sljit_u16)(ins >> 32);
164 		/* fallthrough */
165 	case 4:
166 		*ibuf++ = (sljit_u16)(ins >> 16);
167 		/* fallthrough */
168 	case 2:
169 		*ibuf++ = (sljit_u16)(ins);
170 	}
171 	*ptr = (void*)ibuf;
172 	return SLJIT_SUCCESS;
173 }
174 
175 /* Map the given type to a 4-bit condition code mask. */
get_cc(sljit_s32 type)176 static SLJIT_INLINE sljit_u8 get_cc(sljit_s32 type) {
177 	const sljit_u8 eq = 1 << 3; /* equal {,to zero} */
178 	const sljit_u8 lt = 1 << 2; /* less than {,zero} */
179 	const sljit_u8 gt = 1 << 1; /* greater than {,zero} */
180 	const sljit_u8 ov = 1 << 0; /* {overflow,NaN} */
181 
182 	switch (type) {
183 	case SLJIT_EQUAL:
184 	case SLJIT_EQUAL_F64:
185 		return eq;
186 
187 	case SLJIT_NOT_EQUAL:
188 	case SLJIT_NOT_EQUAL_F64:
189 		return ~eq;
190 
191 	case SLJIT_LESS:
192 	case SLJIT_SIG_LESS:
193 	case SLJIT_LESS_F64:
194 		return lt;
195 
196 	case SLJIT_LESS_EQUAL:
197 	case SLJIT_SIG_LESS_EQUAL:
198 	case SLJIT_LESS_EQUAL_F64:
199 		return (lt | eq);
200 
201 	case SLJIT_GREATER:
202 	case SLJIT_SIG_GREATER:
203 	case SLJIT_GREATER_F64:
204 		return gt;
205 
206 	case SLJIT_GREATER_EQUAL:
207 	case SLJIT_SIG_GREATER_EQUAL:
208 	case SLJIT_GREATER_EQUAL_F64:
209 		return (gt | eq);
210 
211 	case SLJIT_OVERFLOW:
212 	case SLJIT_MUL_OVERFLOW:
213 	case SLJIT_UNORDERED_F64:
214 		return ov;
215 
216 	case SLJIT_NOT_OVERFLOW:
217 	case SLJIT_MUL_NOT_OVERFLOW:
218 	case SLJIT_ORDERED_F64:
219 		return ~ov;
220 	}
221 
222 	SLJIT_UNREACHABLE();
223 	return (sljit_u8)-1;
224 }
225 
226 /* Facility to bit index mappings.
227    Note: some facilities share the same bit index. */
228 typedef sljit_uw facility_bit;
229 #define STORE_FACILITY_LIST_EXTENDED_FACILITY 7
230 #define FAST_LONG_DISPLACEMENT_FACILITY 19
231 #define EXTENDED_IMMEDIATE_FACILITY 21
232 #define GENERAL_INSTRUCTION_EXTENSION_FACILITY 34
233 #define DISTINCT_OPERAND_FACILITY 45
234 #define HIGH_WORD_FACILITY 45
235 #define POPULATION_COUNT_FACILITY 45
236 #define LOAD_STORE_ON_CONDITION_1_FACILITY 45
237 #define MISCELLANEOUS_INSTRUCTION_EXTENSIONS_1_FACILITY 49
238 #define LOAD_STORE_ON_CONDITION_2_FACILITY 53
239 #define MISCELLANEOUS_INSTRUCTION_EXTENSIONS_2_FACILITY 58
240 #define VECTOR_FACILITY 129
241 #define VECTOR_ENHANCEMENTS_1_FACILITY 135
242 
243 /* Report whether a facility is known to be present due to the compiler
244    settings. This function should always be compiled to a constant
245    value given a constant argument. */
have_facility_static(facility_bit x)246 static SLJIT_INLINE int have_facility_static(facility_bit x)
247 {
248 #if ENABLE_STATIC_FACILITY_DETECTION
249 	switch (x) {
250 	case FAST_LONG_DISPLACEMENT_FACILITY:
251 		return (__ARCH__ >=  6 /* z990 */);
252 	case EXTENDED_IMMEDIATE_FACILITY:
253 	case STORE_FACILITY_LIST_EXTENDED_FACILITY:
254 		return (__ARCH__ >=  7 /* z9-109 */);
255 	case GENERAL_INSTRUCTION_EXTENSION_FACILITY:
256 		return (__ARCH__ >=  8 /* z10 */);
257 	case DISTINCT_OPERAND_FACILITY:
258 		return (__ARCH__ >=  9 /* z196 */);
259 	case MISCELLANEOUS_INSTRUCTION_EXTENSIONS_1_FACILITY:
260 		return (__ARCH__ >= 10 /* zEC12 */);
261 	case LOAD_STORE_ON_CONDITION_2_FACILITY:
262 	case VECTOR_FACILITY:
263 		return (__ARCH__ >= 11 /* z13 */);
264 	case MISCELLANEOUS_INSTRUCTION_EXTENSIONS_2_FACILITY:
265 	case VECTOR_ENHANCEMENTS_1_FACILITY:
266 		return (__ARCH__ >= 12 /* z14 */);
267 	default:
268 		SLJIT_UNREACHABLE();
269 	}
270 #endif
271 	return 0;
272 }
273 
get_hwcap()274 static SLJIT_INLINE unsigned long get_hwcap()
275 {
276 	static unsigned long hwcap = 0;
277 	if (SLJIT_UNLIKELY(!hwcap)) {
278 		hwcap = getauxval(AT_HWCAP);
279 		SLJIT_ASSERT(hwcap != 0);
280 	}
281 	return hwcap;
282 }
283 
have_stfle()284 static SLJIT_INLINE int have_stfle()
285 {
286 	if (have_facility_static(STORE_FACILITY_LIST_EXTENDED_FACILITY))
287 		return 1;
288 
289 	return (get_hwcap() & HWCAP_S390_STFLE);
290 }
291 
292 /* Report whether the given facility is available. This function always
293    performs a runtime check. */
have_facility_dynamic(facility_bit x)294 static int have_facility_dynamic(facility_bit x)
295 {
296 #if ENABLE_DYNAMIC_FACILITY_DETECTION
297 	static struct {
298 		sljit_uw bits[4];
299 	} cpu_features;
300 	size_t size = sizeof(cpu_features);
301 	const sljit_uw word_index = x >> 6;
302 	const sljit_uw bit_index = ((1UL << 63) >> (x & 63));
303 
304 	SLJIT_ASSERT(x < size * 8);
305 	if (SLJIT_UNLIKELY(!have_stfle()))
306 		return 0;
307 
308 	if (SLJIT_UNLIKELY(cpu_features.bits[0] == 0)) {
309 		__asm__ __volatile__ (
310 			"lgr   %%r0, %0;"
311 			"stfle 0(%1);"
312 			/* outputs  */:
313 			/* inputs   */: "d" ((size / 8) - 1), "a" (&cpu_features)
314 			/* clobbers */: "r0", "cc", "memory"
315 		);
316 		SLJIT_ASSERT(cpu_features.bits[0] != 0);
317 	}
318 	return (cpu_features.bits[word_index] & bit_index) != 0;
319 #else
320 	return 0;
321 #endif
322 }
323 
324 #define HAVE_FACILITY(name, bit) \
325 static SLJIT_INLINE int name() \
326 { \
327 	static int have = -1; \
328 	/* Static check first. May allow the function to be optimized away. */ \
329 	if (have_facility_static(bit)) \
330 		have = 1; \
331 	else if (SLJIT_UNLIKELY(have < 0)) \
332 		have = have_facility_dynamic(bit) ? 1 : 0; \
333 \
334 	return have; \
335 }
336 
HAVE_FACILITY(have_eimm,EXTENDED_IMMEDIATE_FACILITY)337 HAVE_FACILITY(have_eimm,    EXTENDED_IMMEDIATE_FACILITY)
338 HAVE_FACILITY(have_ldisp,   FAST_LONG_DISPLACEMENT_FACILITY)
339 HAVE_FACILITY(have_genext,  GENERAL_INSTRUCTION_EXTENSION_FACILITY)
340 HAVE_FACILITY(have_lscond1, LOAD_STORE_ON_CONDITION_1_FACILITY)
341 HAVE_FACILITY(have_lscond2, LOAD_STORE_ON_CONDITION_2_FACILITY)
342 HAVE_FACILITY(have_misc2,   MISCELLANEOUS_INSTRUCTION_EXTENSIONS_2_FACILITY)
343 #undef HAVE_FACILITY
344 
345 #define is_u12(d)	(0 <= (d) && (d) <= 0x00000fffL)
346 #define is_u32(d)	(0 <= (d) && (d) <= 0xffffffffL)
347 
348 #define CHECK_SIGNED(v, bitlen) \
349 	((v) == (((v) << (sizeof(v) * 8 - bitlen)) >> (sizeof(v) * 8 - bitlen)))
350 
351 #define is_s16(d)	CHECK_SIGNED((d), 16)
352 #define is_s20(d)	CHECK_SIGNED((d), 20)
353 #define is_s32(d)	CHECK_SIGNED((d), 32)
354 
355 static SLJIT_INLINE sljit_uw disp_s20(sljit_s32 d)
356 {
357 	sljit_uw dh = (d >> 12) & 0xff;
358 	sljit_uw dl = (d << 8) & 0xfff00;
359 
360 	SLJIT_ASSERT(is_s20(d));
361 	return dh | dl;
362 }
363 
364 /* TODO(carenas): variadic macro is not strictly needed */
365 #define SLJIT_S390X_INSTRUCTION(op, ...) \
366 static SLJIT_INLINE sljit_ins op(__VA_ARGS__)
367 
368 /* RR form instructions. */
369 #define SLJIT_S390X_RR(name, pattern) \
370 SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src) \
371 { \
372 	return (pattern) | ((dst & 0xf) << 4) | (src & 0xf); \
373 }
374 
375 /* ADD */
376 SLJIT_S390X_RR(ar,   0x1a00)
377 
378 /* ADD LOGICAL */
379 SLJIT_S390X_RR(alr,  0x1e00)
380 
381 /* AND */
382 SLJIT_S390X_RR(nr,   0x1400)
383 
384 /* BRANCH AND SAVE */
385 SLJIT_S390X_RR(basr, 0x0d00)
386 
387 /* BRANCH ON CONDITION */
388 SLJIT_S390X_RR(bcr,  0x0700) /* TODO(mundaym): type for mask? */
389 
390 /* COMPARE */
391 SLJIT_S390X_RR(cr,   0x1900)
392 
393 /* COMPARE LOGICAL */
394 SLJIT_S390X_RR(clr,  0x1500)
395 
396 /* DIVIDE */
397 SLJIT_S390X_RR(dr,   0x1d00)
398 
399 /* EXCLUSIVE OR */
400 SLJIT_S390X_RR(xr,   0x1700)
401 
402 /* LOAD */
403 SLJIT_S390X_RR(lr,   0x1800)
404 
405 /* LOAD COMPLEMENT */
406 SLJIT_S390X_RR(lcr,  0x1300)
407 
408 /* OR */
409 SLJIT_S390X_RR(or,   0x1600)
410 
411 /* SUBTRACT */
412 SLJIT_S390X_RR(sr,   0x1b00)
413 
414 /* SUBTRACT LOGICAL */
415 SLJIT_S390X_RR(slr,  0x1f00)
416 
417 #undef SLJIT_S390X_RR
418 
419 /* RRE form instructions */
420 #define SLJIT_S390X_RRE(name, pattern) \
421 SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src) \
422 { \
423 	return (pattern) | ((dst & 0xf) << 4) | (src & 0xf); \
424 }
425 
426 /* ADD */
427 SLJIT_S390X_RRE(agr,   0xb9080000)
428 
429 /* ADD LOGICAL */
430 SLJIT_S390X_RRE(algr,  0xb90a0000)
431 
432 /* ADD LOGICAL WITH CARRY */
433 SLJIT_S390X_RRE(alcr,  0xb9980000)
434 SLJIT_S390X_RRE(alcgr, 0xb9880000)
435 
436 /* AND */
437 SLJIT_S390X_RRE(ngr,   0xb9800000)
438 
439 /* COMPARE */
440 SLJIT_S390X_RRE(cgr,   0xb9200000)
441 
442 /* COMPARE LOGICAL */
443 SLJIT_S390X_RRE(clgr,  0xb9210000)
444 
445 /* DIVIDE LOGICAL */
446 SLJIT_S390X_RRE(dlr,   0xb9970000)
447 SLJIT_S390X_RRE(dlgr,  0xb9870000)
448 
449 /* DIVIDE SINGLE */
450 SLJIT_S390X_RRE(dsgr,  0xb90d0000)
451 
452 /* EXCLUSIVE OR */
453 SLJIT_S390X_RRE(xgr,   0xb9820000)
454 
455 /* LOAD */
456 SLJIT_S390X_RRE(lgr,   0xb9040000)
457 SLJIT_S390X_RRE(lgfr,  0xb9140000)
458 
459 /* LOAD BYTE */
460 SLJIT_S390X_RRE(lbr,   0xb9260000)
461 SLJIT_S390X_RRE(lgbr,  0xb9060000)
462 
463 /* LOAD COMPLEMENT */
464 SLJIT_S390X_RRE(lcgr,  0xb9030000)
465 
466 /* LOAD HALFWORD */
467 SLJIT_S390X_RRE(lhr,   0xb9270000)
468 SLJIT_S390X_RRE(lghr,  0xb9070000)
469 
470 /* LOAD LOGICAL */
471 SLJIT_S390X_RRE(llgfr, 0xb9160000)
472 
473 /* LOAD LOGICAL CHARACTER */
474 SLJIT_S390X_RRE(llcr,  0xb9940000)
475 SLJIT_S390X_RRE(llgcr, 0xb9840000)
476 
477 /* LOAD LOGICAL HALFWORD */
478 SLJIT_S390X_RRE(llhr,  0xb9950000)
479 SLJIT_S390X_RRE(llghr, 0xb9850000)
480 
481 /* MULTIPLY LOGICAL */
482 SLJIT_S390X_RRE(mlgr,  0xb9860000)
483 
484 /* MULTIPLY SINGLE */
485 SLJIT_S390X_RRE(msr,   0xb2520000)
486 SLJIT_S390X_RRE(msgr,  0xb90c0000)
487 SLJIT_S390X_RRE(msgfr, 0xb91c0000)
488 
489 /* OR */
490 SLJIT_S390X_RRE(ogr,   0xb9810000)
491 
492 /* SUBTRACT */
493 SLJIT_S390X_RRE(sgr,   0xb9090000)
494 
495 /* SUBTRACT LOGICAL */
496 SLJIT_S390X_RRE(slgr,  0xb90b0000)
497 
498 /* SUBTRACT LOGICAL WITH BORROW */
499 SLJIT_S390X_RRE(slbr,  0xb9990000)
500 SLJIT_S390X_RRE(slbgr, 0xb9890000)
501 
502 #undef SLJIT_S390X_RRE
503 
504 /* RI-a form instructions */
505 #define SLJIT_S390X_RIA(name, pattern, imm_type) \
506 SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, imm_type imm) \
507 { \
508 	return (pattern) | ((reg & 0xf) << 20) | (imm & 0xffff); \
509 }
510 
511 /* ADD HALFWORD IMMEDIATE */
512 SLJIT_S390X_RIA(ahi,   0xa70a0000, sljit_s16)
513 SLJIT_S390X_RIA(aghi,  0xa70b0000, sljit_s16)
514 
515 /* COMPARE HALFWORD IMMEDIATE */
516 SLJIT_S390X_RIA(chi,   0xa70e0000, sljit_s16)
517 SLJIT_S390X_RIA(cghi,  0xa70f0000, sljit_s16)
518 
519 /* LOAD HALFWORD IMMEDIATE */
520 SLJIT_S390X_RIA(lhi,   0xa7080000, sljit_s16)
521 SLJIT_S390X_RIA(lghi,  0xa7090000, sljit_s16)
522 
523 /* LOAD LOGICAL IMMEDIATE */
524 SLJIT_S390X_RIA(llihh, 0xa50c0000, sljit_u16)
525 SLJIT_S390X_RIA(llihl, 0xa50d0000, sljit_u16)
526 SLJIT_S390X_RIA(llilh, 0xa50e0000, sljit_u16)
527 SLJIT_S390X_RIA(llill, 0xa50f0000, sljit_u16)
528 
529 /* MULTIPLY HALFWORD IMMEDIATE */
530 SLJIT_S390X_RIA(mhi,   0xa70c0000, sljit_s16)
531 SLJIT_S390X_RIA(mghi,  0xa70d0000, sljit_s16)
532 
533 /* OR IMMEDIATE */
534 SLJIT_S390X_RIA(oilh,  0xa50a0000, sljit_u16)
535 
536 /* TEST UNDER MASK */
537 SLJIT_S390X_RIA(tmlh,  0xa7000000, sljit_u16)
538 
539 #undef SLJIT_S390X_RIA
540 
541 /* RIL-a form instructions (requires extended immediate facility) */
542 #define SLJIT_S390X_RILA(name, pattern, imm_type) \
543 SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, imm_type imm) \
544 { \
545 	SLJIT_ASSERT(have_eimm()); \
546 	return (pattern) | ((sljit_ins)(reg & 0xf) << 36) | (imm & 0xffffffff); \
547 }
548 
549 /* ADD IMMEDIATE */
550 SLJIT_S390X_RILA(afi,   0xc20900000000, sljit_s32)
551 SLJIT_S390X_RILA(agfi,  0xc20800000000, sljit_s32)
552 
553 /* ADD IMMEDIATE HIGH */
554 SLJIT_S390X_RILA(aih,   0xcc0800000000, sljit_s32) /* TODO(mundaym): high-word facility? */
555 
556 /* ADD LOGICAL IMMEDIATE */
557 SLJIT_S390X_RILA(alfi,  0xc20b00000000, sljit_u32)
558 SLJIT_S390X_RILA(algfi, 0xc20a00000000, sljit_u32)
559 
560 /* AND IMMEDIATE */
561 SLJIT_S390X_RILA(nihf,  0xc00a00000000, sljit_u32)
562 SLJIT_S390X_RILA(nilf,  0xc00b00000000, sljit_u32)
563 
564 /* COMPARE IMMEDIATE */
565 SLJIT_S390X_RILA(cfi,   0xc20d00000000, sljit_s32)
566 SLJIT_S390X_RILA(cgfi,  0xc20c00000000, sljit_s32)
567 
568 /* COMPARE IMMEDIATE HIGH */
569 SLJIT_S390X_RILA(cih,   0xcc0d00000000, sljit_s32) /* TODO(mundaym): high-word facility? */
570 
571 /* COMPARE LOGICAL IMMEDIATE */
572 SLJIT_S390X_RILA(clfi,  0xc20f00000000, sljit_u32)
573 SLJIT_S390X_RILA(clgfi, 0xc20e00000000, sljit_u32)
574 
575 /* EXCLUSIVE OR IMMEDIATE */
576 SLJIT_S390X_RILA(xilf,  0xc00700000000, sljit_u32)
577 
578 /* INSERT IMMEDIATE */
579 SLJIT_S390X_RILA(iihf,  0xc00800000000, sljit_u32)
580 SLJIT_S390X_RILA(iilf,  0xc00900000000, sljit_u32)
581 
582 /* LOAD IMMEDIATE */
583 SLJIT_S390X_RILA(lgfi,  0xc00100000000, sljit_s32)
584 
585 /* LOAD LOGICAL IMMEDIATE */
586 SLJIT_S390X_RILA(llihf, 0xc00e00000000, sljit_u32)
587 SLJIT_S390X_RILA(llilf, 0xc00f00000000, sljit_u32)
588 
589 /* OR IMMEDIATE */
590 SLJIT_S390X_RILA(oilf,  0xc00d00000000, sljit_u32)
591 
592 #undef SLJIT_S390X_RILA
593 
594 /* RX-a form instructions */
595 #define SLJIT_S390X_RXA(name, pattern) \
596 SLJIT_S390X_INSTRUCTION(name, sljit_gpr r, sljit_u16 d, sljit_gpr x, sljit_gpr b) \
597 { \
598 	sljit_ins ri, xi, bi, di; \
599 \
600 	SLJIT_ASSERT((d & 0xfff) == d); \
601 	ri = (sljit_ins)(r & 0xf) << 20; \
602 	xi = (sljit_ins)(x & 0xf) << 16; \
603 	bi = (sljit_ins)(b & 0xf) << 12; \
604 	di = (sljit_ins)(d & 0xfff); \
605 \
606 	return (pattern) | ri | xi | bi | di; \
607 }
608 
609 /* ADD */
610 SLJIT_S390X_RXA(a,   0x5a000000)
611 
612 /* ADD LOGICAL */
613 SLJIT_S390X_RXA(al,  0x5e000000)
614 
615 /* AND */
616 SLJIT_S390X_RXA(n,   0x54000000)
617 
618 /* EXCLUSIVE OR */
619 SLJIT_S390X_RXA(x,   0x57000000)
620 
621 /* LOAD */
622 SLJIT_S390X_RXA(l,   0x58000000)
623 
624 /* LOAD ADDRESS */
625 SLJIT_S390X_RXA(la,  0x41000000)
626 
627 /* LOAD HALFWORD */
628 SLJIT_S390X_RXA(lh,  0x48000000)
629 
630 /* MULTIPLY SINGLE */
631 SLJIT_S390X_RXA(ms,  0x71000000)
632 
633 /* OR */
634 SLJIT_S390X_RXA(o,   0x56000000)
635 
636 /* STORE */
637 SLJIT_S390X_RXA(st,  0x50000000)
638 
639 /* STORE CHARACTER */
640 SLJIT_S390X_RXA(stc, 0x42000000)
641 
642 /* STORE HALFWORD */
643 SLJIT_S390X_RXA(sth, 0x40000000)
644 
645 /* SUBTRACT */
646 SLJIT_S390X_RXA(s,   0x5b000000)
647 
648 /* SUBTRACT LOGICAL */
649 SLJIT_S390X_RXA(sl,  0x5f000000)
650 
651 #undef SLJIT_S390X_RXA
652 
653 /* RXY-a instructions */
654 #define SLJIT_S390X_RXYA(name, pattern, cond) \
655 SLJIT_S390X_INSTRUCTION(name, sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b) \
656 { \
657 	sljit_ins ri, xi, bi, di; \
658 \
659 	SLJIT_ASSERT(cond); \
660 	ri = (sljit_ins)(r & 0xf) << 36; \
661 	xi = (sljit_ins)(x & 0xf) << 32; \
662 	bi = (sljit_ins)(b & 0xf) << 28; \
663 	di = (sljit_ins)disp_s20(d) << 8; \
664 \
665 	return (pattern) | ri | xi | bi | di; \
666 }
667 
668 /* ADD */
669 SLJIT_S390X_RXYA(ay,    0xe3000000005a, have_ldisp())
670 SLJIT_S390X_RXYA(ag,    0xe30000000008, 1)
671 
672 /* ADD LOGICAL */
673 SLJIT_S390X_RXYA(aly,   0xe3000000005e, have_ldisp())
674 SLJIT_S390X_RXYA(alg,   0xe3000000000a, 1)
675 
676 /* ADD LOGICAL WITH CARRY */
677 SLJIT_S390X_RXYA(alc,   0xe30000000098, 1)
678 SLJIT_S390X_RXYA(alcg,  0xe30000000088, 1)
679 
680 /* AND */
681 SLJIT_S390X_RXYA(ny,    0xe30000000054, have_ldisp())
682 SLJIT_S390X_RXYA(ng,    0xe30000000080, 1)
683 
684 /* EXCLUSIVE OR */
685 SLJIT_S390X_RXYA(xy,    0xe30000000057, have_ldisp())
686 SLJIT_S390X_RXYA(xg,    0xe30000000082, 1)
687 
688 /* LOAD */
689 SLJIT_S390X_RXYA(ly,    0xe30000000058, have_ldisp())
690 SLJIT_S390X_RXYA(lg,    0xe30000000004, 1)
691 SLJIT_S390X_RXYA(lgf,   0xe30000000014, 1)
692 
693 /* LOAD BYTE */
694 SLJIT_S390X_RXYA(lb,    0xe30000000076, have_ldisp())
695 SLJIT_S390X_RXYA(lgb,   0xe30000000077, have_ldisp())
696 
697 /* LOAD HALFWORD */
698 SLJIT_S390X_RXYA(lhy,   0xe30000000078, have_ldisp())
699 SLJIT_S390X_RXYA(lgh,   0xe30000000015, 1)
700 
701 /* LOAD LOGICAL */
702 SLJIT_S390X_RXYA(llgf,  0xe30000000016, 1)
703 
704 /* LOAD LOGICAL CHARACTER */
705 SLJIT_S390X_RXYA(llc,   0xe30000000094, have_eimm())
706 SLJIT_S390X_RXYA(llgc,  0xe30000000090, 1)
707 
708 /* LOAD LOGICAL HALFWORD */
709 SLJIT_S390X_RXYA(llh,   0xe30000000095, have_eimm())
710 SLJIT_S390X_RXYA(llgh,  0xe30000000091, 1)
711 
712 /* MULTIPLY SINGLE */
713 SLJIT_S390X_RXYA(msy,   0xe30000000051, have_ldisp())
714 SLJIT_S390X_RXYA(msg,   0xe3000000000c, 1)
715 
716 /* OR */
717 SLJIT_S390X_RXYA(oy,    0xe30000000056, have_ldisp())
718 SLJIT_S390X_RXYA(og,    0xe30000000081, 1)
719 
720 /* STORE */
721 SLJIT_S390X_RXYA(sty,   0xe30000000050, have_ldisp())
722 SLJIT_S390X_RXYA(stg,   0xe30000000024, 1)
723 
724 /* STORE CHARACTER */
725 SLJIT_S390X_RXYA(stcy,  0xe30000000072, have_ldisp())
726 
727 /* STORE HALFWORD */
728 SLJIT_S390X_RXYA(sthy,  0xe30000000070, have_ldisp())
729 
730 /* SUBTRACT */
731 SLJIT_S390X_RXYA(sy,    0xe3000000005b, have_ldisp())
732 SLJIT_S390X_RXYA(sg,    0xe30000000009, 1)
733 
734 /* SUBTRACT LOGICAL */
735 SLJIT_S390X_RXYA(sly,   0xe3000000005f, have_ldisp())
736 SLJIT_S390X_RXYA(slg,   0xe3000000000b, 1)
737 
738 /* SUBTRACT LOGICAL WITH BORROW */
739 SLJIT_S390X_RXYA(slb,   0xe30000000099, 1)
740 SLJIT_S390X_RXYA(slbg,  0xe30000000089, 1)
741 
742 #undef SLJIT_S390X_RXYA
743 
744 /* RS-a instructions */
745 #define SLJIT_S390X_RSA(name, pattern) \
746 SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, sljit_sw d, sljit_gpr b) \
747 { \
748 	sljit_ins r1 = (sljit_ins)(reg & 0xf) << 20; \
749 	sljit_ins b2 = (sljit_ins)(b & 0xf) << 12; \
750 	sljit_ins d2 = (sljit_ins)(d & 0xfff); \
751 	return (pattern) | r1 | b2 | d2; \
752 }
753 
754 /* SHIFT LEFT SINGLE LOGICAL */
755 SLJIT_S390X_RSA(sll, 0x89000000)
756 
757 /* SHIFT RIGHT SINGLE */
758 SLJIT_S390X_RSA(sra, 0x8a000000)
759 
760 /* SHIFT RIGHT SINGLE LOGICAL */
761 SLJIT_S390X_RSA(srl, 0x88000000)
762 
763 #undef SLJIT_S390X_RSA
764 
765 /* RSY-a instructions */
766 #define SLJIT_S390X_RSYA(name, pattern, cond) \
767 SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src, sljit_sw d, sljit_gpr b) \
768 { \
769 	sljit_ins r1, r3, b2, d2; \
770 \
771 	SLJIT_ASSERT(cond); \
772 	r1 = (sljit_ins)(dst & 0xf) << 36; \
773 	r3 = (sljit_ins)(src & 0xf) << 32; \
774 	b2 = (sljit_ins)(b & 0xf) << 28; \
775 	d2 = (sljit_ins)disp_s20(d) << 8; \
776 \
777 	return (pattern) | r1 | r3 | b2 | d2; \
778 }
779 
780 /* LOAD MULTIPLE */
781 SLJIT_S390X_RSYA(lmg,   0xeb0000000004, 1)
782 
783 /* SHIFT LEFT LOGICAL */
784 SLJIT_S390X_RSYA(sllg,  0xeb000000000d, 1)
785 
786 /* SHIFT RIGHT SINGLE */
787 SLJIT_S390X_RSYA(srag,  0xeb000000000a, 1)
788 
789 /* SHIFT RIGHT SINGLE LOGICAL */
790 SLJIT_S390X_RSYA(srlg,  0xeb000000000c, 1)
791 
792 /* STORE MULTIPLE */
793 SLJIT_S390X_RSYA(stmg,  0xeb0000000024, 1)
794 
795 #undef SLJIT_S390X_RSYA
796 
797 /* RIE-f instructions (require general-instructions-extension facility) */
798 #define SLJIT_S390X_RIEF(name, pattern) \
799 SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src, sljit_u8 start, sljit_u8 end, sljit_u8 rot) \
800 { \
801 	sljit_ins r1, r2, i3, i4, i5; \
802 \
803 	SLJIT_ASSERT(have_genext()); \
804 	r1 = (sljit_ins)(dst & 0xf) << 36; \
805 	r2 = (sljit_ins)(src & 0xf) << 32; \
806 	i3 = (sljit_ins)start << 24; \
807 	i4 = (sljit_ins)end << 16; \
808 	i5 = (sljit_ins)rot << 8; \
809 \
810 	return (pattern) | r1 | r2 | i3 | i4 | i5; \
811 }
812 
813 /* ROTATE THEN AND SELECTED BITS */
814 /* SLJIT_S390X_RIEF(rnsbg,  0xec0000000054) */
815 
816 /* ROTATE THEN EXCLUSIVE OR SELECTED BITS */
817 /* SLJIT_S390X_RIEF(rxsbg,  0xec0000000057) */
818 
819 /* ROTATE THEN OR SELECTED BITS */
820 SLJIT_S390X_RIEF(rosbg,  0xec0000000056)
821 
822 /* ROTATE THEN INSERT SELECTED BITS */
823 /* SLJIT_S390X_RIEF(risbg,  0xec0000000055) */
824 /* SLJIT_S390X_RIEF(risbgn, 0xec0000000059) */
825 
826 /* ROTATE THEN INSERT SELECTED BITS HIGH */
827 SLJIT_S390X_RIEF(risbhg, 0xec000000005d)
828 
829 /* ROTATE THEN INSERT SELECTED BITS LOW */
830 /* SLJIT_S390X_RIEF(risblg, 0xec0000000051) */
831 
832 #undef SLJIT_S390X_RIEF
833 
834 /* RRF-a instructions */
835 #define SLJIT_S390X_RRFA(name, pattern, cond) \
836 SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src1, sljit_gpr src2) \
837 { \
838 	sljit_ins r1, r2, r3; \
839 \
840 	SLJIT_ASSERT(cond); \
841 	r1 = (sljit_ins)(dst & 0xf) << 4; \
842 	r2 = (sljit_ins)(src1 & 0xf); \
843 	r3 = (sljit_ins)(src2 & 0xf) << 12; \
844 \
845 	return (pattern) | r3 | r1 | r2; \
846 }
847 
848 /* MULTIPLY */
849 SLJIT_S390X_RRFA(msrkc,  0xb9fd0000, have_misc2())
850 SLJIT_S390X_RRFA(msgrkc, 0xb9ed0000, have_misc2())
851 
852 #undef SLJIT_S390X_RRFA
853 
854 /* RRF-c instructions (require load/store-on-condition 1 facility) */
855 #define SLJIT_S390X_RRFC(name, pattern) \
856 SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src, sljit_uw mask) \
857 { \
858 	sljit_ins r1, r2, m3; \
859 \
860 	SLJIT_ASSERT(have_lscond1()); \
861 	r1 = (sljit_ins)(dst & 0xf) << 4; \
862 	r2 = (sljit_ins)(src & 0xf); \
863 	m3 = (sljit_ins)(mask & 0xf) << 12; \
864 \
865 	return (pattern) | m3 | r1 | r2; \
866 }
867 
868 /* LOAD HALFWORD IMMEDIATE ON CONDITION */
869 SLJIT_S390X_RRFC(locr,  0xb9f20000)
870 SLJIT_S390X_RRFC(locgr, 0xb9e20000)
871 
872 #undef SLJIT_S390X_RRFC
873 
874 /* RIE-g instructions (require load/store-on-condition 2 facility) */
875 #define SLJIT_S390X_RIEG(name, pattern) \
876 SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, sljit_sw imm, sljit_uw mask) \
877 { \
878 	sljit_ins r1, m3, i2; \
879 \
880 	SLJIT_ASSERT(have_lscond2()); \
881 	r1 = (sljit_ins)(reg & 0xf) << 36; \
882 	m3 = (sljit_ins)(mask & 0xf) << 32; \
883 	i2 = (sljit_ins)(imm & 0xffffL) << 16; \
884 \
885 	return (pattern) | r1 | m3 | i2; \
886 }
887 
888 /* LOAD HALFWORD IMMEDIATE ON CONDITION */
889 SLJIT_S390X_RIEG(lochi,  0xec0000000042)
890 SLJIT_S390X_RIEG(locghi, 0xec0000000046)
891 
892 #undef SLJIT_S390X_RIEG
893 
894 #define SLJIT_S390X_RILB(name, pattern, cond) \
895 SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, sljit_sw ri) \
896 { \
897 	sljit_ins r1, ri2; \
898 \
899 	SLJIT_ASSERT(cond); \
900 	r1 = (sljit_ins)(reg & 0xf) << 36; \
901 	ri2 = (sljit_ins)(ri & 0xffffffff); \
902 \
903 	return (pattern) | r1 | ri2; \
904 }
905 
906 /* BRANCH RELATIVE AND SAVE LONG */
907 SLJIT_S390X_RILB(brasl, 0xc00500000000, 1)
908 
909 /* LOAD ADDRESS RELATIVE LONG */
910 SLJIT_S390X_RILB(larl,  0xc00000000000, 1)
911 
912 /* LOAD RELATIVE LONG */
913 SLJIT_S390X_RILB(lgrl,  0xc40800000000, have_genext())
914 
915 #undef SLJIT_S390X_RILB
916 
SLJIT_S390X_INSTRUCTION(br,sljit_gpr target)917 SLJIT_S390X_INSTRUCTION(br, sljit_gpr target)
918 {
919 	return 0x07f0 | target;
920 }
921 
SLJIT_S390X_INSTRUCTION(brcl,sljit_uw mask,sljit_sw target)922 SLJIT_S390X_INSTRUCTION(brcl, sljit_uw mask, sljit_sw target)
923 {
924 	sljit_ins m1 = (sljit_ins)(mask & 0xf) << 36;
925 	sljit_ins ri2 = (sljit_ins)target & 0xffffffff;
926 	return 0xc00400000000L | m1 | ri2;
927 }
928 
SLJIT_S390X_INSTRUCTION(flogr,sljit_gpr dst,sljit_gpr src)929 SLJIT_S390X_INSTRUCTION(flogr, sljit_gpr dst, sljit_gpr src)
930 {
931 	sljit_ins r1 = ((sljit_ins)dst & 0xf) << 8;
932 	sljit_ins r2 = ((sljit_ins)src & 0xf);
933 	SLJIT_ASSERT(have_eimm());
934 	return 0xb9830000 | r1 | r2;
935 }
936 
937 /* INSERT PROGRAM MASK */
SLJIT_S390X_INSTRUCTION(ipm,sljit_gpr dst)938 SLJIT_S390X_INSTRUCTION(ipm, sljit_gpr dst)
939 {
940 	return 0xb2220000 | ((sljit_ins)(dst & 0xf) << 4);
941 }
942 
943 /* ROTATE THEN INSERT SELECTED BITS HIGH (ZERO) */
SLJIT_S390X_INSTRUCTION(risbhgz,sljit_gpr dst,sljit_gpr src,sljit_u8 start,sljit_u8 end,sljit_u8 rot)944 SLJIT_S390X_INSTRUCTION(risbhgz, sljit_gpr dst, sljit_gpr src, sljit_u8 start, sljit_u8 end, sljit_u8 rot)
945 {
946 	return risbhg(dst, src, start, 0x8 | end, rot);
947 }
948 
949 #undef SLJIT_S390X_INSTRUCTION
950 
951 /* load condition code as needed to match type */
push_load_cc(struct sljit_compiler * compiler,sljit_s32 type)952 static sljit_s32 push_load_cc(struct sljit_compiler *compiler, sljit_s32 type)
953 {
954 	type &= ~SLJIT_I32_OP;
955 	switch (type) {
956 	case SLJIT_ZERO:
957 	case SLJIT_NOT_ZERO:
958 		return push_inst(compiler, cih(flag_r, 0));
959 		break;
960 	default:
961 		return push_inst(compiler, tmlh(flag_r, 0x3000));
962 		break;
963 	}
964 	return SLJIT_SUCCESS;
965 }
966 
push_store_zero_flag(struct sljit_compiler * compiler,sljit_s32 op,sljit_gpr source)967 static sljit_s32 push_store_zero_flag(struct sljit_compiler *compiler, sljit_s32 op, sljit_gpr source)
968 {
969 	/* insert low 32-bits into high 32-bits of flag register */
970 	FAIL_IF(push_inst(compiler, risbhgz(flag_r, source, 0, 31, 32)));
971 	if (!(op & SLJIT_I32_OP)) {
972 		/* OR high 32-bits with high 32-bits of flag register */
973 		return push_inst(compiler, rosbg(flag_r, source, 0, 31, 0));
974 	}
975 	return SLJIT_SUCCESS;
976 }
977 
978 /* load 64-bit immediate into register without clobbering flags */
push_load_imm_inst(struct sljit_compiler * compiler,sljit_gpr target,sljit_sw v)979 static sljit_s32 push_load_imm_inst(struct sljit_compiler *compiler, sljit_gpr target, sljit_sw v)
980 {
981 	/* 4 byte instructions */
982 	if (is_s16(v))
983 		return push_inst(compiler, lghi(target, (sljit_s16)v));
984 
985 	if ((sljit_uw)v == (v & 0x000000000000ffffU))
986 		return push_inst(compiler, llill(target, (sljit_u16)v));
987 
988 	if ((sljit_uw)v == (v & 0x00000000ffff0000U))
989 		return push_inst(compiler, llilh(target, (sljit_u16)(v >> 16)));
990 
991 	if ((sljit_uw)v == (v & 0x0000ffff00000000U))
992 		return push_inst(compiler, llihl(target, (sljit_u16)(v >> 32)));
993 
994 	if ((sljit_uw)v == (v & 0xffff000000000000U))
995 		return push_inst(compiler, llihh(target, (sljit_u16)(v >> 48)));
996 
997 	/* 6 byte instructions (requires extended immediate facility) */
998 	if (have_eimm()) {
999 		if (is_s32(v))
1000 			return push_inst(compiler, lgfi(target, (sljit_s32)v));
1001 
1002 		if ((sljit_uw)v == (v & 0x00000000ffffffffU))
1003 			return push_inst(compiler, llilf(target, (sljit_u32)v));
1004 
1005 		if ((sljit_uw)v == (v & 0xffffffff00000000U))
1006 			return push_inst(compiler, llihf(target, (sljit_u32)(v >> 32)));
1007 
1008 		FAIL_IF(push_inst(compiler, llilf(target, (sljit_u32)v)));
1009 		return push_inst(compiler, iihf(target, (sljit_u32)(v >> 32)));
1010 	}
1011 	/* TODO(mundaym): instruction sequences that don't use extended immediates */
1012 	abort();
1013 }
1014 
1015 struct addr {
1016 	sljit_gpr base;
1017 	sljit_gpr index;
1018 	sljit_sw  offset;
1019 };
1020 
1021 /* transform memory operand into D(X,B) form with a signed 20-bit offset */
make_addr_bxy(struct sljit_compiler * compiler,struct addr * addr,sljit_s32 mem,sljit_sw off,sljit_gpr tmp)1022 static sljit_s32 make_addr_bxy(struct sljit_compiler *compiler,
1023 	struct addr *addr, sljit_s32 mem, sljit_sw off,
1024 	sljit_gpr tmp /* clobbered, must not be r0 */)
1025 {
1026 	sljit_gpr base = r0;
1027 	sljit_gpr index = r0;
1028 
1029 	SLJIT_ASSERT(tmp != r0);
1030 	if (mem & REG_MASK)
1031 		base = gpr(mem & REG_MASK);
1032 
1033 	if (mem & OFFS_REG_MASK) {
1034 		index = gpr(OFFS_REG(mem));
1035 		if (off != 0) {
1036 			/* shift and put the result into tmp */
1037 			SLJIT_ASSERT(0 <= off && off < 64);
1038 			FAIL_IF(push_inst(compiler, sllg(tmp, index, off, 0)));
1039 			index = tmp;
1040 			off = 0; /* clear offset */
1041 		}
1042 	}
1043 	else if (!is_s20(off)) {
1044 		FAIL_IF(push_load_imm_inst(compiler, tmp, off));
1045 		index = tmp;
1046 		off = 0; /* clear offset */
1047 	}
1048 	addr->base = base;
1049 	addr->index = index;
1050 	addr->offset = off;
1051 	return SLJIT_SUCCESS;
1052 }
1053 
1054 /* transform memory operand into D(X,B) form with an unsigned 12-bit offset */
make_addr_bx(struct sljit_compiler * compiler,struct addr * addr,sljit_s32 mem,sljit_sw off,sljit_gpr tmp)1055 static sljit_s32 make_addr_bx(struct sljit_compiler *compiler,
1056 	struct addr *addr, sljit_s32 mem, sljit_sw off,
1057 	sljit_gpr tmp /* clobbered, must not be r0 */)
1058 {
1059 	sljit_gpr base = r0;
1060 	sljit_gpr index = r0;
1061 
1062 	SLJIT_ASSERT(tmp != r0);
1063 	if (mem & REG_MASK)
1064 		base = gpr(mem & REG_MASK);
1065 
1066 	if (mem & OFFS_REG_MASK) {
1067 		index = gpr(OFFS_REG(mem));
1068 		if (off != 0) {
1069 			/* shift and put the result into tmp */
1070 			SLJIT_ASSERT(0 <= off && off < 64);
1071 			FAIL_IF(push_inst(compiler, sllg(tmp, index, off, 0)));
1072 			index = tmp;
1073 			off = 0; /* clear offset */
1074 		}
1075 	}
1076 	else if (!is_u12(off)) {
1077 		FAIL_IF(push_load_imm_inst(compiler, tmp, off));
1078 		index = tmp;
1079 		off = 0; /* clear offset */
1080 	}
1081 	addr->base = base;
1082 	addr->index = index;
1083 	addr->offset = off;
1084 	return SLJIT_SUCCESS;
1085 }
1086 
1087 #define EVAL(op, r, addr) op(r, addr.offset, addr.index, addr.base)
1088 #define WHEN(cond, r, i1, i2, addr) \
1089 	(cond) ? EVAL(i1, r, addr) : EVAL(i2, r, addr)
1090 
load_word(struct sljit_compiler * compiler,sljit_gpr dst,sljit_s32 src,sljit_sw srcw,sljit_gpr tmp,sljit_s32 is_32bit)1091 static sljit_s32 load_word(struct sljit_compiler *compiler, sljit_gpr dst,
1092 		sljit_s32 src, sljit_sw srcw,
1093 		sljit_gpr tmp /* clobbered */, sljit_s32 is_32bit)
1094 {
1095 	struct addr addr;
1096 	sljit_ins ins;
1097 
1098 	SLJIT_ASSERT(src & SLJIT_MEM);
1099 	if (have_ldisp() || !is_32bit)
1100 		FAIL_IF(make_addr_bxy(compiler, &addr, src, srcw, tmp));
1101 	else
1102 		FAIL_IF(make_addr_bx(compiler, &addr, src, srcw, tmp));
1103 
1104 	if (is_32bit)
1105 		ins = WHEN(is_u12(addr.offset), dst, l, ly, addr);
1106 	else
1107 		ins = lg(dst, addr.offset, addr.index, addr.base);
1108 
1109 	return push_inst(compiler, ins);
1110 }
1111 
store_word(struct sljit_compiler * compiler,sljit_gpr src,sljit_s32 dst,sljit_sw dstw,sljit_gpr tmp,sljit_s32 is_32bit)1112 static sljit_s32 store_word(struct sljit_compiler *compiler, sljit_gpr src,
1113 		sljit_s32 dst, sljit_sw dstw,
1114 		sljit_gpr tmp /* clobbered */, sljit_s32 is_32bit)
1115 {
1116 	struct addr addr;
1117 	sljit_ins ins;
1118 
1119 	SLJIT_ASSERT(dst & SLJIT_MEM);
1120 	if (have_ldisp() || !is_32bit)
1121 		FAIL_IF(make_addr_bxy(compiler, &addr, dst, dstw, tmp));
1122 	else
1123 		FAIL_IF(make_addr_bx(compiler, &addr, dst, dstw, tmp));
1124 
1125 	if (is_32bit)
1126 		ins = WHEN(is_u12(addr.offset), src, st, sty, addr);
1127 	else
1128 		ins = stg(src, addr.offset, addr.index, addr.base);
1129 
1130 	return push_inst(compiler, ins);
1131 }
1132 
1133 #undef WHEN
1134 
sljit_generate_code(struct sljit_compiler * compiler)1135 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
1136 {
1137 	struct sljit_label *label;
1138 	struct sljit_jump *jump;
1139 	struct sljit_s390x_const *const_;
1140 	struct sljit_put_label *put_label;
1141 	sljit_sw executable_offset;
1142 	sljit_uw ins_size = 0; /* instructions */
1143 	sljit_uw pool_size = 0; /* literal pool */
1144 	sljit_uw pad_size;
1145 	sljit_uw i, j = 0;
1146 	struct sljit_memory_fragment *buf;
1147 	void *code, *code_ptr;
1148 	sljit_uw *pool, *pool_ptr;
1149 
1150 	sljit_uw source;
1151 	sljit_sw offset; /* TODO(carenas): only need 32 bit */
1152 
1153 	CHECK_ERROR_PTR();
1154 	CHECK_PTR(check_sljit_generate_code(compiler));
1155 	reverse_buf(compiler);
1156 
1157 	/* branch handling */
1158 	label = compiler->labels;
1159 	jump = compiler->jumps;
1160 	put_label = compiler->put_labels;
1161 
1162 	/* TODO(carenas): compiler->executable_size could be calculated
1163          *                before to avoid the following loop (except for
1164          *                pool_size)
1165          */
1166 	/* calculate the size of the code */
1167 	for (buf = compiler->buf; buf != NULL; buf = buf->next) {
1168 		sljit_uw len = buf->used_size / sizeof(sljit_ins);
1169 		sljit_ins *ibuf = (sljit_ins *)buf->memory;
1170 		for (i = 0; i < len; ++i, ++j) {
1171 			sljit_ins ins = ibuf[i];
1172 
1173 			/* TODO(carenas): instruction tag vs size/addr == j
1174 			 * using instruction tags for const is creative
1175 			 * but unlike all other architectures, and is not
1176 			 * done consistently for all other objects.
1177 			 * This might need reviewing later.
1178 			 */
1179 			if (ins & sljit_ins_const) {
1180 				pool_size += sizeof(*pool);
1181 				ins &= ~sljit_ins_const;
1182 			}
1183 			if (label && label->size == j) {
1184 				label->size = ins_size;
1185 				label = label->next;
1186 			}
1187 			if (jump && jump->addr == j) {
1188 				if ((jump->flags & SLJIT_REWRITABLE_JUMP) || (jump->flags & JUMP_ADDR)) {
1189 					/* encoded: */
1190 					/*   brasl %r14, <rel_addr> (or brcl <mask>, <rel_addr>) */
1191 					/* replace with: */
1192 					/*   lgrl %r1, <pool_addr> */
1193 					/*   bras %r14, %r1 (or bcr <mask>, %r1) */
1194 					pool_size += sizeof(*pool);
1195 					ins_size += 2;
1196 				}
1197 				jump = jump->next;
1198 			}
1199 			if (put_label && put_label->addr == j) {
1200 				pool_size += sizeof(*pool);
1201 				put_label = put_label->next;
1202 			}
1203 			ins_size += sizeof_ins(ins);
1204 		}
1205 	}
1206 
1207 	/* emit trailing label */
1208 	if (label && label->size == j) {
1209 		label->size = ins_size;
1210 		label = label->next;
1211 	}
1212 
1213 	SLJIT_ASSERT(!label);
1214 	SLJIT_ASSERT(!jump);
1215 	SLJIT_ASSERT(!put_label);
1216 
1217 	/* pad code size to 8 bytes so is accessible with half word offsets */
1218 	/* the literal pool needs to be doubleword aligned */
1219 	pad_size = ((ins_size + 7UL) & ~7UL) - ins_size;
1220 	SLJIT_ASSERT(pad_size < 8UL);
1221 
1222 	/* allocate target buffer */
1223 	code = SLJIT_MALLOC_EXEC(ins_size + pad_size + pool_size,
1224 					compiler->exec_allocator_data);
1225 	PTR_FAIL_WITH_EXEC_IF(code);
1226 	code_ptr = code;
1227 	executable_offset = SLJIT_EXEC_OFFSET(code);
1228 
1229 	/* TODO(carenas): pool is optional, and the ABI recommends it to
1230          *                be created before the function code, instead of
1231          *                globally; if generated code is too big could
1232          *                need offsets bigger than 32bit words and asser()
1233          */
1234 	pool = (sljit_uw *)((sljit_uw)code + ins_size + pad_size);
1235 	pool_ptr = pool;
1236 	const_ = (struct sljit_s390x_const *)compiler->consts;
1237 
1238 	/* update label addresses */
1239 	label = compiler->labels;
1240 	while (label) {
1241 		label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(
1242 			(sljit_uw)code_ptr + label->size, executable_offset);
1243 		label = label->next;
1244 	}
1245 
1246 	/* reset jumps */
1247 	jump = compiler->jumps;
1248 	put_label = compiler->put_labels;
1249 
1250 	/* emit the code */
1251 	j = 0;
1252 	for (buf = compiler->buf; buf != NULL; buf = buf->next) {
1253 		sljit_uw len = buf->used_size / sizeof(sljit_ins);
1254 		sljit_ins *ibuf = (sljit_ins *)buf->memory;
1255 		for (i = 0; i < len; ++i, ++j) {
1256 			sljit_ins ins = ibuf[i];
1257 			if (ins & sljit_ins_const) {
1258 				/* clear the const tag */
1259 				ins &= ~sljit_ins_const;
1260 
1261 				/* update instruction with relative address of constant */
1262 				source = (sljit_uw)code_ptr;
1263 				offset = (sljit_uw)pool_ptr - source;
1264 				SLJIT_ASSERT(!(offset & 1));
1265 				offset >>= 1; /* halfword (not byte) offset */
1266 				SLJIT_ASSERT(is_s32(offset));
1267 				ins |= (sljit_ins)offset & 0xffffffff;
1268 
1269 				/* update address */
1270 				const_->const_.addr = (sljit_uw)pool_ptr;
1271 
1272 				/* store initial value into pool and update pool address */
1273 				*(pool_ptr++) = const_->init_value;
1274 
1275 				/* move to next constant */
1276 				const_ = (struct sljit_s390x_const *)const_->const_.next;
1277 			}
1278 			if (jump && jump->addr == j) {
1279 				sljit_sw target = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target;
1280 				if ((jump->flags & SLJIT_REWRITABLE_JUMP) || (jump->flags & JUMP_ADDR)) {
1281 					jump->addr = (sljit_uw)pool_ptr;
1282 
1283 					/* load address into tmp1 */
1284 					source = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
1285 					offset = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(pool_ptr, executable_offset) - source;
1286 					SLJIT_ASSERT(!(offset & 1));
1287 					offset >>= 1;
1288 					SLJIT_ASSERT(is_s32(offset));
1289 					encode_inst(&code_ptr,
1290 						lgrl(tmp1, offset & 0xffffffff));
1291 
1292 					/* store jump target into pool and update pool address */
1293 					*(pool_ptr++) = target;
1294 
1295 					/* branch to tmp1 */
1296 					sljit_ins op = (ins >> 32) & 0xf;
1297 					sljit_ins arg = (ins >> 36) & 0xf;
1298 					switch (op) {
1299 					case 4: /* brcl -> bcr */
1300 						ins = bcr(arg, tmp1);
1301 						break;
1302 					case 5: /* brasl -> basr */
1303 						ins = basr(arg, tmp1);
1304 						break;
1305 					default:
1306 						abort();
1307 					}
1308 				}
1309 				else {
1310 					jump->addr = (sljit_uw)code_ptr + 2;
1311 					source = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
1312 					offset = target - source;
1313 
1314 					/* offset must be halfword aligned */
1315 					SLJIT_ASSERT(!(offset & 1));
1316 					offset >>= 1;
1317 					SLJIT_ASSERT(is_s32(offset)); /* TODO(mundaym): handle arbitrary offsets */
1318 
1319 					/* patch jump target */
1320 					ins |= (sljit_ins)offset & 0xffffffff;
1321 				}
1322 				jump = jump->next;
1323 			}
1324 			if (put_label && put_label->addr == j) {
1325 				source = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
1326 
1327 				SLJIT_ASSERT(put_label->label);
1328 				put_label->addr = (sljit_uw)code_ptr;
1329 
1330 				/* store target into pool */
1331 				*pool_ptr = put_label->label->addr;
1332 				offset = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(pool_ptr, executable_offset) - source;
1333 				pool_ptr++;
1334 
1335 				SLJIT_ASSERT(!(offset & 1));
1336 				offset >>= 1;
1337 				SLJIT_ASSERT(is_s32(offset));
1338 				ins |= (sljit_ins)offset & 0xffffffff;
1339 
1340 				put_label = put_label->next;
1341 			}
1342 			encode_inst(&code_ptr, ins);
1343 		}
1344 	}
1345 	SLJIT_ASSERT((sljit_u8 *)code + ins_size == code_ptr);
1346 	SLJIT_ASSERT((sljit_u8 *)pool + pool_size == (sljit_u8 *)pool_ptr);
1347 
1348 	compiler->error = SLJIT_ERR_COMPILED;
1349 	compiler->executable_offset = executable_offset;
1350 	compiler->executable_size = ins_size;
1351 	code = SLJIT_ADD_EXEC_OFFSET(code, executable_offset);
1352 	code_ptr = SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
1353 	SLJIT_CACHE_FLUSH(code, code_ptr);
1354 	SLJIT_UPDATE_WX_FLAGS(code, code_ptr, 1);
1355 	return code;
1356 }
1357 
sljit_has_cpu_feature(sljit_s32 feature_type)1358 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
1359 {
1360 	/* TODO(mundaym): implement all */
1361 	switch (feature_type) {
1362 	case SLJIT_HAS_CLZ:
1363 		return have_eimm() ? 1 : 0; /* FLOGR instruction */
1364 	case SLJIT_HAS_CMOV:
1365 		return have_lscond1() ? 1 : 0;
1366 	case SLJIT_HAS_FPU:
1367 		return 0;
1368 	}
1369 	return 0;
1370 }
1371 
1372 /* --------------------------------------------------------------------- */
1373 /*  Entry, exit                                                          */
1374 /* --------------------------------------------------------------------- */
1375 
sljit_emit_enter(struct sljit_compiler * compiler,sljit_s32 options,sljit_s32 arg_types,sljit_s32 scratches,sljit_s32 saveds,sljit_s32 fscratches,sljit_s32 fsaveds,sljit_s32 local_size)1376 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
1377 	sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
1378 	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
1379 {
1380 	sljit_s32 args = get_arg_count(arg_types);
1381 	sljit_sw frame_size;
1382 
1383 	CHECK_ERROR();
1384 	CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
1385 	set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
1386 
1387 	/* saved registers go in callee allocated save area */
1388 	compiler->local_size = (local_size + 0xf) & ~0xf;
1389 	frame_size = compiler->local_size + SLJIT_S390X_DEFAULT_STACK_FRAME_SIZE;
1390 
1391 	FAIL_IF(push_inst(compiler, stmg(r6, r15, r6 * sizeof(sljit_sw), r15))); /* save registers TODO(MGM): optimize */
1392 	if (frame_size != 0) {
1393 		if (is_s16(-frame_size))
1394 			FAIL_IF(push_inst(compiler, aghi(r15, -((sljit_s16)frame_size))));
1395 		else if (is_s32(-frame_size))
1396 			FAIL_IF(push_inst(compiler, agfi(r15, -((sljit_s32)frame_size))));
1397 		else {
1398 			FAIL_IF(push_load_imm_inst(compiler, tmp1, -frame_size));
1399 			FAIL_IF(push_inst(compiler, la(r15, 0, tmp1, r15)));
1400 		}
1401 	}
1402 
1403 	if (args >= 1)
1404 		FAIL_IF(push_inst(compiler, lgr(gpr(SLJIT_S0), gpr(SLJIT_R0))));
1405 	if (args >= 2)
1406 		FAIL_IF(push_inst(compiler, lgr(gpr(SLJIT_S1), gpr(SLJIT_R1))));
1407 	if (args >= 3)
1408 		FAIL_IF(push_inst(compiler, lgr(gpr(SLJIT_S2), gpr(SLJIT_R2))));
1409 	SLJIT_ASSERT(args < 4);
1410 
1411 	return SLJIT_SUCCESS;
1412 }
1413 
sljit_set_context(struct sljit_compiler * compiler,sljit_s32 options,sljit_s32 arg_types,sljit_s32 scratches,sljit_s32 saveds,sljit_s32 fscratches,sljit_s32 fsaveds,sljit_s32 local_size)1414 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
1415 	sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
1416 	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
1417 {
1418 	CHECK_ERROR();
1419 	CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
1420 	set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
1421 
1422 	/* TODO(mundaym): stack space for saved floating point registers */
1423 	compiler->local_size = (local_size + 0xf) & ~0xf;
1424 	return SLJIT_SUCCESS;
1425 }
1426 
sljit_emit_return(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src,sljit_sw srcw)1427 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw)
1428 {
1429 	sljit_sw size;
1430 	sljit_gpr end;
1431 
1432 	CHECK_ERROR();
1433 	CHECK(check_sljit_emit_return(compiler, op, src, srcw));
1434 
1435 	FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));
1436 
1437 	size = compiler->local_size + SLJIT_S390X_DEFAULT_STACK_FRAME_SIZE + (r6 * sizeof(sljit_sw));
1438 	if (!is_s20(size)) {
1439 		FAIL_IF(push_load_imm_inst(compiler, tmp1, compiler->local_size + SLJIT_S390X_DEFAULT_STACK_FRAME_SIZE));
1440 		FAIL_IF(push_inst(compiler, la(r15, 0, tmp1, r15)));
1441 		size = r6 * sizeof(sljit_sw);
1442 		end = r14; /* r15 has been restored already */
1443 	}
1444 	else
1445 		end = r15;
1446 
1447 	FAIL_IF(push_inst(compiler, lmg(r6, end, size, r15))); /* restore registers TODO(MGM): optimize */
1448 	FAIL_IF(push_inst(compiler, br(r14))); /* return */
1449 
1450 	return SLJIT_SUCCESS;
1451 }
1452 
1453 /* --------------------------------------------------------------------- */
1454 /*  Operators                                                            */
1455 /* --------------------------------------------------------------------- */
1456 
sljit_emit_op0(struct sljit_compiler * compiler,sljit_s32 op)1457 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
1458 {
1459 	sljit_gpr arg0 = gpr(SLJIT_R0);
1460 	sljit_gpr arg1 = gpr(SLJIT_R1);
1461 
1462 	CHECK_ERROR();
1463 	CHECK(check_sljit_emit_op0(compiler, op));
1464 
1465 	op = GET_OPCODE(op) | (op & SLJIT_I32_OP);
1466 	switch (op) {
1467 	case SLJIT_BREAKPOINT:
1468 		/* TODO(mundaym): insert real breakpoint? */
1469 	case SLJIT_NOP:
1470 		return push_inst(compiler, 0x0700 /* 2-byte nop */);
1471 	case SLJIT_LMUL_UW:
1472 		FAIL_IF(push_inst(compiler, mlgr(arg0, arg0)));
1473 		break;
1474 	case SLJIT_LMUL_SW:
1475 		/* signed multiplication from: */
1476 		/* Hacker's Delight, Second Edition: Chapter 8-3. */
1477 		FAIL_IF(push_inst(compiler, srag(tmp0, arg0, 63, 0)));
1478 		FAIL_IF(push_inst(compiler, srag(tmp1, arg1, 63, 0)));
1479 		FAIL_IF(push_inst(compiler, ngr(tmp0, arg1)));
1480 		FAIL_IF(push_inst(compiler, ngr(tmp1, arg0)));
1481 
1482 		/* unsigned multiplication */
1483 		FAIL_IF(push_inst(compiler, mlgr(arg0, arg0)));
1484 
1485 		FAIL_IF(push_inst(compiler, sgr(arg0, tmp0)));
1486 		FAIL_IF(push_inst(compiler, sgr(arg0, tmp1)));
1487 		break;
1488 	case SLJIT_DIV_U32:
1489 	case SLJIT_DIVMOD_U32:
1490 		FAIL_IF(push_inst(compiler, lhi(tmp0, 0)));
1491 		FAIL_IF(push_inst(compiler, lr(tmp1, arg0)));
1492 		FAIL_IF(push_inst(compiler, dlr(tmp0, arg1)));
1493 		FAIL_IF(push_inst(compiler, lr(arg0, tmp1))); /* quotient */
1494 		if (op == SLJIT_DIVMOD_U32)
1495 			return push_inst(compiler, lr(arg1, tmp0)); /* remainder */
1496 
1497 		return SLJIT_SUCCESS;
1498 	case SLJIT_DIV_S32:
1499 	case SLJIT_DIVMOD_S32:
1500 		FAIL_IF(push_inst(compiler, lhi(tmp0, 0)));
1501 		FAIL_IF(push_inst(compiler, lr(tmp1, arg0)));
1502 		FAIL_IF(push_inst(compiler, dr(tmp0, arg1)));
1503 		FAIL_IF(push_inst(compiler, lr(arg0, tmp1))); /* quotient */
1504 		if (op == SLJIT_DIVMOD_S32)
1505 			return push_inst(compiler, lr(arg1, tmp0)); /* remainder */
1506 
1507 		return SLJIT_SUCCESS;
1508 	case SLJIT_DIV_UW:
1509 	case SLJIT_DIVMOD_UW:
1510 		FAIL_IF(push_inst(compiler, lghi(tmp0, 0)));
1511 		FAIL_IF(push_inst(compiler, lgr(tmp1, arg0)));
1512 		FAIL_IF(push_inst(compiler, dlgr(tmp0, arg1)));
1513 		FAIL_IF(push_inst(compiler, lgr(arg0, tmp1))); /* quotient */
1514 		if (op == SLJIT_DIVMOD_UW)
1515 			return push_inst(compiler, lgr(arg1, tmp0)); /* remainder */
1516 
1517 		return SLJIT_SUCCESS;
1518 	case SLJIT_DIV_SW:
1519 	case SLJIT_DIVMOD_SW:
1520 		FAIL_IF(push_inst(compiler, lgr(tmp1, arg0)));
1521 		FAIL_IF(push_inst(compiler, dsgr(tmp0, arg1)));
1522 		FAIL_IF(push_inst(compiler, lgr(arg0, tmp1))); /* quotient */
1523 		if (op == SLJIT_DIVMOD_SW)
1524 			return push_inst(compiler, lgr(arg1, tmp0)); /* remainder */
1525 
1526 		return SLJIT_SUCCESS;
1527 	case SLJIT_ENDBR:
1528 		return SLJIT_SUCCESS;
1529 	case SLJIT_SKIP_FRAMES_BEFORE_RETURN:
1530 		return SLJIT_SUCCESS;
1531 	default:
1532 		SLJIT_UNREACHABLE();
1533 	}
1534 	/* swap result registers */
1535 	FAIL_IF(push_inst(compiler, lgr(tmp0, arg0)));
1536 	FAIL_IF(push_inst(compiler, lgr(arg0, arg1)));
1537 	return push_inst(compiler, lgr(arg1, tmp0));
1538 }
1539 
1540 /* LEVAL will be defined later with different parameters as needed */
1541 #define WHEN2(cond, i1, i2) (cond) ? LEVAL(i1) : LEVAL(i2)
1542 
sljit_emit_op1(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1543 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
1544         sljit_s32 dst, sljit_sw dstw,
1545         sljit_s32 src, sljit_sw srcw)
1546 {
1547 	sljit_ins ins;
1548 	struct addr mem;
1549 	sljit_gpr dst_r;
1550 	sljit_gpr src_r;
1551 	sljit_s32 opcode = GET_OPCODE(op);
1552 
1553 	CHECK_ERROR();
1554 	CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
1555 	ADJUST_LOCAL_OFFSET(dst, dstw);
1556 	ADJUST_LOCAL_OFFSET(src, srcw);
1557 
1558 	if ((dst == SLJIT_UNUSED) && !HAS_FLAGS(op)) {
1559 		/* TODO(carenas): implement prefetch? */
1560 		return SLJIT_SUCCESS;
1561 	}
1562 	if (opcode >= SLJIT_MOV && opcode <= SLJIT_MOV_P) {
1563 		/* LOAD REGISTER */
1564 		if (FAST_IS_REG(dst) && FAST_IS_REG(src)) {
1565 			dst_r = gpr(dst);
1566 			src_r = gpr(src);
1567 			switch (opcode | (op & SLJIT_I32_OP)) {
1568 			/* 32-bit */
1569 			case SLJIT_MOV32_U8:
1570 				ins = llcr(dst_r, src_r);
1571 				break;
1572 			case SLJIT_MOV32_S8:
1573 				ins = lbr(dst_r, src_r);
1574 				break;
1575 			case SLJIT_MOV32_U16:
1576 				ins = llhr(dst_r, src_r);
1577 				break;
1578 			case SLJIT_MOV32_S16:
1579 				ins = lhr(dst_r, src_r);
1580 				break;
1581 			case SLJIT_MOV32:
1582 				ins = lr(dst_r, src_r);
1583 				break;
1584 			/* 64-bit */
1585 			case SLJIT_MOV_U8:
1586 				ins = llgcr(dst_r, src_r);
1587 				break;
1588 			case SLJIT_MOV_S8:
1589 				ins = lgbr(dst_r, src_r);
1590 				break;
1591 			case SLJIT_MOV_U16:
1592 				ins = llghr(dst_r, src_r);
1593 				break;
1594 			case SLJIT_MOV_S16:
1595 				ins = lghr(dst_r, src_r);
1596 				break;
1597 			case SLJIT_MOV_U32:
1598 				ins = llgfr(dst_r, src_r);
1599 				break;
1600 			case SLJIT_MOV_S32:
1601 				ins = lgfr(dst_r, src_r);
1602 				break;
1603 			case SLJIT_MOV:
1604 			case SLJIT_MOV_P:
1605 				ins = lgr(dst_r, src_r);
1606 				break;
1607 			default:
1608 				ins = 0;
1609 				SLJIT_UNREACHABLE();
1610 			}
1611 			FAIL_IF(push_inst(compiler, ins));
1612 			if (HAS_FLAGS(op)) {
1613 				/* only handle zero flag */
1614 				SLJIT_ASSERT(!(op & VARIABLE_FLAG_MASK));
1615 				return push_store_zero_flag(compiler, op, dst_r);
1616 			}
1617 			return SLJIT_SUCCESS;
1618 		}
1619 		/* LOAD IMMEDIATE */
1620 		if (FAST_IS_REG(dst) && (src & SLJIT_IMM)) {
1621 			switch (opcode) {
1622 			case SLJIT_MOV_U8:
1623 				srcw = (sljit_sw)((sljit_u8)(srcw));
1624 				break;
1625 			case SLJIT_MOV_S8:
1626 				srcw = (sljit_sw)((sljit_s8)(srcw));
1627 				break;
1628 			case SLJIT_MOV_U16:
1629 				srcw = (sljit_sw)((sljit_u16)(srcw));
1630 				break;
1631 			case SLJIT_MOV_S16:
1632 				srcw = (sljit_sw)((sljit_s16)(srcw));
1633 				break;
1634 			case SLJIT_MOV_U32:
1635 				srcw = (sljit_sw)((sljit_u32)(srcw));
1636 				break;
1637 			case SLJIT_MOV_S32:
1638 				srcw = (sljit_sw)((sljit_s32)(srcw));
1639 				break;
1640 			}
1641 			return push_load_imm_inst(compiler, gpr(dst), srcw);
1642 		}
1643 		/* LOAD */
1644 		/* TODO(carenas): avoid reg being defined later */
1645 		#define LEVAL(i) EVAL(i, reg, mem)
1646 		if (FAST_IS_REG(dst) && (src & SLJIT_MEM)) {
1647 			sljit_gpr reg = gpr(dst);
1648 
1649 			FAIL_IF(make_addr_bxy(compiler, &mem, src, srcw, tmp1));
1650 			/* TODO(carenas): convert all calls below to LEVAL */
1651 			switch (opcode | (op & SLJIT_I32_OP)) {
1652 			case SLJIT_MOV32_U8:
1653 				ins = llc(reg, mem.offset, mem.index, mem.base);
1654 				break;
1655 			case SLJIT_MOV32_S8:
1656 				ins = lb(reg, mem.offset, mem.index, mem.base);
1657 				break;
1658 			case SLJIT_MOV32_U16:
1659 				ins = llh(reg, mem.offset, mem.index, mem.base);
1660 				break;
1661 			case SLJIT_MOV32_S16:
1662 				ins = WHEN2(is_u12(mem.offset), lh, lhy);
1663 				break;
1664 			case SLJIT_MOV32:
1665 				ins = WHEN2(is_u12(mem.offset), l, ly);
1666 				break;
1667 			case SLJIT_MOV_U8:
1668 				ins = LEVAL(llgc);
1669 				break;
1670 			case SLJIT_MOV_S8:
1671 				ins = lgb(reg, mem.offset, mem.index, mem.base);
1672 				break;
1673 			case SLJIT_MOV_U16:
1674 				ins = LEVAL(llgh);
1675 				break;
1676 			case SLJIT_MOV_S16:
1677 				ins = lgh(reg, mem.offset, mem.index, mem.base);
1678 				break;
1679 			case SLJIT_MOV_U32:
1680 				ins = LEVAL(llgf);
1681 				break;
1682 			case SLJIT_MOV_S32:
1683 				ins = lgf(reg, mem.offset, mem.index, mem.base);
1684 				break;
1685 			case SLJIT_MOV_P:
1686 			case SLJIT_MOV:
1687 				ins = lg(reg, mem.offset, mem.index, mem.base);
1688 				break;
1689 			default:
1690 				SLJIT_UNREACHABLE();
1691 			}
1692 			FAIL_IF(push_inst(compiler, ins));
1693 			if (HAS_FLAGS(op)) {
1694 				/* only handle zero flag */
1695 				SLJIT_ASSERT(!(op & VARIABLE_FLAG_MASK));
1696 				return push_store_zero_flag(compiler, op, reg);
1697 			}
1698 			return SLJIT_SUCCESS;
1699 		}
1700 		/* STORE and STORE IMMEDIATE */
1701 		if ((dst & SLJIT_MEM)
1702 			&& (FAST_IS_REG(src) || (src & SLJIT_IMM))) {
1703 			sljit_gpr reg = FAST_IS_REG(src) ? gpr(src) : tmp0;
1704 			if (src & SLJIT_IMM) {
1705 				/* TODO(mundaym): MOVE IMMEDIATE? */
1706 				FAIL_IF(push_load_imm_inst(compiler, reg, srcw));
1707 			}
1708 			struct addr mem;
1709 			FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1));
1710 			switch (opcode) {
1711 			case SLJIT_MOV_U8:
1712 			case SLJIT_MOV_S8:
1713 				return push_inst(compiler,
1714 					WHEN2(is_u12(mem.offset), stc, stcy));
1715 			case SLJIT_MOV_U16:
1716 			case SLJIT_MOV_S16:
1717 				return push_inst(compiler,
1718 					WHEN2(is_u12(mem.offset), sth, sthy));
1719 			case SLJIT_MOV_U32:
1720 			case SLJIT_MOV_S32:
1721 				return push_inst(compiler,
1722 					WHEN2(is_u12(mem.offset), st, sty));
1723 			case SLJIT_MOV_P:
1724 			case SLJIT_MOV:
1725 				FAIL_IF(push_inst(compiler, LEVAL(stg)));
1726 				if (HAS_FLAGS(op)) {
1727 					/* only handle zero flag */
1728 					SLJIT_ASSERT(!(op & VARIABLE_FLAG_MASK));
1729 					return push_store_zero_flag(compiler, op, reg);
1730 				}
1731 				return SLJIT_SUCCESS;
1732 			default:
1733 				SLJIT_UNREACHABLE();
1734 			}
1735 		}
1736 		#undef LEVAL
1737 		/* MOVE CHARACTERS */
1738 		if ((dst & SLJIT_MEM) && (src & SLJIT_MEM)) {
1739 			struct addr mem;
1740 			FAIL_IF(make_addr_bxy(compiler, &mem, src, srcw, tmp1));
1741 			switch (opcode) {
1742 			case SLJIT_MOV_U8:
1743 			case SLJIT_MOV_S8:
1744 				FAIL_IF(push_inst(compiler,
1745 					EVAL(llgc, tmp0, mem)));
1746 				FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1));
1747 				return push_inst(compiler,
1748 					EVAL(stcy, tmp0, mem));
1749 			case SLJIT_MOV_U16:
1750 			case SLJIT_MOV_S16:
1751 				FAIL_IF(push_inst(compiler,
1752 					EVAL(llgh, tmp0, mem)));
1753 				FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1));
1754 				return push_inst(compiler,
1755 					EVAL(sthy, tmp0, mem));
1756 			case SLJIT_MOV_U32:
1757 			case SLJIT_MOV_S32:
1758 				FAIL_IF(push_inst(compiler,
1759 					EVAL(ly, tmp0, mem)));
1760 				FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1));
1761 				return push_inst(compiler,
1762 					EVAL(sty, tmp0, mem));
1763 			case SLJIT_MOV_P:
1764 			case SLJIT_MOV:
1765 				FAIL_IF(push_inst(compiler,
1766 					EVAL(lg, tmp0, mem)));
1767 				FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1));
1768 				FAIL_IF(push_inst(compiler,
1769 					EVAL(stg, tmp0, mem)));
1770 				if (HAS_FLAGS(op)) {
1771 					/* only handle zero flag */
1772 					SLJIT_ASSERT(!(op & VARIABLE_FLAG_MASK));
1773 					return push_store_zero_flag(compiler, op, tmp0);
1774 				}
1775 				return SLJIT_SUCCESS;
1776 			default:
1777 				SLJIT_UNREACHABLE();
1778 			}
1779 		}
1780 		SLJIT_UNREACHABLE();
1781 	}
1782 
1783 	SLJIT_ASSERT((src & SLJIT_IMM) == 0); /* no immediates */
1784 
1785 	dst_r = SLOW_IS_REG(dst) ? gpr(REG_MASK & dst) : tmp0;
1786 	src_r = FAST_IS_REG(src) ? gpr(REG_MASK & src) : tmp0;
1787 	if (src & SLJIT_MEM)
1788 		FAIL_IF(load_word(compiler, src_r, src, srcw, tmp1, src & SLJIT_I32_OP));
1789 
1790 	/* TODO(mundaym): optimize loads and stores */
1791 	switch (opcode | (op & SLJIT_I32_OP)) {
1792 	case SLJIT_NOT:
1793 		/* emulate ~x with x^-1 */
1794 		FAIL_IF(push_load_imm_inst(compiler, tmp1, -1));
1795 		if (src_r != dst_r)
1796 			FAIL_IF(push_inst(compiler, lgr(dst_r, src_r)));
1797 
1798 		FAIL_IF(push_inst(compiler, xgr(dst_r, tmp1)));
1799 		break;
1800 	case SLJIT_NOT32:
1801 		/* emulate ~x with x^-1 */
1802 		if (have_eimm())
1803 			FAIL_IF(push_inst(compiler, xilf(dst_r, -1)));
1804 		else {
1805 			FAIL_IF(push_load_imm_inst(compiler, tmp1, -1));
1806 			if (src_r != dst_r)
1807 				FAIL_IF(push_inst(compiler, lr(dst_r, src_r)));
1808 
1809 			FAIL_IF(push_inst(compiler, xr(dst_r, tmp1)));
1810 		}
1811 		break;
1812 	case SLJIT_NEG:
1813 		FAIL_IF(push_inst(compiler, lcgr(dst_r, src_r)));
1814 		break;
1815 	case SLJIT_NEG32:
1816 		FAIL_IF(push_inst(compiler, lcr(dst_r, src_r)));
1817 		break;
1818 	case SLJIT_CLZ:
1819 		if (have_eimm()) {
1820 			FAIL_IF(push_inst(compiler, flogr(tmp0, src_r))); /* clobbers tmp1 */
1821 			if (dst_r != tmp0)
1822 				FAIL_IF(push_inst(compiler, lgr(dst_r, tmp0)));
1823 		} else {
1824 			abort(); /* TODO(mundaym): no eimm (?) */
1825 		}
1826 		break;
1827 	case SLJIT_CLZ32:
1828 		if (have_eimm()) {
1829 			FAIL_IF(push_inst(compiler, sllg(tmp1, src_r, 32, 0)));
1830 			FAIL_IF(push_inst(compiler, iilf(tmp1, 0xffffffff)));
1831 			FAIL_IF(push_inst(compiler, flogr(tmp0, tmp1))); /* clobbers tmp1 */
1832 			if (dst_r != tmp0)
1833 				FAIL_IF(push_inst(compiler, lr(dst_r, tmp0)));
1834 		} else {
1835 			abort(); /* TODO(mundaym): no eimm (?) */
1836 		}
1837 		break;
1838 	default:
1839 		SLJIT_UNREACHABLE();
1840 	}
1841 
1842 	/* write condition code to emulated flag register */
1843 	if (op & VARIABLE_FLAG_MASK)
1844 		FAIL_IF(push_inst(compiler, ipm(flag_r)));
1845 
1846 	/* write zero flag to emulated flag register */
1847 	if (op & SLJIT_SET_Z)
1848 		FAIL_IF(push_store_zero_flag(compiler, op, dst_r));
1849 
1850 	/* TODO(carenas): doesn't need FAIL_IF */
1851 	if ((dst != SLJIT_UNUSED) && (dst & SLJIT_MEM))
1852 		FAIL_IF(store_word(compiler, dst_r, dst, dstw, tmp1, op & SLJIT_I32_OP));
1853 
1854 	return SLJIT_SUCCESS;
1855 }
1856 
is_commutative(sljit_s32 op)1857 static SLJIT_INLINE int is_commutative(sljit_s32 op)
1858 {
1859 	switch (GET_OPCODE(op)) {
1860 	case SLJIT_ADD:
1861 	case SLJIT_ADDC:
1862 	case SLJIT_MUL:
1863 	case SLJIT_AND:
1864 	case SLJIT_OR:
1865 	case SLJIT_XOR:
1866 		return 1;
1867 	}
1868 	return 0;
1869 }
1870 
is_shift(sljit_s32 op)1871 static SLJIT_INLINE int is_shift(sljit_s32 op) {
1872 	sljit_s32 v = GET_OPCODE(op);
1873 	return (v == SLJIT_SHL || v == SLJIT_ASHR || v == SLJIT_LSHR) ? 1 : 0;
1874 }
1875 
sets_signed_flag(sljit_s32 op)1876 static SLJIT_INLINE int sets_signed_flag(sljit_s32 op)
1877 {
1878 	switch (GET_FLAG_TYPE(op)) {
1879 	case SLJIT_OVERFLOW:
1880 	case SLJIT_NOT_OVERFLOW:
1881 	case SLJIT_SIG_LESS:
1882 	case SLJIT_SIG_LESS_EQUAL:
1883 	case SLJIT_SIG_GREATER:
1884 	case SLJIT_SIG_GREATER_EQUAL:
1885 		return 1;
1886 	}
1887 	return 0;
1888 }
1889 
1890 /* Report whether we have an instruction for:
1891      op dst src imm
1892    where dst and src are separate registers. */
have_op_3_imm(sljit_s32 op,sljit_sw imm)1893 static int have_op_3_imm(sljit_s32 op, sljit_sw imm) {
1894 	return 0; /* TODO(mundaym): implement */
1895 }
1896 
1897 /* Report whether we have an instruction for:
1898      op reg imm
1899   where reg is both a source and the destination. */
have_op_2_imm(sljit_s32 op,sljit_sw imm)1900 static int have_op_2_imm(sljit_s32 op, sljit_sw imm) {
1901 	switch (GET_OPCODE(op) | (op & SLJIT_I32_OP)) {
1902 	case SLJIT_ADD32:
1903 	case SLJIT_ADD:
1904 		if (!HAS_FLAGS(op) || sets_signed_flag(op))
1905 			return have_eimm() ? is_s32(imm) : is_s16(imm);
1906 
1907 		return have_eimm() && is_u32(imm);
1908 	case SLJIT_MUL32:
1909 	case SLJIT_MUL:
1910 		/* TODO(mundaym): general extension check */
1911 		/* for ms{,g}fi */
1912 		if (op & VARIABLE_FLAG_MASK)
1913 			return 0;
1914 
1915 		return have_genext() && is_s16(imm);
1916 	case SLJIT_OR32:
1917 	case SLJIT_XOR32:
1918 	case SLJIT_AND32:
1919 		/* only use if have extended immediate facility */
1920 		/* this ensures flags are set correctly */
1921 		return have_eimm();
1922 	case SLJIT_AND:
1923 	case SLJIT_OR:
1924 	case SLJIT_XOR:
1925 		/* TODO(mundaym): make this more flexible */
1926 		/* avoid using immediate variations, flags */
1927 		/* won't be set correctly */
1928 		return 0;
1929 	case SLJIT_ADDC32:
1930 	case SLJIT_ADDC:
1931 		/* no ADD LOGICAL WITH CARRY IMMEDIATE */
1932 		return 0;
1933 	case SLJIT_SUB:
1934 	case SLJIT_SUB32:
1935 	case SLJIT_SUBC:
1936 	case SLJIT_SUBC32:
1937 		/* no SUBTRACT IMMEDIATE */
1938 		/* TODO(mundaym): SUBTRACT LOGICAL IMMEDIATE */
1939 		return 0;
1940 	}
1941 	return 0;
1942 }
1943 
sljit_emit_op2(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)1944 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,
1945 	sljit_s32 dst, sljit_sw dstw,
1946 	sljit_s32 src1, sljit_sw src1w,
1947 	sljit_s32 src2, sljit_sw src2w)
1948 {
1949 	CHECK_ERROR();
1950 	CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
1951 	ADJUST_LOCAL_OFFSET(dst, dstw);
1952 	ADJUST_LOCAL_OFFSET(src1, src1w);
1953 	ADJUST_LOCAL_OFFSET(src2, src2w);
1954 
1955 	if (dst == SLJIT_UNUSED && !HAS_FLAGS(op))
1956 		return SLJIT_SUCCESS;
1957 
1958 	sljit_gpr dst_r = SLOW_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
1959 
1960 	if (is_commutative(op)) {
1961 		#define SWAP_ARGS \
1962 		do {                         \
1963 			sljit_s32 t = src1;  \
1964 			sljit_sw tw = src1w; \
1965 			src1 = src2;         \
1966 			src1w = src2w;       \
1967 			src2 = t;            \
1968 			src2w = tw;          \
1969 		} while(0);
1970 
1971 		/* prefer immediate in src2 */
1972 		if (src1 & SLJIT_IMM) {
1973 			SWAP_ARGS
1974 		}
1975 
1976 		/* prefer to have src1 use same register as dst */
1977 		if (FAST_IS_REG(src2) && gpr(src2 & REG_MASK) == dst_r) {
1978 			SWAP_ARGS
1979 		}
1980 
1981 		/* prefer memory argument in src2 */
1982 		if (FAST_IS_REG(src2) && (src1 & SLJIT_MEM)) {
1983 			SWAP_ARGS
1984 		}
1985 		#undef SWAP_ARGS
1986 	}
1987 
1988 	/* src1 must be in a register */
1989 	sljit_gpr src1_r = FAST_IS_REG(src1) ? gpr(src1 & REG_MASK) : tmp0;
1990 	if (src1 & SLJIT_IMM)
1991 		FAIL_IF(push_load_imm_inst(compiler, src1_r, src1w));
1992 
1993 	if (src1 & SLJIT_MEM)
1994 		FAIL_IF(load_word(compiler, src1_r, src1, src1w, tmp1, op & SLJIT_I32_OP));
1995 
1996 	/* emit comparison before subtract */
1997 	if (GET_OPCODE(op) == SLJIT_SUB && (op & VARIABLE_FLAG_MASK)) {
1998 		sljit_sw cmp = 0;
1999 		switch (GET_FLAG_TYPE(op)) {
2000 		case SLJIT_LESS:
2001 		case SLJIT_LESS_EQUAL:
2002 		case SLJIT_GREATER:
2003 		case SLJIT_GREATER_EQUAL:
2004 			cmp = 1; /* unsigned */
2005 			break;
2006 		case SLJIT_EQUAL:
2007 		case SLJIT_SIG_LESS:
2008 		case SLJIT_SIG_LESS_EQUAL:
2009 		case SLJIT_SIG_GREATER:
2010 		case SLJIT_SIG_GREATER_EQUAL:
2011 			cmp = -1; /* signed */
2012 			break;
2013 		}
2014 		if (cmp) {
2015 			/* clear flags - no need to generate now */
2016 			op &= ~VARIABLE_FLAG_MASK;
2017 			sljit_gpr src2_r = FAST_IS_REG(src2) ? gpr(src2 & REG_MASK) : tmp1;
2018 			if (src2 & SLJIT_IMM) {
2019 				#define LEVAL(i) i(src1_r, src2w)
2020 				if (cmp > 0 && is_u32(src2w)) {
2021 					/* unsigned */
2022 					FAIL_IF(push_inst(compiler,
2023 					WHEN2(op & SLJIT_I32_OP, clfi, clgfi)));
2024 				}
2025 				else if (cmp < 0 && is_s16(src2w)) {
2026 					/* signed */
2027 					FAIL_IF(push_inst(compiler,
2028 					WHEN2(op & SLJIT_I32_OP, chi, cghi)));
2029 				}
2030 				else if (cmp < 0 && is_s32(src2w)) {
2031 					/* signed */
2032 					FAIL_IF(push_inst(compiler,
2033 					WHEN2(op & SLJIT_I32_OP, cfi, cgfi)));
2034 				}
2035 				#undef LEVAL
2036 				#define LEVAL(i) i(src1_r, src2_r)
2037 				else {
2038 					FAIL_IF(push_load_imm_inst(compiler, src2_r, src2w));
2039 					if (cmp > 0) {
2040 						/* unsigned */
2041 						FAIL_IF(push_inst(compiler,
2042 						WHEN2(op & SLJIT_I32_OP, clr, clgr)));
2043 					}
2044 					if (cmp < 0) {
2045 						/* signed */
2046 						FAIL_IF(push_inst(compiler,
2047 						WHEN2(op & SLJIT_I32_OP, cr, cgr)));
2048 					}
2049 				}
2050 			}
2051 			else {
2052 				if (src2 & SLJIT_MEM) {
2053 					/* TODO(mundaym): comparisons with memory */
2054 					/* load src2 into register */
2055 					FAIL_IF(load_word(compiler, src2_r, src2, src2w, tmp1, op & SLJIT_I32_OP));
2056 				}
2057 				if (cmp > 0) {
2058 					/* unsigned */
2059 					FAIL_IF(push_inst(compiler,
2060 						WHEN2(op & SLJIT_I32_OP, clr, clgr)));
2061 				}
2062 				if (cmp < 0) {
2063 					/* signed */
2064 					FAIL_IF(push_inst(compiler,
2065 						WHEN2(op & SLJIT_I32_OP, cr, cgr)));
2066 				}
2067 				#undef LEVAL
2068 			}
2069 			FAIL_IF(push_inst(compiler, ipm(flag_r)));
2070 		}
2071 	}
2072 
2073 	if (!HAS_FLAGS(op) && dst == SLJIT_UNUSED)
2074 		return SLJIT_SUCCESS;
2075 
2076 	/* need to specify signed or logical operation */
2077 	int signed_flags = sets_signed_flag(op);
2078 
2079 	if (is_shift(op)) {
2080 		/* handle shifts first, they have more constraints than other operations */
2081 		sljit_sw d = 0;
2082 		sljit_gpr b = FAST_IS_REG(src2) ? gpr(src2 & REG_MASK) : r0;
2083 		if (src2 & SLJIT_IMM)
2084 			d = src2w & ((op & SLJIT_I32_OP) ? 31 : 63);
2085 
2086 		if (src2 & SLJIT_MEM) {
2087 			/* shift amount (b) cannot be in r0 (i.e. tmp0) */
2088 			FAIL_IF(load_word(compiler, tmp1, src2, src2w, tmp1, op & SLJIT_I32_OP));
2089 			b = tmp1;
2090 		}
2091 		/* src1 and dst share the same register in the base 32-bit ISA */
2092 		/* TODO(mundaym): not needed when distinct-operand facility is available */
2093 		int workaround_alias = op & SLJIT_I32_OP && src1_r != dst_r;
2094 		if (workaround_alias) {
2095 			/* put src1 into tmp0 so we can overwrite it */
2096 			FAIL_IF(push_inst(compiler, lr(tmp0, src1_r)));
2097 			src1_r = tmp0;
2098 		}
2099 		switch (GET_OPCODE(op) | (op & SLJIT_I32_OP)) {
2100 		case SLJIT_SHL:
2101 			FAIL_IF(push_inst(compiler, sllg(dst_r, src1_r, d, b)));
2102 			break;
2103 		case SLJIT_SHL32:
2104 			FAIL_IF(push_inst(compiler, sll(src1_r, d, b)));
2105 			break;
2106 		case SLJIT_LSHR:
2107 			FAIL_IF(push_inst(compiler, srlg(dst_r, src1_r, d, b)));
2108 			break;
2109 		case SLJIT_LSHR32:
2110 			FAIL_IF(push_inst(compiler, srl(src1_r, d, b)));
2111 			break;
2112 		case SLJIT_ASHR:
2113 			FAIL_IF(push_inst(compiler, srag(dst_r, src1_r, d, b)));
2114 			break;
2115 		case SLJIT_ASHR32:
2116 			FAIL_IF(push_inst(compiler, sra(src1_r, d, b)));
2117 			break;
2118 		default:
2119 			SLJIT_UNREACHABLE();
2120 		}
2121 		if (workaround_alias && dst_r != src1_r)
2122 			FAIL_IF(push_inst(compiler, lr(dst_r, src1_r)));
2123 
2124 	}
2125 	else if ((GET_OPCODE(op) == SLJIT_MUL) && HAS_FLAGS(op)) {
2126 		/* multiply instructions do not generally set flags so we need to manually */
2127 		/* detect overflow conditions */
2128 		/* TODO(mundaym): 64-bit overflow */
2129 		SLJIT_ASSERT(GET_FLAG_TYPE(op) == SLJIT_MUL_OVERFLOW ||
2130 		             GET_FLAG_TYPE(op) == SLJIT_MUL_NOT_OVERFLOW);
2131 		sljit_gpr src2_r = FAST_IS_REG(src2) ? gpr(src2 & REG_MASK) : tmp1;
2132 		if (src2 & SLJIT_IMM) {
2133 			/* load src2 into register */
2134 			FAIL_IF(push_load_imm_inst(compiler, src2_r, src2w));
2135 		}
2136 		if (src2 & SLJIT_MEM) {
2137 			/* load src2 into register */
2138 			FAIL_IF(load_word(compiler, src2_r, src2, src2w, tmp1, op & SLJIT_I32_OP));
2139 		}
2140 		if (have_misc2()) {
2141 			#define LEVAL(i) i(dst_r, src1_r, src2_r)
2142 			FAIL_IF(push_inst(compiler,
2143 				WHEN2(op & SLJIT_I32_OP, msrkc, msgrkc)));
2144 			#undef LEVAL
2145 		}
2146 		else if (op & SLJIT_I32_OP) {
2147 			op &= ~VARIABLE_FLAG_MASK;
2148 			FAIL_IF(push_inst(compiler, lgfr(tmp0, src1_r)));
2149 			FAIL_IF(push_inst(compiler, msgfr(tmp0, src2_r)));
2150 			if (dst_r != tmp0) {
2151 				FAIL_IF(push_inst(compiler, lr(dst_r, tmp0)));
2152 			}
2153 			FAIL_IF(push_inst(compiler, aih(tmp0, 1)));
2154 			FAIL_IF(push_inst(compiler, nihf(tmp0, ~1U)));
2155 			FAIL_IF(push_inst(compiler, ipm(flag_r)));
2156 			FAIL_IF(push_inst(compiler, oilh(flag_r, 0x2000)));
2157 		}
2158 		else
2159 			return SLJIT_ERR_UNSUPPORTED;
2160 
2161 	}
2162 	else if ((GET_OPCODE(op) == SLJIT_SUB) && (op & SLJIT_SET_Z) && !signed_flags) {
2163 		/* subtract logical instructions do not set the right flags unfortunately */
2164 		/* instead, negate src2 and issue an add logical */
2165 		/* TODO(mundaym): distinct operand facility where needed */
2166 		if (src1_r != dst_r && src1_r != tmp0) {
2167 			#define LEVAL(i) i(tmp0, src1_r)
2168 			FAIL_IF(push_inst(compiler,
2169 				WHEN2(op & SLJIT_I32_OP, lr, lgr)));
2170 			src1_r = tmp0;
2171 			#undef LEVAL
2172 		}
2173 		sljit_gpr src2_r = FAST_IS_REG(src2) ? gpr(src2 & REG_MASK) : tmp1;
2174 		if (src2 & SLJIT_IMM) {
2175 			/* load src2 into register */
2176 			FAIL_IF(push_load_imm_inst(compiler, src2_r, src2w));
2177 		}
2178 		if (src2 & SLJIT_MEM) {
2179 			/* load src2 into register */
2180 			FAIL_IF(load_word(compiler, src2_r, src2, src2w, tmp1, op & SLJIT_I32_OP));
2181 		}
2182 		if (op & SLJIT_I32_OP) {
2183 			FAIL_IF(push_inst(compiler, lcr(tmp1, src2_r)));
2184 			FAIL_IF(push_inst(compiler, alr(src1_r, tmp1)));
2185 			if (src1_r != dst_r)
2186 				FAIL_IF(push_inst(compiler, lr(dst_r, src1_r)));
2187 		}
2188 		else {
2189 			FAIL_IF(push_inst(compiler, lcgr(tmp1, src2_r)));
2190 			FAIL_IF(push_inst(compiler, algr(src1_r, tmp1)));
2191 			if (src1_r != dst_r)
2192 				FAIL_IF(push_inst(compiler, lgr(dst_r, src1_r)));
2193 		}
2194 	}
2195 	else if ((src2 & SLJIT_IMM) && (src1_r == dst_r) && have_op_2_imm(op, src2w)) {
2196 		switch (GET_OPCODE(op) | (op & SLJIT_I32_OP)) {
2197 		#define LEVAL(i) i(dst_r, src2w)
2198 		case SLJIT_ADD:
2199 			if (!HAS_FLAGS(op) || signed_flags) {
2200 				FAIL_IF(push_inst(compiler,
2201 					WHEN2(is_s16(src2w), aghi, agfi)));
2202 			}
2203 			else
2204 				FAIL_IF(push_inst(compiler, LEVAL(algfi)));
2205 
2206 			break;
2207 		case SLJIT_ADD32:
2208 			if (!HAS_FLAGS(op) || signed_flags)
2209 				FAIL_IF(push_inst(compiler,
2210 					WHEN2(is_s16(src2w), ahi, afi)));
2211 			else
2212 				FAIL_IF(push_inst(compiler, LEVAL(alfi)));
2213 
2214 			break;
2215 		#undef LEVAL /* TODO(carenas): move down and refactor? */
2216 		case SLJIT_MUL:
2217 			FAIL_IF(push_inst(compiler, mhi(dst_r, src2w)));
2218 			break;
2219 		case SLJIT_MUL32:
2220 			FAIL_IF(push_inst(compiler, mghi(dst_r, src2w)));
2221 			break;
2222 		case SLJIT_OR32:
2223 			FAIL_IF(push_inst(compiler, oilf(dst_r, src2w)));
2224 			break;
2225 		case SLJIT_XOR32:
2226 			FAIL_IF(push_inst(compiler, xilf(dst_r, src2w)));
2227 			break;
2228 		case SLJIT_AND32:
2229 			FAIL_IF(push_inst(compiler, nilf(dst_r, src2w)));
2230 			break;
2231 		default:
2232 			SLJIT_UNREACHABLE();
2233 		}
2234 	}
2235 	else if ((src2 & SLJIT_IMM) && have_op_3_imm(op, src2w)) {
2236 		abort(); /* TODO(mundaym): implement */
2237 	}
2238 	else if ((src2 & SLJIT_MEM) && (dst_r == src1_r)) {
2239 		/* most 32-bit instructions can only handle 12-bit immediate offsets */
2240 		int need_u12 = !have_ldisp() &&
2241 			(op & SLJIT_I32_OP) &&
2242 			(GET_OPCODE(op) != SLJIT_ADDC) &&
2243 			(GET_OPCODE(op) != SLJIT_SUBC);
2244 		struct addr mem;
2245 		if (need_u12)
2246 			FAIL_IF(make_addr_bx(compiler, &mem, src2, src2w, tmp1));
2247 		else
2248 			FAIL_IF(make_addr_bxy(compiler, &mem, src2, src2w, tmp1));
2249 
2250 		int can_u12 = is_u12(mem.offset) ? 1 : 0;
2251 		sljit_ins ins = 0;
2252 		switch (GET_OPCODE(op) | (op & SLJIT_I32_OP)) {
2253 		/* 64-bit ops */
2254 		#define LEVAL(i) EVAL(i, dst_r, mem)
2255 		case SLJIT_ADD:
2256 			ins = WHEN2(signed_flags, ag, alg);
2257 			break;
2258 		case SLJIT_SUB:
2259 			ins = WHEN2(signed_flags, sg, slg);
2260 			break;
2261 		case SLJIT_ADDC:
2262 			ins = LEVAL(alcg);
2263 			break;
2264 		case SLJIT_SUBC:
2265 			ins = LEVAL(slbg);
2266 			break;
2267 		case SLJIT_MUL:
2268 			ins = LEVAL(msg);
2269 			break;
2270 		case SLJIT_OR:
2271 			ins = LEVAL(og);
2272 			break;
2273 		case SLJIT_XOR:
2274 			ins = LEVAL(xg);
2275 			break;
2276 		case SLJIT_AND:
2277 			ins = LEVAL(ng);
2278 			break;
2279 		/* 32-bit ops */
2280 		case SLJIT_ADD32:
2281 			if (signed_flags)
2282 				ins = WHEN2(can_u12, a, ay);
2283 			else
2284 				ins = WHEN2(can_u12, al, aly);
2285 			break;
2286 		case SLJIT_SUB32:
2287 			if (signed_flags)
2288 				ins = WHEN2(can_u12, s, sy);
2289 			else
2290 				ins = WHEN2(can_u12, sl, sly);
2291 			break;
2292 		case SLJIT_ADDC32:
2293 			ins = LEVAL(alc);
2294 			break;
2295 		case SLJIT_SUBC32:
2296 			ins = LEVAL(slb);
2297 			break;
2298 		case SLJIT_MUL32:
2299 			ins = WHEN2(can_u12, ms, msy);
2300 			break;
2301 		case SLJIT_OR32:
2302 			ins = WHEN2(can_u12, o, oy);
2303 			break;
2304 		case SLJIT_XOR32:
2305 			ins = WHEN2(can_u12, x, xy);
2306 			break;
2307 		case SLJIT_AND32:
2308 			ins = WHEN2(can_u12, n, ny);
2309 			break;
2310 		#undef LEVAL
2311 		default:
2312 			SLJIT_UNREACHABLE();
2313 		}
2314 		FAIL_IF(push_inst(compiler, ins));
2315 	}
2316 	else {
2317 		sljit_gpr src2_r = FAST_IS_REG(src2) ? gpr(src2 & REG_MASK) : tmp1;
2318 		if (src2 & SLJIT_IMM) {
2319 			/* load src2 into register */
2320 			FAIL_IF(push_load_imm_inst(compiler, src2_r, src2w));
2321 		}
2322 		if (src2 & SLJIT_MEM) {
2323 			/* load src2 into register */
2324 			FAIL_IF(load_word(compiler, src2_r, src2, src2w, tmp1, op & SLJIT_I32_OP));
2325 		}
2326 		/* TODO(mundaym): distinct operand facility where needed */
2327 		#define LEVAL(i) i(tmp0, src1_r)
2328 		if (src1_r != dst_r && src1_r != tmp0) {
2329 			FAIL_IF(push_inst(compiler,
2330 				WHEN2(op & SLJIT_I32_OP, lr, lgr)));
2331 			src1_r = tmp0;
2332 		}
2333 		#undef LEVAL
2334 		sljit_ins ins = 0;
2335 		switch (GET_OPCODE(op) | (op & SLJIT_I32_OP)) {
2336 		#define LEVAL(i) i(src1_r, src2_r)
2337 		/* 64-bit ops */
2338 		case SLJIT_ADD:
2339 			ins = WHEN2(signed_flags, agr, algr);
2340 			break;
2341 		case SLJIT_SUB:
2342 			ins = WHEN2(signed_flags, sgr, slgr);
2343 			break;
2344 		case SLJIT_ADDC:
2345 			ins = LEVAL(alcgr);
2346 			break;
2347 		case SLJIT_SUBC:
2348 			ins = LEVAL(slbgr);
2349 			break;
2350 		case SLJIT_MUL:
2351 			ins = LEVAL(msgr);
2352 			break;
2353 		case SLJIT_AND:
2354 			ins = LEVAL(ngr);
2355 			break;
2356 		case SLJIT_OR:
2357 			ins = LEVAL(ogr);
2358 			break;
2359 		case SLJIT_XOR:
2360 			ins = LEVAL(xgr);
2361 			break;
2362 		/* 32-bit ops */
2363 		case SLJIT_ADD32:
2364 			ins = WHEN2(signed_flags, ar, alr);
2365 			break;
2366 		case SLJIT_SUB32:
2367 			ins = WHEN2(signed_flags, sr, slr);
2368 			break;
2369 		case SLJIT_ADDC32:
2370 			ins = LEVAL(alcr);
2371 			break;
2372 		case SLJIT_SUBC32:
2373 			ins = LEVAL(slbr);
2374 			break;
2375 		case SLJIT_MUL32:
2376 			ins = LEVAL(msr);
2377 			break;
2378 		case SLJIT_AND32:
2379 			ins = LEVAL(nr);
2380 			break;
2381 		case SLJIT_OR32:
2382 			ins = LEVAL(or);
2383 			break;
2384 		case SLJIT_XOR32:
2385 			ins = LEVAL(xr);
2386 			break;
2387 		#undef LEVAL
2388 		default:
2389 			SLJIT_UNREACHABLE();
2390 		}
2391 		FAIL_IF(push_inst(compiler, ins));
2392 		#define LEVAL(i) i(dst_r, src1_r)
2393 		if (src1_r != dst_r)
2394 			FAIL_IF(push_inst(compiler,
2395 				WHEN2(op & SLJIT_I32_OP, lr, lgr)));
2396 		#undef LEVAL
2397 	}
2398 
2399 	/* write condition code to emulated flag register */
2400 	if (op & VARIABLE_FLAG_MASK)
2401 		FAIL_IF(push_inst(compiler, ipm(flag_r)));
2402 
2403 	/* write zero flag to emulated flag register */
2404 	if (op & SLJIT_SET_Z)
2405 		FAIL_IF(push_store_zero_flag(compiler, op, dst_r));
2406 
2407 	/* finally write the result to memory if required */
2408 	if (dst & SLJIT_MEM) {
2409 		SLJIT_ASSERT(dst_r != tmp1);
2410 		/* TODO(carenas): s/FAIL_IF/ return */
2411 		FAIL_IF(store_word(compiler, dst_r, dst, dstw, tmp1, op & SLJIT_I32_OP));
2412 	}
2413 
2414 	return SLJIT_SUCCESS;
2415 }
2416 
sljit_emit_op_src(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src,sljit_sw srcw)2417 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(
2418 	struct sljit_compiler *compiler,
2419 	sljit_s32 op, sljit_s32 src, sljit_sw srcw)
2420 {
2421 	sljit_gpr src_r;
2422 
2423 	CHECK_ERROR();
2424 	CHECK(check_sljit_emit_op_src(compiler, op, src, srcw));
2425 	ADJUST_LOCAL_OFFSET(src, srcw);
2426 
2427 	switch (op) {
2428 	case SLJIT_FAST_RETURN:
2429 		src_r = FAST_IS_REG(src) ? gpr(src) : tmp1;
2430 		if (src & SLJIT_MEM)
2431 			FAIL_IF(load_word(compiler, tmp1, src, srcw, tmp1, 0));
2432 
2433 		return push_inst(compiler, br(src_r));
2434 	case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN:
2435 		/* TODO(carenas): implement? */
2436 		return SLJIT_SUCCESS;
2437 	case SLJIT_PREFETCH_L1:
2438 	case SLJIT_PREFETCH_L2:
2439 	case SLJIT_PREFETCH_L3:
2440 	case SLJIT_PREFETCH_ONCE:
2441 		/* TODO(carenas): implement */
2442 		return SLJIT_SUCCESS;
2443 	default:
2444                 /* TODO(carenas): probably should not success by default */
2445 		return SLJIT_SUCCESS;
2446 	}
2447 
2448 	return SLJIT_SUCCESS;
2449 }
2450 
sljit_get_register_index(sljit_s32 reg)2451 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg)
2452 {
2453 	CHECK_REG_INDEX(check_sljit_get_register_index(reg));
2454 	return gpr(reg);
2455 }
2456 
sljit_get_float_register_index(sljit_s32 reg)2457 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg)
2458 {
2459 	CHECK_REG_INDEX(check_sljit_get_float_register_index(reg));
2460 	abort();
2461 }
2462 
sljit_emit_op_custom(struct sljit_compiler * compiler,void * instruction,sljit_s32 size)2463 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
2464 	void *instruction, sljit_s32 size)
2465 {
2466 	sljit_ins ins = 0;
2467 
2468 	CHECK_ERROR();
2469 	CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
2470 
2471 	memcpy((sljit_u8 *)&ins + sizeof(ins) - size, instruction, size);
2472 	return push_inst(compiler, ins);
2473 }
2474 
2475 /* --------------------------------------------------------------------- */
2476 /*  Floating point operators                                             */
2477 /* --------------------------------------------------------------------- */
2478 
sljit_emit_fop1(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)2479 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
2480 	sljit_s32 dst, sljit_sw dstw,
2481 	sljit_s32 src, sljit_sw srcw)
2482 {
2483 	CHECK_ERROR();
2484 	abort();
2485 }
2486 
sljit_emit_fop2(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2487 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op,
2488 	sljit_s32 dst, sljit_sw dstw,
2489 	sljit_s32 src1, sljit_sw src1w,
2490 	sljit_s32 src2, sljit_sw src2w)
2491 {
2492 	CHECK_ERROR();
2493 	abort();
2494 }
2495 
2496 /* --------------------------------------------------------------------- */
2497 /*  Other instructions                                                   */
2498 /* --------------------------------------------------------------------- */
2499 
sljit_emit_fast_enter(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw)2500 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
2501 {
2502 	CHECK_ERROR();
2503 	CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
2504 	ADJUST_LOCAL_OFFSET(dst, dstw);
2505 
2506 	if (FAST_IS_REG(dst))
2507 		return push_inst(compiler, lgr(gpr(dst), fast_link_r));
2508 
2509 	/* memory */
2510 	return store_word(compiler, fast_link_r, dst, dstw, tmp1, 0);
2511 }
2512 
2513 /* --------------------------------------------------------------------- */
2514 /*  Conditional instructions                                             */
2515 /* --------------------------------------------------------------------- */
2516 
sljit_emit_label(struct sljit_compiler * compiler)2517 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
2518 {
2519 	struct sljit_label *label;
2520 
2521 	CHECK_ERROR_PTR();
2522 	CHECK_PTR(check_sljit_emit_label(compiler));
2523 
2524 	if (compiler->last_label && compiler->last_label->size == compiler->size)
2525 		return compiler->last_label;
2526 
2527 	label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
2528 	PTR_FAIL_IF(!label);
2529 	set_label(label, compiler);
2530 	return label;
2531 }
2532 
sljit_emit_jump(struct sljit_compiler * compiler,sljit_s32 type)2533 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)
2534 {
2535 	sljit_u8 mask = ((type & 0xff) < SLJIT_JUMP) ? get_cc(type & 0xff) : 0xf;
2536 
2537 	CHECK_ERROR_PTR();
2538 	CHECK_PTR(check_sljit_emit_jump(compiler, type));
2539 
2540 	/* reload condition code */
2541 	if (mask != 0xf)
2542 		PTR_FAIL_IF(push_load_cc(compiler, type & 0xff));
2543 
2544 	/* record jump */
2545 	struct sljit_jump *jump = (struct sljit_jump *)
2546 		ensure_abuf(compiler, sizeof(struct sljit_jump));
2547 	PTR_FAIL_IF(!jump);
2548 	set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
2549 	jump->addr = compiler->size;
2550 
2551 	/* emit jump instruction */
2552 	type &= 0xff;
2553 	if (type >= SLJIT_FAST_CALL)
2554 		PTR_FAIL_IF(push_inst(compiler, brasl(type == SLJIT_FAST_CALL ? fast_link_r : link_r, 0)));
2555 	else
2556 		PTR_FAIL_IF(push_inst(compiler, brcl(mask, 0)));
2557 
2558 	return jump;
2559 }
2560 
sljit_emit_call(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 arg_types)2561 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type,
2562 	sljit_s32 arg_types)
2563 {
2564 	CHECK_ERROR_PTR();
2565 	CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));
2566 
2567 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
2568 		|| (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
2569 	compiler->skip_checks = 1;
2570 #endif
2571 
2572 	return sljit_emit_jump(compiler, type);
2573 }
2574 
sljit_emit_ijump(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 src,sljit_sw srcw)2575 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
2576 {
2577 	sljit_gpr src_r = FAST_IS_REG(src) ? gpr(src) : tmp1;
2578 
2579 	CHECK_ERROR();
2580 	CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
2581 	ADJUST_LOCAL_OFFSET(src, srcw);
2582 
2583 	if (src & SLJIT_IMM) {
2584 		SLJIT_ASSERT(!(srcw & 1)); /* target address must be even */
2585 		FAIL_IF(push_load_imm_inst(compiler, src_r, srcw));
2586 	}
2587 	else if (src & SLJIT_MEM)
2588 		FAIL_IF(load_word(compiler, src_r, src, srcw, tmp1, 0 /* 64-bit */));
2589 
2590 	/* emit jump instruction */
2591 	if (type >= SLJIT_FAST_CALL)
2592 		return push_inst(compiler, basr(type == SLJIT_FAST_CALL ? fast_link_r : link_r, src_r));
2593 
2594 	return push_inst(compiler, br(src_r));
2595 }
2596 
sljit_emit_icall(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 arg_types,sljit_s32 src,sljit_sw srcw)2597 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type,
2598 	sljit_s32 arg_types,
2599 	sljit_s32 src, sljit_sw srcw)
2600 {
2601 	CHECK_ERROR();
2602 	CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));
2603 
2604 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
2605 		|| (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
2606 	compiler->skip_checks = 1;
2607 #endif
2608 
2609 	return sljit_emit_ijump(compiler, type, src, srcw);
2610 }
2611 
sljit_emit_op_flags(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 type)2612 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
2613 	sljit_s32 dst, sljit_sw dstw,
2614 	sljit_s32 type)
2615 {
2616 	sljit_u8 mask = get_cc(type & 0xff);
2617 
2618 	CHECK_ERROR();
2619 	CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));
2620 
2621 	sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
2622 	sljit_gpr loc_r = tmp1;
2623 	switch (GET_OPCODE(op)) {
2624 	case SLJIT_AND:
2625 	case SLJIT_OR:
2626 	case SLJIT_XOR:
2627 		/* dst is also source operand */
2628 		if (dst & SLJIT_MEM)
2629 			FAIL_IF(load_word(compiler, dst_r, dst, dstw, tmp1, op & SLJIT_I32_OP));
2630 
2631 		break;
2632 	case SLJIT_MOV:
2633 	case (SLJIT_MOV32 & ~SLJIT_I32_OP):
2634 		/* can write straight into destination */
2635 		loc_r = dst_r;
2636 		break;
2637 	default:
2638 		SLJIT_UNREACHABLE();
2639 	}
2640 
2641 	if (mask != 0xf)
2642 		FAIL_IF(push_load_cc(compiler, type & 0xff));
2643 
2644 	/* TODO(mundaym): fold into cmov helper function? */
2645 	#define LEVAL(i) i(loc_r, 1, mask)
2646 	if (have_lscond2()) {
2647 		FAIL_IF(push_load_imm_inst(compiler, loc_r, 0));
2648 		FAIL_IF(push_inst(compiler,
2649 			WHEN2(op & SLJIT_I32_OP, lochi, locghi)));
2650 	} else {
2651 		/* TODO(mundaym): no load/store-on-condition 2 facility (ipm? branch-and-set?) */
2652 		abort();
2653 	}
2654 	#undef LEVAL
2655 
2656 	/* apply bitwise op and set condition codes */
2657 	switch (GET_OPCODE(op)) {
2658 	#define LEVAL(i) i(dst_r, loc_r)
2659 	case SLJIT_AND:
2660 		FAIL_IF(push_inst(compiler,
2661 			WHEN2(op & SLJIT_I32_OP, nr, ngr)));
2662 		break;
2663 	case SLJIT_OR:
2664 		FAIL_IF(push_inst(compiler,
2665 			WHEN2(op & SLJIT_I32_OP, or, ogr)));
2666 		break;
2667 	case SLJIT_XOR:
2668 		FAIL_IF(push_inst(compiler,
2669 			WHEN2(op & SLJIT_I32_OP, xr, xgr)));
2670 		break;
2671 	#undef LEVAL
2672 	}
2673 
2674 	/* set zero flag if needed */
2675 	if (op & SLJIT_SET_Z)
2676 		FAIL_IF(push_store_zero_flag(compiler, op, dst_r));
2677 
2678 	/* store result to memory if required */
2679 	/* TODO(carenas): s/FAIL_IF/ return */
2680 	if (dst & SLJIT_MEM)
2681 		FAIL_IF(store_word(compiler, dst_r, dst, dstw, tmp1, op & SLJIT_I32_OP));
2682 
2683 	return SLJIT_SUCCESS;
2684 }
2685 
sljit_emit_cmov(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 dst_reg,sljit_s32 src,sljit_sw srcw)2686 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type,
2687 	sljit_s32 dst_reg,
2688 	sljit_s32 src, sljit_sw srcw)
2689 {
2690 	sljit_u8 mask = get_cc(type & 0xff);
2691 	sljit_gpr dst_r = gpr(dst_reg & ~SLJIT_I32_OP);
2692 	sljit_gpr src_r = FAST_IS_REG(src) ? gpr(src) : tmp0;
2693 
2694 	CHECK_ERROR();
2695 	CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw));
2696 
2697 	if (mask != 0xf)
2698 		FAIL_IF(push_load_cc(compiler, type & 0xff));
2699 
2700 	if (src & SLJIT_IMM) {
2701 		/* TODO(mundaym): fast path with lscond2 */
2702 		FAIL_IF(push_load_imm_inst(compiler, src_r, srcw));
2703 	}
2704 
2705 	#define LEVAL(i) i(dst_r, src_r, mask)
2706 	if (have_lscond1())
2707 		return push_inst(compiler,
2708 			WHEN2(dst_reg & SLJIT_I32_OP, locr, locgr));
2709 
2710 	#undef LEVAL
2711 
2712 	/* TODO(mundaym): implement */
2713 	return SLJIT_ERR_UNSUPPORTED;
2714 }
2715 
2716 /* --------------------------------------------------------------------- */
2717 /*  Other instructions                                                   */
2718 /* --------------------------------------------------------------------- */
2719 
2720 /* On s390x we build a literal pool to hold constants. This has two main
2721    advantages:
2722 
2723      1. we only need one instruction in the instruction stream (LGRL)
2724      2. we can store 64 bit addresses and use 32 bit offsets
2725 
2726    To retrofit the extra information needed to build the literal pool we
2727    add a new sljit_s390x_const struct that contains the initial value but
2728    can still be cast to a sljit_const. */
2729 
sljit_emit_const(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw,sljit_sw init_value)2730 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
2731 {
2732 	struct sljit_s390x_const *const_;
2733 	sljit_gpr dst_r;
2734 
2735 	CHECK_ERROR_PTR();
2736 	CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
2737 
2738 	const_ = (struct sljit_s390x_const*)ensure_abuf(compiler,
2739 					sizeof(struct sljit_s390x_const));
2740 	PTR_FAIL_IF(!const_);
2741 	set_const((struct sljit_const*)const_, compiler);
2742 	const_->init_value = init_value;
2743 
2744 	dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
2745 	if (have_genext())
2746 		PTR_FAIL_IF(push_inst(compiler, sljit_ins_const | lgrl(dst_r, 0)));
2747 	else {
2748 		PTR_FAIL_IF(push_inst(compiler, sljit_ins_const | larl(tmp1, 0)));
2749 		PTR_FAIL_IF(push_inst(compiler, lg(dst_r, 0, r0, tmp1)));
2750 	}
2751 
2752 	if (dst & SLJIT_MEM)
2753 		PTR_FAIL_IF(store_word(compiler, dst_r, dst, dstw, tmp1, 0 /* always 64-bit */));
2754 
2755 	return (struct sljit_const*)const_;
2756 }
2757 
sljit_set_jump_addr(sljit_uw addr,sljit_uw new_target,sljit_sw executable_offset)2758 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
2759 {
2760 	/* Update the constant pool. */
2761 	sljit_uw *ptr = (sljit_uw *)addr;
2762 	SLJIT_UNUSED_ARG(executable_offset);
2763 
2764 	SLJIT_UPDATE_WX_FLAGS(ptr, ptr + 1, 0);
2765 	*ptr = new_target;
2766 	SLJIT_UPDATE_WX_FLAGS(ptr, ptr + 1, 1);
2767 	SLJIT_CACHE_FLUSH(ptr, ptr + 1);
2768 }
2769 
sljit_set_const(sljit_uw addr,sljit_sw new_constant,sljit_sw executable_offset)2770 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
2771 {
2772 	sljit_set_jump_addr(addr, new_constant, executable_offset);
2773 }
2774 
sljit_emit_put_label(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw)2775 SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label *sljit_emit_put_label(
2776 	struct sljit_compiler *compiler,
2777 	sljit_s32 dst, sljit_sw dstw)
2778 {
2779 	struct sljit_put_label *put_label;
2780 	sljit_gpr dst_r;
2781 
2782 	CHECK_ERROR_PTR();
2783 	CHECK_PTR(check_sljit_emit_put_label(compiler, dst, dstw));
2784 	ADJUST_LOCAL_OFFSET(dst, dstw);
2785 
2786 	put_label = (struct sljit_put_label*)ensure_abuf(compiler, sizeof(struct sljit_put_label));
2787 	PTR_FAIL_IF(!put_label);
2788 	set_put_label(put_label, compiler, 0);
2789 
2790 	dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
2791 
2792 	if (have_genext())
2793 		PTR_FAIL_IF(push_inst(compiler, lgrl(dst_r, 0)));
2794 	else {
2795 		PTR_FAIL_IF(push_inst(compiler, larl(tmp1, 0)));
2796 		PTR_FAIL_IF(push_inst(compiler, lg(dst_r, 0, r0, tmp1)));
2797 	}
2798 
2799 	if (dst & SLJIT_MEM)
2800 		PTR_FAIL_IF(store_word(compiler, dst_r, dst, dstw, tmp1, 0));
2801 
2802 	return put_label;
2803 }
2804 
2805 /* TODO(carenas): EVAL probably should move up or be refactored */
2806 #undef WHEN2
2807 #undef EVAL
2808 
2809 #undef tmp1
2810 #undef tmp0
2811 
2812 /* TODO(carenas): undef other macros that spill like is_u12? */
2813