1 /*
2 * Optimizations for Tiny Code Generator for QEMU
3 *
4 * Copyright (c) 2010 Samsung Electronics.
5 * Contributed by Kirill Batuzov <batuzovk@ispras.ru>
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a copy
8 * of this software and associated documentation files (the "Software"), to deal
9 * in the Software without restriction, including without limitation the rights
10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 * copies of the Software, and to permit persons to whom the Software is
12 * furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included in
15 * all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 * THE SOFTWARE.
24 */
25
26 #include "config.h"
27
28 #include <stdlib.h>
29 #include <stdio.h>
30
31 #include "qemu-common.h"
32 #include "cpu.h"
33 #include "exec/exec-all.h"
34 #include "tcg-op.h"
35
36 #define CASE_OP_32_64(x) \
37 glue(glue(case INDEX_op_, x), _i32): \
38 glue(glue(case INDEX_op_, x), _i64)
39
40 typedef enum {
41 TCG_TEMP_UNDEF = 0,
42 TCG_TEMP_CONST,
43 TCG_TEMP_COPY,
44 } tcg_temp_state;
45
46 struct tcg_temp_info {
47 tcg_temp_state state;
48 uint16_t prev_copy;
49 uint16_t next_copy;
50 tcg_target_ulong val;
51 tcg_target_ulong mask;
52 };
53
54 static struct tcg_temp_info temps[TCG_MAX_TEMPS];
55
56 /* Reset TEMP's state to TCG_TEMP_UNDEF. If TEMP only had one copy, remove
57 the copy flag from the left temp. */
reset_temp(TCGArg temp)58 static void reset_temp(TCGArg temp)
59 {
60 if (temps[temp].state == TCG_TEMP_COPY) {
61 if (temps[temp].prev_copy == temps[temp].next_copy) {
62 temps[temps[temp].next_copy].state = TCG_TEMP_UNDEF;
63 } else {
64 temps[temps[temp].next_copy].prev_copy = temps[temp].prev_copy;
65 temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy;
66 }
67 }
68 temps[temp].state = TCG_TEMP_UNDEF;
69 temps[temp].mask = -1;
70 }
71
72 /* Reset all temporaries, given that there are NB_TEMPS of them. */
reset_all_temps(int nb_temps)73 static void reset_all_temps(int nb_temps)
74 {
75 int i;
76 for (i = 0; i < nb_temps; i++) {
77 temps[i].state = TCG_TEMP_UNDEF;
78 temps[i].mask = -1;
79 }
80 }
81
op_bits(TCGOpcode op)82 static int op_bits(TCGOpcode op)
83 {
84 const TCGOpDef *def = &tcg_op_defs[op];
85 return def->flags & TCG_OPF_64BIT ? 64 : 32;
86 }
87
op_to_movi(TCGOpcode op)88 static TCGOpcode op_to_movi(TCGOpcode op)
89 {
90 switch (op_bits(op)) {
91 case 32:
92 return INDEX_op_movi_i32;
93 case 64:
94 return INDEX_op_movi_i64;
95 default:
96 fprintf(stderr, "op_to_movi: unexpected return value of "
97 "function op_bits.\n");
98 tcg_abort();
99 }
100 }
101
find_better_copy(TCGContext * s,TCGArg temp)102 static TCGArg find_better_copy(TCGContext *s, TCGArg temp)
103 {
104 TCGArg i;
105
106 /* If this is already a global, we can't do better. */
107 if (temp < s->nb_globals) {
108 return temp;
109 }
110
111 /* Search for a global first. */
112 for (i = temps[temp].next_copy ; i != temp ; i = temps[i].next_copy) {
113 if (i < s->nb_globals) {
114 return i;
115 }
116 }
117
118 /* If it is a temp, search for a temp local. */
119 if (!s->temps[temp].temp_local) {
120 for (i = temps[temp].next_copy ; i != temp ; i = temps[i].next_copy) {
121 if (s->temps[i].temp_local) {
122 return i;
123 }
124 }
125 }
126
127 /* Failure to find a better representation, return the same temp. */
128 return temp;
129 }
130
temps_are_copies(TCGArg arg1,TCGArg arg2)131 static bool temps_are_copies(TCGArg arg1, TCGArg arg2)
132 {
133 TCGArg i;
134
135 if (arg1 == arg2) {
136 return true;
137 }
138
139 if (temps[arg1].state != TCG_TEMP_COPY
140 || temps[arg2].state != TCG_TEMP_COPY) {
141 return false;
142 }
143
144 for (i = temps[arg1].next_copy ; i != arg1 ; i = temps[i].next_copy) {
145 if (i == arg2) {
146 return true;
147 }
148 }
149
150 return false;
151 }
152
tcg_opt_gen_mov(TCGContext * s,TCGArg * gen_args,TCGArg dst,TCGArg src)153 static void tcg_opt_gen_mov(TCGContext *s, TCGArg *gen_args,
154 TCGArg dst, TCGArg src)
155 {
156 reset_temp(dst);
157 temps[dst].mask = temps[src].mask;
158 assert(temps[src].state != TCG_TEMP_CONST);
159
160 if (s->temps[src].type == s->temps[dst].type) {
161 if (temps[src].state != TCG_TEMP_COPY) {
162 temps[src].state = TCG_TEMP_COPY;
163 temps[src].next_copy = src;
164 temps[src].prev_copy = src;
165 }
166 temps[dst].state = TCG_TEMP_COPY;
167 temps[dst].next_copy = temps[src].next_copy;
168 temps[dst].prev_copy = src;
169 temps[temps[dst].next_copy].prev_copy = dst;
170 temps[src].next_copy = dst;
171 }
172
173 gen_args[0] = dst;
174 gen_args[1] = src;
175 }
176
tcg_opt_gen_movi(TCGArg * gen_args,TCGArg dst,TCGArg val)177 static void tcg_opt_gen_movi(TCGArg *gen_args, TCGArg dst, TCGArg val)
178 {
179 reset_temp(dst);
180 temps[dst].state = TCG_TEMP_CONST;
181 temps[dst].val = val;
182 temps[dst].mask = val;
183 gen_args[0] = dst;
184 gen_args[1] = val;
185 }
186
op_to_mov(TCGOpcode op)187 static TCGOpcode op_to_mov(TCGOpcode op)
188 {
189 switch (op_bits(op)) {
190 case 32:
191 return INDEX_op_mov_i32;
192 case 64:
193 return INDEX_op_mov_i64;
194 default:
195 fprintf(stderr, "op_to_mov: unexpected return value of "
196 "function op_bits.\n");
197 tcg_abort();
198 }
199 }
200
do_constant_folding_2(TCGOpcode op,TCGArg x,TCGArg y)201 static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y)
202 {
203 uint64_t l64, h64;
204
205 switch (op) {
206 CASE_OP_32_64(add):
207 return x + y;
208
209 CASE_OP_32_64(sub):
210 return x - y;
211
212 CASE_OP_32_64(mul):
213 return x * y;
214
215 CASE_OP_32_64(and):
216 return x & y;
217
218 CASE_OP_32_64(or):
219 return x | y;
220
221 CASE_OP_32_64(xor):
222 return x ^ y;
223
224 case INDEX_op_shl_i32:
225 return (uint32_t)x << (uint32_t)y;
226
227 case INDEX_op_shl_i64:
228 return (uint64_t)x << (uint64_t)y;
229
230 case INDEX_op_shr_i32:
231 return (uint32_t)x >> (uint32_t)y;
232
233 case INDEX_op_shr_i64:
234 return (uint64_t)x >> (uint64_t)y;
235
236 case INDEX_op_sar_i32:
237 return (int32_t)x >> (int32_t)y;
238
239 case INDEX_op_sar_i64:
240 return (int64_t)x >> (int64_t)y;
241
242 case INDEX_op_rotr_i32:
243 return ror32(x, y);
244
245 case INDEX_op_rotr_i64:
246 return ror64(x, y);
247
248 case INDEX_op_rotl_i32:
249 return rol32(x, y);
250
251 case INDEX_op_rotl_i64:
252 return rol64(x, y);
253
254 CASE_OP_32_64(not):
255 return ~x;
256
257 CASE_OP_32_64(neg):
258 return -x;
259
260 CASE_OP_32_64(andc):
261 return x & ~y;
262
263 CASE_OP_32_64(orc):
264 return x | ~y;
265
266 CASE_OP_32_64(eqv):
267 return ~(x ^ y);
268
269 CASE_OP_32_64(nand):
270 return ~(x & y);
271
272 CASE_OP_32_64(nor):
273 return ~(x | y);
274
275 CASE_OP_32_64(ext8s):
276 return (int8_t)x;
277
278 CASE_OP_32_64(ext16s):
279 return (int16_t)x;
280
281 CASE_OP_32_64(ext8u):
282 return (uint8_t)x;
283
284 CASE_OP_32_64(ext16u):
285 return (uint16_t)x;
286
287 case INDEX_op_ext32s_i64:
288 return (int32_t)x;
289
290 case INDEX_op_ext32u_i64:
291 return (uint32_t)x;
292
293 case INDEX_op_muluh_i32:
294 return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32;
295 case INDEX_op_mulsh_i32:
296 return ((int64_t)(int32_t)x * (int32_t)y) >> 32;
297
298 case INDEX_op_muluh_i64:
299 mulu64(&l64, &h64, x, y);
300 return h64;
301 case INDEX_op_mulsh_i64:
302 muls64(&l64, &h64, x, y);
303 return h64;
304
305 case INDEX_op_div_i32:
306 /* Avoid crashing on divide by zero, otherwise undefined. */
307 return (int32_t)x / ((int32_t)y ? : 1);
308 case INDEX_op_divu_i32:
309 return (uint32_t)x / ((uint32_t)y ? : 1);
310 case INDEX_op_div_i64:
311 return (int64_t)x / ((int64_t)y ? : 1);
312 case INDEX_op_divu_i64:
313 return (uint64_t)x / ((uint64_t)y ? : 1);
314
315 case INDEX_op_rem_i32:
316 return (int32_t)x % ((int32_t)y ? : 1);
317 case INDEX_op_remu_i32:
318 return (uint32_t)x % ((uint32_t)y ? : 1);
319 case INDEX_op_rem_i64:
320 return (int64_t)x % ((int64_t)y ? : 1);
321 case INDEX_op_remu_i64:
322 return (uint64_t)x % ((uint64_t)y ? : 1);
323
324 default:
325 fprintf(stderr,
326 "Unrecognized operation %d in do_constant_folding.\n", op);
327 tcg_abort();
328 }
329 }
330
do_constant_folding(TCGOpcode op,TCGArg x,TCGArg y)331 static TCGArg do_constant_folding(TCGOpcode op, TCGArg x, TCGArg y)
332 {
333 TCGArg res = do_constant_folding_2(op, x, y);
334 if (op_bits(op) == 32) {
335 res &= 0xffffffff;
336 }
337 return res;
338 }
339
do_constant_folding_cond_32(uint32_t x,uint32_t y,TCGCond c)340 static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c)
341 {
342 switch (c) {
343 case TCG_COND_EQ:
344 return x == y;
345 case TCG_COND_NE:
346 return x != y;
347 case TCG_COND_LT:
348 return (int32_t)x < (int32_t)y;
349 case TCG_COND_GE:
350 return (int32_t)x >= (int32_t)y;
351 case TCG_COND_LE:
352 return (int32_t)x <= (int32_t)y;
353 case TCG_COND_GT:
354 return (int32_t)x > (int32_t)y;
355 case TCG_COND_LTU:
356 return x < y;
357 case TCG_COND_GEU:
358 return x >= y;
359 case TCG_COND_LEU:
360 return x <= y;
361 case TCG_COND_GTU:
362 return x > y;
363 default:
364 tcg_abort();
365 }
366 }
367
do_constant_folding_cond_64(uint64_t x,uint64_t y,TCGCond c)368 static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c)
369 {
370 switch (c) {
371 case TCG_COND_EQ:
372 return x == y;
373 case TCG_COND_NE:
374 return x != y;
375 case TCG_COND_LT:
376 return (int64_t)x < (int64_t)y;
377 case TCG_COND_GE:
378 return (int64_t)x >= (int64_t)y;
379 case TCG_COND_LE:
380 return (int64_t)x <= (int64_t)y;
381 case TCG_COND_GT:
382 return (int64_t)x > (int64_t)y;
383 case TCG_COND_LTU:
384 return x < y;
385 case TCG_COND_GEU:
386 return x >= y;
387 case TCG_COND_LEU:
388 return x <= y;
389 case TCG_COND_GTU:
390 return x > y;
391 default:
392 tcg_abort();
393 }
394 }
395
do_constant_folding_cond_eq(TCGCond c)396 static bool do_constant_folding_cond_eq(TCGCond c)
397 {
398 switch (c) {
399 case TCG_COND_GT:
400 case TCG_COND_LTU:
401 case TCG_COND_LT:
402 case TCG_COND_GTU:
403 case TCG_COND_NE:
404 return 0;
405 case TCG_COND_GE:
406 case TCG_COND_GEU:
407 case TCG_COND_LE:
408 case TCG_COND_LEU:
409 case TCG_COND_EQ:
410 return 1;
411 default:
412 tcg_abort();
413 }
414 }
415
416 /* Return 2 if the condition can't be simplified, and the result
417 of the condition (0 or 1) if it can */
do_constant_folding_cond(TCGOpcode op,TCGArg x,TCGArg y,TCGCond c)418 static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
419 TCGArg y, TCGCond c)
420 {
421 if (temps[x].state == TCG_TEMP_CONST && temps[y].state == TCG_TEMP_CONST) {
422 switch (op_bits(op)) {
423 case 32:
424 return do_constant_folding_cond_32(temps[x].val, temps[y].val, c);
425 case 64:
426 return do_constant_folding_cond_64(temps[x].val, temps[y].val, c);
427 default:
428 tcg_abort();
429 }
430 } else if (temps_are_copies(x, y)) {
431 return do_constant_folding_cond_eq(c);
432 } else if (temps[y].state == TCG_TEMP_CONST && temps[y].val == 0) {
433 switch (c) {
434 case TCG_COND_LTU:
435 return 0;
436 case TCG_COND_GEU:
437 return 1;
438 default:
439 return 2;
440 }
441 } else {
442 return 2;
443 }
444 }
445
446 /* Return 2 if the condition can't be simplified, and the result
447 of the condition (0 or 1) if it can */
do_constant_folding_cond2(TCGArg * p1,TCGArg * p2,TCGCond c)448 static TCGArg do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c)
449 {
450 TCGArg al = p1[0], ah = p1[1];
451 TCGArg bl = p2[0], bh = p2[1];
452
453 if (temps[bl].state == TCG_TEMP_CONST
454 && temps[bh].state == TCG_TEMP_CONST) {
455 uint64_t b = ((uint64_t)temps[bh].val << 32) | (uint32_t)temps[bl].val;
456
457 if (temps[al].state == TCG_TEMP_CONST
458 && temps[ah].state == TCG_TEMP_CONST) {
459 uint64_t a;
460 a = ((uint64_t)temps[ah].val << 32) | (uint32_t)temps[al].val;
461 return do_constant_folding_cond_64(a, b, c);
462 }
463 if (b == 0) {
464 switch (c) {
465 case TCG_COND_LTU:
466 return 0;
467 case TCG_COND_GEU:
468 return 1;
469 default:
470 break;
471 }
472 }
473 }
474 if (temps_are_copies(al, bl) && temps_are_copies(ah, bh)) {
475 return do_constant_folding_cond_eq(c);
476 }
477 return 2;
478 }
479
swap_commutative(TCGArg dest,TCGArg * p1,TCGArg * p2)480 static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2)
481 {
482 TCGArg a1 = *p1, a2 = *p2;
483 int sum = 0;
484 sum += temps[a1].state == TCG_TEMP_CONST;
485 sum -= temps[a2].state == TCG_TEMP_CONST;
486
487 /* Prefer the constant in second argument, and then the form
488 op a, a, b, which is better handled on non-RISC hosts. */
489 if (sum > 0 || (sum == 0 && dest == a2)) {
490 *p1 = a2;
491 *p2 = a1;
492 return true;
493 }
494 return false;
495 }
496
swap_commutative2(TCGArg * p1,TCGArg * p2)497 static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
498 {
499 int sum = 0;
500 sum += temps[p1[0]].state == TCG_TEMP_CONST;
501 sum += temps[p1[1]].state == TCG_TEMP_CONST;
502 sum -= temps[p2[0]].state == TCG_TEMP_CONST;
503 sum -= temps[p2[1]].state == TCG_TEMP_CONST;
504 if (sum > 0) {
505 TCGArg t;
506 t = p1[0], p1[0] = p2[0], p2[0] = t;
507 t = p1[1], p1[1] = p2[1], p2[1] = t;
508 return true;
509 }
510 return false;
511 }
512
513 /* Propagate constants and copies, fold constant expressions. */
tcg_constant_folding(TCGContext * s,uint16_t * tcg_opc_ptr,TCGArg * args,TCGOpDef * tcg_op_defs)514 static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
515 TCGArg *args, TCGOpDef *tcg_op_defs)
516 {
517 int i, nb_ops, op_index, nb_temps, nb_globals, nb_call_args;
518 tcg_target_ulong mask, affected;
519 TCGOpcode op;
520 const TCGOpDef *def;
521 TCGArg *gen_args;
522 TCGArg tmp;
523
524 /* Array VALS has an element for each temp.
525 If this temp holds a constant then its value is kept in VALS' element.
526 If this temp is a copy of other ones then the other copies are
527 available through the doubly linked circular list. */
528
529 nb_temps = s->nb_temps;
530 nb_globals = s->nb_globals;
531 reset_all_temps(nb_temps);
532
533 nb_ops = tcg_opc_ptr - s->gen_opc_buf;
534 gen_args = args;
535 for (op_index = 0; op_index < nb_ops; op_index++) {
536 op = s->gen_opc_buf[op_index];
537 def = &tcg_op_defs[op];
538 /* Do copy propagation */
539 if (op == INDEX_op_call) {
540 int nb_oargs = args[0] >> 16;
541 int nb_iargs = args[0] & 0xffff;
542 for (i = nb_oargs + 1; i < nb_oargs + nb_iargs + 1; i++) {
543 if (temps[args[i]].state == TCG_TEMP_COPY) {
544 args[i] = find_better_copy(s, args[i]);
545 }
546 }
547 } else {
548 for (i = def->nb_oargs; i < def->nb_oargs + def->nb_iargs; i++) {
549 if (temps[args[i]].state == TCG_TEMP_COPY) {
550 args[i] = find_better_copy(s, args[i]);
551 }
552 }
553 }
554
555 /* For commutative operations make constant second argument */
556 switch (op) {
557 CASE_OP_32_64(add):
558 CASE_OP_32_64(mul):
559 CASE_OP_32_64(and):
560 CASE_OP_32_64(or):
561 CASE_OP_32_64(xor):
562 CASE_OP_32_64(eqv):
563 CASE_OP_32_64(nand):
564 CASE_OP_32_64(nor):
565 CASE_OP_32_64(muluh):
566 CASE_OP_32_64(mulsh):
567 swap_commutative(args[0], &args[1], &args[2]);
568 break;
569 CASE_OP_32_64(brcond):
570 if (swap_commutative(-1, &args[0], &args[1])) {
571 args[2] = tcg_swap_cond(args[2]);
572 }
573 break;
574 CASE_OP_32_64(setcond):
575 if (swap_commutative(args[0], &args[1], &args[2])) {
576 args[3] = tcg_swap_cond(args[3]);
577 }
578 break;
579 CASE_OP_32_64(movcond):
580 if (swap_commutative(-1, &args[1], &args[2])) {
581 args[5] = tcg_swap_cond(args[5]);
582 }
583 /* For movcond, we canonicalize the "false" input reg to match
584 the destination reg so that the tcg backend can implement
585 a "move if true" operation. */
586 if (swap_commutative(args[0], &args[4], &args[3])) {
587 args[5] = tcg_invert_cond(args[5]);
588 }
589 break;
590 CASE_OP_32_64(add2):
591 swap_commutative(args[0], &args[2], &args[4]);
592 swap_commutative(args[1], &args[3], &args[5]);
593 break;
594 CASE_OP_32_64(mulu2):
595 CASE_OP_32_64(muls2):
596 swap_commutative(args[0], &args[2], &args[3]);
597 break;
598 case INDEX_op_brcond2_i32:
599 if (swap_commutative2(&args[0], &args[2])) {
600 args[4] = tcg_swap_cond(args[4]);
601 }
602 break;
603 case INDEX_op_setcond2_i32:
604 if (swap_commutative2(&args[1], &args[3])) {
605 args[5] = tcg_swap_cond(args[5]);
606 }
607 break;
608 default:
609 break;
610 }
611
612 /* Simplify expressions for "shift/rot r, 0, a => movi r, 0",
613 and "sub r, 0, a => neg r, a" case. */
614 switch (op) {
615 CASE_OP_32_64(shl):
616 CASE_OP_32_64(shr):
617 CASE_OP_32_64(sar):
618 CASE_OP_32_64(rotl):
619 CASE_OP_32_64(rotr):
620 if (temps[args[1]].state == TCG_TEMP_CONST
621 && temps[args[1]].val == 0) {
622 s->gen_opc_buf[op_index] = op_to_movi(op);
623 tcg_opt_gen_movi(gen_args, args[0], 0);
624 args += 3;
625 gen_args += 2;
626 continue;
627 }
628 break;
629 CASE_OP_32_64(sub):
630 {
631 TCGOpcode neg_op;
632 bool have_neg;
633
634 if (temps[args[2]].state == TCG_TEMP_CONST) {
635 /* Proceed with possible constant folding. */
636 break;
637 }
638 if (op == INDEX_op_sub_i32) {
639 neg_op = INDEX_op_neg_i32;
640 have_neg = TCG_TARGET_HAS_neg_i32;
641 } else {
642 neg_op = INDEX_op_neg_i64;
643 have_neg = TCG_TARGET_HAS_neg_i64;
644 }
645 if (!have_neg) {
646 break;
647 }
648 if (temps[args[1]].state == TCG_TEMP_CONST
649 && temps[args[1]].val == 0) {
650 s->gen_opc_buf[op_index] = neg_op;
651 reset_temp(args[0]);
652 gen_args[0] = args[0];
653 gen_args[1] = args[2];
654 args += 3;
655 gen_args += 2;
656 continue;
657 }
658 }
659 break;
660 default:
661 break;
662 }
663
664 /* Simplify expression for "op r, a, 0 => mov r, a" cases */
665 switch (op) {
666 CASE_OP_32_64(add):
667 CASE_OP_32_64(sub):
668 CASE_OP_32_64(shl):
669 CASE_OP_32_64(shr):
670 CASE_OP_32_64(sar):
671 CASE_OP_32_64(rotl):
672 CASE_OP_32_64(rotr):
673 CASE_OP_32_64(or):
674 CASE_OP_32_64(xor):
675 if (temps[args[1]].state == TCG_TEMP_CONST) {
676 /* Proceed with possible constant folding. */
677 break;
678 }
679 if (temps[args[2]].state == TCG_TEMP_CONST
680 && temps[args[2]].val == 0) {
681 if (temps_are_copies(args[0], args[1])) {
682 s->gen_opc_buf[op_index] = INDEX_op_nop;
683 } else {
684 s->gen_opc_buf[op_index] = op_to_mov(op);
685 tcg_opt_gen_mov(s, gen_args, args[0], args[1]);
686 gen_args += 2;
687 }
688 args += 3;
689 continue;
690 }
691 break;
692 default:
693 break;
694 }
695
696 /* Simplify using known-zero bits */
697 mask = -1;
698 affected = -1;
699 switch (op) {
700 CASE_OP_32_64(ext8s):
701 if ((temps[args[1]].mask & 0x80) != 0) {
702 break;
703 }
704 CASE_OP_32_64(ext8u):
705 mask = 0xff;
706 goto and_const;
707 CASE_OP_32_64(ext16s):
708 if ((temps[args[1]].mask & 0x8000) != 0) {
709 break;
710 }
711 CASE_OP_32_64(ext16u):
712 mask = 0xffff;
713 goto and_const;
714 case INDEX_op_ext32s_i64:
715 if ((temps[args[1]].mask & 0x80000000) != 0) {
716 break;
717 }
718 case INDEX_op_ext32u_i64:
719 mask = 0xffffffffU;
720 goto and_const;
721
722 CASE_OP_32_64(and):
723 mask = temps[args[2]].mask;
724 if (temps[args[2]].state == TCG_TEMP_CONST) {
725 and_const:
726 affected = temps[args[1]].mask & ~mask;
727 }
728 mask = temps[args[1]].mask & mask;
729 break;
730
731 CASE_OP_32_64(sar):
732 if (temps[args[2]].state == TCG_TEMP_CONST) {
733 mask = ((tcg_target_long)temps[args[1]].mask
734 >> temps[args[2]].val);
735 }
736 break;
737
738 CASE_OP_32_64(shr):
739 if (temps[args[2]].state == TCG_TEMP_CONST) {
740 mask = temps[args[1]].mask >> temps[args[2]].val;
741 }
742 break;
743
744 CASE_OP_32_64(shl):
745 if (temps[args[2]].state == TCG_TEMP_CONST) {
746 mask = temps[args[1]].mask << temps[args[2]].val;
747 }
748 break;
749
750 CASE_OP_32_64(neg):
751 /* Set to 1 all bits to the left of the rightmost. */
752 mask = -(temps[args[1]].mask & -temps[args[1]].mask);
753 break;
754
755 CASE_OP_32_64(deposit):
756 tmp = ((1ull << args[4]) - 1);
757 mask = ((temps[args[1]].mask & ~(tmp << args[3]))
758 | ((temps[args[2]].mask & tmp) << args[3]));
759 break;
760
761 CASE_OP_32_64(or):
762 CASE_OP_32_64(xor):
763 mask = temps[args[1]].mask | temps[args[2]].mask;
764 break;
765
766 CASE_OP_32_64(setcond):
767 mask = 1;
768 break;
769
770 CASE_OP_32_64(movcond):
771 mask = temps[args[3]].mask | temps[args[4]].mask;
772 break;
773
774 default:
775 break;
776 }
777
778 if (mask == 0) {
779 assert(def->nb_oargs == 1);
780 s->gen_opc_buf[op_index] = op_to_movi(op);
781 tcg_opt_gen_movi(gen_args, args[0], 0);
782 args += def->nb_oargs + def->nb_iargs + def->nb_cargs;
783 gen_args += 2;
784 continue;
785 }
786 if (affected == 0) {
787 assert(def->nb_oargs == 1);
788 if (temps_are_copies(args[0], args[1])) {
789 s->gen_opc_buf[op_index] = INDEX_op_nop;
790 } else if (temps[args[1]].state != TCG_TEMP_CONST) {
791 s->gen_opc_buf[op_index] = op_to_mov(op);
792 tcg_opt_gen_mov(s, gen_args, args[0], args[1]);
793 gen_args += 2;
794 } else {
795 s->gen_opc_buf[op_index] = op_to_movi(op);
796 tcg_opt_gen_movi(gen_args, args[0], temps[args[1]].val);
797 gen_args += 2;
798 }
799 args += def->nb_iargs + 1;
800 continue;
801 }
802
803 /* Simplify expression for "op r, a, 0 => movi r, 0" cases */
804 switch (op) {
805 CASE_OP_32_64(and):
806 CASE_OP_32_64(mul):
807 CASE_OP_32_64(muluh):
808 CASE_OP_32_64(mulsh):
809 if ((temps[args[2]].state == TCG_TEMP_CONST
810 && temps[args[2]].val == 0)) {
811 s->gen_opc_buf[op_index] = op_to_movi(op);
812 tcg_opt_gen_movi(gen_args, args[0], 0);
813 args += 3;
814 gen_args += 2;
815 continue;
816 }
817 break;
818 default:
819 break;
820 }
821
822 /* Simplify expression for "op r, a, a => mov r, a" cases */
823 switch (op) {
824 CASE_OP_32_64(or):
825 CASE_OP_32_64(and):
826 if (temps_are_copies(args[1], args[2])) {
827 if (temps_are_copies(args[0], args[1])) {
828 s->gen_opc_buf[op_index] = INDEX_op_nop;
829 } else {
830 s->gen_opc_buf[op_index] = op_to_mov(op);
831 tcg_opt_gen_mov(s, gen_args, args[0], args[1]);
832 gen_args += 2;
833 }
834 args += 3;
835 continue;
836 }
837 break;
838 default:
839 break;
840 }
841
842 /* Simplify expression for "op r, a, a => movi r, 0" cases */
843 switch (op) {
844 CASE_OP_32_64(sub):
845 CASE_OP_32_64(xor):
846 if (temps_are_copies(args[1], args[2])) {
847 s->gen_opc_buf[op_index] = op_to_movi(op);
848 tcg_opt_gen_movi(gen_args, args[0], 0);
849 gen_args += 2;
850 args += 3;
851 continue;
852 }
853 break;
854 default:
855 break;
856 }
857
858 /* Propagate constants through copy operations and do constant
859 folding. Constants will be substituted to arguments by register
860 allocator where needed and possible. Also detect copies. */
861 switch (op) {
862 CASE_OP_32_64(mov):
863 if (temps_are_copies(args[0], args[1])) {
864 args += 2;
865 s->gen_opc_buf[op_index] = INDEX_op_nop;
866 break;
867 }
868 if (temps[args[1]].state != TCG_TEMP_CONST) {
869 tcg_opt_gen_mov(s, gen_args, args[0], args[1]);
870 gen_args += 2;
871 args += 2;
872 break;
873 }
874 /* Source argument is constant. Rewrite the operation and
875 let movi case handle it. */
876 op = op_to_movi(op);
877 s->gen_opc_buf[op_index] = op;
878 args[1] = temps[args[1]].val;
879 /* fallthrough */
880 CASE_OP_32_64(movi):
881 tcg_opt_gen_movi(gen_args, args[0], args[1]);
882 gen_args += 2;
883 args += 2;
884 break;
885
886 CASE_OP_32_64(not):
887 CASE_OP_32_64(neg):
888 CASE_OP_32_64(ext8s):
889 CASE_OP_32_64(ext8u):
890 CASE_OP_32_64(ext16s):
891 CASE_OP_32_64(ext16u):
892 case INDEX_op_ext32s_i64:
893 case INDEX_op_ext32u_i64:
894 if (temps[args[1]].state == TCG_TEMP_CONST) {
895 s->gen_opc_buf[op_index] = op_to_movi(op);
896 tmp = do_constant_folding(op, temps[args[1]].val, 0);
897 tcg_opt_gen_movi(gen_args, args[0], tmp);
898 gen_args += 2;
899 args += 2;
900 break;
901 }
902 goto do_default;
903
904 CASE_OP_32_64(add):
905 CASE_OP_32_64(sub):
906 CASE_OP_32_64(mul):
907 CASE_OP_32_64(or):
908 CASE_OP_32_64(and):
909 CASE_OP_32_64(xor):
910 CASE_OP_32_64(shl):
911 CASE_OP_32_64(shr):
912 CASE_OP_32_64(sar):
913 CASE_OP_32_64(rotl):
914 CASE_OP_32_64(rotr):
915 CASE_OP_32_64(andc):
916 CASE_OP_32_64(orc):
917 CASE_OP_32_64(eqv):
918 CASE_OP_32_64(nand):
919 CASE_OP_32_64(nor):
920 CASE_OP_32_64(muluh):
921 CASE_OP_32_64(mulsh):
922 CASE_OP_32_64(div):
923 CASE_OP_32_64(divu):
924 CASE_OP_32_64(rem):
925 CASE_OP_32_64(remu):
926 if (temps[args[1]].state == TCG_TEMP_CONST
927 && temps[args[2]].state == TCG_TEMP_CONST) {
928 s->gen_opc_buf[op_index] = op_to_movi(op);
929 tmp = do_constant_folding(op, temps[args[1]].val,
930 temps[args[2]].val);
931 tcg_opt_gen_movi(gen_args, args[0], tmp);
932 gen_args += 2;
933 args += 3;
934 break;
935 }
936 goto do_default;
937
938 CASE_OP_32_64(deposit):
939 if (temps[args[1]].state == TCG_TEMP_CONST
940 && temps[args[2]].state == TCG_TEMP_CONST) {
941 s->gen_opc_buf[op_index] = op_to_movi(op);
942 tmp = ((1ull << args[4]) - 1);
943 tmp = (temps[args[1]].val & ~(tmp << args[3]))
944 | ((temps[args[2]].val & tmp) << args[3]);
945 tcg_opt_gen_movi(gen_args, args[0], tmp);
946 gen_args += 2;
947 args += 5;
948 break;
949 }
950 goto do_default;
951
952 CASE_OP_32_64(setcond):
953 tmp = do_constant_folding_cond(op, args[1], args[2], args[3]);
954 if (tmp != 2) {
955 s->gen_opc_buf[op_index] = op_to_movi(op);
956 tcg_opt_gen_movi(gen_args, args[0], tmp);
957 gen_args += 2;
958 args += 4;
959 break;
960 }
961 goto do_default;
962
963 CASE_OP_32_64(brcond):
964 tmp = do_constant_folding_cond(op, args[0], args[1], args[2]);
965 if (tmp != 2) {
966 if (tmp) {
967 reset_all_temps(nb_temps);
968 s->gen_opc_buf[op_index] = INDEX_op_br;
969 gen_args[0] = args[3];
970 gen_args += 1;
971 } else {
972 s->gen_opc_buf[op_index] = INDEX_op_nop;
973 }
974 args += 4;
975 break;
976 }
977 goto do_default;
978
979 CASE_OP_32_64(movcond):
980 tmp = do_constant_folding_cond(op, args[1], args[2], args[5]);
981 if (tmp != 2) {
982 if (temps_are_copies(args[0], args[4-tmp])) {
983 s->gen_opc_buf[op_index] = INDEX_op_nop;
984 } else if (temps[args[4-tmp]].state == TCG_TEMP_CONST) {
985 s->gen_opc_buf[op_index] = op_to_movi(op);
986 tcg_opt_gen_movi(gen_args, args[0], temps[args[4-tmp]].val);
987 gen_args += 2;
988 } else {
989 s->gen_opc_buf[op_index] = op_to_mov(op);
990 tcg_opt_gen_mov(s, gen_args, args[0], args[4-tmp]);
991 gen_args += 2;
992 }
993 args += 6;
994 break;
995 }
996 goto do_default;
997
998 case INDEX_op_add2_i32:
999 case INDEX_op_sub2_i32:
1000 if (temps[args[2]].state == TCG_TEMP_CONST
1001 && temps[args[3]].state == TCG_TEMP_CONST
1002 && temps[args[4]].state == TCG_TEMP_CONST
1003 && temps[args[5]].state == TCG_TEMP_CONST) {
1004 uint32_t al = temps[args[2]].val;
1005 uint32_t ah = temps[args[3]].val;
1006 uint32_t bl = temps[args[4]].val;
1007 uint32_t bh = temps[args[5]].val;
1008 uint64_t a = ((uint64_t)ah << 32) | al;
1009 uint64_t b = ((uint64_t)bh << 32) | bl;
1010 TCGArg rl, rh;
1011
1012 if (op == INDEX_op_add2_i32) {
1013 a += b;
1014 } else {
1015 a -= b;
1016 }
1017
1018 /* We emit the extra nop when we emit the add2/sub2. */
1019 assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop);
1020
1021 rl = args[0];
1022 rh = args[1];
1023 s->gen_opc_buf[op_index] = INDEX_op_movi_i32;
1024 s->gen_opc_buf[++op_index] = INDEX_op_movi_i32;
1025 tcg_opt_gen_movi(&gen_args[0], rl, (uint32_t)a);
1026 tcg_opt_gen_movi(&gen_args[2], rh, (uint32_t)(a >> 32));
1027 gen_args += 4;
1028 args += 6;
1029 break;
1030 }
1031 goto do_default;
1032
1033 case INDEX_op_mulu2_i32:
1034 if (temps[args[2]].state == TCG_TEMP_CONST
1035 && temps[args[3]].state == TCG_TEMP_CONST) {
1036 uint32_t a = temps[args[2]].val;
1037 uint32_t b = temps[args[3]].val;
1038 uint64_t r = (uint64_t)a * b;
1039 TCGArg rl, rh;
1040
1041 /* We emit the extra nop when we emit the mulu2. */
1042 assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop);
1043
1044 rl = args[0];
1045 rh = args[1];
1046 s->gen_opc_buf[op_index] = INDEX_op_movi_i32;
1047 s->gen_opc_buf[++op_index] = INDEX_op_movi_i32;
1048 tcg_opt_gen_movi(&gen_args[0], rl, (uint32_t)r);
1049 tcg_opt_gen_movi(&gen_args[2], rh, (uint32_t)(r >> 32));
1050 gen_args += 4;
1051 args += 4;
1052 break;
1053 }
1054 goto do_default;
1055
1056 case INDEX_op_brcond2_i32:
1057 tmp = do_constant_folding_cond2(&args[0], &args[2], args[4]);
1058 if (tmp != 2) {
1059 if (tmp) {
1060 reset_all_temps(nb_temps);
1061 s->gen_opc_buf[op_index] = INDEX_op_br;
1062 gen_args[0] = args[5];
1063 gen_args += 1;
1064 } else {
1065 s->gen_opc_buf[op_index] = INDEX_op_nop;
1066 }
1067 } else if ((args[4] == TCG_COND_LT || args[4] == TCG_COND_GE)
1068 && temps[args[2]].state == TCG_TEMP_CONST
1069 && temps[args[3]].state == TCG_TEMP_CONST
1070 && temps[args[2]].val == 0
1071 && temps[args[3]].val == 0) {
1072 /* Simplify LT/GE comparisons vs zero to a single compare
1073 vs the high word of the input. */
1074 reset_all_temps(nb_temps);
1075 s->gen_opc_buf[op_index] = INDEX_op_brcond_i32;
1076 gen_args[0] = args[1];
1077 gen_args[1] = args[3];
1078 gen_args[2] = args[4];
1079 gen_args[3] = args[5];
1080 gen_args += 4;
1081 } else {
1082 goto do_default;
1083 }
1084 args += 6;
1085 break;
1086
1087 case INDEX_op_setcond2_i32:
1088 tmp = do_constant_folding_cond2(&args[1], &args[3], args[5]);
1089 if (tmp != 2) {
1090 s->gen_opc_buf[op_index] = INDEX_op_movi_i32;
1091 tcg_opt_gen_movi(gen_args, args[0], tmp);
1092 gen_args += 2;
1093 } else if ((args[5] == TCG_COND_LT || args[5] == TCG_COND_GE)
1094 && temps[args[3]].state == TCG_TEMP_CONST
1095 && temps[args[4]].state == TCG_TEMP_CONST
1096 && temps[args[3]].val == 0
1097 && temps[args[4]].val == 0) {
1098 /* Simplify LT/GE comparisons vs zero to a single compare
1099 vs the high word of the input. */
1100 s->gen_opc_buf[op_index] = INDEX_op_setcond_i32;
1101 reset_temp(args[0]);
1102 gen_args[0] = args[0];
1103 gen_args[1] = args[2];
1104 gen_args[2] = args[4];
1105 gen_args[3] = args[5];
1106 gen_args += 4;
1107 } else {
1108 goto do_default;
1109 }
1110 args += 6;
1111 break;
1112
1113 case INDEX_op_call:
1114 nb_call_args = (args[0] >> 16) + (args[0] & 0xffff);
1115 if (!(args[nb_call_args + 1] & (TCG_CALL_NO_READ_GLOBALS |
1116 TCG_CALL_NO_WRITE_GLOBALS))) {
1117 for (i = 0; i < nb_globals; i++) {
1118 reset_temp(i);
1119 }
1120 }
1121 for (i = 0; i < (args[0] >> 16); i++) {
1122 reset_temp(args[i + 1]);
1123 }
1124 i = nb_call_args + 3;
1125 while (i) {
1126 *gen_args = *args;
1127 args++;
1128 gen_args++;
1129 i--;
1130 }
1131 break;
1132
1133 default:
1134 do_default:
1135 /* Default case: we know nothing about operation (or were unable
1136 to compute the operation result) so no propagation is done.
1137 We trash everything if the operation is the end of a basic
1138 block, otherwise we only trash the output args. "mask" is
1139 the non-zero bits mask for the first output arg. */
1140 if (def->flags & TCG_OPF_BB_END) {
1141 reset_all_temps(nb_temps);
1142 } else {
1143 for (i = 0; i < def->nb_oargs; i++) {
1144 reset_temp(args[i]);
1145 }
1146 }
1147 for (i = 0; i < def->nb_args; i++) {
1148 gen_args[i] = args[i];
1149 }
1150 args += def->nb_args;
1151 gen_args += def->nb_args;
1152 break;
1153 }
1154 }
1155
1156 return gen_args;
1157 }
1158
tcg_optimize(TCGContext * s,uint16_t * tcg_opc_ptr,TCGArg * args,TCGOpDef * tcg_op_defs)1159 TCGArg *tcg_optimize(TCGContext *s, uint16_t *tcg_opc_ptr,
1160 TCGArg *args, TCGOpDef *tcg_op_defs)
1161 {
1162 TCGArg *res;
1163 res = tcg_constant_folding(s, tcg_opc_ptr, args, tcg_op_defs);
1164 return res;
1165 }
1166