1
2 /*---------------------------------------------------------------*/
3 /*--- begin ir_opt.c ---*/
4 /*---------------------------------------------------------------*/
5
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
9
10 Copyright (C) 2004-2011 OpenWorks LLP
11 info@open-works.net
12
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26 02110-1301, USA.
27
28 The GNU General Public License is contained in the file COPYING.
29
30 Neither the names of the U.S. Department of Energy nor the
31 University of California nor the names of its contributors may be
32 used to endorse or promote products derived from this software
33 without prior written permission.
34 */
35
36 #include "libvex_basictypes.h"
37 #include "libvex_ir.h"
38 #include "libvex.h"
39
40 #include "main_util.h"
41 #include "main_globals.h"
42 #include "ir_opt.h"
43
44
45 /* Set to 1 for lots of debugging output. */
46 #define DEBUG_IROPT 0
47
48
49 /* What iropt does, 29 Dec 04.
50
51 It takes an IRSB and produces a new one with the same meaning,
52 defined thus:
53
54 After execution of the new BB, all guest state and guest memory is
55 the same as after execution of the original. This is true
56 regardless of how the block was exited (at the end vs side exit).
57
58 In addition, parts of the guest state will be identical to that
59 created by execution of the original at the following observation
60 points:
61
62 * In a dirty helper call, any parts of the guest state that the
63 helper states that it reads or modifies will be up to date.
64 Also, guest memory will be up to date. Parts of the guest state
65 not marked as being read or modified by the helper cannot be
66 assumed to be up-to-date at the point where the helper is called.
67
68 * Immediately prior to any load or store, those parts of the guest
69 state marked as requiring precise exceptions will be up to date.
70 Also, guest memory will be up to date. Parts of the guest state
71 not marked as requiring precise exceptions cannot be assumed to
72 be up-to-date at the point of the load/store.
73
74 The relative order of loads and stores (including loads/stores of
75 guest memory done by dirty helpers annotated as such) is not
76 changed. However, the relative order of loads with no intervening
77 stores/modifies may be changed.
78
79 Transformation order
80 ~~~~~~~~~~~~~~~~~~~~
81
82 There are three levels of optimisation, controlled by
83 vex_control.iropt_level. Define first:
84
85 "Cheap transformations" are the following sequence:
86 * Redundant-Get removal
87 * Redundant-Put removal
88 * Constant propagation/folding
89 * Dead code removal
90 * Specialisation of clean helper functions
91 * Dead code removal
92
93 "Expensive transformations" are the following sequence:
94 * CSE
95 * Folding of add/sub chains
96 * Redundant-GetI removal
97 * Redundant-PutI removal
98 * Dead code removal
99
100 Then the transformations are as follows, as defined by
101 vex_control.iropt_level:
102
103 Level 0:
104 * Flatten into atomic form.
105
106 Level 1: the following sequence:
107 * Flatten into atomic form.
108 * Cheap transformations.
109
110 Level 2: the following sequence
111 * Flatten into atomic form.
112 * Cheap transformations.
113 * If block contains any floating or vector types, CSE.
114 * If block contains GetI or PutI, Expensive transformations.
115 * Try unrolling loops. Three possible outcomes:
116 - No effect: do nothing more.
117 - Unrolled a loop, and block does not contain GetI or PutI:
118 Do: * CSE
119 * Dead code removal
120 - Unrolled a loop, and block contains GetI or PutI:
121 Do: * Expensive transformations
122 * Cheap transformations
123 */
124
125 /* Implementation notes, 29 Dec 04.
126
127 TODO (important): I think rPutI removal ignores precise exceptions
128 and is therefore in a sense, wrong. In the sense that PutIs are
129 assumed not to write parts of the guest state that we need to have
130 up-to-date at loads/stores. So far on x86 guest that has not
131 mattered since indeed only the x87 FP registers and tags are
132 accessed using GetI/PutI, and there is no need so far for them to
133 be up to date at mem exception points. The rPutI pass should be
134 fixed.
135
136 TODO: improve pessimistic handling of precise exceptions
137 in the tree builder.
138
139 TODO: check interaction of rGetI and dirty helpers.
140
141 F64i constants are treated differently from other constants.
142 They are not regarded as atoms, and instead lifted off and
143 bound to temps. This allows them to participate in CSE, which
144 is important for getting good performance for x86 guest code.
145
146 CSE up F64 literals (already doing F64is)
147
148 CSE: consider carefully the requirement for precise exns
149 prior to making CSE any more aggressive. */
150
151
152 /*---------------------------------------------------------------*/
153 /*--- Finite mappery, of a sort ---*/
154 /*---------------------------------------------------------------*/
155
156 /* General map from HWord-sized thing HWord-sized thing. Could be by
157 hashing, but it's not clear whether or not this would really be any
158 faster. */
159
160 typedef
161 struct {
162 Bool* inuse;
163 HWord* key;
164 HWord* val;
165 Int size;
166 Int used;
167 }
168 HashHW;
169
newHHW(void)170 static HashHW* newHHW ( void )
171 {
172 HashHW* h = LibVEX_Alloc(sizeof(HashHW));
173 h->size = 8;
174 h->used = 0;
175 h->inuse = LibVEX_Alloc(h->size * sizeof(Bool));
176 h->key = LibVEX_Alloc(h->size * sizeof(HWord));
177 h->val = LibVEX_Alloc(h->size * sizeof(HWord));
178 return h;
179 }
180
181
182 /* Look up key in the map. */
183
lookupHHW(HashHW * h,HWord * val,HWord key)184 static Bool lookupHHW ( HashHW* h, /*OUT*/HWord* val, HWord key )
185 {
186 Int i;
187 /* vex_printf("lookupHHW(%llx)\n", key ); */
188 for (i = 0; i < h->used; i++) {
189 if (h->inuse[i] && h->key[i] == key) {
190 if (val)
191 *val = h->val[i];
192 return True;
193 }
194 }
195 return False;
196 }
197
198
199 /* Add key->val to the map. Replaces any existing binding for key. */
200
addToHHW(HashHW * h,HWord key,HWord val)201 static void addToHHW ( HashHW* h, HWord key, HWord val )
202 {
203 Int i, j;
204 /* vex_printf("addToHHW(%llx, %llx)\n", key, val); */
205
206 /* Find and replace existing binding, if any. */
207 for (i = 0; i < h->used; i++) {
208 if (h->inuse[i] && h->key[i] == key) {
209 h->val[i] = val;
210 return;
211 }
212 }
213
214 /* Ensure a space is available. */
215 if (h->used == h->size) {
216 /* Copy into arrays twice the size. */
217 Bool* inuse2 = LibVEX_Alloc(2 * h->size * sizeof(Bool));
218 HWord* key2 = LibVEX_Alloc(2 * h->size * sizeof(HWord));
219 HWord* val2 = LibVEX_Alloc(2 * h->size * sizeof(HWord));
220 for (i = j = 0; i < h->size; i++) {
221 if (!h->inuse[i]) continue;
222 inuse2[j] = True;
223 key2[j] = h->key[i];
224 val2[j] = h->val[i];
225 j++;
226 }
227 h->used = j;
228 h->size *= 2;
229 h->inuse = inuse2;
230 h->key = key2;
231 h->val = val2;
232 }
233
234 /* Finally, add it. */
235 vassert(h->used < h->size);
236 h->inuse[h->used] = True;
237 h->key[h->used] = key;
238 h->val[h->used] = val;
239 h->used++;
240 }
241
242
243 /*---------------------------------------------------------------*/
244 /*--- Flattening out a BB into atomic SSA form ---*/
245 /*---------------------------------------------------------------*/
246
247 /* Non-critical helper, heuristic for reducing the number of tmp-tmp
248 copies made by flattening. If in doubt return False. */
249
isFlat(IRExpr * e)250 static Bool isFlat ( IRExpr* e )
251 {
252 if (e->tag == Iex_Get)
253 return True;
254 if (e->tag == Iex_Binop)
255 return toBool( isIRAtom(e->Iex.Binop.arg1)
256 && isIRAtom(e->Iex.Binop.arg2) );
257 if (e->tag == Iex_Load)
258 return isIRAtom(e->Iex.Load.addr);
259 return False;
260 }
261
262 /* Flatten out 'ex' so it is atomic, returning a new expression with
263 the same value, after having appended extra IRTemp assignments to
264 the end of 'bb'. */
265
flatten_Expr(IRSB * bb,IRExpr * ex)266 static IRExpr* flatten_Expr ( IRSB* bb, IRExpr* ex )
267 {
268 Int i;
269 IRExpr** newargs;
270 IRType ty = typeOfIRExpr(bb->tyenv, ex);
271 IRTemp t1;
272
273 switch (ex->tag) {
274
275 case Iex_GetI:
276 t1 = newIRTemp(bb->tyenv, ty);
277 addStmtToIRSB(bb, IRStmt_WrTmp(t1,
278 IRExpr_GetI(ex->Iex.GetI.descr,
279 flatten_Expr(bb, ex->Iex.GetI.ix),
280 ex->Iex.GetI.bias)));
281 return IRExpr_RdTmp(t1);
282
283 case Iex_Get:
284 t1 = newIRTemp(bb->tyenv, ty);
285 addStmtToIRSB(bb,
286 IRStmt_WrTmp(t1, ex));
287 return IRExpr_RdTmp(t1);
288
289 case Iex_Qop:
290 t1 = newIRTemp(bb->tyenv, ty);
291 addStmtToIRSB(bb, IRStmt_WrTmp(t1,
292 IRExpr_Qop(ex->Iex.Qop.op,
293 flatten_Expr(bb, ex->Iex.Qop.arg1),
294 flatten_Expr(bb, ex->Iex.Qop.arg2),
295 flatten_Expr(bb, ex->Iex.Qop.arg3),
296 flatten_Expr(bb, ex->Iex.Qop.arg4))));
297 return IRExpr_RdTmp(t1);
298
299 case Iex_Triop:
300 t1 = newIRTemp(bb->tyenv, ty);
301 addStmtToIRSB(bb, IRStmt_WrTmp(t1,
302 IRExpr_Triop(ex->Iex.Triop.op,
303 flatten_Expr(bb, ex->Iex.Triop.arg1),
304 flatten_Expr(bb, ex->Iex.Triop.arg2),
305 flatten_Expr(bb, ex->Iex.Triop.arg3))));
306 return IRExpr_RdTmp(t1);
307
308 case Iex_Binop:
309 t1 = newIRTemp(bb->tyenv, ty);
310 addStmtToIRSB(bb, IRStmt_WrTmp(t1,
311 IRExpr_Binop(ex->Iex.Binop.op,
312 flatten_Expr(bb, ex->Iex.Binop.arg1),
313 flatten_Expr(bb, ex->Iex.Binop.arg2))));
314 return IRExpr_RdTmp(t1);
315
316 case Iex_Unop:
317 t1 = newIRTemp(bb->tyenv, ty);
318 addStmtToIRSB(bb, IRStmt_WrTmp(t1,
319 IRExpr_Unop(ex->Iex.Unop.op,
320 flatten_Expr(bb, ex->Iex.Unop.arg))));
321 return IRExpr_RdTmp(t1);
322
323 case Iex_Load:
324 t1 = newIRTemp(bb->tyenv, ty);
325 addStmtToIRSB(bb, IRStmt_WrTmp(t1,
326 IRExpr_Load(ex->Iex.Load.end,
327 ex->Iex.Load.ty,
328 flatten_Expr(bb, ex->Iex.Load.addr))));
329 return IRExpr_RdTmp(t1);
330
331 case Iex_CCall:
332 newargs = shallowCopyIRExprVec(ex->Iex.CCall.args);
333 for (i = 0; newargs[i]; i++)
334 newargs[i] = flatten_Expr(bb, newargs[i]);
335 t1 = newIRTemp(bb->tyenv, ty);
336 addStmtToIRSB(bb, IRStmt_WrTmp(t1,
337 IRExpr_CCall(ex->Iex.CCall.cee,
338 ex->Iex.CCall.retty,
339 newargs)));
340 return IRExpr_RdTmp(t1);
341
342 case Iex_Mux0X:
343 t1 = newIRTemp(bb->tyenv, ty);
344 addStmtToIRSB(bb, IRStmt_WrTmp(t1,
345 IRExpr_Mux0X(flatten_Expr(bb, ex->Iex.Mux0X.cond),
346 flatten_Expr(bb, ex->Iex.Mux0X.expr0),
347 flatten_Expr(bb, ex->Iex.Mux0X.exprX))));
348 return IRExpr_RdTmp(t1);
349
350 case Iex_Const:
351 /* Lift F64i constants out onto temps so they can be CSEd
352 later. */
353 if (ex->Iex.Const.con->tag == Ico_F64i) {
354 t1 = newIRTemp(bb->tyenv, ty);
355 addStmtToIRSB(bb, IRStmt_WrTmp(t1,
356 IRExpr_Const(ex->Iex.Const.con)));
357 return IRExpr_RdTmp(t1);
358 } else {
359 /* Leave all other constants alone. */
360 return ex;
361 }
362
363 case Iex_RdTmp:
364 return ex;
365
366 default:
367 vex_printf("\n");
368 ppIRExpr(ex);
369 vex_printf("\n");
370 vpanic("flatten_Expr");
371 }
372 }
373
374
375 /* Append a completely flattened form of 'st' to the end of 'bb'. */
376
flatten_Stmt(IRSB * bb,IRStmt * st)377 static void flatten_Stmt ( IRSB* bb, IRStmt* st )
378 {
379 Int i;
380 IRExpr *e1, *e2, *e3, *e4, *e5;
381 IRDirty *d, *d2;
382 IRCAS *cas, *cas2;
383 switch (st->tag) {
384 case Ist_Put:
385 if (isIRAtom(st->Ist.Put.data)) {
386 /* optimisation to reduce the amount of heap wasted
387 by the flattener */
388 addStmtToIRSB(bb, st);
389 } else {
390 /* general case, always correct */
391 e1 = flatten_Expr(bb, st->Ist.Put.data);
392 addStmtToIRSB(bb, IRStmt_Put(st->Ist.Put.offset, e1));
393 }
394 break;
395 case Ist_PutI:
396 e1 = flatten_Expr(bb, st->Ist.PutI.ix);
397 e2 = flatten_Expr(bb, st->Ist.PutI.data);
398 addStmtToIRSB(bb, IRStmt_PutI(st->Ist.PutI.descr,
399 e1,
400 st->Ist.PutI.bias,
401 e2));
402 break;
403 case Ist_WrTmp:
404 if (isFlat(st->Ist.WrTmp.data)) {
405 /* optimisation, to reduce the number of tmp-tmp
406 copies generated */
407 addStmtToIRSB(bb, st);
408 } else {
409 /* general case, always correct */
410 e1 = flatten_Expr(bb, st->Ist.WrTmp.data);
411 addStmtToIRSB(bb, IRStmt_WrTmp(st->Ist.WrTmp.tmp, e1));
412 }
413 break;
414 case Ist_Store:
415 e1 = flatten_Expr(bb, st->Ist.Store.addr);
416 e2 = flatten_Expr(bb, st->Ist.Store.data);
417 addStmtToIRSB(bb, IRStmt_Store(st->Ist.Store.end, e1,e2));
418 break;
419 case Ist_CAS:
420 cas = st->Ist.CAS.details;
421 e1 = flatten_Expr(bb, cas->addr);
422 e2 = cas->expdHi ? flatten_Expr(bb, cas->expdHi) : NULL;
423 e3 = flatten_Expr(bb, cas->expdLo);
424 e4 = cas->dataHi ? flatten_Expr(bb, cas->dataHi) : NULL;
425 e5 = flatten_Expr(bb, cas->dataLo);
426 cas2 = mkIRCAS( cas->oldHi, cas->oldLo, cas->end,
427 e1, e2, e3, e4, e5 );
428 addStmtToIRSB(bb, IRStmt_CAS(cas2));
429 break;
430 case Ist_LLSC:
431 e1 = flatten_Expr(bb, st->Ist.LLSC.addr);
432 e2 = st->Ist.LLSC.storedata
433 ? flatten_Expr(bb, st->Ist.LLSC.storedata)
434 : NULL;
435 addStmtToIRSB(bb, IRStmt_LLSC(st->Ist.LLSC.end,
436 st->Ist.LLSC.result, e1, e2));
437 break;
438 case Ist_Dirty:
439 d = st->Ist.Dirty.details;
440 d2 = emptyIRDirty();
441 *d2 = *d;
442 d2->args = shallowCopyIRExprVec(d2->args);
443 if (d2->mFx != Ifx_None) {
444 d2->mAddr = flatten_Expr(bb, d2->mAddr);
445 } else {
446 vassert(d2->mAddr == NULL);
447 }
448 d2->guard = flatten_Expr(bb, d2->guard);
449 for (i = 0; d2->args[i]; i++)
450 d2->args[i] = flatten_Expr(bb, d2->args[i]);
451 addStmtToIRSB(bb, IRStmt_Dirty(d2));
452 break;
453 case Ist_NoOp:
454 case Ist_MBE:
455 case Ist_IMark:
456 addStmtToIRSB(bb, st);
457 break;
458 case Ist_AbiHint:
459 e1 = flatten_Expr(bb, st->Ist.AbiHint.base);
460 e2 = flatten_Expr(bb, st->Ist.AbiHint.nia);
461 addStmtToIRSB(bb, IRStmt_AbiHint(e1, st->Ist.AbiHint.len, e2));
462 break;
463 case Ist_Exit:
464 e1 = flatten_Expr(bb, st->Ist.Exit.guard);
465 addStmtToIRSB(bb, IRStmt_Exit(e1, st->Ist.Exit.jk,
466 st->Ist.Exit.dst));
467 break;
468 default:
469 vex_printf("\n");
470 ppIRStmt(st);
471 vex_printf("\n");
472 vpanic("flatten_Stmt");
473 }
474 }
475
476
flatten_BB(IRSB * in)477 static IRSB* flatten_BB ( IRSB* in )
478 {
479 Int i;
480 IRSB* out;
481 out = emptyIRSB();
482 out->tyenv = deepCopyIRTypeEnv( in->tyenv );
483 for (i = 0; i < in->stmts_used; i++)
484 if (in->stmts[i])
485 flatten_Stmt( out, in->stmts[i] );
486 out->next = flatten_Expr( out, in->next );
487 out->jumpkind = in->jumpkind;
488 return out;
489 }
490
491
492 /*---------------------------------------------------------------*/
493 /*--- In-place removal of redundant GETs ---*/
494 /*---------------------------------------------------------------*/
495
496 /* Scan forwards, building up an environment binding (min offset, max
497 offset) pairs to values, which will either be temps or constants.
498
499 On seeing 't = Get(minoff,maxoff)', look up (minoff,maxoff) in the
500 env and if it matches, replace the Get with the stored value. If
501 there is no match, add a (minoff,maxoff) :-> t binding.
502
503 On seeing 'Put (minoff,maxoff) = t or c', first remove in the env
504 any binding which fully or partially overlaps with (minoff,maxoff).
505 Then add a new (minoff,maxoff) :-> t or c binding. */
506
507 /* Extract the min/max offsets from a guest state array descriptor. */
508
509 inline
getArrayBounds(IRRegArray * descr,UInt * minoff,UInt * maxoff)510 static void getArrayBounds ( IRRegArray* descr,
511 UInt* minoff, UInt* maxoff )
512 {
513 *minoff = descr->base;
514 *maxoff = *minoff + descr->nElems*sizeofIRType(descr->elemTy) - 1;
515 vassert((*minoff & ~0xFFFF) == 0);
516 vassert((*maxoff & ~0xFFFF) == 0);
517 vassert(*minoff <= *maxoff);
518 }
519
520 /* Create keys, of the form ((minoffset << 16) | maxoffset). */
521
mk_key_GetPut(Int offset,IRType ty)522 static UInt mk_key_GetPut ( Int offset, IRType ty )
523 {
524 /* offset should fit in 16 bits. */
525 UInt minoff = offset;
526 UInt maxoff = minoff + sizeofIRType(ty) - 1;
527 vassert((minoff & ~0xFFFF) == 0);
528 vassert((maxoff & ~0xFFFF) == 0);
529 return (minoff << 16) | maxoff;
530 }
531
mk_key_GetIPutI(IRRegArray * descr)532 static UInt mk_key_GetIPutI ( IRRegArray* descr )
533 {
534 UInt minoff, maxoff;
535 getArrayBounds( descr, &minoff, &maxoff );
536 vassert((minoff & ~0xFFFF) == 0);
537 vassert((maxoff & ~0xFFFF) == 0);
538 return (minoff << 16) | maxoff;
539 }
540
541 /* Supposing h has keys of the form generated by mk_key_GetPut and
542 mk_key_GetIPutI, invalidate any key which overlaps (k_lo
543 .. k_hi).
544 */
invalidateOverlaps(HashHW * h,UInt k_lo,UInt k_hi)545 static void invalidateOverlaps ( HashHW* h, UInt k_lo, UInt k_hi )
546 {
547 Int j;
548 UInt e_lo, e_hi;
549 vassert(k_lo <= k_hi);
550 /* invalidate any env entries which in any way overlap (k_lo
551 .. k_hi) */
552 /* vex_printf("invalidate %d .. %d\n", k_lo, k_hi ); */
553
554 for (j = 0; j < h->used; j++) {
555 if (!h->inuse[j])
556 continue;
557 e_lo = (((UInt)h->key[j]) >> 16) & 0xFFFF;
558 e_hi = ((UInt)h->key[j]) & 0xFFFF;
559 vassert(e_lo <= e_hi);
560 if (e_hi < k_lo || k_hi < e_lo)
561 continue; /* no overlap possible */
562 else
563 /* overlap; invalidate */
564 h->inuse[j] = False;
565 }
566 }
567
568
redundant_get_removal_BB(IRSB * bb)569 static void redundant_get_removal_BB ( IRSB* bb )
570 {
571 HashHW* env = newHHW();
572 UInt key = 0; /* keep gcc -O happy */
573 Int i, j;
574 HWord val;
575
576 for (i = 0; i < bb->stmts_used; i++) {
577 IRStmt* st = bb->stmts[i];
578
579 if (st->tag == Ist_NoOp)
580 continue;
581
582 /* Deal with Gets */
583 if (st->tag == Ist_WrTmp
584 && st->Ist.WrTmp.data->tag == Iex_Get) {
585 /* st is 't = Get(...)'. Look up in the environment and see
586 if the Get can be replaced. */
587 IRExpr* get = st->Ist.WrTmp.data;
588 key = (HWord)mk_key_GetPut( get->Iex.Get.offset,
589 get->Iex.Get.ty );
590 if (lookupHHW(env, &val, (HWord)key)) {
591 /* found it */
592 /* Note, we could do better here. If the types are
593 different we don't do the substitution, since doing so
594 could lead to invalidly-typed IR. An improvement would
595 be to stick in a reinterpret-style cast, although that
596 would make maintaining flatness more difficult. */
597 IRExpr* valE = (IRExpr*)val;
598 Bool typesOK = toBool( typeOfIRExpr(bb->tyenv,valE)
599 == st->Ist.WrTmp.data->Iex.Get.ty );
600 if (typesOK && DEBUG_IROPT) {
601 vex_printf("rGET: "); ppIRExpr(get);
602 vex_printf(" -> "); ppIRExpr(valE);
603 vex_printf("\n");
604 }
605 if (typesOK)
606 bb->stmts[i] = IRStmt_WrTmp(st->Ist.WrTmp.tmp, valE);
607 } else {
608 /* Not found, but at least we know that t and the Get(...)
609 are now associated. So add a binding to reflect that
610 fact. */
611 addToHHW( env, (HWord)key,
612 (HWord)(void*)(IRExpr_RdTmp(st->Ist.WrTmp.tmp)) );
613 }
614 }
615
616 /* Deal with Puts: invalidate any env entries overlapped by this
617 Put */
618 if (st->tag == Ist_Put || st->tag == Ist_PutI) {
619 UInt k_lo, k_hi;
620 if (st->tag == Ist_Put) {
621 key = mk_key_GetPut( st->Ist.Put.offset,
622 typeOfIRExpr(bb->tyenv,st->Ist.Put.data) );
623 } else {
624 vassert(st->tag == Ist_PutI);
625 key = mk_key_GetIPutI( st->Ist.PutI.descr );
626 }
627
628 k_lo = (key >> 16) & 0xFFFF;
629 k_hi = key & 0xFFFF;
630 invalidateOverlaps(env, k_lo, k_hi);
631 }
632 else
633 if (st->tag == Ist_Dirty) {
634 /* Deal with dirty helpers which write or modify guest state.
635 Invalidate the entire env. We could do a lot better
636 here. */
637 IRDirty* d = st->Ist.Dirty.details;
638 Bool writes = False;
639 for (j = 0; j < d->nFxState; j++) {
640 if (d->fxState[j].fx == Ifx_Modify
641 || d->fxState[j].fx == Ifx_Write)
642 writes = True;
643 }
644 if (writes) {
645 /* dump the entire env (not clever, but correct ...) */
646 for (j = 0; j < env->used; j++)
647 env->inuse[j] = False;
648 if (0) vex_printf("rGET: trash env due to dirty helper\n");
649 }
650 }
651
652 /* add this one to the env, if appropriate */
653 if (st->tag == Ist_Put) {
654 vassert(isIRAtom(st->Ist.Put.data));
655 addToHHW( env, (HWord)key, (HWord)(st->Ist.Put.data));
656 }
657
658 } /* for (i = 0; i < bb->stmts_used; i++) */
659
660 }
661
662
663 /*---------------------------------------------------------------*/
664 /*--- In-place removal of redundant PUTs ---*/
665 /*---------------------------------------------------------------*/
666
667 /* Find any Get uses in st and invalidate any partially or fully
668 overlapping ranges listed in env. Due to the flattening phase, the
669 only stmt kind we expect to find a Get on is IRStmt_WrTmp. */
670
handle_gets_Stmt(HashHW * env,IRStmt * st,Bool (* preciseMemExnsFn)(Int,Int))671 static void handle_gets_Stmt (
672 HashHW* env,
673 IRStmt* st,
674 Bool (*preciseMemExnsFn)(Int,Int)
675 )
676 {
677 Int j;
678 UInt key = 0; /* keep gcc -O happy */
679 Bool isGet;
680 Bool memRW = False;
681 IRExpr* e;
682
683 switch (st->tag) {
684
685 /* This is the only interesting case. Deal with Gets in the RHS
686 expression. */
687 case Ist_WrTmp:
688 e = st->Ist.WrTmp.data;
689 switch (e->tag) {
690 case Iex_Get:
691 isGet = True;
692 key = mk_key_GetPut ( e->Iex.Get.offset, e->Iex.Get.ty );
693 break;
694 case Iex_GetI:
695 isGet = True;
696 key = mk_key_GetIPutI ( e->Iex.GetI.descr );
697 break;
698 case Iex_Load:
699 isGet = False;
700 memRW = True;
701 break;
702 default:
703 isGet = False;
704 }
705 if (isGet) {
706 UInt k_lo, k_hi;
707 k_lo = (key >> 16) & 0xFFFF;
708 k_hi = key & 0xFFFF;
709 invalidateOverlaps(env, k_lo, k_hi);
710 }
711 break;
712
713 /* Be very conservative for dirty helper calls; dump the entire
714 environment. The helper might read guest state, in which
715 case it needs to be flushed first. Also, the helper might
716 access guest memory, in which case all parts of the guest
717 state requiring precise exceptions needs to be flushed. The
718 crude solution is just to flush everything; we could easily
719 enough do a lot better if needed. */
720 /* Probably also overly-conservative, but also dump everything
721 if we hit a memory bus event (fence, lock, unlock). Ditto
722 AbiHints, CASs, LLs and SCs. */
723 case Ist_AbiHint:
724 vassert(isIRAtom(st->Ist.AbiHint.base));
725 vassert(isIRAtom(st->Ist.AbiHint.nia));
726 /* fall through */
727 case Ist_MBE:
728 case Ist_Dirty:
729 case Ist_CAS:
730 case Ist_LLSC:
731 for (j = 0; j < env->used; j++)
732 env->inuse[j] = False;
733 break;
734
735 /* all other cases are boring. */
736 case Ist_Store:
737 vassert(isIRAtom(st->Ist.Store.addr));
738 vassert(isIRAtom(st->Ist.Store.data));
739 memRW = True;
740 break;
741
742 case Ist_Exit:
743 vassert(isIRAtom(st->Ist.Exit.guard));
744 break;
745
746 case Ist_PutI:
747 vassert(isIRAtom(st->Ist.PutI.ix));
748 vassert(isIRAtom(st->Ist.PutI.data));
749 break;
750
751 case Ist_NoOp:
752 case Ist_IMark:
753 break;
754
755 default:
756 vex_printf("\n");
757 ppIRStmt(st);
758 vex_printf("\n");
759 vpanic("handle_gets_Stmt");
760 }
761
762 if (memRW) {
763 /* This statement accesses memory. So we need to dump all parts
764 of the environment corresponding to guest state that may not
765 be reordered with respect to memory references. That means
766 at least the stack pointer. */
767 for (j = 0; j < env->used; j++) {
768 if (!env->inuse[j])
769 continue;
770 if (vex_control.iropt_precise_memory_exns) {
771 /* Precise exceptions required. Flush all guest state. */
772 env->inuse[j] = False;
773 } else {
774 /* Just flush the minimal amount required, as computed by
775 preciseMemExnsFn. */
776 HWord k_lo = (env->key[j] >> 16) & 0xFFFF;
777 HWord k_hi = env->key[j] & 0xFFFF;
778 if (preciseMemExnsFn( k_lo, k_hi ))
779 env->inuse[j] = False;
780 }
781 }
782 } /* if (memRW) */
783
784 }
785
786
787 /* Scan backwards, building up a set of (min offset, max
788 offset) pairs, indicating those parts of the guest state
789 for which the next event is a write.
790
791 On seeing a conditional exit, empty the set.
792
793 On seeing 'Put (minoff,maxoff) = t or c', if (minoff,maxoff) is
794 completely within the set, remove the Put. Otherwise, add
795 (minoff,maxoff) to the set.
796
797 On seeing 'Get (minoff,maxoff)', remove any part of the set
798 overlapping (minoff,maxoff). The same has to happen for any events
799 which implicitly read parts of the guest state: dirty helper calls
800 and loads/stores.
801 */
802
redundant_put_removal_BB(IRSB * bb,Bool (* preciseMemExnsFn)(Int,Int))803 static void redundant_put_removal_BB (
804 IRSB* bb,
805 Bool (*preciseMemExnsFn)(Int,Int)
806 )
807 {
808 Int i, j;
809 Bool isPut;
810 IRStmt* st;
811 UInt key = 0; /* keep gcc -O happy */
812
813 HashHW* env = newHHW();
814 for (i = bb->stmts_used-1; i >= 0; i--) {
815 st = bb->stmts[i];
816
817 if (st->tag == Ist_NoOp)
818 continue;
819
820 /* Deal with conditional exits. */
821 if (st->tag == Ist_Exit) {
822 /* Since control may not get beyond this point, we must empty
823 out the set, since we can no longer claim that the next
824 event for any part of the guest state is definitely a
825 write. */
826 vassert(isIRAtom(st->Ist.Exit.guard));
827 for (j = 0; j < env->used; j++)
828 env->inuse[j] = False;
829 continue;
830 }
831
832 /* Deal with Puts */
833 switch (st->tag) {
834 case Ist_Put:
835 isPut = True;
836 key = mk_key_GetPut( st->Ist.Put.offset,
837 typeOfIRExpr(bb->tyenv,st->Ist.Put.data) );
838 vassert(isIRAtom(st->Ist.Put.data));
839 break;
840 case Ist_PutI:
841 isPut = True;
842 key = mk_key_GetIPutI( st->Ist.PutI.descr );
843 vassert(isIRAtom(st->Ist.PutI.ix));
844 vassert(isIRAtom(st->Ist.PutI.data));
845 break;
846 default:
847 isPut = False;
848 }
849 if (isPut && st->tag != Ist_PutI) {
850 /* See if any single entry in env overlaps this Put. This is
851 simplistic in that the transformation is valid if, say, two
852 or more entries in the env overlap this Put, but the use of
853 lookupHHW will only find a single entry which exactly
854 overlaps this Put. This is suboptimal but safe. */
855 if (lookupHHW(env, NULL, (HWord)key)) {
856 /* This Put is redundant because a later one will overwrite
857 it. So NULL (nop) it out. */
858 if (DEBUG_IROPT) {
859 vex_printf("rPUT: "); ppIRStmt(st);
860 vex_printf("\n");
861 }
862 bb->stmts[i] = IRStmt_NoOp();
863 } else {
864 /* We can't demonstrate that this Put is redundant, so add it
865 to the running collection. */
866 addToHHW(env, (HWord)key, 0);
867 }
868 continue;
869 }
870
871 /* Deal with Gets. These remove bits of the environment since
872 appearance of a Get means that the next event for that slice
873 of the guest state is no longer a write, but a read. Also
874 deals with implicit reads of guest state needed to maintain
875 precise exceptions. */
876 handle_gets_Stmt( env, st, preciseMemExnsFn );
877 }
878 }
879
880
881 /*---------------------------------------------------------------*/
882 /*--- Constant propagation and folding ---*/
883 /*---------------------------------------------------------------*/
884
885 /* The env in this section is a map from IRTemp to IRExpr*,
886 that is, an array indexed by IRTemp. */
887
888 /* Are both expressions simply the same IRTemp ? */
sameIRTemps(IRExpr * e1,IRExpr * e2)889 static Bool sameIRTemps ( IRExpr* e1, IRExpr* e2 )
890 {
891 return toBool( e1->tag == Iex_RdTmp
892 && e2->tag == Iex_RdTmp
893 && e1->Iex.RdTmp.tmp == e2->Iex.RdTmp.tmp );
894 }
895
sameIcoU32s(IRExpr * e1,IRExpr * e2)896 static Bool sameIcoU32s ( IRExpr* e1, IRExpr* e2 )
897 {
898 return toBool( e1->tag == Iex_Const
899 && e2->tag == Iex_Const
900 && e1->Iex.Const.con->tag == Ico_U32
901 && e2->Iex.Const.con->tag == Ico_U32
902 && e1->Iex.Const.con->Ico.U32
903 == e2->Iex.Const.con->Ico.U32 );
904 }
905
906 /* Are both expressions either the same IRTemp or IRConst-U32s ? If
907 in doubt, say No. */
sameIRTempsOrIcoU32s(IRExpr * e1,IRExpr * e2)908 static Bool sameIRTempsOrIcoU32s ( IRExpr* e1, IRExpr* e2 )
909 {
910 switch (e1->tag) {
911 case Iex_RdTmp:
912 return sameIRTemps(e1, e2);
913 case Iex_Const:
914 return sameIcoU32s(e1, e2);
915 default:
916 return False;
917 }
918 }
919
920 /* Is this literally IRExpr_Const(IRConst_U32(0)) ? */
isZeroU32(IRExpr * e)921 static Bool isZeroU32 ( IRExpr* e )
922 {
923 return toBool( e->tag == Iex_Const
924 && e->Iex.Const.con->tag == Ico_U32
925 && e->Iex.Const.con->Ico.U32 == 0);
926 }
927
928 /* Is this literally IRExpr_Const(IRConst_U64(0)) ? */
isZeroU64(IRExpr * e)929 static Bool isZeroU64 ( IRExpr* e )
930 {
931 return toBool( e->tag == Iex_Const
932 && e->Iex.Const.con->tag == Ico_U64
933 && e->Iex.Const.con->Ico.U64 == 0);
934 }
935
notBool(Bool b)936 static Bool notBool ( Bool b )
937 {
938 if (b == True) return False;
939 if (b == False) return True;
940 vpanic("notBool");
941 }
942
943 /* Make a zero which has the same type as the result of the given
944 primop. */
mkZeroOfPrimopResultType(IROp op)945 static IRExpr* mkZeroOfPrimopResultType ( IROp op )
946 {
947 switch (op) {
948 case Iop_Xor8: return IRExpr_Const(IRConst_U8(0));
949 case Iop_Xor16: return IRExpr_Const(IRConst_U16(0));
950 case Iop_Sub32:
951 case Iop_Xor32: return IRExpr_Const(IRConst_U32(0));
952 case Iop_Sub64:
953 case Iop_Xor64: return IRExpr_Const(IRConst_U64(0));
954 case Iop_XorV128: return IRExpr_Const(IRConst_V128(0));
955 default: vpanic("mkZeroOfPrimopResultType: bad primop");
956 }
957 }
958
959 /* Make a value containing all 1-bits, which has the same type as the
960 result of the given primop. */
mkOnesOfPrimopResultType(IROp op)961 static IRExpr* mkOnesOfPrimopResultType ( IROp op )
962 {
963 switch (op) {
964 case Iop_CmpEQ64:
965 return IRExpr_Const(IRConst_U1(toBool(1)));
966 case Iop_CmpEQ8x8:
967 return IRExpr_Const(IRConst_U64(0xFFFFFFFFFFFFFFFFULL));
968 case Iop_CmpEQ8x16:
969 return IRExpr_Const(IRConst_V128(0xFFFF));
970 default:
971 vpanic("mkOnesOfPrimopResultType: bad primop");
972 }
973 }
974
975 /* Helpers for folding Clz32/64. */
fold_Clz64(ULong value)976 static UInt fold_Clz64 ( ULong value )
977 {
978 UInt i;
979 vassert(value != 0ULL); /* no defined semantics for arg==0 */
980 for (i = 0; i < 64; ++i) {
981 if (0ULL != (value & (((ULong)1) << (63 - i)))) return i;
982 }
983 vassert(0);
984 /*NOTREACHED*/
985 return 0;
986 }
987
fold_Clz32(UInt value)988 static UInt fold_Clz32 ( UInt value )
989 {
990 UInt i;
991 vassert(value != 0); /* no defined semantics for arg==0 */
992 for (i = 0; i < 32; ++i) {
993 if (0 != (value & (((UInt)1) << (31 - i)))) return i;
994 }
995 vassert(0);
996 /*NOTREACHED*/
997 return 0;
998 }
999
1000
fold_Expr(IRExpr * e)1001 static IRExpr* fold_Expr ( IRExpr* e )
1002 {
1003 Int shift;
1004 IRExpr* e2 = e; /* e2 is the result of folding e, if possible */
1005
1006 /* UNARY ops */
1007 if (e->tag == Iex_Unop
1008 && e->Iex.Unop.arg->tag == Iex_Const) {
1009 switch (e->Iex.Unop.op) {
1010 case Iop_1Uto8:
1011 e2 = IRExpr_Const(IRConst_U8(toUChar(
1012 e->Iex.Unop.arg->Iex.Const.con->Ico.U1
1013 ? 1 : 0)));
1014 break;
1015 case Iop_1Uto32:
1016 e2 = IRExpr_Const(IRConst_U32(
1017 e->Iex.Unop.arg->Iex.Const.con->Ico.U1
1018 ? 1 : 0));
1019 break;
1020 case Iop_1Uto64:
1021 e2 = IRExpr_Const(IRConst_U64(
1022 e->Iex.Unop.arg->Iex.Const.con->Ico.U1
1023 ? 1 : 0));
1024 break;
1025
1026 case Iop_1Sto8:
1027 e2 = IRExpr_Const(IRConst_U8(toUChar(
1028 e->Iex.Unop.arg->Iex.Const.con->Ico.U1
1029 ? 0xFF : 0)));
1030 break;
1031 case Iop_1Sto16:
1032 e2 = IRExpr_Const(IRConst_U16(toUShort(
1033 e->Iex.Unop.arg->Iex.Const.con->Ico.U1
1034 ? 0xFFFF : 0)));
1035 break;
1036 case Iop_1Sto32:
1037 e2 = IRExpr_Const(IRConst_U32(
1038 e->Iex.Unop.arg->Iex.Const.con->Ico.U1
1039 ? 0xFFFFFFFF : 0));
1040 break;
1041 case Iop_1Sto64:
1042 e2 = IRExpr_Const(IRConst_U64(
1043 e->Iex.Unop.arg->Iex.Const.con->Ico.U1
1044 ? 0xFFFFFFFFFFFFFFFFULL : 0));
1045 break;
1046
1047 case Iop_8Sto32: {
1048 /* signed */ Int s32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U8;
1049 s32 <<= 24;
1050 s32 >>= 24;
1051 e2 = IRExpr_Const(IRConst_U32((UInt)s32));
1052 break;
1053 }
1054 case Iop_16Sto32: {
1055 /* signed */ Int s32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16;
1056 s32 <<= 16;
1057 s32 >>= 16;
1058 e2 = IRExpr_Const(IRConst_U32( (UInt)s32) );
1059 break;
1060 }
1061 case Iop_8Uto64:
1062 e2 = IRExpr_Const(IRConst_U64(
1063 0xFFULL & e->Iex.Unop.arg->Iex.Const.con->Ico.U8));
1064 break;
1065 case Iop_16Uto64:
1066 e2 = IRExpr_Const(IRConst_U64(
1067 0xFFFFULL & e->Iex.Unop.arg->Iex.Const.con->Ico.U16));
1068 break;
1069 case Iop_8Uto32:
1070 e2 = IRExpr_Const(IRConst_U32(
1071 0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U8));
1072 break;
1073 case Iop_8Sto16: {
1074 /* signed */ Short s16 = e->Iex.Unop.arg->Iex.Const.con->Ico.U8;
1075 s16 <<= 8;
1076 s16 >>= 8;
1077 e2 = IRExpr_Const(IRConst_U16( (UShort)s16) );
1078 break;
1079 }
1080 case Iop_8Uto16:
1081 e2 = IRExpr_Const(IRConst_U16(
1082 0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U8));
1083 break;
1084 case Iop_16Uto32:
1085 e2 = IRExpr_Const(IRConst_U32(
1086 0xFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U16));
1087 break;
1088 case Iop_32to16:
1089 e2 = IRExpr_Const(IRConst_U16(toUShort(
1090 0xFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)));
1091 break;
1092 case Iop_32to8:
1093 e2 = IRExpr_Const(IRConst_U8(toUChar(
1094 0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)));
1095 break;
1096 case Iop_32to1:
1097 e2 = IRExpr_Const(IRConst_U1(toBool(
1098 1 == (1 & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)
1099 )));
1100 break;
1101 case Iop_64to1:
1102 e2 = IRExpr_Const(IRConst_U1(toBool(
1103 1 == (1 & e->Iex.Unop.arg->Iex.Const.con->Ico.U64)
1104 )));
1105 break;
1106
1107 case Iop_Not64:
1108 e2 = IRExpr_Const(IRConst_U64(
1109 ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U64)));
1110 break;
1111 case Iop_Not32:
1112 e2 = IRExpr_Const(IRConst_U32(
1113 ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U32)));
1114 break;
1115 case Iop_Not16:
1116 e2 = IRExpr_Const(IRConst_U16(toUShort(
1117 ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U16))));
1118 break;
1119 case Iop_Not8:
1120 e2 = IRExpr_Const(IRConst_U8(toUChar(
1121 ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U8))));
1122 break;
1123
1124 case Iop_Not1:
1125 e2 = IRExpr_Const(IRConst_U1(
1126 notBool(e->Iex.Unop.arg->Iex.Const.con->Ico.U1)));
1127 break;
1128
1129 case Iop_64to8: {
1130 ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1131 w64 &= 0xFFULL;
1132 e2 = IRExpr_Const(IRConst_U8( (UChar)w64 ));
1133 break;
1134 }
1135 case Iop_64to16: {
1136 ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1137 w64 &= 0xFFFFULL;
1138 e2 = IRExpr_Const(IRConst_U16( (UShort)w64 ));
1139 break;
1140 }
1141 case Iop_64to32: {
1142 ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1143 w64 &= 0x00000000FFFFFFFFULL;
1144 e2 = IRExpr_Const(IRConst_U32( (UInt)w64 ));
1145 break;
1146 }
1147 case Iop_64HIto32: {
1148 ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1149 w64 >>= 32;
1150 e2 = IRExpr_Const(IRConst_U32( (UInt)w64 ));
1151 break;
1152 }
1153 case Iop_32Uto64:
1154 e2 = IRExpr_Const(IRConst_U64(
1155 0xFFFFFFFFULL
1156 & e->Iex.Unop.arg->Iex.Const.con->Ico.U32));
1157 break;
1158 case Iop_16Sto64: {
1159 /* signed */ Long s64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16;
1160 s64 <<= 48;
1161 s64 >>= 48;
1162 e2 = IRExpr_Const(IRConst_U64((ULong)s64));
1163 break;
1164 }
1165 case Iop_32Sto64: {
1166 /* signed */ Long s64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
1167 s64 <<= 32;
1168 s64 >>= 32;
1169 e2 = IRExpr_Const(IRConst_U64((ULong)s64));
1170 break;
1171 }
1172
1173 case Iop_16to8: {
1174 UShort w16 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16;
1175 w16 &= 0xFF;
1176 e2 = IRExpr_Const(IRConst_U8( (UChar)w16 ));
1177 break;
1178 }
1179 case Iop_16HIto8: {
1180 UShort w16 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16;
1181 w16 >>= 8;
1182 w16 &= 0xFF;
1183 e2 = IRExpr_Const(IRConst_U8( (UChar)w16 ));
1184 break;
1185 }
1186
1187 case Iop_CmpNEZ8:
1188 e2 = IRExpr_Const(IRConst_U1(toBool(
1189 0 !=
1190 (0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U8)
1191 )));
1192 break;
1193 case Iop_CmpNEZ32:
1194 e2 = IRExpr_Const(IRConst_U1(toBool(
1195 0 !=
1196 (0xFFFFFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)
1197 )));
1198 break;
1199 case Iop_CmpNEZ64:
1200 e2 = IRExpr_Const(IRConst_U1(toBool(
1201 0ULL != e->Iex.Unop.arg->Iex.Const.con->Ico.U64
1202 )));
1203 break;
1204
1205 case Iop_CmpwNEZ32: {
1206 UInt w32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
1207 if (w32 == 0)
1208 e2 = IRExpr_Const(IRConst_U32( 0 ));
1209 else
1210 e2 = IRExpr_Const(IRConst_U32( 0xFFFFFFFF ));
1211 break;
1212 }
1213 case Iop_CmpwNEZ64: {
1214 ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1215 if (w64 == 0)
1216 e2 = IRExpr_Const(IRConst_U64( 0 ));
1217 else
1218 e2 = IRExpr_Const(IRConst_U64( 0xFFFFFFFFFFFFFFFFULL ));
1219 break;
1220 }
1221
1222 case Iop_Left32: {
1223 UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
1224 Int s32 = (Int)(u32 & 0xFFFFFFFF);
1225 s32 = (s32 | (-s32));
1226 e2 = IRExpr_Const( IRConst_U32( (UInt)s32 ));
1227 break;
1228 }
1229
1230 case Iop_Left64: {
1231 ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1232 Long s64 = (Long)u64;
1233 s64 = (s64 | (-s64));
1234 e2 = IRExpr_Const( IRConst_U64( (ULong)s64 ));
1235 break;
1236 }
1237
1238 case Iop_Clz32: {
1239 UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
1240 if (u32 != 0)
1241 e2 = IRExpr_Const(IRConst_U32(fold_Clz32(u32)));
1242 break;
1243 }
1244 case Iop_Clz64: {
1245 ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1246 if (u64 != 0ULL)
1247 e2 = IRExpr_Const(IRConst_U64(fold_Clz64(u64)));
1248 break;
1249 }
1250
1251 default:
1252 goto unhandled;
1253 }
1254 }
1255
1256 /* BINARY ops */
1257 if (e->tag == Iex_Binop) {
1258 if (e->Iex.Binop.arg1->tag == Iex_Const
1259 && e->Iex.Binop.arg2->tag == Iex_Const) {
1260 /* cases where both args are consts */
1261 switch (e->Iex.Binop.op) {
1262
1263 /* -- Or -- */
1264 case Iop_Or8:
1265 e2 = IRExpr_Const(IRConst_U8(toUChar(
1266 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
1267 | e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
1268 break;
1269 case Iop_Or16:
1270 e2 = IRExpr_Const(IRConst_U16(toUShort(
1271 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U16
1272 | e->Iex.Binop.arg2->Iex.Const.con->Ico.U16))));
1273 break;
1274 case Iop_Or32:
1275 e2 = IRExpr_Const(IRConst_U32(
1276 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1277 | e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1278 break;
1279 case Iop_Or64:
1280 e2 = IRExpr_Const(IRConst_U64(
1281 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1282 | e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1283 break;
1284
1285 /* -- Xor -- */
1286 case Iop_Xor8:
1287 e2 = IRExpr_Const(IRConst_U8(toUChar(
1288 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
1289 ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
1290 break;
1291 case Iop_Xor16:
1292 e2 = IRExpr_Const(IRConst_U16(toUShort(
1293 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U16
1294 ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U16))));
1295 break;
1296 case Iop_Xor32:
1297 e2 = IRExpr_Const(IRConst_U32(
1298 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1299 ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1300 break;
1301 case Iop_Xor64:
1302 e2 = IRExpr_Const(IRConst_U64(
1303 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1304 ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1305 break;
1306
1307 /* -- And -- */
1308 case Iop_And8:
1309 e2 = IRExpr_Const(IRConst_U8(toUChar(
1310 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
1311 & e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
1312 break;
1313 case Iop_And16:
1314 e2 = IRExpr_Const(IRConst_U16(toUShort(
1315 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U16
1316 & e->Iex.Binop.arg2->Iex.Const.con->Ico.U16))));
1317 break;
1318 case Iop_And32:
1319 e2 = IRExpr_Const(IRConst_U32(
1320 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1321 & e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1322 break;
1323 case Iop_And64:
1324 e2 = IRExpr_Const(IRConst_U64(
1325 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1326 & e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1327 break;
1328
1329 /* -- Add -- */
1330 case Iop_Add8:
1331 e2 = IRExpr_Const(IRConst_U8(toUChar(
1332 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
1333 + e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
1334 break;
1335 case Iop_Add32:
1336 e2 = IRExpr_Const(IRConst_U32(
1337 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1338 + e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1339 break;
1340 case Iop_Add64:
1341 e2 = IRExpr_Const(IRConst_U64(
1342 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1343 + e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1344 break;
1345
1346 /* -- Sub -- */
1347 case Iop_Sub8:
1348 e2 = IRExpr_Const(IRConst_U8(toUChar(
1349 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
1350 - e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
1351 break;
1352 case Iop_Sub32:
1353 e2 = IRExpr_Const(IRConst_U32(
1354 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1355 - e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1356 break;
1357 case Iop_Sub64:
1358 e2 = IRExpr_Const(IRConst_U64(
1359 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1360 - e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1361 break;
1362
1363 /* -- Max32U -- */
1364 case Iop_Max32U: {
1365 UInt u32a = e->Iex.Binop.arg1->Iex.Const.con->Ico.U32;
1366 UInt u32b = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
1367 UInt res = u32a > u32b ? u32a : u32b;
1368 e2 = IRExpr_Const(IRConst_U32(res));
1369 break;
1370 }
1371
1372 /* -- Mul -- */
1373 case Iop_Mul32:
1374 e2 = IRExpr_Const(IRConst_U32(
1375 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1376 * e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1377 break;
1378 case Iop_Mul64:
1379 e2 = IRExpr_Const(IRConst_U64(
1380 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1381 * e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1382 break;
1383
1384 case Iop_MullS32: {
1385 /* very paranoid */
1386 UInt u32a = e->Iex.Binop.arg1->Iex.Const.con->Ico.U32;
1387 UInt u32b = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
1388 Int s32a = (Int)u32a;
1389 Int s32b = (Int)u32b;
1390 Long s64a = (Long)s32a;
1391 Long s64b = (Long)s32b;
1392 Long sres = s64a * s64b;
1393 ULong ures = (ULong)sres;
1394 e2 = IRExpr_Const(IRConst_U64(ures));
1395 break;
1396 }
1397
1398 /* -- Shl -- */
1399 case Iop_Shl32:
1400 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1401 shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1402 if (shift >= 0 && shift <= 31)
1403 e2 = IRExpr_Const(IRConst_U32(
1404 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1405 << shift)));
1406 break;
1407 case Iop_Shl64:
1408 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1409 shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1410 if (shift >= 0 && shift <= 63)
1411 e2 = IRExpr_Const(IRConst_U64(
1412 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1413 << shift)));
1414 break;
1415
1416 /* -- Sar -- */
1417 case Iop_Sar32: {
1418 /* paranoid ... */
1419 /*signed*/ Int s32;
1420 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1421 s32 = (Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32);
1422 shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1423 if (shift >= 0 && shift <= 31) {
1424 s32 >>=/*signed*/ shift;
1425 e2 = IRExpr_Const(IRConst_U32((UInt)s32));
1426 }
1427 break;
1428 }
1429 case Iop_Sar64: {
1430 /* paranoid ... */
1431 /*signed*/ Long s64;
1432 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1433 s64 = (Long)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64);
1434 shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1435 if (shift >= 0 && shift <= 63) {
1436 s64 >>=/*signed*/ shift;
1437 e2 = IRExpr_Const(IRConst_U64((ULong)s64));
1438 }
1439 break;
1440 }
1441
1442 /* -- Shr -- */
1443 case Iop_Shr32: {
1444 /* paranoid ... */
1445 /*unsigned*/ UInt u32;
1446 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1447 u32 = (UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32);
1448 shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1449 if (shift >= 0 && shift <= 31) {
1450 u32 >>=/*unsigned*/ shift;
1451 e2 = IRExpr_Const(IRConst_U32(u32));
1452 }
1453 break;
1454 }
1455 case Iop_Shr64: {
1456 /* paranoid ... */
1457 /*unsigned*/ ULong u64;
1458 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1459 u64 = (ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64);
1460 shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1461 if (shift >= 0 && shift <= 63) {
1462 u64 >>=/*unsigned*/ shift;
1463 e2 = IRExpr_Const(IRConst_U64(u64));
1464 }
1465 break;
1466 }
1467
1468 /* -- CmpEQ -- */
1469 case Iop_CmpEQ32:
1470 e2 = IRExpr_Const(IRConst_U1(toBool(
1471 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1472 == e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))));
1473 break;
1474 case Iop_CmpEQ64:
1475 e2 = IRExpr_Const(IRConst_U1(toBool(
1476 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1477 == e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))));
1478 break;
1479
1480 /* -- CmpNE -- */
1481 case Iop_CmpNE8:
1482 e2 = IRExpr_Const(IRConst_U1(toBool(
1483 ((0xFF & e->Iex.Binop.arg1->Iex.Const.con->Ico.U8)
1484 != (0xFF & e->Iex.Binop.arg2->Iex.Const.con->Ico.U8)))));
1485 break;
1486 case Iop_CmpNE32:
1487 e2 = IRExpr_Const(IRConst_U1(toBool(
1488 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1489 != e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))));
1490 break;
1491 case Iop_CmpNE64:
1492 e2 = IRExpr_Const(IRConst_U1(toBool(
1493 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1494 != e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))));
1495 break;
1496
1497 /* -- CmpLEU -- */
1498 case Iop_CmpLE32U:
1499 e2 = IRExpr_Const(IRConst_U1(toBool(
1500 ((UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
1501 <= (UInt)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
1502 break;
1503 case Iop_CmpLE64U:
1504 e2 = IRExpr_Const(IRConst_U1(toBool(
1505 ((ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64)
1506 <= (ULong)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))));
1507 break;
1508
1509 /* -- CmpLES -- */
1510 case Iop_CmpLE32S:
1511 e2 = IRExpr_Const(IRConst_U1(toBool(
1512 ((Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
1513 <= (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
1514 break;
1515 case Iop_CmpLE64S:
1516 e2 = IRExpr_Const(IRConst_U1(toBool(
1517 ((Long)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64)
1518 <= (Long)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))));
1519 break;
1520
1521 /* -- CmpLTS -- */
1522 case Iop_CmpLT32S:
1523 e2 = IRExpr_Const(IRConst_U1(toBool(
1524 ((Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
1525 < (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
1526 break;
1527 case Iop_CmpLT64S:
1528 e2 = IRExpr_Const(IRConst_U1(toBool(
1529 ((Long)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64)
1530 < (Long)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))));
1531 break;
1532
1533 /* -- CmpLTU -- */
1534 case Iop_CmpLT32U:
1535 e2 = IRExpr_Const(IRConst_U1(toBool(
1536 ((UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
1537 < (UInt)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
1538 break;
1539 case Iop_CmpLT64U:
1540 e2 = IRExpr_Const(IRConst_U1(toBool(
1541 ((ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64)
1542 < (ULong)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))));
1543 break;
1544
1545 /* -- CmpORD -- */
1546 case Iop_CmpORD32S: {
1547 /* very paranoid */
1548 UInt u32a = e->Iex.Binop.arg1->Iex.Const.con->Ico.U32;
1549 UInt u32b = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
1550 Int s32a = (Int)u32a;
1551 Int s32b = (Int)u32b;
1552 Int r = 0x2; /* EQ */
1553 if (s32a < s32b) {
1554 r = 0x8; /* LT */
1555 }
1556 else if (s32a > s32b) {
1557 r = 0x4; /* GT */
1558 }
1559 e2 = IRExpr_Const(IRConst_U32(r));
1560 break;
1561 }
1562
1563 /* -- nHLto2n -- */
1564 case Iop_32HLto64:
1565 e2 = IRExpr_Const(IRConst_U64(
1566 (((ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)) << 32)
1567 | ((ULong)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))
1568 ));
1569 break;
1570 case Iop_64HLto128:
1571 /* We can't fold this, because there is no way to
1572 express he result in IR, but at least pretend to
1573 handle it, so as to stop getting blasted with
1574 no-rule-for-this-primop messages. */
1575 break;
1576
1577 default:
1578 goto unhandled;
1579 }
1580
1581 } else {
1582
1583 /* other cases (identities, etc) */
1584
1585 /* Shl64/Shr64(x,0) ==> x */
1586 if ((e->Iex.Binop.op == Iop_Shl64 || e->Iex.Binop.op == Iop_Shr64)
1587 && e->Iex.Binop.arg2->tag == Iex_Const
1588 && e->Iex.Binop.arg2->Iex.Const.con->Ico.U8 == 0) {
1589 e2 = e->Iex.Binop.arg1;
1590 } else
1591
1592 /* Shl32/Shr32(x,0) ==> x */
1593 if ((e->Iex.Binop.op == Iop_Shl32 || e->Iex.Binop.op == Iop_Shr32)
1594 && e->Iex.Binop.arg2->tag == Iex_Const
1595 && e->Iex.Binop.arg2->Iex.Const.con->Ico.U8 == 0) {
1596 e2 = e->Iex.Binop.arg1;
1597 } else
1598
1599 /* Or8(x,0) ==> x */
1600 if ((e->Iex.Binop.op == Iop_Or8)
1601 && e->Iex.Binop.arg2->tag == Iex_Const
1602 && e->Iex.Binop.arg2->Iex.Const.con->Ico.U8 == 0) {
1603 e2 = e->Iex.Binop.arg1;
1604 } else
1605
1606 /* Or16(x,0) ==> x */
1607 if ((e->Iex.Binop.op == Iop_Or16)
1608 && e->Iex.Binop.arg2->tag == Iex_Const
1609 && e->Iex.Binop.arg2->Iex.Const.con->Ico.U16 == 0) {
1610 e2 = e->Iex.Binop.arg1;
1611 } else
1612
1613 /* Or32/Add32/Max32U(x,0) ==> x
1614 Or32/Add32/Max32U(0,x) ==> x */
1615 if (e->Iex.Binop.op == Iop_Add32
1616 || e->Iex.Binop.op == Iop_Or32 || e->Iex.Binop.op == Iop_Max32U) {
1617 if (isZeroU32(e->Iex.Binop.arg2))
1618 e2 = e->Iex.Binop.arg1;
1619 else if (isZeroU32(e->Iex.Binop.arg1))
1620 e2 = e->Iex.Binop.arg2;
1621 } else
1622
1623 /* Sub64(x,0) ==> x */
1624 if (e->Iex.Binop.op == Iop_Sub64 && isZeroU64(e->Iex.Binop.arg2)) {
1625 e2 = e->Iex.Binop.arg1;
1626 } else
1627
1628 /* Add32(t,t) ==> t << 1. Memcheck doesn't understand that
1629 x+x produces a defined least significant bit, and it seems
1630 simplest just to get rid of the problem by rewriting it
1631 out, since the opportunity to do so exists. */
1632 if (e->Iex.Binop.op == Iop_Add32
1633 && e->Iex.Binop.arg1->tag == Iex_RdTmp
1634 && e->Iex.Binop.arg2->tag == Iex_RdTmp
1635 && e->Iex.Binop.arg1->Iex.RdTmp.tmp
1636 == e->Iex.Binop.arg2->Iex.RdTmp.tmp) {
1637 e2 = IRExpr_Binop(Iop_Shl32,
1638 e->Iex.Binop.arg1,
1639 IRExpr_Const(IRConst_U8(1)));
1640 } else
1641
1642 /* Add64(t,t) ==> t << 1; rationale as for Add32(t,t) above. */
1643 if (e->Iex.Binop.op == Iop_Add64
1644 && e->Iex.Binop.arg1->tag == Iex_RdTmp
1645 && e->Iex.Binop.arg2->tag == Iex_RdTmp
1646 && e->Iex.Binop.arg1->Iex.RdTmp.tmp
1647 == e->Iex.Binop.arg2->Iex.RdTmp.tmp) {
1648 e2 = IRExpr_Binop(Iop_Shl64,
1649 e->Iex.Binop.arg1,
1650 IRExpr_Const(IRConst_U8(1)));
1651 } else
1652
1653 /* Add8(t,t) ==> t << 1; rationale as for Add32(t,t) above. */
1654 if (e->Iex.Binop.op == Iop_Add8
1655 && e->Iex.Binop.arg1->tag == Iex_RdTmp
1656 && e->Iex.Binop.arg2->tag == Iex_RdTmp
1657 && e->Iex.Binop.arg1->Iex.RdTmp.tmp
1658 == e->Iex.Binop.arg2->Iex.RdTmp.tmp) {
1659 e2 = IRExpr_Binop(Iop_Shl8,
1660 e->Iex.Binop.arg1,
1661 IRExpr_Const(IRConst_U8(1)));
1662 } else
1663 /* NB no Add16(t,t) case yet as no known test case exists */
1664
1665 /* Or64/Add64(x,0) ==> x
1666 Or64/Add64(0,x) ==> x */
1667 if (e->Iex.Binop.op == Iop_Add64 || e->Iex.Binop.op == Iop_Or64) {
1668 if (isZeroU64(e->Iex.Binop.arg2))
1669 e2 = e->Iex.Binop.arg1;
1670 else if (isZeroU64(e->Iex.Binop.arg1))
1671 e2 = e->Iex.Binop.arg2;
1672 } else
1673
1674 /* And32(x,0xFFFFFFFF) ==> x */
1675 if (e->Iex.Binop.op == Iop_And32
1676 && e->Iex.Binop.arg2->tag == Iex_Const
1677 && e->Iex.Binop.arg2->Iex.Const.con->Ico.U32 == 0xFFFFFFFF) {
1678 e2 = e->Iex.Binop.arg1;
1679 } else
1680
1681 /* And32(x,0) ==> 0 */
1682 if (e->Iex.Binop.op == Iop_And32
1683 && e->Iex.Binop.arg2->tag == Iex_Const
1684 && e->Iex.Binop.arg2->Iex.Const.con->Ico.U32 == 0) {
1685 e2 = IRExpr_Const(IRConst_U32(0));
1686 } else
1687
1688 /* And32/Shl32(0,x) ==> 0 */
1689 if ((e->Iex.Binop.op == Iop_And32 || e->Iex.Binop.op == Iop_Shl32)
1690 && e->Iex.Binop.arg1->tag == Iex_Const
1691 && e->Iex.Binop.arg1->Iex.Const.con->Ico.U32 == 0) {
1692 e2 = IRExpr_Const(IRConst_U32(0));
1693 } else
1694
1695 /* Or8(0,x) ==> x */
1696 if (e->Iex.Binop.op == Iop_Or8
1697 && e->Iex.Binop.arg1->tag == Iex_Const
1698 && e->Iex.Binop.arg1->Iex.Const.con->Ico.U8 == 0) {
1699 e2 = e->Iex.Binop.arg2;
1700 } else
1701
1702 /* Or8/16/32/64/V128(t,t) ==> t, for some IRTemp t */
1703 /* And8/16/32/64(t,t) ==> t, for some IRTemp t */
1704 /* Max32U(t,t) ==> t, for some IRTemp t */
1705 switch (e->Iex.Binop.op) {
1706 case Iop_And64: case Iop_And32:
1707 case Iop_And16: case Iop_And8:
1708 case Iop_Or64: case Iop_Or32:
1709 case Iop_Or16: case Iop_Or8: case Iop_OrV128:
1710 case Iop_Max32U:
1711 if (sameIRTemps(e->Iex.Binop.arg1, e->Iex.Binop.arg2))
1712 e2 = e->Iex.Binop.arg1;
1713 break;
1714 default:
1715 break;
1716 }
1717
1718 /* Xor8/16/32/64/V128(t,t) ==> 0, for some IRTemp t */
1719 /* Sub32/64(t,t) ==> 0, for some IRTemp t */
1720 switch (e->Iex.Binop.op) {
1721 case Iop_Xor64: case Iop_Xor32:
1722 case Iop_Xor16: case Iop_Xor8:
1723 case Iop_XorV128:
1724 case Iop_Sub64: case Iop_Sub32:
1725 if (sameIRTemps(e->Iex.Binop.arg1, e->Iex.Binop.arg2))
1726 e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op);
1727 break;
1728 default:
1729 break;
1730 }
1731
1732 switch (e->Iex.Binop.op) {
1733 case Iop_CmpEQ64:
1734 case Iop_CmpEQ8x8:
1735 case Iop_CmpEQ8x16:
1736 if (sameIRTemps(e->Iex.Binop.arg1, e->Iex.Binop.arg2))
1737 e2 = mkOnesOfPrimopResultType(e->Iex.Binop.op);
1738 break;
1739 default:
1740 break;
1741 }
1742
1743 }
1744 }
1745
1746 /* Mux0X */
1747 if (e->tag == Iex_Mux0X) {
1748 /* is the discriminant is a constant? */
1749 if (e->Iex.Mux0X.cond->tag == Iex_Const) {
1750 Bool zero;
1751 /* assured us by the IR type rules */
1752 vassert(e->Iex.Mux0X.cond->Iex.Const.con->tag == Ico_U8);
1753 zero = toBool(0 == (0xFF & e->Iex.Mux0X.cond
1754 ->Iex.Const.con->Ico.U8));
1755 e2 = zero ? e->Iex.Mux0X.expr0 : e->Iex.Mux0X.exprX;
1756 }
1757 else
1758 /* are the arms identical? (pretty weedy test) */
1759 if (sameIRTempsOrIcoU32s(e->Iex.Mux0X.expr0,
1760 e->Iex.Mux0X.exprX)) {
1761 e2 = e->Iex.Mux0X.expr0;
1762 }
1763 }
1764
1765 /* Show cases where we've found but not folded 'op(t,t)'. */
1766 if (0 && e == e2 && e->tag == Iex_Binop
1767 && sameIRTemps(e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
1768 vex_printf("IDENT: ");
1769 ppIRExpr(e); vex_printf("\n");
1770 }
1771
1772 /* Show the overall results of folding. */
1773 if (DEBUG_IROPT && e2 != e) {
1774 vex_printf("FOLD: ");
1775 ppIRExpr(e); vex_printf(" -> ");
1776 ppIRExpr(e2); vex_printf("\n");
1777 }
1778
1779 return e2;
1780
1781 unhandled:
1782 # if 0
1783 vex_printf("\n\n");
1784 ppIRExpr(e);
1785 vpanic("fold_Expr: no rule for the above");
1786 # else
1787 if (vex_control.iropt_verbosity > 0) {
1788 vex_printf("vex iropt: fold_Expr: no rule for: ");
1789 ppIRExpr(e);
1790 vex_printf("\n");
1791 }
1792 return e2;
1793 # endif
1794 }
1795
1796
1797 /* Apply the subst to a simple 1-level expression -- guaranteed to be
1798 1-level due to previous flattening pass. */
1799
subst_Expr(IRExpr ** env,IRExpr * ex)1800 static IRExpr* subst_Expr ( IRExpr** env, IRExpr* ex )
1801 {
1802 switch (ex->tag) {
1803 case Iex_RdTmp:
1804 if (env[(Int)ex->Iex.RdTmp.tmp] != NULL) {
1805 return env[(Int)ex->Iex.RdTmp.tmp];
1806 } else {
1807 /* not bound in env */
1808 return ex;
1809 }
1810
1811 case Iex_Const:
1812 case Iex_Get:
1813 return ex;
1814
1815 case Iex_GetI:
1816 vassert(isIRAtom(ex->Iex.GetI.ix));
1817 return IRExpr_GetI(
1818 ex->Iex.GetI.descr,
1819 subst_Expr(env, ex->Iex.GetI.ix),
1820 ex->Iex.GetI.bias
1821 );
1822
1823 case Iex_Qop:
1824 vassert(isIRAtom(ex->Iex.Qop.arg1));
1825 vassert(isIRAtom(ex->Iex.Qop.arg2));
1826 vassert(isIRAtom(ex->Iex.Qop.arg3));
1827 vassert(isIRAtom(ex->Iex.Qop.arg4));
1828 return IRExpr_Qop(
1829 ex->Iex.Qop.op,
1830 subst_Expr(env, ex->Iex.Qop.arg1),
1831 subst_Expr(env, ex->Iex.Qop.arg2),
1832 subst_Expr(env, ex->Iex.Qop.arg3),
1833 subst_Expr(env, ex->Iex.Qop.arg4)
1834 );
1835
1836 case Iex_Triop:
1837 vassert(isIRAtom(ex->Iex.Triop.arg1));
1838 vassert(isIRAtom(ex->Iex.Triop.arg2));
1839 vassert(isIRAtom(ex->Iex.Triop.arg3));
1840 return IRExpr_Triop(
1841 ex->Iex.Triop.op,
1842 subst_Expr(env, ex->Iex.Triop.arg1),
1843 subst_Expr(env, ex->Iex.Triop.arg2),
1844 subst_Expr(env, ex->Iex.Triop.arg3)
1845 );
1846
1847 case Iex_Binop:
1848 vassert(isIRAtom(ex->Iex.Binop.arg1));
1849 vassert(isIRAtom(ex->Iex.Binop.arg2));
1850 return IRExpr_Binop(
1851 ex->Iex.Binop.op,
1852 subst_Expr(env, ex->Iex.Binop.arg1),
1853 subst_Expr(env, ex->Iex.Binop.arg2)
1854 );
1855
1856 case Iex_Unop:
1857 vassert(isIRAtom(ex->Iex.Unop.arg));
1858 return IRExpr_Unop(
1859 ex->Iex.Unop.op,
1860 subst_Expr(env, ex->Iex.Unop.arg)
1861 );
1862
1863 case Iex_Load:
1864 vassert(isIRAtom(ex->Iex.Load.addr));
1865 return IRExpr_Load(
1866 ex->Iex.Load.end,
1867 ex->Iex.Load.ty,
1868 subst_Expr(env, ex->Iex.Load.addr)
1869 );
1870
1871 case Iex_CCall: {
1872 Int i;
1873 IRExpr** args2 = shallowCopyIRExprVec(ex->Iex.CCall.args);
1874 for (i = 0; args2[i]; i++) {
1875 vassert(isIRAtom(args2[i]));
1876 args2[i] = subst_Expr(env, args2[i]);
1877 }
1878 return IRExpr_CCall(
1879 ex->Iex.CCall.cee,
1880 ex->Iex.CCall.retty,
1881 args2
1882 );
1883 }
1884
1885 case Iex_Mux0X:
1886 vassert(isIRAtom(ex->Iex.Mux0X.cond));
1887 vassert(isIRAtom(ex->Iex.Mux0X.expr0));
1888 vassert(isIRAtom(ex->Iex.Mux0X.exprX));
1889 return IRExpr_Mux0X(
1890 subst_Expr(env, ex->Iex.Mux0X.cond),
1891 subst_Expr(env, ex->Iex.Mux0X.expr0),
1892 subst_Expr(env, ex->Iex.Mux0X.exprX)
1893 );
1894
1895 default:
1896 vex_printf("\n\n"); ppIRExpr(ex);
1897 vpanic("subst_Expr");
1898
1899 }
1900 }
1901
1902
1903 /* Apply the subst to stmt, then fold the result as much as possible.
1904 Much simplified due to stmt being previously flattened. As a
1905 result of this, the stmt may wind up being turned into a no-op.
1906 */
subst_and_fold_Stmt(IRExpr ** env,IRStmt * st)1907 static IRStmt* subst_and_fold_Stmt ( IRExpr** env, IRStmt* st )
1908 {
1909 # if 0
1910 vex_printf("\nsubst and fold stmt\n");
1911 ppIRStmt(st);
1912 vex_printf("\n");
1913 # endif
1914
1915 switch (st->tag) {
1916 case Ist_AbiHint:
1917 vassert(isIRAtom(st->Ist.AbiHint.base));
1918 vassert(isIRAtom(st->Ist.AbiHint.nia));
1919 return IRStmt_AbiHint(
1920 fold_Expr(subst_Expr(env, st->Ist.AbiHint.base)),
1921 st->Ist.AbiHint.len,
1922 fold_Expr(subst_Expr(env, st->Ist.AbiHint.nia))
1923 );
1924 case Ist_Put:
1925 vassert(isIRAtom(st->Ist.Put.data));
1926 return IRStmt_Put(
1927 st->Ist.Put.offset,
1928 fold_Expr(subst_Expr(env, st->Ist.Put.data))
1929 );
1930
1931 case Ist_PutI:
1932 vassert(isIRAtom(st->Ist.PutI.ix));
1933 vassert(isIRAtom(st->Ist.PutI.data));
1934 return IRStmt_PutI(
1935 st->Ist.PutI.descr,
1936 fold_Expr(subst_Expr(env, st->Ist.PutI.ix)),
1937 st->Ist.PutI.bias,
1938 fold_Expr(subst_Expr(env, st->Ist.PutI.data))
1939 );
1940
1941 case Ist_WrTmp:
1942 /* This is the one place where an expr (st->Ist.WrTmp.data) is
1943 allowed to be more than just a constant or a tmp. */
1944 return IRStmt_WrTmp(
1945 st->Ist.WrTmp.tmp,
1946 fold_Expr(subst_Expr(env, st->Ist.WrTmp.data))
1947 );
1948
1949 case Ist_Store:
1950 vassert(isIRAtom(st->Ist.Store.addr));
1951 vassert(isIRAtom(st->Ist.Store.data));
1952 return IRStmt_Store(
1953 st->Ist.Store.end,
1954 fold_Expr(subst_Expr(env, st->Ist.Store.addr)),
1955 fold_Expr(subst_Expr(env, st->Ist.Store.data))
1956 );
1957
1958 case Ist_CAS: {
1959 IRCAS *cas, *cas2;
1960 cas = st->Ist.CAS.details;
1961 vassert(isIRAtom(cas->addr));
1962 vassert(cas->expdHi == NULL || isIRAtom(cas->expdHi));
1963 vassert(isIRAtom(cas->expdLo));
1964 vassert(cas->dataHi == NULL || isIRAtom(cas->dataHi));
1965 vassert(isIRAtom(cas->dataLo));
1966 cas2 = mkIRCAS(
1967 cas->oldHi, cas->oldLo, cas->end,
1968 fold_Expr(subst_Expr(env, cas->addr)),
1969 cas->expdHi ? fold_Expr(subst_Expr(env, cas->expdHi)) : NULL,
1970 fold_Expr(subst_Expr(env, cas->expdLo)),
1971 cas->dataHi ? fold_Expr(subst_Expr(env, cas->dataHi)) : NULL,
1972 fold_Expr(subst_Expr(env, cas->dataLo))
1973 );
1974 return IRStmt_CAS(cas2);
1975 }
1976
1977 case Ist_LLSC:
1978 vassert(isIRAtom(st->Ist.LLSC.addr));
1979 if (st->Ist.LLSC.storedata)
1980 vassert(isIRAtom(st->Ist.LLSC.storedata));
1981 return IRStmt_LLSC(
1982 st->Ist.LLSC.end,
1983 st->Ist.LLSC.result,
1984 fold_Expr(subst_Expr(env, st->Ist.LLSC.addr)),
1985 st->Ist.LLSC.storedata
1986 ? fold_Expr(subst_Expr(env, st->Ist.LLSC.storedata))
1987 : NULL
1988 );
1989
1990 case Ist_Dirty: {
1991 Int i;
1992 IRDirty *d, *d2;
1993 d = st->Ist.Dirty.details;
1994 d2 = emptyIRDirty();
1995 *d2 = *d;
1996 d2->args = shallowCopyIRExprVec(d2->args);
1997 if (d2->mFx != Ifx_None) {
1998 vassert(isIRAtom(d2->mAddr));
1999 d2->mAddr = fold_Expr(subst_Expr(env, d2->mAddr));
2000 }
2001 vassert(isIRAtom(d2->guard));
2002 d2->guard = fold_Expr(subst_Expr(env, d2->guard));
2003 for (i = 0; d2->args[i]; i++) {
2004 vassert(isIRAtom(d2->args[i]));
2005 d2->args[i] = fold_Expr(subst_Expr(env, d2->args[i]));
2006 }
2007 return IRStmt_Dirty(d2);
2008 }
2009
2010 case Ist_IMark:
2011 return IRStmt_IMark(st->Ist.IMark.addr,
2012 st->Ist.IMark.len,
2013 st->Ist.IMark.delta);
2014
2015 case Ist_NoOp:
2016 return IRStmt_NoOp();
2017
2018 case Ist_MBE:
2019 return IRStmt_MBE(st->Ist.MBE.event);
2020
2021 case Ist_Exit: {
2022 IRExpr* fcond;
2023 vassert(isIRAtom(st->Ist.Exit.guard));
2024 fcond = fold_Expr(subst_Expr(env, st->Ist.Exit.guard));
2025 if (fcond->tag == Iex_Const) {
2026 /* Interesting. The condition on this exit has folded down to
2027 a constant. */
2028 vassert(fcond->Iex.Const.con->tag == Ico_U1);
2029 vassert(fcond->Iex.Const.con->Ico.U1 == False
2030 || fcond->Iex.Const.con->Ico.U1 == True);
2031 if (fcond->Iex.Const.con->Ico.U1 == False) {
2032 /* exit is never going to happen, so dump the statement. */
2033 return IRStmt_NoOp();
2034 } else {
2035 vassert(fcond->Iex.Const.con->Ico.U1 == True);
2036 /* Hmmm. The exit has become unconditional. Leave it
2037 as it is for now, since we'd have to truncate the BB
2038 at this point, which is tricky. Such truncation is
2039 done later by the dead-code elimination pass. */
2040 /* fall out into the reconstruct-the-exit code. */
2041 if (vex_control.iropt_verbosity > 0)
2042 /* really a misuse of vex_control.iropt_verbosity */
2043 vex_printf("vex iropt: IRStmt_Exit became unconditional\n");
2044 }
2045 }
2046 return IRStmt_Exit(fcond, st->Ist.Exit.jk, st->Ist.Exit.dst);
2047 }
2048
2049 default:
2050 vex_printf("\n"); ppIRStmt(st);
2051 vpanic("subst_and_fold_Stmt");
2052 }
2053 }
2054
2055
cprop_BB(IRSB * in)2056 IRSB* cprop_BB ( IRSB* in )
2057 {
2058 Int i;
2059 IRSB* out;
2060 IRStmt* st2;
2061 Int n_tmps = in->tyenv->types_used;
2062 IRExpr** env = LibVEX_Alloc(n_tmps * sizeof(IRExpr*));
2063
2064 out = emptyIRSB();
2065 out->tyenv = deepCopyIRTypeEnv( in->tyenv );
2066
2067 /* Set up the env with which travels forward. This holds a
2068 substitution, mapping IRTemps to atoms, that is, IRExprs which
2069 are either IRTemps or IRConsts. Thus, copy and constant
2070 propagation is done. The environment is to be applied as we
2071 move along. Keys are IRTemps. Values are IRExpr*s.
2072 */
2073 for (i = 0; i < n_tmps; i++)
2074 env[i] = NULL;
2075
2076 /* For each original SSA-form stmt ... */
2077 for (i = 0; i < in->stmts_used; i++) {
2078
2079 /* First apply the substitution to the current stmt. This
2080 propagates in any constants and tmp-tmp assignments
2081 accumulated prior to this point. As part of the subst_Stmt
2082 call, also then fold any constant expressions resulting. */
2083
2084 st2 = in->stmts[i];
2085
2086 /* perhaps st2 is already a no-op? */
2087 if (st2->tag == Ist_NoOp) continue;
2088
2089 st2 = subst_and_fold_Stmt( env, st2 );
2090
2091 /* If the statement has been folded into a no-op, forget it. */
2092 if (st2->tag == Ist_NoOp) continue;
2093
2094 /* Now consider what the stmt looks like. If it's of the form
2095 't = const' or 't1 = t2', add it to the running environment
2096 and not to the output BB. Otherwise, add it to the output
2097 BB. Note, we choose not to propagate const when const is an
2098 F64i, so that F64i literals can be CSE'd later. This helps
2099 x86 floating point code generation. */
2100
2101 if (st2->tag == Ist_WrTmp
2102 && st2->Ist.WrTmp.data->tag == Iex_Const
2103 && st2->Ist.WrTmp.data->Iex.Const.con->tag != Ico_F64i) {
2104 /* 't = const' -- add to env.
2105 The pair (IRTemp, IRExpr*) is added. */
2106 env[(Int)(st2->Ist.WrTmp.tmp)] = st2->Ist.WrTmp.data;
2107 }
2108 else
2109 if (st2->tag == Ist_WrTmp && st2->Ist.WrTmp.data->tag == Iex_RdTmp) {
2110 /* 't1 = t2' -- add to env.
2111 The pair (IRTemp, IRExpr*) is added. */
2112 env[(Int)(st2->Ist.WrTmp.tmp)] = st2->Ist.WrTmp.data;
2113 }
2114 else {
2115 /* Not interesting, copy st2 into the output block. */
2116 addStmtToIRSB( out, st2 );
2117 }
2118 }
2119
2120 out->next = subst_Expr( env, in->next );
2121 out->jumpkind = in->jumpkind;
2122 return out;
2123 }
2124
2125
2126 /*---------------------------------------------------------------*/
2127 /*--- Dead code (t = E) removal ---*/
2128 /*---------------------------------------------------------------*/
2129
2130 /* As a side effect, also removes all code following an unconditional
2131 side exit. */
2132
2133 /* The type of the HashHW map is: a map from IRTemp to nothing
2134 -- really just operating a set or IRTemps.
2135 */
2136
2137 inline
addUses_Temp(Bool * set,IRTemp tmp)2138 static void addUses_Temp ( Bool* set, IRTemp tmp )
2139 {
2140 set[(Int)tmp] = True;
2141 }
2142
addUses_Expr(Bool * set,IRExpr * e)2143 static void addUses_Expr ( Bool* set, IRExpr* e )
2144 {
2145 Int i;
2146 switch (e->tag) {
2147 case Iex_GetI:
2148 addUses_Expr(set, e->Iex.GetI.ix);
2149 return;
2150 case Iex_Mux0X:
2151 addUses_Expr(set, e->Iex.Mux0X.cond);
2152 addUses_Expr(set, e->Iex.Mux0X.expr0);
2153 addUses_Expr(set, e->Iex.Mux0X.exprX);
2154 return;
2155 case Iex_CCall:
2156 for (i = 0; e->Iex.CCall.args[i]; i++)
2157 addUses_Expr(set, e->Iex.CCall.args[i]);
2158 return;
2159 case Iex_Load:
2160 addUses_Expr(set, e->Iex.Load.addr);
2161 return;
2162 case Iex_Qop:
2163 addUses_Expr(set, e->Iex.Qop.arg1);
2164 addUses_Expr(set, e->Iex.Qop.arg2);
2165 addUses_Expr(set, e->Iex.Qop.arg3);
2166 addUses_Expr(set, e->Iex.Qop.arg4);
2167 return;
2168 case Iex_Triop:
2169 addUses_Expr(set, e->Iex.Triop.arg1);
2170 addUses_Expr(set, e->Iex.Triop.arg2);
2171 addUses_Expr(set, e->Iex.Triop.arg3);
2172 return;
2173 case Iex_Binop:
2174 addUses_Expr(set, e->Iex.Binop.arg1);
2175 addUses_Expr(set, e->Iex.Binop.arg2);
2176 return;
2177 case Iex_Unop:
2178 addUses_Expr(set, e->Iex.Unop.arg);
2179 return;
2180 case Iex_RdTmp:
2181 addUses_Temp(set, e->Iex.RdTmp.tmp);
2182 return;
2183 case Iex_Const:
2184 case Iex_Get:
2185 return;
2186 default:
2187 vex_printf("\n");
2188 ppIRExpr(e);
2189 vpanic("addUses_Expr");
2190 }
2191 }
2192
addUses_Stmt(Bool * set,IRStmt * st)2193 static void addUses_Stmt ( Bool* set, IRStmt* st )
2194 {
2195 Int i;
2196 IRDirty* d;
2197 IRCAS* cas;
2198 switch (st->tag) {
2199 case Ist_AbiHint:
2200 addUses_Expr(set, st->Ist.AbiHint.base);
2201 addUses_Expr(set, st->Ist.AbiHint.nia);
2202 return;
2203 case Ist_PutI:
2204 addUses_Expr(set, st->Ist.PutI.ix);
2205 addUses_Expr(set, st->Ist.PutI.data);
2206 return;
2207 case Ist_WrTmp:
2208 addUses_Expr(set, st->Ist.WrTmp.data);
2209 return;
2210 case Ist_Put:
2211 addUses_Expr(set, st->Ist.Put.data);
2212 return;
2213 case Ist_Store:
2214 addUses_Expr(set, st->Ist.Store.addr);
2215 addUses_Expr(set, st->Ist.Store.data);
2216 return;
2217 case Ist_CAS:
2218 cas = st->Ist.CAS.details;
2219 addUses_Expr(set, cas->addr);
2220 if (cas->expdHi)
2221 addUses_Expr(set, cas->expdHi);
2222 addUses_Expr(set, cas->expdLo);
2223 if (cas->dataHi)
2224 addUses_Expr(set, cas->dataHi);
2225 addUses_Expr(set, cas->dataLo);
2226 return;
2227 case Ist_LLSC:
2228 addUses_Expr(set, st->Ist.LLSC.addr);
2229 if (st->Ist.LLSC.storedata)
2230 addUses_Expr(set, st->Ist.LLSC.storedata);
2231 return;
2232 case Ist_Dirty:
2233 d = st->Ist.Dirty.details;
2234 if (d->mFx != Ifx_None)
2235 addUses_Expr(set, d->mAddr);
2236 addUses_Expr(set, d->guard);
2237 for (i = 0; d->args[i] != NULL; i++)
2238 addUses_Expr(set, d->args[i]);
2239 return;
2240 case Ist_NoOp:
2241 case Ist_IMark:
2242 case Ist_MBE:
2243 return;
2244 case Ist_Exit:
2245 addUses_Expr(set, st->Ist.Exit.guard);
2246 return;
2247 default:
2248 vex_printf("\n");
2249 ppIRStmt(st);
2250 vpanic("addUses_Stmt");
2251 }
2252 }
2253
2254
2255 /* Is this literally IRExpr_Const(IRConst_U1(False)) ? */
isZeroU1(IRExpr * e)2256 static Bool isZeroU1 ( IRExpr* e )
2257 {
2258 return toBool( e->tag == Iex_Const
2259 && e->Iex.Const.con->tag == Ico_U1
2260 && e->Iex.Const.con->Ico.U1 == False );
2261 }
2262
2263 /* Is this literally IRExpr_Const(IRConst_U1(True)) ? */
isOneU1(IRExpr * e)2264 static Bool isOneU1 ( IRExpr* e )
2265 {
2266 return toBool( e->tag == Iex_Const
2267 && e->Iex.Const.con->tag == Ico_U1
2268 && e->Iex.Const.con->Ico.U1 == True );
2269 }
2270
2271
2272 /* Note, this destructively modifies the given IRSB. */
2273
2274 /* Scan backwards through statements, carrying a set of IRTemps which
2275 are known to be used after the current point. On encountering 't =
2276 E', delete the binding if it is not used. Otherwise, add any temp
2277 uses to the set and keep on moving backwards.
2278
2279 As an enhancement, the first (backwards) pass searches for IR exits
2280 with always-taken conditions and notes the location of the earliest
2281 one in the block. If any such are found, a second pass copies the
2282 exit destination and jump kind to the bb-end. Then, the exit and
2283 all statements following it are turned into no-ops.
2284 */
2285
do_deadcode_BB(IRSB * bb)2286 /* notstatic */ void do_deadcode_BB ( IRSB* bb )
2287 {
2288 Int i, i_unconditional_exit;
2289 Int n_tmps = bb->tyenv->types_used;
2290 Bool* set = LibVEX_Alloc(n_tmps * sizeof(Bool));
2291 IRStmt* st;
2292
2293 for (i = 0; i < n_tmps; i++)
2294 set[i] = False;
2295
2296 /* start off by recording IRTemp uses in the next field. */
2297 addUses_Expr(set, bb->next);
2298
2299 /* First pass */
2300
2301 /* Work backwards through the stmts */
2302 i_unconditional_exit = -1;
2303 for (i = bb->stmts_used-1; i >= 0; i--) {
2304 st = bb->stmts[i];
2305 if (st->tag == Ist_NoOp)
2306 continue;
2307 /* take note of any unconditional exits */
2308 if (st->tag == Ist_Exit
2309 && isOneU1(st->Ist.Exit.guard))
2310 i_unconditional_exit = i;
2311 if (st->tag == Ist_WrTmp
2312 && set[(Int)(st->Ist.WrTmp.tmp)] == False) {
2313 /* it's an IRTemp which never got used. Delete it. */
2314 if (DEBUG_IROPT) {
2315 vex_printf("DEAD: ");
2316 ppIRStmt(st);
2317 vex_printf("\n");
2318 }
2319 bb->stmts[i] = IRStmt_NoOp();
2320 }
2321 else
2322 if (st->tag == Ist_Dirty
2323 && st->Ist.Dirty.details->guard
2324 && isZeroU1(st->Ist.Dirty.details->guard)) {
2325 /* This is a dirty helper which will never get called.
2326 Delete it. */
2327 bb->stmts[i] = IRStmt_NoOp();
2328 }
2329 else {
2330 /* Note any IRTemp uses made by the current statement. */
2331 addUses_Stmt(set, st);
2332 }
2333 }
2334
2335 /* Optional second pass: if any unconditional exits were found,
2336 delete them and all following statements. */
2337
2338 if (i_unconditional_exit != -1) {
2339 if (0) vex_printf("ZAPPING ALL FORWARDS from %d\n",
2340 i_unconditional_exit);
2341 vassert(i_unconditional_exit >= 0
2342 && i_unconditional_exit < bb->stmts_used);
2343 bb->next
2344 = IRExpr_Const( bb->stmts[i_unconditional_exit]->Ist.Exit.dst );
2345 bb->jumpkind
2346 = bb->stmts[i_unconditional_exit]->Ist.Exit.jk;
2347 for (i = i_unconditional_exit; i < bb->stmts_used; i++)
2348 bb->stmts[i] = IRStmt_NoOp();
2349 }
2350 }
2351
2352
2353 /*---------------------------------------------------------------*/
2354 /*--- Specialisation of helper function calls, in ---*/
2355 /*--- collaboration with the front end ---*/
2356 /*---------------------------------------------------------------*/
2357
2358 static
spec_helpers_BB(IRSB * bb,IRExpr * (* specHelper)(HChar *,IRExpr **,IRStmt **,Int))2359 IRSB* spec_helpers_BB(
2360 IRSB* bb,
2361 IRExpr* (*specHelper) (HChar*, IRExpr**, IRStmt**, Int)
2362 )
2363 {
2364 Int i;
2365 IRStmt* st;
2366 IRExpr* ex;
2367 Bool any = False;
2368
2369 for (i = bb->stmts_used-1; i >= 0; i--) {
2370 st = bb->stmts[i];
2371
2372 if (st->tag != Ist_WrTmp
2373 || st->Ist.WrTmp.data->tag != Iex_CCall)
2374 continue;
2375
2376 ex = (*specHelper)( st->Ist.WrTmp.data->Iex.CCall.cee->name,
2377 st->Ist.WrTmp.data->Iex.CCall.args,
2378 &bb->stmts[0], i );
2379 if (!ex)
2380 /* the front end can't think of a suitable replacement */
2381 continue;
2382
2383 /* We got something better. Install it in the bb. */
2384 any = True;
2385 bb->stmts[i]
2386 = IRStmt_WrTmp(st->Ist.WrTmp.tmp, ex);
2387
2388 if (0) {
2389 vex_printf("SPEC: ");
2390 ppIRExpr(st->Ist.WrTmp.data);
2391 vex_printf(" --> ");
2392 ppIRExpr(ex);
2393 vex_printf("\n");
2394 }
2395 }
2396
2397 if (any)
2398 bb = flatten_BB(bb);
2399 return bb;
2400 }
2401
2402
2403 /*---------------------------------------------------------------*/
2404 /*--- Determination of guest state aliasing relationships ---*/
2405 /*---------------------------------------------------------------*/
2406
2407 /* These are helper functions for CSE and GetI/PutI transformations.
2408
2409 Determine, to the extent possible, the relationship between two
2410 guest state accesses. The possible outcomes are:
2411
2412 * Exact alias. These two accesses denote precisely the same
2413 piece of the guest state.
2414
2415 * Definitely no alias. These two accesses are guaranteed not to
2416 overlap any part of the guest state.
2417
2418 * Unknown -- if neither of the above can be established.
2419
2420 If in doubt, return Unknown. */
2421
2422 typedef
2423 enum { ExactAlias, NoAlias, UnknownAlias }
2424 GSAliasing;
2425
2426
2427 /* Produces the alias relation between an indexed guest
2428 state access and a non-indexed access. */
2429
2430 static
getAliasingRelation_IC(IRRegArray * descr1,IRExpr * ix1,Int offset2,IRType ty2)2431 GSAliasing getAliasingRelation_IC ( IRRegArray* descr1, IRExpr* ix1,
2432 Int offset2, IRType ty2 )
2433 {
2434 UInt minoff1, maxoff1, minoff2, maxoff2;
2435
2436 getArrayBounds( descr1, &minoff1, &maxoff1 );
2437 minoff2 = offset2;
2438 maxoff2 = minoff2 + sizeofIRType(ty2) - 1;
2439
2440 if (maxoff1 < minoff2 || maxoff2 < minoff1)
2441 return NoAlias;
2442
2443 /* Could probably do better here if required. For the moment
2444 however just claim not to know anything more. */
2445 return UnknownAlias;
2446 }
2447
2448
2449 /* Produces the alias relation between two indexed guest state
2450 accesses. */
2451
2452 static
getAliasingRelation_II(IRRegArray * descr1,IRExpr * ix1,Int bias1,IRRegArray * descr2,IRExpr * ix2,Int bias2)2453 GSAliasing getAliasingRelation_II (
2454 IRRegArray* descr1, IRExpr* ix1, Int bias1,
2455 IRRegArray* descr2, IRExpr* ix2, Int bias2
2456 )
2457 {
2458 UInt minoff1, maxoff1, minoff2, maxoff2;
2459 Int iters;
2460
2461 /* First try hard to show they don't alias. */
2462 getArrayBounds( descr1, &minoff1, &maxoff1 );
2463 getArrayBounds( descr2, &minoff2, &maxoff2 );
2464 if (maxoff1 < minoff2 || maxoff2 < minoff1)
2465 return NoAlias;
2466
2467 /* So the two arrays at least partially overlap. To get any
2468 further we'll have to be sure that the descriptors are
2469 identical. */
2470 if (!eqIRRegArray(descr1, descr2))
2471 return UnknownAlias;
2472
2473 /* The descriptors are identical. Now the only difference can be
2474 in the index expressions. If they cannot be shown to be
2475 identical, we have to say we don't know what the aliasing
2476 relation will be. Now, since the IR is flattened, the index
2477 expressions should be atoms -- either consts or tmps. So that
2478 makes the comparison simple. */
2479 vassert(isIRAtom(ix1));
2480 vassert(isIRAtom(ix2));
2481 if (!eqIRAtom(ix1,ix2))
2482 return UnknownAlias;
2483
2484 /* Ok, the index expressions are identical. So now the only way
2485 they can be different is in the bias. Normalise this
2486 paranoidly, to reliably establish equality/non-equality. */
2487
2488 /* So now we know that the GetI and PutI index the same array
2489 with the same base. Are the offsets the same, modulo the
2490 array size? Do this paranoidly. */
2491 vassert(descr1->nElems == descr2->nElems);
2492 vassert(descr1->elemTy == descr2->elemTy);
2493 vassert(descr1->base == descr2->base);
2494 iters = 0;
2495 while (bias1 < 0 || bias2 < 0) {
2496 bias1 += descr1->nElems;
2497 bias2 += descr1->nElems;
2498 iters++;
2499 if (iters > 10)
2500 vpanic("getAliasingRelation: iters");
2501 }
2502 vassert(bias1 >= 0 && bias2 >= 0);
2503 bias1 %= descr1->nElems;
2504 bias2 %= descr1->nElems;
2505 vassert(bias1 >= 0 && bias1 < descr1->nElems);
2506 vassert(bias2 >= 0 && bias2 < descr1->nElems);
2507
2508 /* Finally, biasP and biasG are normalised into the range
2509 0 .. descrP/G->nElems - 1. And so we can establish
2510 equality/non-equality. */
2511
2512 return bias1==bias2 ? ExactAlias : NoAlias;
2513 }
2514
2515
2516 /*---------------------------------------------------------------*/
2517 /*--- Common Subexpression Elimination ---*/
2518 /*---------------------------------------------------------------*/
2519
2520 /* Expensive in time and space. */
2521
2522 /* Uses two environments:
2523 a IRTemp -> IRTemp mapping
2524 a mapping from AvailExpr* to IRTemp
2525 */
2526
2527 typedef
2528 struct {
2529 enum { Ut, Btt, Btc, Bct, Cf64i, Mttt, GetIt } tag;
2530 union {
2531 /* unop(tmp) */
2532 struct {
2533 IROp op;
2534 IRTemp arg;
2535 } Ut;
2536 /* binop(tmp,tmp) */
2537 struct {
2538 IROp op;
2539 IRTemp arg1;
2540 IRTemp arg2;
2541 } Btt;
2542 /* binop(tmp,const) */
2543 struct {
2544 IROp op;
2545 IRTemp arg1;
2546 IRConst con2;
2547 } Btc;
2548 /* binop(const,tmp) */
2549 struct {
2550 IROp op;
2551 IRConst con1;
2552 IRTemp arg2;
2553 } Bct;
2554 /* F64i-style const */
2555 struct {
2556 ULong f64i;
2557 } Cf64i;
2558 /* Mux0X(tmp,tmp,tmp) */
2559 struct {
2560 IRTemp co;
2561 IRTemp e0;
2562 IRTemp eX;
2563 } Mttt;
2564 /* GetI(descr,tmp,bias)*/
2565 struct {
2566 IRRegArray* descr;
2567 IRTemp ix;
2568 Int bias;
2569 } GetIt;
2570 } u;
2571 }
2572 AvailExpr;
2573
eq_AvailExpr(AvailExpr * a1,AvailExpr * a2)2574 static Bool eq_AvailExpr ( AvailExpr* a1, AvailExpr* a2 )
2575 {
2576 if (a1->tag != a2->tag)
2577 return False;
2578 switch (a1->tag) {
2579 case Ut:
2580 return toBool(
2581 a1->u.Ut.op == a2->u.Ut.op
2582 && a1->u.Ut.arg == a2->u.Ut.arg);
2583 case Btt:
2584 return toBool(
2585 a1->u.Btt.op == a2->u.Btt.op
2586 && a1->u.Btt.arg1 == a2->u.Btt.arg1
2587 && a1->u.Btt.arg2 == a2->u.Btt.arg2);
2588 case Btc:
2589 return toBool(
2590 a1->u.Btc.op == a2->u.Btc.op
2591 && a1->u.Btc.arg1 == a2->u.Btc.arg1
2592 && eqIRConst(&a1->u.Btc.con2, &a2->u.Btc.con2));
2593 case Bct:
2594 return toBool(
2595 a1->u.Bct.op == a2->u.Bct.op
2596 && a1->u.Bct.arg2 == a2->u.Bct.arg2
2597 && eqIRConst(&a1->u.Bct.con1, &a2->u.Bct.con1));
2598 case Cf64i:
2599 return toBool(a1->u.Cf64i.f64i == a2->u.Cf64i.f64i);
2600 case Mttt:
2601 return toBool(a1->u.Mttt.co == a2->u.Mttt.co
2602 && a1->u.Mttt.e0 == a2->u.Mttt.e0
2603 && a1->u.Mttt.eX == a2->u.Mttt.eX);
2604 case GetIt:
2605 return toBool(eqIRRegArray(a1->u.GetIt.descr, a2->u.GetIt.descr)
2606 && a1->u.GetIt.ix == a2->u.GetIt.ix
2607 && a1->u.GetIt.bias == a2->u.GetIt.bias);
2608 default: vpanic("eq_AvailExpr");
2609 }
2610 }
2611
availExpr_to_IRExpr(AvailExpr * ae)2612 static IRExpr* availExpr_to_IRExpr ( AvailExpr* ae )
2613 {
2614 IRConst* con;
2615 switch (ae->tag) {
2616 case Ut:
2617 return IRExpr_Unop( ae->u.Ut.op, IRExpr_RdTmp(ae->u.Ut.arg) );
2618 case Btt:
2619 return IRExpr_Binop( ae->u.Btt.op,
2620 IRExpr_RdTmp(ae->u.Btt.arg1),
2621 IRExpr_RdTmp(ae->u.Btt.arg2) );
2622 case Btc:
2623 con = LibVEX_Alloc(sizeof(IRConst));
2624 *con = ae->u.Btc.con2;
2625 return IRExpr_Binop( ae->u.Btc.op,
2626 IRExpr_RdTmp(ae->u.Btc.arg1),
2627 IRExpr_Const(con) );
2628 case Bct:
2629 con = LibVEX_Alloc(sizeof(IRConst));
2630 *con = ae->u.Bct.con1;
2631 return IRExpr_Binop( ae->u.Bct.op,
2632 IRExpr_Const(con),
2633 IRExpr_RdTmp(ae->u.Bct.arg2) );
2634 case Cf64i:
2635 return IRExpr_Const(IRConst_F64i(ae->u.Cf64i.f64i));
2636 case Mttt:
2637 return IRExpr_Mux0X(IRExpr_RdTmp(ae->u.Mttt.co),
2638 IRExpr_RdTmp(ae->u.Mttt.e0),
2639 IRExpr_RdTmp(ae->u.Mttt.eX));
2640 case GetIt:
2641 return IRExpr_GetI(ae->u.GetIt.descr,
2642 IRExpr_RdTmp(ae->u.GetIt.ix),
2643 ae->u.GetIt.bias);
2644 default:
2645 vpanic("availExpr_to_IRExpr");
2646 }
2647 }
2648
2649 inline
subst_AvailExpr_Temp(HashHW * env,IRTemp tmp)2650 static IRTemp subst_AvailExpr_Temp ( HashHW* env, IRTemp tmp )
2651 {
2652 HWord res;
2653 /* env :: IRTemp -> IRTemp */
2654 if (lookupHHW( env, &res, (HWord)tmp ))
2655 return (IRTemp)res;
2656 else
2657 return tmp;
2658 }
2659
subst_AvailExpr(HashHW * env,AvailExpr * ae)2660 static void subst_AvailExpr ( HashHW* env, AvailExpr* ae )
2661 {
2662 /* env :: IRTemp -> IRTemp */
2663 switch (ae->tag) {
2664 case Ut:
2665 ae->u.Ut.arg = subst_AvailExpr_Temp( env, ae->u.Ut.arg );
2666 break;
2667 case Btt:
2668 ae->u.Btt.arg1 = subst_AvailExpr_Temp( env, ae->u.Btt.arg1 );
2669 ae->u.Btt.arg2 = subst_AvailExpr_Temp( env, ae->u.Btt.arg2 );
2670 break;
2671 case Btc:
2672 ae->u.Btc.arg1 = subst_AvailExpr_Temp( env, ae->u.Btc.arg1 );
2673 break;
2674 case Bct:
2675 ae->u.Bct.arg2 = subst_AvailExpr_Temp( env, ae->u.Bct.arg2 );
2676 break;
2677 case Cf64i:
2678 break;
2679 case Mttt:
2680 ae->u.Mttt.co = subst_AvailExpr_Temp( env, ae->u.Mttt.co );
2681 ae->u.Mttt.e0 = subst_AvailExpr_Temp( env, ae->u.Mttt.e0 );
2682 ae->u.Mttt.eX = subst_AvailExpr_Temp( env, ae->u.Mttt.eX );
2683 break;
2684 case GetIt:
2685 ae->u.GetIt.ix = subst_AvailExpr_Temp( env, ae->u.GetIt.ix );
2686 break;
2687 default:
2688 vpanic("subst_AvailExpr");
2689 }
2690 }
2691
irExpr_to_AvailExpr(IRExpr * e)2692 static AvailExpr* irExpr_to_AvailExpr ( IRExpr* e )
2693 {
2694 AvailExpr* ae;
2695
2696 if (e->tag == Iex_Unop
2697 && e->Iex.Unop.arg->tag == Iex_RdTmp) {
2698 ae = LibVEX_Alloc(sizeof(AvailExpr));
2699 ae->tag = Ut;
2700 ae->u.Ut.op = e->Iex.Unop.op;
2701 ae->u.Ut.arg = e->Iex.Unop.arg->Iex.RdTmp.tmp;
2702 return ae;
2703 }
2704
2705 if (e->tag == Iex_Binop
2706 && e->Iex.Binop.arg1->tag == Iex_RdTmp
2707 && e->Iex.Binop.arg2->tag == Iex_RdTmp) {
2708 ae = LibVEX_Alloc(sizeof(AvailExpr));
2709 ae->tag = Btt;
2710 ae->u.Btt.op = e->Iex.Binop.op;
2711 ae->u.Btt.arg1 = e->Iex.Binop.arg1->Iex.RdTmp.tmp;
2712 ae->u.Btt.arg2 = e->Iex.Binop.arg2->Iex.RdTmp.tmp;
2713 return ae;
2714 }
2715
2716 if (e->tag == Iex_Binop
2717 && e->Iex.Binop.arg1->tag == Iex_RdTmp
2718 && e->Iex.Binop.arg2->tag == Iex_Const) {
2719 ae = LibVEX_Alloc(sizeof(AvailExpr));
2720 ae->tag = Btc;
2721 ae->u.Btc.op = e->Iex.Binop.op;
2722 ae->u.Btc.arg1 = e->Iex.Binop.arg1->Iex.RdTmp.tmp;
2723 ae->u.Btc.con2 = *(e->Iex.Binop.arg2->Iex.Const.con);
2724 return ae;
2725 }
2726
2727 if (e->tag == Iex_Binop
2728 && e->Iex.Binop.arg1->tag == Iex_Const
2729 && e->Iex.Binop.arg2->tag == Iex_RdTmp) {
2730 ae = LibVEX_Alloc(sizeof(AvailExpr));
2731 ae->tag = Bct;
2732 ae->u.Bct.op = e->Iex.Binop.op;
2733 ae->u.Bct.arg2 = e->Iex.Binop.arg2->Iex.RdTmp.tmp;
2734 ae->u.Bct.con1 = *(e->Iex.Binop.arg1->Iex.Const.con);
2735 return ae;
2736 }
2737
2738 if (e->tag == Iex_Const
2739 && e->Iex.Const.con->tag == Ico_F64i) {
2740 ae = LibVEX_Alloc(sizeof(AvailExpr));
2741 ae->tag = Cf64i;
2742 ae->u.Cf64i.f64i = e->Iex.Const.con->Ico.F64i;
2743 return ae;
2744 }
2745
2746 if (e->tag == Iex_Mux0X
2747 && e->Iex.Mux0X.cond->tag == Iex_RdTmp
2748 && e->Iex.Mux0X.expr0->tag == Iex_RdTmp
2749 && e->Iex.Mux0X.exprX->tag == Iex_RdTmp) {
2750 ae = LibVEX_Alloc(sizeof(AvailExpr));
2751 ae->tag = Mttt;
2752 ae->u.Mttt.co = e->Iex.Mux0X.cond->Iex.RdTmp.tmp;
2753 ae->u.Mttt.e0 = e->Iex.Mux0X.expr0->Iex.RdTmp.tmp;
2754 ae->u.Mttt.eX = e->Iex.Mux0X.exprX->Iex.RdTmp.tmp;
2755 return ae;
2756 }
2757
2758 if (e->tag == Iex_GetI
2759 && e->Iex.GetI.ix->tag == Iex_RdTmp) {
2760 ae = LibVEX_Alloc(sizeof(AvailExpr));
2761 ae->tag = GetIt;
2762 ae->u.GetIt.descr = e->Iex.GetI.descr;
2763 ae->u.GetIt.ix = e->Iex.GetI.ix->Iex.RdTmp.tmp;
2764 ae->u.GetIt.bias = e->Iex.GetI.bias;
2765 return ae;
2766 }
2767
2768 return NULL;
2769 }
2770
2771
2772 /* The BB is modified in-place. Returns True if any changes were
2773 made. */
2774
do_cse_BB(IRSB * bb)2775 static Bool do_cse_BB ( IRSB* bb )
2776 {
2777 Int i, j, paranoia;
2778 IRTemp t, q;
2779 IRStmt* st;
2780 AvailExpr* eprime;
2781 AvailExpr* ae;
2782 Bool invalidate;
2783 Bool anyDone = False;
2784
2785 HashHW* tenv = newHHW(); /* :: IRTemp -> IRTemp */
2786 HashHW* aenv = newHHW(); /* :: AvailExpr* -> IRTemp */
2787
2788 vassert(sizeof(IRTemp) <= sizeof(HWord));
2789
2790 if (0) { ppIRSB(bb); vex_printf("\n\n"); }
2791
2792 /* Iterate forwards over the stmts.
2793 On seeing "t = E", where E is one of the 5 AvailExpr forms:
2794 let E' = apply tenv substitution to E
2795 search aenv for E'
2796 if a mapping E' -> q is found,
2797 replace this stmt by "t = q"
2798 and add binding t -> q to tenv
2799 else
2800 add binding E' -> t to aenv
2801 replace this stmt by "t = E'"
2802
2803 Other statements are only interesting to the extent that they
2804 might invalidate some of the expressions in aenv. So there is
2805 an invalidate-bindings check for each statement seen.
2806 */
2807 for (i = 0; i < bb->stmts_used; i++) {
2808 st = bb->stmts[i];
2809
2810 /* ------ BEGIN invalidate aenv bindings ------ */
2811 /* This is critical: remove from aenv any E' -> .. bindings
2812 which might be invalidated by this statement. The only
2813 vulnerable kind of bindings are the GetI kind.
2814 Dirty call - dump (paranoia level -> 2)
2815 Store - dump (ditto)
2816 Put, PutI - dump unless no-overlap is proven (.. -> 1)
2817 Uses getAliasingRelation_IC and getAliasingRelation_II
2818 to do the no-overlap assessments needed for Put/PutI.
2819 */
2820 switch (st->tag) {
2821 case Ist_Dirty: case Ist_Store: case Ist_MBE:
2822 case Ist_CAS: case Ist_LLSC:
2823 paranoia = 2; break;
2824 case Ist_Put: case Ist_PutI:
2825 paranoia = 1; break;
2826 case Ist_NoOp: case Ist_IMark: case Ist_AbiHint:
2827 case Ist_WrTmp: case Ist_Exit:
2828 paranoia = 0; break;
2829 default:
2830 vpanic("do_cse_BB(1)");
2831 }
2832
2833 if (paranoia > 0) {
2834 for (j = 0; j < aenv->used; j++) {
2835 if (!aenv->inuse[j])
2836 continue;
2837 ae = (AvailExpr*)aenv->key[j];
2838 if (ae->tag != GetIt)
2839 continue;
2840 invalidate = False;
2841 if (paranoia >= 2) {
2842 invalidate = True;
2843 } else {
2844 vassert(paranoia == 1);
2845 if (st->tag == Ist_Put) {
2846 if (getAliasingRelation_IC(
2847 ae->u.GetIt.descr,
2848 IRExpr_RdTmp(ae->u.GetIt.ix),
2849 st->Ist.Put.offset,
2850 typeOfIRExpr(bb->tyenv,st->Ist.Put.data)
2851 ) != NoAlias)
2852 invalidate = True;
2853 }
2854 else
2855 if (st->tag == Ist_PutI) {
2856 if (getAliasingRelation_II(
2857 ae->u.GetIt.descr,
2858 IRExpr_RdTmp(ae->u.GetIt.ix),
2859 ae->u.GetIt.bias,
2860 st->Ist.PutI.descr,
2861 st->Ist.PutI.ix,
2862 st->Ist.PutI.bias
2863 ) != NoAlias)
2864 invalidate = True;
2865 }
2866 else
2867 vpanic("do_cse_BB(2)");
2868 }
2869
2870 if (invalidate) {
2871 aenv->inuse[j] = False;
2872 aenv->key[j] = (HWord)NULL; /* be sure */
2873 }
2874 } /* for j */
2875 } /* paranoia > 0 */
2876
2877 /* ------ ENV invalidate aenv bindings ------ */
2878
2879 /* ignore not-interestings */
2880 if (st->tag != Ist_WrTmp)
2881 continue;
2882
2883 t = st->Ist.WrTmp.tmp;
2884 eprime = irExpr_to_AvailExpr(st->Ist.WrTmp.data);
2885 /* ignore if not of AvailExpr form */
2886 if (!eprime)
2887 continue;
2888
2889 /* vex_printf("considering: " ); ppIRStmt(st); vex_printf("\n"); */
2890
2891 /* apply tenv */
2892 subst_AvailExpr( tenv, eprime );
2893
2894 /* search aenv for eprime, unfortunately the hard way */
2895 for (j = 0; j < aenv->used; j++)
2896 if (aenv->inuse[j] && eq_AvailExpr(eprime, (AvailExpr*)aenv->key[j]))
2897 break;
2898
2899 if (j < aenv->used) {
2900 /* A binding E' -> q was found. Replace stmt by "t = q" and
2901 note the t->q binding in tenv. */
2902 /* (this is the core of the CSE action) */
2903 q = (IRTemp)aenv->val[j];
2904 bb->stmts[i] = IRStmt_WrTmp( t, IRExpr_RdTmp(q) );
2905 addToHHW( tenv, (HWord)t, (HWord)q );
2906 anyDone = True;
2907 } else {
2908 /* No binding was found, so instead we add E' -> t to our
2909 collection of available expressions, replace this stmt
2910 with "t = E'", and move on. */
2911 bb->stmts[i] = IRStmt_WrTmp( t, availExpr_to_IRExpr(eprime) );
2912 addToHHW( aenv, (HWord)eprime, (HWord)t );
2913 }
2914 }
2915
2916 /*
2917 ppIRSB(bb);
2918 sanityCheckIRSB(bb, Ity_I32);
2919 vex_printf("\n\n");
2920 */
2921 return anyDone;
2922 }
2923
2924
2925 /*---------------------------------------------------------------*/
2926 /*--- Add32/Sub32 chain collapsing ---*/
2927 /*---------------------------------------------------------------*/
2928
2929 /* ----- Helper functions for Add32/Sub32 chain collapsing ----- */
2930
2931 /* Is this expression "Add32(tmp,const)" or "Sub32(tmp,const)" ? If
2932 yes, set *tmp and *i32 appropriately. *i32 is set as if the
2933 root node is Add32, not Sub32. */
2934
isAdd32OrSub32(IRExpr * e,IRTemp * tmp,Int * i32)2935 static Bool isAdd32OrSub32 ( IRExpr* e, IRTemp* tmp, Int* i32 )
2936 {
2937 if (e->tag != Iex_Binop)
2938 return False;
2939 if (e->Iex.Binop.op != Iop_Add32 && e->Iex.Binop.op != Iop_Sub32)
2940 return False;
2941 if (e->Iex.Binop.arg1->tag != Iex_RdTmp)
2942 return False;
2943 if (e->Iex.Binop.arg2->tag != Iex_Const)
2944 return False;
2945 *tmp = e->Iex.Binop.arg1->Iex.RdTmp.tmp;
2946 *i32 = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32);
2947 if (e->Iex.Binop.op == Iop_Sub32)
2948 *i32 = -*i32;
2949 return True;
2950 }
2951
2952
2953 /* Figure out if tmp can be expressed as tmp2 +32 const, for some
2954 other tmp2. Scan backwards from the specified start point -- an
2955 optimisation. */
2956
collapseChain(IRSB * bb,Int startHere,IRTemp tmp,IRTemp * tmp2,Int * i32)2957 static Bool collapseChain ( IRSB* bb, Int startHere,
2958 IRTemp tmp,
2959 IRTemp* tmp2, Int* i32 )
2960 {
2961 Int j, ii;
2962 IRTemp vv;
2963 IRStmt* st;
2964 IRExpr* e;
2965
2966 /* the (var, con) pair contain the current 'representation' for
2967 'tmp'. We start with 'tmp + 0'. */
2968 IRTemp var = tmp;
2969 Int con = 0;
2970
2971 /* Scan backwards to see if tmp can be replaced by some other tmp
2972 +/- a constant. */
2973 for (j = startHere; j >= 0; j--) {
2974 st = bb->stmts[j];
2975 if (st->tag != Ist_WrTmp)
2976 continue;
2977 if (st->Ist.WrTmp.tmp != var)
2978 continue;
2979 e = st->Ist.WrTmp.data;
2980 if (!isAdd32OrSub32(e, &vv, &ii))
2981 break;
2982 var = vv;
2983 con += ii;
2984 }
2985 if (j == -1)
2986 /* no earlier binding for var .. ill-formed IR */
2987 vpanic("collapseChain");
2988
2989 /* so, did we find anything interesting? */
2990 if (var == tmp)
2991 return False; /* no .. */
2992
2993 *tmp2 = var;
2994 *i32 = con;
2995 return True;
2996 }
2997
2998
2999 /* ------- Main function for Add32/Sub32 chain collapsing ------ */
3000
collapse_AddSub_chains_BB(IRSB * bb)3001 static void collapse_AddSub_chains_BB ( IRSB* bb )
3002 {
3003 IRStmt *st;
3004 IRTemp var, var2;
3005 Int i, con, con2;
3006
3007 for (i = bb->stmts_used-1; i >= 0; i--) {
3008 st = bb->stmts[i];
3009 if (st->tag == Ist_NoOp)
3010 continue;
3011
3012 /* Try to collapse 't1 = Add32/Sub32(t2, con)'. */
3013
3014 if (st->tag == Ist_WrTmp
3015 && isAdd32OrSub32(st->Ist.WrTmp.data, &var, &con)) {
3016
3017 /* So e1 is of the form Add32(var,con) or Sub32(var,-con).
3018 Find out if var can be expressed as var2 + con2. */
3019 if (collapseChain(bb, i-1, var, &var2, &con2)) {
3020 if (DEBUG_IROPT) {
3021 vex_printf("replacing1 ");
3022 ppIRStmt(st);
3023 vex_printf(" with ");
3024 }
3025 con2 += con;
3026 bb->stmts[i]
3027 = IRStmt_WrTmp(
3028 st->Ist.WrTmp.tmp,
3029 (con2 >= 0)
3030 ? IRExpr_Binop(Iop_Add32,
3031 IRExpr_RdTmp(var2),
3032 IRExpr_Const(IRConst_U32(con2)))
3033 : IRExpr_Binop(Iop_Sub32,
3034 IRExpr_RdTmp(var2),
3035 IRExpr_Const(IRConst_U32(-con2)))
3036 );
3037 if (DEBUG_IROPT) {
3038 ppIRStmt(bb->stmts[i]);
3039 vex_printf("\n");
3040 }
3041 }
3042
3043 continue;
3044 }
3045
3046 /* Try to collapse 't1 = GetI[t2, con]'. */
3047
3048 if (st->tag == Ist_WrTmp
3049 && st->Ist.WrTmp.data->tag == Iex_GetI
3050 && st->Ist.WrTmp.data->Iex.GetI.ix->tag == Iex_RdTmp
3051 && collapseChain(bb, i-1, st->Ist.WrTmp.data->Iex.GetI.ix
3052 ->Iex.RdTmp.tmp, &var2, &con2)) {
3053 if (DEBUG_IROPT) {
3054 vex_printf("replacing3 ");
3055 ppIRStmt(st);
3056 vex_printf(" with ");
3057 }
3058 con2 += st->Ist.WrTmp.data->Iex.GetI.bias;
3059 bb->stmts[i]
3060 = IRStmt_WrTmp(
3061 st->Ist.WrTmp.tmp,
3062 IRExpr_GetI(st->Ist.WrTmp.data->Iex.GetI.descr,
3063 IRExpr_RdTmp(var2),
3064 con2));
3065 if (DEBUG_IROPT) {
3066 ppIRStmt(bb->stmts[i]);
3067 vex_printf("\n");
3068 }
3069 continue;
3070 }
3071
3072 /* Perhaps st is PutI[t, con] ? */
3073
3074 if (st->tag == Ist_PutI
3075 && st->Ist.PutI.ix->tag == Iex_RdTmp
3076 && collapseChain(bb, i-1, st->Ist.PutI.ix->Iex.RdTmp.tmp,
3077 &var2, &con2)) {
3078 if (DEBUG_IROPT) {
3079 vex_printf("replacing2 ");
3080 ppIRStmt(st);
3081 vex_printf(" with ");
3082 }
3083 con2 += st->Ist.PutI.bias;
3084 bb->stmts[i]
3085 = IRStmt_PutI(st->Ist.PutI.descr,
3086 IRExpr_RdTmp(var2),
3087 con2,
3088 st->Ist.PutI.data);
3089 if (DEBUG_IROPT) {
3090 ppIRStmt(bb->stmts[i]);
3091 vex_printf("\n");
3092 }
3093 continue;
3094 }
3095
3096 } /* for */
3097 }
3098
3099
3100 /*---------------------------------------------------------------*/
3101 /*--- PutI/GetI transformations ---*/
3102 /*---------------------------------------------------------------*/
3103
3104 /* Given the parts (descr, tmp, bias) for a GetI, scan backwards from
3105 the given starting point to find, if any, a PutI which writes
3106 exactly the same piece of guest state, and so return the expression
3107 that the PutI writes. This is the core of PutI-GetI forwarding. */
3108
3109 static
findPutI(IRSB * bb,Int startHere,IRRegArray * descrG,IRExpr * ixG,Int biasG)3110 IRExpr* findPutI ( IRSB* bb, Int startHere,
3111 IRRegArray* descrG, IRExpr* ixG, Int biasG )
3112 {
3113 Int j;
3114 IRStmt* st;
3115 GSAliasing relation;
3116
3117 if (0) {
3118 vex_printf("\nfindPutI ");
3119 ppIRRegArray(descrG);
3120 vex_printf(" ");
3121 ppIRExpr(ixG);
3122 vex_printf(" %d\n", biasG);
3123 }
3124
3125 /* Scan backwards in bb from startHere to find a suitable PutI
3126 binding for (descrG, ixG, biasG), if any. */
3127
3128 for (j = startHere; j >= 0; j--) {
3129 st = bb->stmts[j];
3130 if (st->tag == Ist_NoOp)
3131 continue;
3132
3133 if (st->tag == Ist_Put) {
3134 /* Non-indexed Put. This can't give a binding, but we do
3135 need to check it doesn't invalidate the search by
3136 overlapping any part of the indexed guest state. */
3137
3138 relation
3139 = getAliasingRelation_IC(
3140 descrG, ixG,
3141 st->Ist.Put.offset,
3142 typeOfIRExpr(bb->tyenv,st->Ist.Put.data) );
3143
3144 if (relation == NoAlias) {
3145 /* we're OK; keep going */
3146 continue;
3147 } else {
3148 /* relation == UnknownAlias || relation == ExactAlias */
3149 /* If this assertion fails, we've found a Put which writes
3150 an area of guest state which is read by a GetI. Which
3151 is unlikely (although not per se wrong). */
3152 vassert(relation != ExactAlias);
3153 /* This Put potentially writes guest state that the GetI
3154 reads; we must fail. */
3155 return NULL;
3156 }
3157 }
3158
3159 if (st->tag == Ist_PutI) {
3160
3161 relation = getAliasingRelation_II(
3162 descrG, ixG, biasG,
3163 st->Ist.PutI.descr,
3164 st->Ist.PutI.ix,
3165 st->Ist.PutI.bias
3166 );
3167
3168 if (relation == NoAlias) {
3169 /* This PutI definitely doesn't overlap. Ignore it and
3170 keep going. */
3171 continue; /* the for j loop */
3172 }
3173
3174 if (relation == UnknownAlias) {
3175 /* We don't know if this PutI writes to the same guest
3176 state that the GetI, or not. So we have to give up. */
3177 return NULL;
3178 }
3179
3180 /* Otherwise, we've found what we're looking for. */
3181 vassert(relation == ExactAlias);
3182 return st->Ist.PutI.data;
3183
3184 } /* if (st->tag == Ist_PutI) */
3185
3186 if (st->tag == Ist_Dirty) {
3187 /* Be conservative. If the dirty call has any guest effects at
3188 all, give up. We could do better -- only give up if there
3189 are any guest writes/modifies. */
3190 if (st->Ist.Dirty.details->nFxState > 0)
3191 return NULL;
3192 }
3193
3194 } /* for */
3195
3196 /* No valid replacement was found. */
3197 return NULL;
3198 }
3199
3200
3201
3202 /* Assuming pi is a PutI stmt, is s2 identical to it (in the sense
3203 that it writes exactly the same piece of guest state) ? Safe
3204 answer: False. */
3205
identicalPutIs(IRStmt * pi,IRStmt * s2)3206 static Bool identicalPutIs ( IRStmt* pi, IRStmt* s2 )
3207 {
3208 vassert(pi->tag == Ist_PutI);
3209 if (s2->tag != Ist_PutI)
3210 return False;
3211
3212 return toBool(
3213 getAliasingRelation_II(
3214 pi->Ist.PutI.descr, pi->Ist.PutI.ix, pi->Ist.PutI.bias,
3215 s2->Ist.PutI.descr, s2->Ist.PutI.ix, s2->Ist.PutI.bias
3216 )
3217 == ExactAlias
3218 );
3219 }
3220
3221
3222 /* Assuming pi is a PutI stmt, is s2 a Get/GetI/Put/PutI which might
3223 overlap it? Safe answer: True. Note, we could do a lot better
3224 than this if needed. */
3225
3226 static
guestAccessWhichMightOverlapPutI(IRTypeEnv * tyenv,IRStmt * pi,IRStmt * s2)3227 Bool guestAccessWhichMightOverlapPutI (
3228 IRTypeEnv* tyenv, IRStmt* pi, IRStmt* s2
3229 )
3230 {
3231 GSAliasing relation;
3232 UInt minoffP, maxoffP;
3233
3234 vassert(pi->tag == Ist_PutI);
3235 getArrayBounds(pi->Ist.PutI.descr, &minoffP, &maxoffP);
3236 switch (s2->tag) {
3237
3238 case Ist_NoOp:
3239 case Ist_IMark:
3240 return False;
3241
3242 case Ist_MBE:
3243 case Ist_AbiHint:
3244 /* just be paranoid ... these should be rare. */
3245 return True;
3246
3247 case Ist_CAS:
3248 /* This is unbelievably lame, but it's probably not
3249 significant from a performance point of view. Really, a
3250 CAS is a load-store op, so it should be safe to say False.
3251 However .. */
3252 return True;
3253
3254 case Ist_Dirty:
3255 /* If the dirty call has any guest effects at all, give up.
3256 Probably could do better. */
3257 if (s2->Ist.Dirty.details->nFxState > 0)
3258 return True;
3259 return False;
3260
3261 case Ist_Put:
3262 vassert(isIRAtom(s2->Ist.Put.data));
3263 relation
3264 = getAliasingRelation_IC(
3265 pi->Ist.PutI.descr, pi->Ist.PutI.ix,
3266 s2->Ist.Put.offset,
3267 typeOfIRExpr(tyenv,s2->Ist.Put.data)
3268 );
3269 goto have_relation;
3270
3271 case Ist_PutI:
3272 vassert(isIRAtom(s2->Ist.PutI.ix));
3273 vassert(isIRAtom(s2->Ist.PutI.data));
3274 relation
3275 = getAliasingRelation_II(
3276 pi->Ist.PutI.descr, pi->Ist.PutI.ix, pi->Ist.PutI.bias,
3277 s2->Ist.PutI.descr, s2->Ist.PutI.ix, s2->Ist.PutI.bias
3278 );
3279 goto have_relation;
3280
3281 case Ist_WrTmp:
3282 if (s2->Ist.WrTmp.data->tag == Iex_GetI) {
3283 relation
3284 = getAliasingRelation_II(
3285 pi->Ist.PutI.descr, pi->Ist.PutI.ix,
3286 pi->Ist.PutI.bias,
3287 s2->Ist.WrTmp.data->Iex.GetI.descr,
3288 s2->Ist.WrTmp.data->Iex.GetI.ix,
3289 s2->Ist.WrTmp.data->Iex.GetI.bias
3290 );
3291 goto have_relation;
3292 }
3293 if (s2->Ist.WrTmp.data->tag == Iex_Get) {
3294 relation
3295 = getAliasingRelation_IC(
3296 pi->Ist.PutI.descr, pi->Ist.PutI.ix,
3297 s2->Ist.WrTmp.data->Iex.Get.offset,
3298 s2->Ist.WrTmp.data->Iex.Get.ty
3299 );
3300 goto have_relation;
3301 }
3302 return False;
3303
3304 case Ist_Store:
3305 vassert(isIRAtom(s2->Ist.Store.addr));
3306 vassert(isIRAtom(s2->Ist.Store.data));
3307 return False;
3308
3309 default:
3310 vex_printf("\n"); ppIRStmt(s2); vex_printf("\n");
3311 vpanic("guestAccessWhichMightOverlapPutI");
3312 }
3313
3314 have_relation:
3315 if (relation == NoAlias)
3316 return False;
3317 else
3318 return True; /* ExactAlias or UnknownAlias */
3319 }
3320
3321
3322
3323 /* ---------- PutI/GetI transformations main functions --------- */
3324
3325 /* Remove redundant GetIs, to the extent that they can be detected.
3326 bb is modified in-place. */
3327
3328 static
do_redundant_GetI_elimination(IRSB * bb)3329 void do_redundant_GetI_elimination ( IRSB* bb )
3330 {
3331 Int i;
3332 IRStmt* st;
3333
3334 for (i = bb->stmts_used-1; i >= 0; i--) {
3335 st = bb->stmts[i];
3336 if (st->tag == Ist_NoOp)
3337 continue;
3338
3339 if (st->tag == Ist_WrTmp
3340 && st->Ist.WrTmp.data->tag == Iex_GetI
3341 && st->Ist.WrTmp.data->Iex.GetI.ix->tag == Iex_RdTmp) {
3342 IRRegArray* descr = st->Ist.WrTmp.data->Iex.GetI.descr;
3343 IRExpr* ix = st->Ist.WrTmp.data->Iex.GetI.ix;
3344 Int bias = st->Ist.WrTmp.data->Iex.GetI.bias;
3345 IRExpr* replacement = findPutI(bb, i-1, descr, ix, bias);
3346 if (replacement
3347 && isIRAtom(replacement)
3348 /* Make sure we're doing a type-safe transformation! */
3349 && typeOfIRExpr(bb->tyenv, replacement) == descr->elemTy) {
3350 if (DEBUG_IROPT) {
3351 vex_printf("rGI: ");
3352 ppIRExpr(st->Ist.WrTmp.data);
3353 vex_printf(" -> ");
3354 ppIRExpr(replacement);
3355 vex_printf("\n");
3356 }
3357 bb->stmts[i] = IRStmt_WrTmp(st->Ist.WrTmp.tmp, replacement);
3358 }
3359 }
3360 }
3361
3362 }
3363
3364
3365 /* Remove redundant PutIs, to the extent which they can be detected.
3366 bb is modified in-place. */
3367
3368 static
do_redundant_PutI_elimination(IRSB * bb)3369 void do_redundant_PutI_elimination ( IRSB* bb )
3370 {
3371 Int i, j;
3372 Bool delete;
3373 IRStmt *st, *stj;
3374
3375 for (i = 0; i < bb->stmts_used; i++) {
3376 st = bb->stmts[i];
3377 if (st->tag != Ist_PutI)
3378 continue;
3379 /* Ok, search forwards from here to see if we can find another
3380 PutI which makes this one redundant, and dodging various
3381 hazards. Search forwards:
3382 * If conditional exit, give up (because anything after that
3383 does not postdominate this put).
3384 * If a Get which might overlap, give up (because this PutI
3385 not necessarily dead).
3386 * If a Put which is identical, stop with success.
3387 * If a Put which might overlap, but is not identical, give up.
3388 * If a dirty helper call which might write guest state, give up.
3389 * If a Put which definitely doesn't overlap, or any other
3390 kind of stmt, continue.
3391 */
3392 delete = False;
3393 for (j = i+1; j < bb->stmts_used; j++) {
3394 stj = bb->stmts[j];
3395 if (stj->tag == Ist_NoOp)
3396 continue;
3397 if (identicalPutIs(st, stj)) {
3398 /* success! */
3399 delete = True;
3400 break;
3401 }
3402 if (stj->tag == Ist_Exit)
3403 /* give up */
3404 break;
3405 if (st->tag == Ist_Dirty)
3406 /* give up; could do better here */
3407 break;
3408 if (guestAccessWhichMightOverlapPutI(bb->tyenv, st, stj))
3409 /* give up */
3410 break;
3411 }
3412
3413 if (delete) {
3414 if (DEBUG_IROPT) {
3415 vex_printf("rPI: ");
3416 ppIRStmt(st);
3417 vex_printf("\n");
3418 }
3419 bb->stmts[i] = IRStmt_NoOp();
3420 }
3421
3422 }
3423 }
3424
3425
3426 /*---------------------------------------------------------------*/
3427 /*--- Loop unrolling ---*/
3428 /*---------------------------------------------------------------*/
3429
3430 /* Adjust all tmp values (names) in e by delta. e is destructively
3431 modified. */
3432
deltaIRExpr(IRExpr * e,Int delta)3433 static void deltaIRExpr ( IRExpr* e, Int delta )
3434 {
3435 Int i;
3436 switch (e->tag) {
3437 case Iex_RdTmp:
3438 e->Iex.RdTmp.tmp += delta;
3439 break;
3440 case Iex_Get:
3441 case Iex_Const:
3442 break;
3443 case Iex_GetI:
3444 deltaIRExpr(e->Iex.GetI.ix, delta);
3445 break;
3446 case Iex_Qop:
3447 deltaIRExpr(e->Iex.Qop.arg1, delta);
3448 deltaIRExpr(e->Iex.Qop.arg2, delta);
3449 deltaIRExpr(e->Iex.Qop.arg3, delta);
3450 deltaIRExpr(e->Iex.Qop.arg4, delta);
3451 break;
3452 case Iex_Triop:
3453 deltaIRExpr(e->Iex.Triop.arg1, delta);
3454 deltaIRExpr(e->Iex.Triop.arg2, delta);
3455 deltaIRExpr(e->Iex.Triop.arg3, delta);
3456 break;
3457 case Iex_Binop:
3458 deltaIRExpr(e->Iex.Binop.arg1, delta);
3459 deltaIRExpr(e->Iex.Binop.arg2, delta);
3460 break;
3461 case Iex_Unop:
3462 deltaIRExpr(e->Iex.Unop.arg, delta);
3463 break;
3464 case Iex_Load:
3465 deltaIRExpr(e->Iex.Load.addr, delta);
3466 break;
3467 case Iex_CCall:
3468 for (i = 0; e->Iex.CCall.args[i]; i++)
3469 deltaIRExpr(e->Iex.CCall.args[i], delta);
3470 break;
3471 case Iex_Mux0X:
3472 deltaIRExpr(e->Iex.Mux0X.cond, delta);
3473 deltaIRExpr(e->Iex.Mux0X.expr0, delta);
3474 deltaIRExpr(e->Iex.Mux0X.exprX, delta);
3475 break;
3476 default:
3477 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3478 vpanic("deltaIRExpr");
3479 }
3480 }
3481
3482 /* Adjust all tmp values (names) in st by delta. st is destructively
3483 modified. */
3484
deltaIRStmt(IRStmt * st,Int delta)3485 static void deltaIRStmt ( IRStmt* st, Int delta )
3486 {
3487 Int i;
3488 IRDirty* d;
3489 switch (st->tag) {
3490 case Ist_NoOp:
3491 case Ist_IMark:
3492 case Ist_MBE:
3493 break;
3494 case Ist_AbiHint:
3495 deltaIRExpr(st->Ist.AbiHint.base, delta);
3496 deltaIRExpr(st->Ist.AbiHint.nia, delta);
3497 break;
3498 case Ist_Put:
3499 deltaIRExpr(st->Ist.Put.data, delta);
3500 break;
3501 case Ist_PutI:
3502 deltaIRExpr(st->Ist.PutI.ix, delta);
3503 deltaIRExpr(st->Ist.PutI.data, delta);
3504 break;
3505 case Ist_WrTmp:
3506 st->Ist.WrTmp.tmp += delta;
3507 deltaIRExpr(st->Ist.WrTmp.data, delta);
3508 break;
3509 case Ist_Exit:
3510 deltaIRExpr(st->Ist.Exit.guard, delta);
3511 break;
3512 case Ist_Store:
3513 deltaIRExpr(st->Ist.Store.addr, delta);
3514 deltaIRExpr(st->Ist.Store.data, delta);
3515 break;
3516 case Ist_CAS:
3517 if (st->Ist.CAS.details->oldHi != IRTemp_INVALID)
3518 st->Ist.CAS.details->oldHi += delta;
3519 st->Ist.CAS.details->oldLo += delta;
3520 deltaIRExpr(st->Ist.CAS.details->addr, delta);
3521 if (st->Ist.CAS.details->expdHi)
3522 deltaIRExpr(st->Ist.CAS.details->expdHi, delta);
3523 deltaIRExpr(st->Ist.CAS.details->expdLo, delta);
3524 if (st->Ist.CAS.details->dataHi)
3525 deltaIRExpr(st->Ist.CAS.details->dataHi, delta);
3526 deltaIRExpr(st->Ist.CAS.details->dataLo, delta);
3527 break;
3528 case Ist_LLSC:
3529 st->Ist.LLSC.result += delta;
3530 deltaIRExpr(st->Ist.LLSC.addr, delta);
3531 if (st->Ist.LLSC.storedata)
3532 deltaIRExpr(st->Ist.LLSC.storedata, delta);
3533 break;
3534 case Ist_Dirty:
3535 d = st->Ist.Dirty.details;
3536 deltaIRExpr(d->guard, delta);
3537 for (i = 0; d->args[i]; i++)
3538 deltaIRExpr(d->args[i], delta);
3539 if (d->tmp != IRTemp_INVALID)
3540 d->tmp += delta;
3541 if (d->mAddr)
3542 deltaIRExpr(d->mAddr, delta);
3543 break;
3544 default:
3545 vex_printf("\n"); ppIRStmt(st); vex_printf("\n");
3546 vpanic("deltaIRStmt");
3547 }
3548 }
3549
3550
3551 /* If possible, return a loop-unrolled version of bb0. The original
3552 is changed. If not possible, return NULL. */
3553
3554 /* The two schemas considered are:
3555
3556 X: BODY; goto X
3557
3558 which unrolls to (eg) X: BODY;BODY; goto X
3559
3560 and
3561
3562 X: BODY; if (c) goto X; goto Y
3563 which trivially transforms to
3564 X: BODY; if (!c) goto Y; goto X;
3565 so it falls in the scope of the first case.
3566
3567 X and Y must be literal (guest) addresses.
3568 */
3569
calc_unroll_factor(IRSB * bb)3570 static Int calc_unroll_factor( IRSB* bb )
3571 {
3572 Int n_stmts, i;
3573
3574 n_stmts = 0;
3575 for (i = 0; i < bb->stmts_used; i++) {
3576 if (bb->stmts[i]->tag != Ist_NoOp)
3577 n_stmts++;
3578 }
3579
3580 if (n_stmts <= vex_control.iropt_unroll_thresh/8) {
3581 if (vex_control.iropt_verbosity > 0)
3582 vex_printf("vex iropt: 8 x unrolling (%d sts -> %d sts)\n",
3583 n_stmts, 8* n_stmts);
3584 return 8;
3585 }
3586 if (n_stmts <= vex_control.iropt_unroll_thresh/4) {
3587 if (vex_control.iropt_verbosity > 0)
3588 vex_printf("vex iropt: 4 x unrolling (%d sts -> %d sts)\n",
3589 n_stmts, 4* n_stmts);
3590 return 4;
3591 }
3592
3593 if (n_stmts <= vex_control.iropt_unroll_thresh/2) {
3594 if (vex_control.iropt_verbosity > 0)
3595 vex_printf("vex iropt: 2 x unrolling (%d sts -> %d sts)\n",
3596 n_stmts, 2* n_stmts);
3597 return 2;
3598 }
3599
3600 if (vex_control.iropt_verbosity > 0)
3601 vex_printf("vex iropt: not unrolling (%d sts)\n", n_stmts);
3602
3603 return 1;
3604 }
3605
3606
maybe_loop_unroll_BB(IRSB * bb0,Addr64 my_addr)3607 static IRSB* maybe_loop_unroll_BB ( IRSB* bb0, Addr64 my_addr )
3608 {
3609 Int i, j, jmax, n_vars;
3610 Bool xxx_known;
3611 Addr64 xxx_value, yyy_value;
3612 IRExpr* udst;
3613 IRStmt* st;
3614 IRConst* con;
3615 IRSB *bb1, *bb2;
3616 Int unroll_factor;
3617
3618 if (vex_control.iropt_unroll_thresh <= 0)
3619 return NULL;
3620
3621 /* First off, figure out if we can unroll this loop. Do this
3622 without modifying bb0. */
3623
3624 if (bb0->jumpkind != Ijk_Boring)
3625 return NULL;
3626
3627 xxx_known = False;
3628 xxx_value = 0;
3629
3630 /* Extract the next-guest address. If it isn't a literal, we
3631 have to give up. */
3632
3633 udst = bb0->next;
3634 if (udst->tag == Iex_Const
3635 && (udst->Iex.Const.con->tag == Ico_U32
3636 || udst->Iex.Const.con->tag == Ico_U64)) {
3637 /* The BB ends in a jump to a literal location. */
3638 xxx_known = True;
3639 xxx_value = udst->Iex.Const.con->tag == Ico_U64
3640 ? udst->Iex.Const.con->Ico.U64
3641 : (Addr64)(udst->Iex.Const.con->Ico.U32);
3642 }
3643
3644 if (!xxx_known)
3645 return NULL;
3646
3647 /* Now we know the BB ends to a jump to a literal location. If
3648 it's a jump to itself (viz, idiom #1), move directly to the
3649 unrolling stage, first cloning the bb so the original isn't
3650 modified. */
3651 if (xxx_value == my_addr) {
3652 unroll_factor = calc_unroll_factor( bb0 );
3653 if (unroll_factor < 2)
3654 return NULL;
3655 bb1 = deepCopyIRSB( bb0 );
3656 bb0 = NULL;
3657 udst = NULL; /* is now invalid */
3658 goto do_unroll;
3659 }
3660
3661 /* Search for the second idiomatic form:
3662 X: BODY; if (c) goto X; goto Y
3663 We know Y, but need to establish that the last stmt
3664 is 'if (c) goto X'.
3665 */
3666 yyy_value = xxx_value;
3667 for (i = bb0->stmts_used-1; i >= 0; i--)
3668 if (bb0->stmts[i])
3669 break;
3670
3671 if (i < 0)
3672 return NULL; /* block with no stmts. Strange. */
3673
3674 st = bb0->stmts[i];
3675 if (st->tag != Ist_Exit)
3676 return NULL;
3677 if (st->Ist.Exit.jk != Ijk_Boring)
3678 return NULL;
3679
3680 con = st->Ist.Exit.dst;
3681 vassert(con->tag == Ico_U32 || con->tag == Ico_U64);
3682
3683 xxx_value = con->tag == Ico_U64
3684 ? st->Ist.Exit.dst->Ico.U64
3685 : (Addr64)(st->Ist.Exit.dst->Ico.U32);
3686
3687 /* If this assertion fails, we have some kind of type error. */
3688 vassert(con->tag == udst->Iex.Const.con->tag);
3689
3690 if (xxx_value != my_addr)
3691 /* We didn't find either idiom. Give up. */
3692 return NULL;
3693
3694 /* Ok, we found idiom #2. Copy the BB, switch around the xxx and
3695 yyy values (which makes it look like idiom #1), and go into
3696 unrolling proper. This means finding (again) the last stmt, in
3697 the copied BB. */
3698
3699 unroll_factor = calc_unroll_factor( bb0 );
3700 if (unroll_factor < 2)
3701 return NULL;
3702
3703 bb1 = deepCopyIRSB( bb0 );
3704 bb0 = NULL;
3705 udst = NULL; /* is now invalid */
3706 for (i = bb1->stmts_used-1; i >= 0; i--)
3707 if (bb1->stmts[i])
3708 break;
3709
3710 /* The next bunch of assertions should be true since we already
3711 found and checked the last stmt in the original bb. */
3712
3713 vassert(i >= 0);
3714
3715 st = bb1->stmts[i];
3716 vassert(st->tag == Ist_Exit);
3717
3718 con = st->Ist.Exit.dst;
3719 vassert(con->tag == Ico_U32 || con->tag == Ico_U64);
3720
3721 udst = bb1->next;
3722 vassert(udst->tag == Iex_Const);
3723 vassert(udst->Iex.Const.con->tag == Ico_U32
3724 || udst->Iex.Const.con->tag == Ico_U64);
3725 vassert(con->tag == udst->Iex.Const.con->tag);
3726
3727 /* switch the xxx and yyy fields around */
3728 if (con->tag == Ico_U64) {
3729 udst->Iex.Const.con->Ico.U64 = xxx_value;
3730 con->Ico.U64 = yyy_value;
3731 } else {
3732 udst->Iex.Const.con->Ico.U32 = (UInt)xxx_value;
3733 con->Ico.U32 = (UInt)yyy_value;
3734 }
3735
3736 /* negate the test condition */
3737 st->Ist.Exit.guard
3738 = IRExpr_Unop(Iop_Not1,deepCopyIRExpr(st->Ist.Exit.guard));
3739
3740 /* --- The unroller proper. Both idioms are by now --- */
3741 /* --- now converted to idiom 1. --- */
3742
3743 do_unroll:
3744
3745 vassert(unroll_factor == 2
3746 || unroll_factor == 4
3747 || unroll_factor == 8);
3748
3749 jmax = unroll_factor==8 ? 3 : (unroll_factor==4 ? 2 : 1);
3750 for (j = 1; j <= jmax; j++) {
3751
3752 n_vars = bb1->tyenv->types_used;
3753
3754 bb2 = deepCopyIRSB(bb1);
3755 for (i = 0; i < n_vars; i++)
3756 (void)newIRTemp(bb1->tyenv, bb2->tyenv->types[i]);
3757
3758 for (i = 0; i < bb2->stmts_used; i++) {
3759 /* deltaIRStmt destructively modifies the stmt, but
3760 that's OK since bb2 is a complete fresh copy of bb1. */
3761 deltaIRStmt(bb2->stmts[i], n_vars);
3762 addStmtToIRSB(bb1, bb2->stmts[i]);
3763 }
3764 }
3765
3766 if (DEBUG_IROPT) {
3767 vex_printf("\nUNROLLED (%llx)\n", my_addr);
3768 ppIRSB(bb1);
3769 vex_printf("\n");
3770 }
3771
3772 /* Flattening; sigh. The unroller succeeds in breaking flatness
3773 by negating the test condition. This should be fixed properly.
3774 For the moment use this shotgun approach. */
3775 return flatten_BB(bb1);
3776 }
3777
3778
3779 /*---------------------------------------------------------------*/
3780 /*--- The tree builder ---*/
3781 /*---------------------------------------------------------------*/
3782
3783 /* This isn't part of IR optimisation. Really it's a pass done prior
3784 to instruction selection, which improves the code that the
3785 instruction selector can produce. */
3786
3787 /* --- The 'tmp' environment is the central data structure here --- */
3788
3789 /* The number of outstanding bindings we're prepared to track.
3790 The number of times the env becomes full and we have to dump
3791 the oldest binding (hence reducing code quality) falls very
3792 rapidly as the env size increases. 8 gives reasonable performance
3793 under most circumstances. */
3794 #define A_NENV 10
3795
3796 /* bindee == NULL === slot is not in use
3797 bindee != NULL === slot is in use
3798 */
3799 typedef
3800 struct {
3801 IRTemp binder;
3802 IRExpr* bindee;
3803 Bool doesLoad;
3804 Bool doesGet;
3805 }
3806 ATmpInfo;
3807
3808 __attribute__((unused))
ppAEnv(ATmpInfo * env)3809 static void ppAEnv ( ATmpInfo* env )
3810 {
3811 Int i;
3812 for (i = 0; i < A_NENV; i++) {
3813 vex_printf("%d tmp %d val ", i, (Int)env[i].binder);
3814 if (env[i].bindee)
3815 ppIRExpr(env[i].bindee);
3816 else
3817 vex_printf("(null)");
3818 vex_printf("\n");
3819 }
3820 }
3821
3822 /* --- Tree-traversal fns --- */
3823
3824 /* Traverse an expr, and detect if any part of it reads memory or does
3825 a Get. Be careful ... this really controls how much the
3826 tree-builder can reorder the code, so getting it right is critical.
3827 */
setHints_Expr(Bool * doesLoad,Bool * doesGet,IRExpr * e)3828 static void setHints_Expr (Bool* doesLoad, Bool* doesGet, IRExpr* e )
3829 {
3830 Int i;
3831 switch (e->tag) {
3832 case Iex_CCall:
3833 for (i = 0; e->Iex.CCall.args[i]; i++)
3834 setHints_Expr(doesLoad, doesGet, e->Iex.CCall.args[i]);
3835 return;
3836 case Iex_Mux0X:
3837 setHints_Expr(doesLoad, doesGet, e->Iex.Mux0X.cond);
3838 setHints_Expr(doesLoad, doesGet, e->Iex.Mux0X.expr0);
3839 setHints_Expr(doesLoad, doesGet, e->Iex.Mux0X.exprX);
3840 return;
3841 case Iex_Qop:
3842 setHints_Expr(doesLoad, doesGet, e->Iex.Qop.arg1);
3843 setHints_Expr(doesLoad, doesGet, e->Iex.Qop.arg2);
3844 setHints_Expr(doesLoad, doesGet, e->Iex.Qop.arg3);
3845 setHints_Expr(doesLoad, doesGet, e->Iex.Qop.arg4);
3846 return;
3847 case Iex_Triop:
3848 setHints_Expr(doesLoad, doesGet, e->Iex.Triop.arg1);
3849 setHints_Expr(doesLoad, doesGet, e->Iex.Triop.arg2);
3850 setHints_Expr(doesLoad, doesGet, e->Iex.Triop.arg3);
3851 return;
3852 case Iex_Binop:
3853 setHints_Expr(doesLoad, doesGet, e->Iex.Binop.arg1);
3854 setHints_Expr(doesLoad, doesGet, e->Iex.Binop.arg2);
3855 return;
3856 case Iex_Unop:
3857 setHints_Expr(doesLoad, doesGet, e->Iex.Unop.arg);
3858 return;
3859 case Iex_Load:
3860 *doesLoad = True;
3861 setHints_Expr(doesLoad, doesGet, e->Iex.Load.addr);
3862 return;
3863 case Iex_Get:
3864 *doesGet = True;
3865 return;
3866 case Iex_GetI:
3867 *doesGet = True;
3868 setHints_Expr(doesLoad, doesGet, e->Iex.GetI.ix);
3869 return;
3870 case Iex_RdTmp:
3871 case Iex_Const:
3872 return;
3873 default:
3874 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3875 vpanic("setHints_Expr");
3876 }
3877 }
3878
3879
3880 /* Add a binding to the front of the env and slide all the rest
3881 backwards. It should be the case that the last slot is free. */
addToEnvFront(ATmpInfo * env,IRTemp binder,IRExpr * bindee)3882 static void addToEnvFront ( ATmpInfo* env, IRTemp binder, IRExpr* bindee )
3883 {
3884 Int i;
3885 vassert(env[A_NENV-1].bindee == NULL);
3886 for (i = A_NENV-1; i >= 1; i--)
3887 env[i] = env[i-1];
3888 env[0].binder = binder;
3889 env[0].bindee = bindee;
3890 env[0].doesLoad = False; /* filled in later */
3891 env[0].doesGet = False; /* filled in later */
3892 }
3893
3894 /* Given uses :: array of UShort, indexed by IRTemp
3895 Add the use-occurrences of temps in this expression
3896 to the env.
3897 */
aoccCount_Expr(UShort * uses,IRExpr * e)3898 static void aoccCount_Expr ( UShort* uses, IRExpr* e )
3899 {
3900 Int i;
3901
3902 switch (e->tag) {
3903
3904 case Iex_RdTmp: /* the only interesting case */
3905 uses[e->Iex.RdTmp.tmp]++;
3906 return;
3907
3908 case Iex_Mux0X:
3909 aoccCount_Expr(uses, e->Iex.Mux0X.cond);
3910 aoccCount_Expr(uses, e->Iex.Mux0X.expr0);
3911 aoccCount_Expr(uses, e->Iex.Mux0X.exprX);
3912 return;
3913
3914 case Iex_Qop:
3915 aoccCount_Expr(uses, e->Iex.Qop.arg1);
3916 aoccCount_Expr(uses, e->Iex.Qop.arg2);
3917 aoccCount_Expr(uses, e->Iex.Qop.arg3);
3918 aoccCount_Expr(uses, e->Iex.Qop.arg4);
3919 return;
3920
3921 case Iex_Triop:
3922 aoccCount_Expr(uses, e->Iex.Triop.arg1);
3923 aoccCount_Expr(uses, e->Iex.Triop.arg2);
3924 aoccCount_Expr(uses, e->Iex.Triop.arg3);
3925 return;
3926
3927 case Iex_Binop:
3928 aoccCount_Expr(uses, e->Iex.Binop.arg1);
3929 aoccCount_Expr(uses, e->Iex.Binop.arg2);
3930 return;
3931
3932 case Iex_Unop:
3933 aoccCount_Expr(uses, e->Iex.Unop.arg);
3934 return;
3935
3936 case Iex_Load:
3937 aoccCount_Expr(uses, e->Iex.Load.addr);
3938 return;
3939
3940 case Iex_CCall:
3941 for (i = 0; e->Iex.CCall.args[i]; i++)
3942 aoccCount_Expr(uses, e->Iex.CCall.args[i]);
3943 return;
3944
3945 case Iex_GetI:
3946 aoccCount_Expr(uses, e->Iex.GetI.ix);
3947 return;
3948
3949 case Iex_Const:
3950 case Iex_Get:
3951 return;
3952
3953 default:
3954 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3955 vpanic("aoccCount_Expr");
3956 }
3957 }
3958
3959
3960 /* Given uses :: array of UShort, indexed by IRTemp
3961 Add the use-occurrences of temps in this statement
3962 to the env.
3963 */
aoccCount_Stmt(UShort * uses,IRStmt * st)3964 static void aoccCount_Stmt ( UShort* uses, IRStmt* st )
3965 {
3966 Int i;
3967 IRDirty* d;
3968 IRCAS* cas;
3969 switch (st->tag) {
3970 case Ist_AbiHint:
3971 aoccCount_Expr(uses, st->Ist.AbiHint.base);
3972 aoccCount_Expr(uses, st->Ist.AbiHint.nia);
3973 return;
3974 case Ist_WrTmp:
3975 aoccCount_Expr(uses, st->Ist.WrTmp.data);
3976 return;
3977 case Ist_Put:
3978 aoccCount_Expr(uses, st->Ist.Put.data);
3979 return;
3980 case Ist_PutI:
3981 aoccCount_Expr(uses, st->Ist.PutI.ix);
3982 aoccCount_Expr(uses, st->Ist.PutI.data);
3983 return;
3984 case Ist_Store:
3985 aoccCount_Expr(uses, st->Ist.Store.addr);
3986 aoccCount_Expr(uses, st->Ist.Store.data);
3987 return;
3988 case Ist_CAS:
3989 cas = st->Ist.CAS.details;
3990 aoccCount_Expr(uses, cas->addr);
3991 if (cas->expdHi)
3992 aoccCount_Expr(uses, cas->expdHi);
3993 aoccCount_Expr(uses, cas->expdLo);
3994 if (cas->dataHi)
3995 aoccCount_Expr(uses, cas->dataHi);
3996 aoccCount_Expr(uses, cas->dataLo);
3997 return;
3998 case Ist_LLSC:
3999 aoccCount_Expr(uses, st->Ist.LLSC.addr);
4000 if (st->Ist.LLSC.storedata)
4001 aoccCount_Expr(uses, st->Ist.LLSC.storedata);
4002 return;
4003 case Ist_Dirty:
4004 d = st->Ist.Dirty.details;
4005 if (d->mFx != Ifx_None)
4006 aoccCount_Expr(uses, d->mAddr);
4007 aoccCount_Expr(uses, d->guard);
4008 for (i = 0; d->args[i]; i++)
4009 aoccCount_Expr(uses, d->args[i]);
4010 return;
4011 case Ist_NoOp:
4012 case Ist_IMark:
4013 case Ist_MBE:
4014 return;
4015 case Ist_Exit:
4016 aoccCount_Expr(uses, st->Ist.Exit.guard);
4017 return;
4018 default:
4019 vex_printf("\n"); ppIRStmt(st); vex_printf("\n");
4020 vpanic("aoccCount_Stmt");
4021 }
4022 }
4023
4024 /* Look up a binding for tmp in the env. If found, return the bound
4025 expression, and set the env's binding to NULL so it is marked as
4026 used. If not found, return NULL. */
4027
atbSubst_Temp(ATmpInfo * env,IRTemp tmp)4028 static IRExpr* atbSubst_Temp ( ATmpInfo* env, IRTemp tmp )
4029 {
4030 Int i;
4031 for (i = 0; i < A_NENV; i++) {
4032 if (env[i].binder == tmp && env[i].bindee != NULL) {
4033 IRExpr* bindee = env[i].bindee;
4034 env[i].bindee = NULL;
4035 return bindee;
4036 }
4037 }
4038 return NULL;
4039 }
4040
4041 /* Traverse e, looking for temps. For each observed temp, see if env
4042 contains a binding for the temp, and if so return the bound value.
4043 The env has the property that any binding it holds is
4044 'single-shot', so once a binding is used, it is marked as no longer
4045 available, by setting its .bindee field to NULL. */
4046
is_Unop(IRExpr * e,IROp op)4047 static inline Bool is_Unop ( IRExpr* e, IROp op ) {
4048 return e->tag == Iex_Unop && e->Iex.Unop.op == op;
4049 }
is_Binop(IRExpr * e,IROp op)4050 static inline Bool is_Binop ( IRExpr* e, IROp op ) {
4051 return e->tag == Iex_Binop && e->Iex.Binop.op == op;
4052 }
4053
fold_IRExpr_Binop(IROp op,IRExpr * a1,IRExpr * a2)4054 static IRExpr* fold_IRExpr_Binop ( IROp op, IRExpr* a1, IRExpr* a2 )
4055 {
4056 switch (op) {
4057 case Iop_Or32:
4058 /* Or32( CmpwNEZ32(x), CmpwNEZ32(y) ) --> CmpwNEZ32( Or32( x, y ) ) */
4059 if (is_Unop(a1, Iop_CmpwNEZ32) && is_Unop(a2, Iop_CmpwNEZ32))
4060 return IRExpr_Unop( Iop_CmpwNEZ32,
4061 IRExpr_Binop( Iop_Or32, a1->Iex.Unop.arg,
4062 a2->Iex.Unop.arg ) );
4063 break;
4064
4065 case Iop_CmpNE32:
4066 /* Since X has type Ity_I1 we can simplify:
4067 CmpNE32(1Uto32(X),0)) ==> X */
4068 if (is_Unop(a1, Iop_1Uto32) && isZeroU32(a2))
4069 return a1->Iex.Unop.arg;
4070 break;
4071
4072 default:
4073 break;
4074 }
4075 /* no reduction rule applies */
4076 return IRExpr_Binop( op, a1, a2 );
4077 }
4078
fold_IRExpr_Unop(IROp op,IRExpr * aa)4079 static IRExpr* fold_IRExpr_Unop ( IROp op, IRExpr* aa )
4080 {
4081 switch (op) {
4082 case Iop_CmpwNEZ64:
4083 /* CmpwNEZ64( Or64 ( CmpwNEZ64(x), y ) ) --> CmpwNEZ64( Or64( x, y ) ) */
4084 if (is_Binop(aa, Iop_Or64)
4085 && is_Unop(aa->Iex.Binop.arg1, Iop_CmpwNEZ64))
4086 return fold_IRExpr_Unop(
4087 Iop_CmpwNEZ64,
4088 IRExpr_Binop(Iop_Or64,
4089 aa->Iex.Binop.arg1->Iex.Unop.arg,
4090 aa->Iex.Binop.arg2));
4091 /* CmpwNEZ64( Or64 ( x, CmpwNEZ64(y) ) ) --> CmpwNEZ64( Or64( x, y ) ) */
4092 if (is_Binop(aa, Iop_Or64)
4093 && is_Unop(aa->Iex.Binop.arg2, Iop_CmpwNEZ64))
4094 return fold_IRExpr_Unop(
4095 Iop_CmpwNEZ64,
4096 IRExpr_Binop(Iop_Or64,
4097 aa->Iex.Binop.arg1,
4098 aa->Iex.Binop.arg2->Iex.Unop.arg));
4099 break;
4100 case Iop_CmpNEZ64:
4101 /* CmpNEZ64( Left64(x) ) --> CmpNEZ64(x) */
4102 if (is_Unop(aa, Iop_Left64))
4103 return IRExpr_Unop(Iop_CmpNEZ64, aa->Iex.Unop.arg);
4104 break;
4105 case Iop_CmpwNEZ32:
4106 /* CmpwNEZ32( CmpwNEZ32 ( x ) ) --> CmpwNEZ32 ( x ) */
4107 if (is_Unop(aa, Iop_CmpwNEZ32))
4108 return IRExpr_Unop( Iop_CmpwNEZ32, aa->Iex.Unop.arg );
4109 break;
4110 case Iop_CmpNEZ32:
4111 /* CmpNEZ32( Left32(x) ) --> CmpNEZ32(x) */
4112 if (is_Unop(aa, Iop_Left32))
4113 return IRExpr_Unop(Iop_CmpNEZ32, aa->Iex.Unop.arg);
4114 /* CmpNEZ32( 1Uto32(X) ) --> X */
4115 if (is_Unop(aa, Iop_1Uto32))
4116 return aa->Iex.Unop.arg;
4117 break;
4118 case Iop_CmpNEZ8:
4119 /* CmpNEZ8( 1Uto8(X) ) --> X */
4120 if (is_Unop(aa, Iop_1Uto8))
4121 return aa->Iex.Unop.arg;
4122 break;
4123 case Iop_Left32:
4124 /* Left32( Left32(x) ) --> Left32(x) */
4125 if (is_Unop(aa, Iop_Left32))
4126 return IRExpr_Unop( Iop_Left32, aa->Iex.Unop.arg );
4127 break;
4128 case Iop_32to1:
4129 /* 32to1( 1Uto32 ( x ) ) --> x */
4130 if (is_Unop(aa, Iop_1Uto32))
4131 return aa->Iex.Unop.arg;
4132 /* 32to1( CmpwNEZ32 ( x )) --> CmpNEZ32(x) */
4133 if (is_Unop(aa, Iop_CmpwNEZ32))
4134 return IRExpr_Unop( Iop_CmpNEZ32, aa->Iex.Unop.arg );
4135 break;
4136 case Iop_64to1:
4137 /* 64to1( 1Uto64 ( x ) ) --> x */
4138 if (is_Unop(aa, Iop_1Uto64))
4139 return aa->Iex.Unop.arg;
4140 /* 64to1( CmpwNEZ64 ( x )) --> CmpNEZ64(x) */
4141 if (is_Unop(aa, Iop_CmpwNEZ64))
4142 return IRExpr_Unop( Iop_CmpNEZ64, aa->Iex.Unop.arg );
4143 break;
4144 case Iop_64to32:
4145 /* 64to32( 32Uto64 ( x )) --> x */
4146 if (is_Unop(aa, Iop_32Uto64))
4147 return aa->Iex.Unop.arg;
4148 /* 64to32( 8Uto64 ( x )) --> 8Uto32(x) */
4149 if (is_Unop(aa, Iop_8Uto64))
4150 return IRExpr_Unop(Iop_8Uto32, aa->Iex.Unop.arg);
4151 break;
4152
4153 case Iop_32Uto64:
4154 /* 32Uto64( 8Uto32( x )) --> 8Uto64(x) */
4155 if (is_Unop(aa, Iop_8Uto32))
4156 return IRExpr_Unop(Iop_8Uto64, aa->Iex.Unop.arg);
4157 /* 32Uto64( 16Uto32( x )) --> 16Uto64(x) */
4158 if (is_Unop(aa, Iop_16Uto32))
4159 return IRExpr_Unop(Iop_16Uto64, aa->Iex.Unop.arg);
4160 break;
4161
4162 case Iop_1Sto32:
4163 /* 1Sto32( CmpNEZ8( 32to8( 1Uto32( CmpNEZ32( x ))))) -> CmpwNEZ32(x) */
4164 if (is_Unop(aa, Iop_CmpNEZ8)
4165 && is_Unop(aa->Iex.Unop.arg, Iop_32to8)
4166 && is_Unop(aa->Iex.Unop.arg->Iex.Unop.arg, Iop_1Uto32)
4167 && is_Unop(aa->Iex.Unop.arg->Iex.Unop.arg->Iex.Unop.arg,
4168 Iop_CmpNEZ32)) {
4169 return IRExpr_Unop( Iop_CmpwNEZ32,
4170 aa->Iex.Unop.arg->Iex.Unop.arg
4171 ->Iex.Unop.arg->Iex.Unop.arg);
4172 }
4173 break;
4174
4175
4176 default:
4177 break;
4178 }
4179 /* no reduction rule applies */
4180 return IRExpr_Unop( op, aa );
4181 }
4182
atbSubst_Expr(ATmpInfo * env,IRExpr * e)4183 static IRExpr* atbSubst_Expr ( ATmpInfo* env, IRExpr* e )
4184 {
4185 IRExpr* e2;
4186 IRExpr** args2;
4187 Int i;
4188
4189 switch (e->tag) {
4190
4191 case Iex_CCall:
4192 args2 = shallowCopyIRExprVec(e->Iex.CCall.args);
4193 for (i = 0; args2[i]; i++)
4194 args2[i] = atbSubst_Expr(env,args2[i]);
4195 return IRExpr_CCall(
4196 e->Iex.CCall.cee,
4197 e->Iex.CCall.retty,
4198 args2
4199 );
4200 case Iex_RdTmp:
4201 e2 = atbSubst_Temp(env, e->Iex.RdTmp.tmp);
4202 return e2 ? e2 : e;
4203 case Iex_Mux0X:
4204 return IRExpr_Mux0X(
4205 atbSubst_Expr(env, e->Iex.Mux0X.cond),
4206 atbSubst_Expr(env, e->Iex.Mux0X.expr0),
4207 atbSubst_Expr(env, e->Iex.Mux0X.exprX)
4208 );
4209 case Iex_Qop:
4210 return IRExpr_Qop(
4211 e->Iex.Qop.op,
4212 atbSubst_Expr(env, e->Iex.Qop.arg1),
4213 atbSubst_Expr(env, e->Iex.Qop.arg2),
4214 atbSubst_Expr(env, e->Iex.Qop.arg3),
4215 atbSubst_Expr(env, e->Iex.Qop.arg4)
4216 );
4217 case Iex_Triop:
4218 return IRExpr_Triop(
4219 e->Iex.Triop.op,
4220 atbSubst_Expr(env, e->Iex.Triop.arg1),
4221 atbSubst_Expr(env, e->Iex.Triop.arg2),
4222 atbSubst_Expr(env, e->Iex.Triop.arg3)
4223 );
4224 case Iex_Binop:
4225 return fold_IRExpr_Binop(
4226 e->Iex.Binop.op,
4227 atbSubst_Expr(env, e->Iex.Binop.arg1),
4228 atbSubst_Expr(env, e->Iex.Binop.arg2)
4229 );
4230 case Iex_Unop:
4231 return fold_IRExpr_Unop(
4232 e->Iex.Unop.op,
4233 atbSubst_Expr(env, e->Iex.Unop.arg)
4234 );
4235 case Iex_Load:
4236 return IRExpr_Load(
4237 e->Iex.Load.end,
4238 e->Iex.Load.ty,
4239 atbSubst_Expr(env, e->Iex.Load.addr)
4240 );
4241 case Iex_GetI:
4242 return IRExpr_GetI(
4243 e->Iex.GetI.descr,
4244 atbSubst_Expr(env, e->Iex.GetI.ix),
4245 e->Iex.GetI.bias
4246 );
4247 case Iex_Const:
4248 case Iex_Get:
4249 return e;
4250 default:
4251 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
4252 vpanic("atbSubst_Expr");
4253 }
4254 }
4255
4256 /* Same deal as atbSubst_Expr, except for stmts. */
4257
atbSubst_Stmt(ATmpInfo * env,IRStmt * st)4258 static IRStmt* atbSubst_Stmt ( ATmpInfo* env, IRStmt* st )
4259 {
4260 Int i;
4261 IRDirty *d, *d2;
4262 IRCAS *cas, *cas2;
4263 switch (st->tag) {
4264 case Ist_AbiHint:
4265 return IRStmt_AbiHint(
4266 atbSubst_Expr(env, st->Ist.AbiHint.base),
4267 st->Ist.AbiHint.len,
4268 atbSubst_Expr(env, st->Ist.AbiHint.nia)
4269 );
4270 case Ist_Store:
4271 return IRStmt_Store(
4272 st->Ist.Store.end,
4273 atbSubst_Expr(env, st->Ist.Store.addr),
4274 atbSubst_Expr(env, st->Ist.Store.data)
4275 );
4276 case Ist_WrTmp:
4277 return IRStmt_WrTmp(
4278 st->Ist.WrTmp.tmp,
4279 atbSubst_Expr(env, st->Ist.WrTmp.data)
4280 );
4281 case Ist_Put:
4282 return IRStmt_Put(
4283 st->Ist.Put.offset,
4284 atbSubst_Expr(env, st->Ist.Put.data)
4285 );
4286 case Ist_PutI:
4287 return IRStmt_PutI(
4288 st->Ist.PutI.descr,
4289 atbSubst_Expr(env, st->Ist.PutI.ix),
4290 st->Ist.PutI.bias,
4291 atbSubst_Expr(env, st->Ist.PutI.data)
4292 );
4293
4294 case Ist_Exit:
4295 return IRStmt_Exit(
4296 atbSubst_Expr(env, st->Ist.Exit.guard),
4297 st->Ist.Exit.jk,
4298 st->Ist.Exit.dst
4299 );
4300 case Ist_IMark:
4301 return IRStmt_IMark(st->Ist.IMark.addr,
4302 st->Ist.IMark.len,
4303 st->Ist.IMark.delta);
4304 case Ist_NoOp:
4305 return IRStmt_NoOp();
4306 case Ist_MBE:
4307 return IRStmt_MBE(st->Ist.MBE.event);
4308 case Ist_CAS:
4309 cas = st->Ist.CAS.details;
4310 cas2 = mkIRCAS(
4311 cas->oldHi, cas->oldLo, cas->end,
4312 atbSubst_Expr(env, cas->addr),
4313 cas->expdHi ? atbSubst_Expr(env, cas->expdHi) : NULL,
4314 atbSubst_Expr(env, cas->expdLo),
4315 cas->dataHi ? atbSubst_Expr(env, cas->dataHi) : NULL,
4316 atbSubst_Expr(env, cas->dataLo)
4317 );
4318 return IRStmt_CAS(cas2);
4319 case Ist_LLSC:
4320 return IRStmt_LLSC(
4321 st->Ist.LLSC.end,
4322 st->Ist.LLSC.result,
4323 atbSubst_Expr(env, st->Ist.LLSC.addr),
4324 st->Ist.LLSC.storedata
4325 ? atbSubst_Expr(env, st->Ist.LLSC.storedata) : NULL
4326 );
4327 case Ist_Dirty:
4328 d = st->Ist.Dirty.details;
4329 d2 = emptyIRDirty();
4330 *d2 = *d;
4331 if (d2->mFx != Ifx_None)
4332 d2->mAddr = atbSubst_Expr(env, d2->mAddr);
4333 d2->guard = atbSubst_Expr(env, d2->guard);
4334 for (i = 0; d2->args[i]; i++)
4335 d2->args[i] = atbSubst_Expr(env, d2->args[i]);
4336 return IRStmt_Dirty(d2);
4337 default:
4338 vex_printf("\n"); ppIRStmt(st); vex_printf("\n");
4339 vpanic("atbSubst_Stmt");
4340 }
4341 }
4342
ado_treebuild_BB(IRSB * bb)4343 /* notstatic */ void ado_treebuild_BB ( IRSB* bb )
4344 {
4345 Int i, j, k, m;
4346 Bool stmtPuts, stmtStores, invalidateMe;
4347 IRStmt* st;
4348 IRStmt* st2;
4349 ATmpInfo env[A_NENV];
4350
4351 Int n_tmps = bb->tyenv->types_used;
4352 UShort* uses = LibVEX_Alloc(n_tmps * sizeof(UShort));
4353
4354 /* Phase 1. Scan forwards in bb, counting use occurrences of each
4355 temp. Also count occurrences in the bb->next field. */
4356
4357 for (i = 0; i < n_tmps; i++)
4358 uses[i] = 0;
4359
4360 for (i = 0; i < bb->stmts_used; i++) {
4361 st = bb->stmts[i];
4362 if (st->tag == Ist_NoOp)
4363 continue;
4364 aoccCount_Stmt( uses, st );
4365 }
4366 aoccCount_Expr(uses, bb->next );
4367
4368 # if 0
4369 for (i = 0; i < n_tmps; i++) {
4370 if (uses[i] == 0)
4371 continue;
4372 ppIRTemp( (IRTemp)i );
4373 vex_printf(" used %d\n", (Int)uses[i] );
4374 }
4375 # endif
4376
4377 /* Phase 2. Scan forwards in bb. For each statement in turn:
4378
4379 If the env is full, emit the end element. This guarantees
4380 there is at least one free slot in the following.
4381
4382 On seeing 't = E', occ(t)==1,
4383 let E'=env(E)
4384 delete this stmt
4385 add t -> E' to the front of the env
4386 Examine E' and set the hints for E' appropriately
4387 (doesLoad? doesGet?)
4388
4389 On seeing any other stmt,
4390 let stmt' = env(stmt)
4391 remove from env any 't=E' binds invalidated by stmt
4392 emit the invalidated stmts
4393 emit stmt'
4394 compact any holes in env
4395 by sliding entries towards the front
4396
4397 Finally, apply env to bb->next.
4398 */
4399
4400 for (i = 0; i < A_NENV; i++) {
4401 env[i].bindee = NULL;
4402 env[i].binder = IRTemp_INVALID;
4403 }
4404
4405 /* The stmts in bb are being reordered, and we are guaranteed to
4406 end up with no more than the number we started with. Use i to
4407 be the cursor of the current stmt examined and j <= i to be that
4408 for the current stmt being written.
4409 */
4410 j = 0;
4411 for (i = 0; i < bb->stmts_used; i++) {
4412
4413 st = bb->stmts[i];
4414 if (st->tag == Ist_NoOp)
4415 continue;
4416
4417 /* Ensure there's at least one space in the env, by emitting
4418 the oldest binding if necessary. */
4419 if (env[A_NENV-1].bindee != NULL) {
4420 bb->stmts[j] = IRStmt_WrTmp( env[A_NENV-1].binder,
4421 env[A_NENV-1].bindee );
4422 j++;
4423 vassert(j <= i);
4424 env[A_NENV-1].bindee = NULL;
4425 }
4426
4427 /* Consider current stmt. */
4428 if (st->tag == Ist_WrTmp && uses[st->Ist.WrTmp.tmp] <= 1) {
4429 IRExpr *e, *e2;
4430
4431 /* optional extra: dump dead bindings as we find them.
4432 Removes the need for a prior dead-code removal pass. */
4433 if (uses[st->Ist.WrTmp.tmp] == 0) {
4434 if (0) vex_printf("DEAD binding\n");
4435 continue; /* for (i = 0; i < bb->stmts_used; i++) loop */
4436 }
4437 vassert(uses[st->Ist.WrTmp.tmp] == 1);
4438
4439 /* ok, we have 't = E', occ(t)==1. Do the abovementioned
4440 actions. */
4441 e = st->Ist.WrTmp.data;
4442 e2 = atbSubst_Expr(env, e);
4443 addToEnvFront(env, st->Ist.WrTmp.tmp, e2);
4444 setHints_Expr(&env[0].doesLoad, &env[0].doesGet, e2);
4445 /* don't advance j, as we are deleting this stmt and instead
4446 holding it temporarily in the env. */
4447 continue; /* for (i = 0; i < bb->stmts_used; i++) loop */
4448 }
4449
4450 /* we get here for any other kind of statement. */
4451 /* 'use up' any bindings required by the current statement. */
4452 st2 = atbSubst_Stmt(env, st);
4453
4454 /* Now, before this stmt, dump any bindings in env that it
4455 invalidates. These need to be dumped in the order in which
4456 they originally entered env -- that means from oldest to
4457 youngest. */
4458
4459 /* stmtPuts/stmtStores characterise what the stmt under
4460 consideration does, or might do (sidely safe @ True). */
4461 stmtPuts
4462 = toBool( st->tag == Ist_Put
4463 || st->tag == Ist_PutI
4464 || st->tag == Ist_Dirty );
4465
4466 /* be True if this stmt writes memory or might do (==> we don't
4467 want to reorder other loads or stores relative to it). Also,
4468 both LL and SC fall under this classification, since we
4469 really ought to be conservative and not reorder any other
4470 memory transactions relative to them. */
4471 stmtStores
4472 = toBool( st->tag == Ist_Store
4473 || st->tag == Ist_Dirty
4474 || st->tag == Ist_LLSC );
4475
4476 for (k = A_NENV-1; k >= 0; k--) {
4477 if (env[k].bindee == NULL)
4478 continue;
4479 /* Compare the actions of this stmt with the actions of
4480 binding 'k', to see if they invalidate the binding. */
4481 invalidateMe
4482 = toBool(
4483 /* a store invalidates loaded data */
4484 (env[k].doesLoad && stmtStores)
4485 /* a put invalidates get'd data */
4486 || (env[k].doesGet && stmtPuts)
4487 /* a put invalidates loaded data. Note, we could do
4488 much better here in the sense that we only need to
4489 invalidate trees containing loads if the Put in
4490 question is marked as requiring precise
4491 exceptions. */
4492 || (env[k].doesLoad && stmtPuts)
4493 /* probably overly conservative: a memory bus event
4494 invalidates absolutely everything, so that all
4495 computation prior to it is forced to complete before
4496 proceeding with the event (fence,lock,unlock). */
4497 || st->tag == Ist_MBE
4498 /* also be (probably overly) paranoid re AbiHints */
4499 || st->tag == Ist_AbiHint
4500 );
4501 if (invalidateMe) {
4502 bb->stmts[j] = IRStmt_WrTmp( env[k].binder, env[k].bindee );
4503 j++;
4504 vassert(j <= i);
4505 env[k].bindee = NULL;
4506 }
4507 }
4508
4509 /* Slide in-use entries in env up to the front */
4510 m = 0;
4511 for (k = 0; k < A_NENV; k++) {
4512 if (env[k].bindee != NULL) {
4513 env[m] = env[k];
4514 m++;
4515 }
4516 }
4517 for (m = m; m < A_NENV; m++) {
4518 env[m].bindee = NULL;
4519 }
4520
4521 /* finally, emit the substituted statement */
4522 bb->stmts[j] = st2;
4523 /* vex_printf("**2 "); ppIRStmt(bb->stmts[j]); vex_printf("\n"); */
4524 j++;
4525
4526 vassert(j <= i+1);
4527 } /* for each stmt in the original bb ... */
4528
4529 /* Finally ... substitute the ->next field as much as possible, and
4530 dump any left-over bindings. Hmm. Perhaps there should be no
4531 left over bindings? Or any left-over bindings are
4532 by definition dead? */
4533 bb->next = atbSubst_Expr(env, bb->next);
4534 bb->stmts_used = j;
4535 }
4536
4537
4538 /*---------------------------------------------------------------*/
4539 /*--- iropt main ---*/
4540 /*---------------------------------------------------------------*/
4541
4542 static Bool iropt_verbose = False; /* True; */
4543
4544
4545 /* Do a simple cleanup pass on bb. This is: redundant Get removal,
4546 redundant Put removal, constant propagation, dead code removal,
4547 clean helper specialisation, and dead code removal (again).
4548 */
4549
4550
4551 static
cheap_transformations(IRSB * bb,IRExpr * (* specHelper)(HChar *,IRExpr **,IRStmt **,Int),Bool (* preciseMemExnsFn)(Int,Int))4552 IRSB* cheap_transformations (
4553 IRSB* bb,
4554 IRExpr* (*specHelper) (HChar*, IRExpr**, IRStmt**, Int),
4555 Bool (*preciseMemExnsFn)(Int,Int)
4556 )
4557 {
4558 redundant_get_removal_BB ( bb );
4559 if (iropt_verbose) {
4560 vex_printf("\n========= REDUNDANT GET\n\n" );
4561 ppIRSB(bb);
4562 }
4563
4564 redundant_put_removal_BB ( bb, preciseMemExnsFn );
4565 if (iropt_verbose) {
4566 vex_printf("\n========= REDUNDANT PUT\n\n" );
4567 ppIRSB(bb);
4568 }
4569
4570 bb = cprop_BB ( bb );
4571 if (iropt_verbose) {
4572 vex_printf("\n========= CPROPD\n\n" );
4573 ppIRSB(bb);
4574 }
4575
4576 do_deadcode_BB ( bb );
4577 if (iropt_verbose) {
4578 vex_printf("\n========= DEAD\n\n" );
4579 ppIRSB(bb);
4580 }
4581
4582 bb = spec_helpers_BB ( bb, specHelper );
4583 do_deadcode_BB ( bb );
4584 if (iropt_verbose) {
4585 vex_printf("\n========= SPECd \n\n" );
4586 ppIRSB(bb);
4587 }
4588
4589 return bb;
4590 }
4591
4592
4593 /* Do some more expensive transformations on bb, which are aimed at
4594 optimising as much as possible in the presence of GetI and PutI. */
4595
4596 static
expensive_transformations(IRSB * bb)4597 IRSB* expensive_transformations( IRSB* bb )
4598 {
4599 (void)do_cse_BB( bb );
4600 collapse_AddSub_chains_BB( bb );
4601 do_redundant_GetI_elimination( bb );
4602 do_redundant_PutI_elimination( bb );
4603 do_deadcode_BB( bb );
4604 return bb;
4605 }
4606
4607
4608 /* Scan a flattened BB to look for signs that more expensive
4609 optimisations might be useful:
4610 - find out if there are any GetIs and PutIs
4611 - find out if there are any floating or vector-typed temporaries
4612 */
4613
considerExpensives(Bool * hasGetIorPutI,Bool * hasVorFtemps,IRSB * bb)4614 static void considerExpensives ( /*OUT*/Bool* hasGetIorPutI,
4615 /*OUT*/Bool* hasVorFtemps,
4616 IRSB* bb )
4617 {
4618 Int i, j;
4619 IRStmt* st;
4620 IRDirty* d;
4621 IRCAS* cas;
4622
4623 *hasGetIorPutI = False;
4624 *hasVorFtemps = False;
4625
4626 for (i = 0; i < bb->stmts_used; i++) {
4627 st = bb->stmts[i];
4628 switch (st->tag) {
4629 case Ist_AbiHint:
4630 vassert(isIRAtom(st->Ist.AbiHint.base));
4631 vassert(isIRAtom(st->Ist.AbiHint.nia));
4632 break;
4633 case Ist_PutI:
4634 *hasGetIorPutI = True;
4635 break;
4636 case Ist_WrTmp:
4637 if (st->Ist.WrTmp.data->tag == Iex_GetI)
4638 *hasGetIorPutI = True;
4639 switch (typeOfIRTemp(bb->tyenv, st->Ist.WrTmp.tmp)) {
4640 case Ity_I1: case Ity_I8: case Ity_I16:
4641 case Ity_I32: case Ity_I64: case Ity_I128:
4642 break;
4643 case Ity_F32: case Ity_F64: case Ity_F128: case Ity_V128:
4644 *hasVorFtemps = True;
4645 break;
4646 default:
4647 goto bad;
4648 }
4649 break;
4650 case Ist_Put:
4651 vassert(isIRAtom(st->Ist.Put.data));
4652 break;
4653 case Ist_Store:
4654 vassert(isIRAtom(st->Ist.Store.addr));
4655 vassert(isIRAtom(st->Ist.Store.data));
4656 break;
4657 case Ist_CAS:
4658 cas = st->Ist.CAS.details;
4659 vassert(isIRAtom(cas->addr));
4660 vassert(cas->expdHi == NULL || isIRAtom(cas->expdHi));
4661 vassert(isIRAtom(cas->expdLo));
4662 vassert(cas->dataHi == NULL || isIRAtom(cas->dataHi));
4663 vassert(isIRAtom(cas->dataLo));
4664 break;
4665 case Ist_LLSC:
4666 vassert(isIRAtom(st->Ist.LLSC.addr));
4667 if (st->Ist.LLSC.storedata)
4668 vassert(isIRAtom(st->Ist.LLSC.storedata));
4669 break;
4670 case Ist_Dirty:
4671 d = st->Ist.Dirty.details;
4672 vassert(isIRAtom(d->guard));
4673 for (j = 0; d->args[j]; j++)
4674 vassert(isIRAtom(d->args[j]));
4675 if (d->mFx != Ifx_None)
4676 vassert(isIRAtom(d->mAddr));
4677 break;
4678 case Ist_NoOp:
4679 case Ist_IMark:
4680 case Ist_MBE:
4681 break;
4682 case Ist_Exit:
4683 vassert(isIRAtom(st->Ist.Exit.guard));
4684 break;
4685 default:
4686 bad:
4687 ppIRStmt(st);
4688 vpanic("considerExpensives");
4689 }
4690 }
4691 }
4692
4693
4694 /* ---------------- The main iropt entry point. ---------------- */
4695
4696 /* exported from this file */
4697 /* Rules of the game:
4698
4699 - IRExpr/IRStmt trees should be treated as immutable, as they
4700 may get shared. So never change a field of such a tree node;
4701 instead construct and return a new one if needed.
4702 */
4703
4704
do_iropt_BB(IRSB * bb0,IRExpr * (* specHelper)(HChar *,IRExpr **,IRStmt **,Int),Bool (* preciseMemExnsFn)(Int,Int),Addr64 guest_addr,VexArch guest_arch)4705 IRSB* do_iropt_BB(
4706 IRSB* bb0,
4707 IRExpr* (*specHelper) (HChar*, IRExpr**, IRStmt**, Int),
4708 Bool (*preciseMemExnsFn)(Int,Int),
4709 Addr64 guest_addr,
4710 VexArch guest_arch
4711 )
4712 {
4713 static Int n_total = 0;
4714 static Int n_expensive = 0;
4715
4716 Bool hasGetIorPutI, hasVorFtemps;
4717 IRSB *bb, *bb2;
4718
4719 n_total++;
4720
4721 /* First flatten the block out, since all other
4722 phases assume flat code. */
4723
4724 bb = flatten_BB ( bb0 );
4725
4726 if (iropt_verbose) {
4727 vex_printf("\n========= FLAT\n\n" );
4728 ppIRSB(bb);
4729 }
4730
4731 /* If at level 0, stop now. */
4732 if (vex_control.iropt_level <= 0) return bb;
4733
4734 /* Now do a preliminary cleanup pass, and figure out if we also
4735 need to do 'expensive' optimisations. Expensive optimisations
4736 are deemed necessary if the block contains any GetIs or PutIs.
4737 If needed, do expensive transformations and then another cheap
4738 cleanup pass. */
4739
4740 bb = cheap_transformations( bb, specHelper, preciseMemExnsFn );
4741
4742 if (guest_arch == VexArchARM) {
4743 /* Translating Thumb2 code produces a lot of chaff. We have to
4744 work extra hard to get rid of it. */
4745 bb = cprop_BB(bb);
4746 bb = spec_helpers_BB ( bb, specHelper );
4747 redundant_put_removal_BB ( bb, preciseMemExnsFn );
4748 do_cse_BB( bb );
4749 do_deadcode_BB( bb );
4750 }
4751
4752 if (vex_control.iropt_level > 1) {
4753
4754 /* Peer at what we have, to decide how much more effort to throw
4755 at it. */
4756 considerExpensives( &hasGetIorPutI, &hasVorFtemps, bb );
4757
4758 if (hasVorFtemps && !hasGetIorPutI) {
4759 /* If any evidence of FP or Vector activity, CSE, as that
4760 tends to mop up all manner of lardy code to do with
4761 rounding modes. Don't bother if hasGetIorPutI since that
4762 case leads into the expensive transformations, which do
4763 CSE anyway. */
4764 (void)do_cse_BB( bb );
4765 do_deadcode_BB( bb );
4766 }
4767
4768 if (hasGetIorPutI) {
4769 Bool cses;
4770 n_expensive++;
4771 if (DEBUG_IROPT)
4772 vex_printf("***** EXPENSIVE %d %d\n", n_total, n_expensive);
4773 bb = expensive_transformations( bb );
4774 bb = cheap_transformations( bb, specHelper, preciseMemExnsFn );
4775 /* Potentially common up GetIs */
4776 cses = do_cse_BB( bb );
4777 if (cses)
4778 bb = cheap_transformations( bb, specHelper, preciseMemExnsFn );
4779 }
4780
4781 /* Now have a go at unrolling simple (single-BB) loops. If
4782 successful, clean up the results as much as possible. */
4783
4784 bb2 = maybe_loop_unroll_BB( bb, guest_addr );
4785 if (bb2) {
4786 bb = cheap_transformations( bb2, specHelper, preciseMemExnsFn );
4787 if (hasGetIorPutI) {
4788 bb = expensive_transformations( bb );
4789 bb = cheap_transformations( bb, specHelper, preciseMemExnsFn );
4790 } else {
4791 /* at least do CSE and dead code removal */
4792 do_cse_BB( bb );
4793 do_deadcode_BB( bb );
4794 }
4795 if (0) vex_printf("vex iropt: unrolled a loop\n");
4796 }
4797
4798 }
4799
4800 return bb;
4801 }
4802
4803
4804 /*---------------------------------------------------------------*/
4805 /*--- end ir_opt.c ---*/
4806 /*---------------------------------------------------------------*/
4807