• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* -*- mode: C; c-basic-offset: 3; -*- */
2 
3 /*---------------------------------------------------------------*/
4 /*--- begin                                          ir_opt.c ---*/
5 /*---------------------------------------------------------------*/
6 
7 /*
8    This file is part of Valgrind, a dynamic binary instrumentation
9    framework.
10 
11    Copyright (C) 2004-2017 OpenWorks LLP
12       info@open-works.net
13 
14    This program is free software; you can redistribute it and/or
15    modify it under the terms of the GNU General Public License as
16    published by the Free Software Foundation; either version 2 of the
17    License, or (at your option) any later version.
18 
19    This program is distributed in the hope that it will be useful, but
20    WITHOUT ANY WARRANTY; without even the implied warranty of
21    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22    General Public License for more details.
23 
24    You should have received a copy of the GNU General Public License
25    along with this program; if not, write to the Free Software
26    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
27    02110-1301, USA.
28 
29    The GNU General Public License is contained in the file COPYING.
30 
31    Neither the names of the U.S. Department of Energy nor the
32    University of California nor the names of its contributors may be
33    used to endorse or promote products derived from this software
34    without prior written permission.
35 */
36 
37 #include "libvex_basictypes.h"
38 #include "libvex_ir.h"
39 #include "libvex.h"
40 
41 #include "main_util.h"
42 #include "main_globals.h"
43 #include "ir_opt.h"
44 
45 
46 /* Set to 1 for lots of debugging output. */
47 #define DEBUG_IROPT 0
48 
49 /* Set to 1 to gather some statistics. Currently only for sameIRExprs. */
50 #define STATS_IROPT 0
51 
52 
53 /* What iropt does, 29 Dec 04.
54 
55    It takes an IRSB and produces a new one with the same meaning,
56    defined thus:
57 
58    After execution of the new BB, all guest state and guest memory is
59    the same as after execution of the original.  This is true
60    regardless of how the block was exited (at the end vs side exit).
61 
62    In addition, parts of the guest state will be identical to that
63    created by execution of the original at the following observation
64    points:
65 
66    * In a dirty helper call, any parts of the guest state that the
67      helper states that it reads or modifies will be up to date.
68      Also, guest memory will be up to date.  Parts of the guest state
69      not marked as being read or modified by the helper cannot be
70      assumed to be up-to-date at the point where the helper is called.
71 
72    * If iropt_register_updates == VexRegUpdSpAtMemAccess :
73      The guest state is only up to date only as explained above
74      (i.e. at SB exits and as specified by dirty helper call).
75      Also, the stack pointer register is up to date at memory
76      exception points (as this is needed for the stack extension
77      logic in m_signals.c).
78 
79    * If iropt_register_updates == VexRegUpdUnwindregsAtMemAccess :
80      Immediately prior to any load or store, those parts of the guest
81      state marked as requiring precise exceptions will be up to date.
82      Also, guest memory will be up to date.  Parts of the guest state
83      not marked as requiring precise exceptions cannot be assumed to
84      be up-to-date at the point of the load/store.
85 
86    * If iropt_register_updates == VexRegUpdAllregsAtMemAccess:
87      Same as minimal, but all the guest state is up to date at memory
88      exception points.
89 
90    * If iropt_register_updates == VexRegUpdAllregsAtEachInsn :
91      Guest state is up to date at each instruction.
92 
93    The relative order of loads and stores (including loads/stores of
94    guest memory done by dirty helpers annotated as such) is not
95    changed.  However, the relative order of loads with no intervening
96    stores/modifies may be changed.
97 
98    Transformation order
99    ~~~~~~~~~~~~~~~~~~~~
100 
101    There are three levels of optimisation, controlled by
102    vex_control.iropt_level.  Define first:
103 
104    "Cheap transformations" are the following sequence:
105       * Redundant-Get removal
106       * Redundant-Put removal
107       * Constant propagation/folding
108       * Dead code removal
109       * Specialisation of clean helper functions
110       * Dead code removal
111 
112    "Expensive transformations" are the following sequence:
113       * CSE
114       * Folding of add/sub chains
115       * Redundant-GetI removal
116       * Redundant-PutI removal
117       * Dead code removal
118 
119    Then the transformations are as follows, as defined by
120    vex_control.iropt_level:
121 
122    Level 0:
123       * Flatten into atomic form.
124 
125    Level 1: the following sequence:
126       * Flatten into atomic form.
127       * Cheap transformations.
128 
129    Level 2: the following sequence
130       * Flatten into atomic form.
131       * Cheap transformations.
132       * If block contains any floating or vector types, CSE.
133       * If block contains GetI or PutI, Expensive transformations.
134       * Try unrolling loops.  Three possible outcomes:
135         - No effect: do nothing more.
136         - Unrolled a loop, and block does not contain GetI or PutI:
137           Do: * CSE
138               * Dead code removal
139         - Unrolled a loop, and block contains GetI or PutI:
140           Do: * Expensive transformations
141               * Cheap transformations
142 */
143 
144 /* Implementation notes, 29 Dec 04.
145 
146    TODO (important): I think rPutI removal ignores precise exceptions
147    and is therefore in a sense, wrong.  In the sense that PutIs are
148    assumed not to write parts of the guest state that we need to have
149    up-to-date at loads/stores.  So far on x86 guest that has not
150    mattered since indeed only the x87 FP registers and tags are
151    accessed using GetI/PutI, and there is no need so far for them to
152    be up to date at mem exception points.  The rPutI pass should be
153    fixed.
154 
155    TODO: improve pessimistic handling of precise exceptions
156      in the tree builder.
157 
158    TODO: check interaction of rGetI and dirty helpers.
159 
160    F64i constants are treated differently from other constants.
161    They are not regarded as atoms, and instead lifted off and
162    bound to temps.  This allows them to participate in CSE, which
163    is important for getting good performance for x86 guest code.
164 
165    CSE up F64 literals (already doing F64is)
166 
167    CSE: consider carefully the requirement for precise exns
168         prior to making CSE any more aggressive.  */
169 
170 
171 /*---------------------------------------------------------------*/
172 /*--- Finite mappery, of a sort                               ---*/
173 /*---------------------------------------------------------------*/
174 
175 /* General map from HWord-sized thing HWord-sized thing.  Could be by
176    hashing, but it's not clear whether or not this would really be any
177    faster. */
178 
179 typedef
180    struct {
181       Bool*  inuse;
182       HWord* key;
183       HWord* val;
184       Int    size;
185       Int    used;
186    }
187    HashHW;
188 
newHHW(void)189 static HashHW* newHHW ( void )
190 {
191    HashHW* h = LibVEX_Alloc_inline(sizeof(HashHW));
192    h->size   = 8;
193    h->used   = 0;
194    h->inuse  = LibVEX_Alloc_inline(h->size * sizeof(Bool));
195    h->key    = LibVEX_Alloc_inline(h->size * sizeof(HWord));
196    h->val    = LibVEX_Alloc_inline(h->size * sizeof(HWord));
197    return h;
198 }
199 
200 
201 /* Look up key in the map. */
202 
lookupHHW(HashHW * h,HWord * val,HWord key)203 static Bool lookupHHW ( HashHW* h, /*OUT*/HWord* val, HWord key )
204 {
205    Int i;
206    /* vex_printf("lookupHHW(%llx)\n", key ); */
207    for (i = 0; i < h->used; i++) {
208       if (h->inuse[i] && h->key[i] == key) {
209          if (val)
210             *val = h->val[i];
211          return True;
212       }
213    }
214    return False;
215 }
216 
217 
218 /* Add key->val to the map.  Replaces any existing binding for key. */
219 
addToHHW(HashHW * h,HWord key,HWord val)220 static void addToHHW ( HashHW* h, HWord key, HWord val )
221 {
222    Int i, j;
223    /* vex_printf("addToHHW(%llx, %llx)\n", key, val); */
224 
225    /* Find and replace existing binding, if any. */
226    for (i = 0; i < h->used; i++) {
227       if (h->inuse[i] && h->key[i] == key) {
228          h->val[i] = val;
229          return;
230       }
231    }
232 
233    /* Ensure a space is available. */
234    if (h->used == h->size) {
235       /* Copy into arrays twice the size. */
236       Bool*  inuse2 = LibVEX_Alloc_inline(2 * h->size * sizeof(Bool));
237       HWord* key2   = LibVEX_Alloc_inline(2 * h->size * sizeof(HWord));
238       HWord* val2   = LibVEX_Alloc_inline(2 * h->size * sizeof(HWord));
239       for (i = j = 0; i < h->size; i++) {
240          if (!h->inuse[i]) continue;
241          inuse2[j] = True;
242          key2[j] = h->key[i];
243          val2[j] = h->val[i];
244          j++;
245       }
246       h->used = j;
247       h->size *= 2;
248       h->inuse = inuse2;
249       h->key = key2;
250       h->val = val2;
251    }
252 
253    /* Finally, add it. */
254    vassert(h->used < h->size);
255    h->inuse[h->used] = True;
256    h->key[h->used] = key;
257    h->val[h->used] = val;
258    h->used++;
259 }
260 
261 
262 /*---------------------------------------------------------------*/
263 /*--- Flattening out a BB into atomic SSA form                ---*/
264 /*---------------------------------------------------------------*/
265 
266 /* Non-critical helper, heuristic for reducing the number of tmp-tmp
267    copies made by flattening.  If in doubt return False. */
268 
isFlat(IRExpr * e)269 static Bool isFlat ( IRExpr* e )
270 {
271    if (e->tag == Iex_Get)
272       return True;
273    if (e->tag == Iex_Binop)
274       return toBool( isIRAtom(e->Iex.Binop.arg1)
275                      && isIRAtom(e->Iex.Binop.arg2) );
276    if (e->tag == Iex_Load)
277       return isIRAtom(e->Iex.Load.addr);
278    return False;
279 }
280 
281 /* Flatten out 'ex' so it is atomic, returning a new expression with
282    the same value, after having appended extra IRTemp assignments to
283    the end of 'bb'. */
284 
flatten_Expr(IRSB * bb,IRExpr * ex)285 static IRExpr* flatten_Expr ( IRSB* bb, IRExpr* ex )
286 {
287    Int i;
288    IRExpr** newargs;
289    IRType ty = typeOfIRExpr(bb->tyenv, ex);
290    IRTemp t1;
291 
292    switch (ex->tag) {
293 
294       case Iex_GetI:
295          t1 = newIRTemp(bb->tyenv, ty);
296          addStmtToIRSB(bb, IRStmt_WrTmp(t1,
297             IRExpr_GetI(ex->Iex.GetI.descr,
298                         flatten_Expr(bb, ex->Iex.GetI.ix),
299                         ex->Iex.GetI.bias)));
300          return IRExpr_RdTmp(t1);
301 
302       case Iex_Get:
303          t1 = newIRTemp(bb->tyenv, ty);
304          addStmtToIRSB(bb,
305             IRStmt_WrTmp(t1, ex));
306          return IRExpr_RdTmp(t1);
307 
308       case Iex_Qop: {
309          IRQop* qop = ex->Iex.Qop.details;
310          t1 = newIRTemp(bb->tyenv, ty);
311          addStmtToIRSB(bb, IRStmt_WrTmp(t1,
312             IRExpr_Qop(qop->op,
313                          flatten_Expr(bb, qop->arg1),
314                          flatten_Expr(bb, qop->arg2),
315                          flatten_Expr(bb, qop->arg3),
316                          flatten_Expr(bb, qop->arg4))));
317          return IRExpr_RdTmp(t1);
318       }
319 
320       case Iex_Triop: {
321          IRTriop* triop = ex->Iex.Triop.details;
322          t1 = newIRTemp(bb->tyenv, ty);
323          addStmtToIRSB(bb, IRStmt_WrTmp(t1,
324             IRExpr_Triop(triop->op,
325                          flatten_Expr(bb, triop->arg1),
326                          flatten_Expr(bb, triop->arg2),
327                          flatten_Expr(bb, triop->arg3))));
328          return IRExpr_RdTmp(t1);
329       }
330 
331       case Iex_Binop:
332          t1 = newIRTemp(bb->tyenv, ty);
333          addStmtToIRSB(bb, IRStmt_WrTmp(t1,
334             IRExpr_Binop(ex->Iex.Binop.op,
335                          flatten_Expr(bb, ex->Iex.Binop.arg1),
336                          flatten_Expr(bb, ex->Iex.Binop.arg2))));
337          return IRExpr_RdTmp(t1);
338 
339       case Iex_Unop:
340          t1 = newIRTemp(bb->tyenv, ty);
341          addStmtToIRSB(bb, IRStmt_WrTmp(t1,
342             IRExpr_Unop(ex->Iex.Unop.op,
343                         flatten_Expr(bb, ex->Iex.Unop.arg))));
344          return IRExpr_RdTmp(t1);
345 
346       case Iex_Load:
347          t1 = newIRTemp(bb->tyenv, ty);
348          addStmtToIRSB(bb, IRStmt_WrTmp(t1,
349             IRExpr_Load(ex->Iex.Load.end,
350                         ex->Iex.Load.ty,
351                         flatten_Expr(bb, ex->Iex.Load.addr))));
352          return IRExpr_RdTmp(t1);
353 
354       case Iex_CCall:
355          newargs = shallowCopyIRExprVec(ex->Iex.CCall.args);
356          for (i = 0; newargs[i]; i++)
357             newargs[i] = flatten_Expr(bb, newargs[i]);
358          t1 = newIRTemp(bb->tyenv, ty);
359          addStmtToIRSB(bb, IRStmt_WrTmp(t1,
360             IRExpr_CCall(ex->Iex.CCall.cee,
361                          ex->Iex.CCall.retty,
362                          newargs)));
363          return IRExpr_RdTmp(t1);
364 
365       case Iex_ITE:
366          t1 = newIRTemp(bb->tyenv, ty);
367          addStmtToIRSB(bb, IRStmt_WrTmp(t1,
368             IRExpr_ITE(flatten_Expr(bb, ex->Iex.ITE.cond),
369                        flatten_Expr(bb, ex->Iex.ITE.iftrue),
370                        flatten_Expr(bb, ex->Iex.ITE.iffalse))));
371          return IRExpr_RdTmp(t1);
372 
373       case Iex_Const:
374          /* Lift F64i constants out onto temps so they can be CSEd
375             later. */
376          if (ex->Iex.Const.con->tag == Ico_F64i) {
377             t1 = newIRTemp(bb->tyenv, ty);
378             addStmtToIRSB(bb, IRStmt_WrTmp(t1,
379                IRExpr_Const(ex->Iex.Const.con)));
380             return IRExpr_RdTmp(t1);
381          } else {
382             /* Leave all other constants alone. */
383             return ex;
384          }
385 
386       case Iex_RdTmp:
387          return ex;
388 
389       default:
390          vex_printf("\n");
391          ppIRExpr(ex);
392          vex_printf("\n");
393          vpanic("flatten_Expr");
394    }
395 }
396 
397 
398 /* Append a completely flattened form of 'st' to the end of 'bb'. */
399 
flatten_Stmt(IRSB * bb,IRStmt * st)400 static void flatten_Stmt ( IRSB* bb, IRStmt* st )
401 {
402    Int i;
403    IRExpr   *e1, *e2, *e3, *e4, *e5;
404    IRDirty  *d,  *d2;
405    IRCAS    *cas, *cas2;
406    IRPutI   *puti, *puti2;
407    IRLoadG  *lg;
408    IRStoreG *sg;
409    switch (st->tag) {
410       case Ist_Put:
411          if (isIRAtom(st->Ist.Put.data)) {
412             /* optimisation to reduce the amount of heap wasted
413                by the flattener */
414             addStmtToIRSB(bb, st);
415          } else {
416             /* general case, always correct */
417             e1 = flatten_Expr(bb, st->Ist.Put.data);
418             addStmtToIRSB(bb, IRStmt_Put(st->Ist.Put.offset, e1));
419          }
420          break;
421       case Ist_PutI:
422          puti = st->Ist.PutI.details;
423          e1 = flatten_Expr(bb, puti->ix);
424          e2 = flatten_Expr(bb, puti->data);
425          puti2 = mkIRPutI(puti->descr, e1, puti->bias, e2);
426          addStmtToIRSB(bb, IRStmt_PutI(puti2));
427          break;
428       case Ist_WrTmp:
429          if (isFlat(st->Ist.WrTmp.data)) {
430             /* optimisation, to reduce the number of tmp-tmp
431                copies generated */
432             addStmtToIRSB(bb, st);
433          } else {
434             /* general case, always correct */
435             e1 = flatten_Expr(bb, st->Ist.WrTmp.data);
436             addStmtToIRSB(bb, IRStmt_WrTmp(st->Ist.WrTmp.tmp, e1));
437          }
438          break;
439       case Ist_Store:
440          e1 = flatten_Expr(bb, st->Ist.Store.addr);
441          e2 = flatten_Expr(bb, st->Ist.Store.data);
442          addStmtToIRSB(bb, IRStmt_Store(st->Ist.Store.end, e1,e2));
443          break;
444       case Ist_StoreG:
445          sg = st->Ist.StoreG.details;
446          e1 = flatten_Expr(bb, sg->addr);
447          e2 = flatten_Expr(bb, sg->data);
448          e3 = flatten_Expr(bb, sg->guard);
449          addStmtToIRSB(bb, IRStmt_StoreG(sg->end, e1, e2, e3));
450          break;
451       case Ist_LoadG:
452          lg = st->Ist.LoadG.details;
453          e1 = flatten_Expr(bb, lg->addr);
454          e2 = flatten_Expr(bb, lg->alt);
455          e3 = flatten_Expr(bb, lg->guard);
456          addStmtToIRSB(bb, IRStmt_LoadG(lg->end, lg->cvt, lg->dst,
457                                         e1, e2, e3));
458          break;
459       case Ist_CAS:
460          cas  = st->Ist.CAS.details;
461          e1   = flatten_Expr(bb, cas->addr);
462          e2   = cas->expdHi ? flatten_Expr(bb, cas->expdHi) : NULL;
463          e3   = flatten_Expr(bb, cas->expdLo);
464          e4   = cas->dataHi ? flatten_Expr(bb, cas->dataHi) : NULL;
465          e5   = flatten_Expr(bb, cas->dataLo);
466          cas2 = mkIRCAS( cas->oldHi, cas->oldLo, cas->end,
467                          e1, e2, e3, e4, e5 );
468          addStmtToIRSB(bb, IRStmt_CAS(cas2));
469          break;
470       case Ist_LLSC:
471          e1 = flatten_Expr(bb, st->Ist.LLSC.addr);
472          e2 = st->Ist.LLSC.storedata
473                  ? flatten_Expr(bb, st->Ist.LLSC.storedata)
474                  : NULL;
475          addStmtToIRSB(bb, IRStmt_LLSC(st->Ist.LLSC.end,
476                                        st->Ist.LLSC.result, e1, e2));
477          break;
478       case Ist_Dirty:
479          d = st->Ist.Dirty.details;
480          d2 = emptyIRDirty();
481          *d2 = *d;
482          d2->args = shallowCopyIRExprVec(d2->args);
483          if (d2->mFx != Ifx_None) {
484             d2->mAddr = flatten_Expr(bb, d2->mAddr);
485          } else {
486             vassert(d2->mAddr == NULL);
487          }
488          d2->guard = flatten_Expr(bb, d2->guard);
489          for (i = 0; d2->args[i]; i++) {
490             IRExpr* arg = d2->args[i];
491             if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg)))
492                d2->args[i] = flatten_Expr(bb, arg);
493          }
494          addStmtToIRSB(bb, IRStmt_Dirty(d2));
495          break;
496       case Ist_NoOp:
497       case Ist_MBE:
498       case Ist_IMark:
499          addStmtToIRSB(bb, st);
500          break;
501       case Ist_AbiHint:
502          e1 = flatten_Expr(bb, st->Ist.AbiHint.base);
503          e2 = flatten_Expr(bb, st->Ist.AbiHint.nia);
504          addStmtToIRSB(bb, IRStmt_AbiHint(e1, st->Ist.AbiHint.len, e2));
505          break;
506       case Ist_Exit:
507          e1 = flatten_Expr(bb, st->Ist.Exit.guard);
508          addStmtToIRSB(bb, IRStmt_Exit(e1, st->Ist.Exit.jk,
509                                        st->Ist.Exit.dst,
510                                        st->Ist.Exit.offsIP));
511          break;
512       default:
513          vex_printf("\n");
514          ppIRStmt(st);
515          vex_printf("\n");
516          vpanic("flatten_Stmt");
517    }
518 }
519 
520 
flatten_BB(IRSB * in)521 static IRSB* flatten_BB ( IRSB* in )
522 {
523    Int   i;
524    IRSB* out;
525    out = emptyIRSB();
526    out->tyenv = deepCopyIRTypeEnv( in->tyenv );
527    for (i = 0; i < in->stmts_used; i++)
528       if (in->stmts[i])
529          flatten_Stmt( out, in->stmts[i] );
530    out->next     = flatten_Expr( out, in->next );
531    out->jumpkind = in->jumpkind;
532    out->offsIP   = in->offsIP;
533    return out;
534 }
535 
536 
537 /*---------------------------------------------------------------*/
538 /*--- In-place removal of redundant GETs                      ---*/
539 /*---------------------------------------------------------------*/
540 
541 /* Scan forwards, building up an environment binding (min offset, max
542    offset) pairs to values, which will either be temps or constants.
543 
544    On seeing 't = Get(minoff,maxoff)', look up (minoff,maxoff) in the
545    env and if it matches, replace the Get with the stored value.  If
546    there is no match, add a (minoff,maxoff) :-> t binding.
547 
548    On seeing 'Put (minoff,maxoff) = t or c', first remove in the env
549    any binding which fully or partially overlaps with (minoff,maxoff).
550    Then add a new (minoff,maxoff) :-> t or c binding.  */
551 
552 /* Extract the min/max offsets from a guest state array descriptor. */
553 
554 inline
getArrayBounds(IRRegArray * descr,UInt * minoff,UInt * maxoff)555 static void getArrayBounds ( IRRegArray* descr,
556                              UInt* minoff, UInt* maxoff )
557 {
558    *minoff = descr->base;
559    *maxoff = *minoff + descr->nElems*sizeofIRType(descr->elemTy) - 1;
560    vassert((*minoff & ~0xFFFF) == 0);
561    vassert((*maxoff & ~0xFFFF) == 0);
562    vassert(*minoff <= *maxoff);
563 }
564 
565 /* Create keys, of the form ((minoffset << 16) | maxoffset). */
566 
mk_key_GetPut(Int offset,IRType ty)567 static UInt mk_key_GetPut ( Int offset, IRType ty )
568 {
569    /* offset should fit in 16 bits. */
570    UInt minoff = offset;
571    UInt maxoff = minoff + sizeofIRType(ty) - 1;
572    vassert((minoff & ~0xFFFF) == 0);
573    vassert((maxoff & ~0xFFFF) == 0);
574    return (minoff << 16) | maxoff;
575 }
576 
mk_key_GetIPutI(IRRegArray * descr)577 static UInt mk_key_GetIPutI ( IRRegArray* descr )
578 {
579    UInt minoff, maxoff;
580    getArrayBounds( descr, &minoff, &maxoff );
581    vassert((minoff & ~0xFFFF) == 0);
582    vassert((maxoff & ~0xFFFF) == 0);
583    return (minoff << 16) | maxoff;
584 }
585 
586 /* Supposing h has keys of the form generated by mk_key_GetPut and
587    mk_key_GetIPutI, invalidate any key which overlaps (k_lo
588    .. k_hi).
589 */
invalidateOverlaps(HashHW * h,UInt k_lo,UInt k_hi)590 static void invalidateOverlaps ( HashHW* h, UInt k_lo, UInt k_hi )
591 {
592    Int  j;
593    UInt e_lo, e_hi;
594    vassert(k_lo <= k_hi);
595    /* invalidate any env entries which in any way overlap (k_lo
596       .. k_hi) */
597    /* vex_printf("invalidate %d .. %d\n", k_lo, k_hi ); */
598 
599    for (j = 0; j < h->used; j++) {
600       if (!h->inuse[j])
601          continue;
602       e_lo = (((UInt)h->key[j]) >> 16) & 0xFFFF;
603       e_hi = ((UInt)h->key[j]) & 0xFFFF;
604       vassert(e_lo <= e_hi);
605       if (e_hi < k_lo || k_hi < e_lo)
606          continue; /* no overlap possible */
607       else
608          /* overlap; invalidate */
609          h->inuse[j] = False;
610    }
611 }
612 
613 
redundant_get_removal_BB(IRSB * bb)614 static void redundant_get_removal_BB ( IRSB* bb )
615 {
616    HashHW* env = newHHW();
617    UInt    key = 0; /* keep gcc -O happy */
618    Int     i, j;
619    HWord   val;
620 
621    for (i = 0; i < bb->stmts_used; i++) {
622       IRStmt* st = bb->stmts[i];
623 
624       if (st->tag == Ist_NoOp)
625          continue;
626 
627       /* Deal with Gets */
628       if (st->tag == Ist_WrTmp
629           && st->Ist.WrTmp.data->tag == Iex_Get) {
630          /* st is 't = Get(...)'.  Look up in the environment and see
631             if the Get can be replaced. */
632          IRExpr* get = st->Ist.WrTmp.data;
633          key = (HWord)mk_key_GetPut( get->Iex.Get.offset,
634                                      get->Iex.Get.ty );
635          if (lookupHHW(env, &val, (HWord)key)) {
636             /* found it */
637             /* Note, we could do better here.  If the types are
638                different we don't do the substitution, since doing so
639                could lead to invalidly-typed IR.  An improvement would
640                be to stick in a reinterpret-style cast, although that
641                would make maintaining flatness more difficult. */
642             IRExpr* valE    = (IRExpr*)val;
643             Bool    typesOK = toBool( typeOfIRExpr(bb->tyenv,valE)
644                                       == st->Ist.WrTmp.data->Iex.Get.ty );
645             if (typesOK && DEBUG_IROPT) {
646                vex_printf("rGET: "); ppIRExpr(get);
647                vex_printf("  ->  "); ppIRExpr(valE);
648                vex_printf("\n");
649             }
650             if (typesOK)
651                bb->stmts[i] = IRStmt_WrTmp(st->Ist.WrTmp.tmp, valE);
652          } else {
653             /* Not found, but at least we know that t and the Get(...)
654                are now associated.  So add a binding to reflect that
655                fact. */
656             addToHHW( env, (HWord)key,
657                            (HWord)(void*)(IRExpr_RdTmp(st->Ist.WrTmp.tmp)) );
658          }
659       }
660 
661       /* Deal with Puts: invalidate any env entries overlapped by this
662          Put */
663       if (st->tag == Ist_Put || st->tag == Ist_PutI) {
664          UInt k_lo, k_hi;
665          if (st->tag == Ist_Put) {
666             key = mk_key_GetPut( st->Ist.Put.offset,
667                                  typeOfIRExpr(bb->tyenv,st->Ist.Put.data) );
668          } else {
669             vassert(st->tag == Ist_PutI);
670             key = mk_key_GetIPutI( st->Ist.PutI.details->descr );
671          }
672 
673          k_lo = (key >> 16) & 0xFFFF;
674          k_hi = key & 0xFFFF;
675          invalidateOverlaps(env, k_lo, k_hi);
676       }
677       else
678       if (st->tag == Ist_Dirty) {
679          /* Deal with dirty helpers which write or modify guest state.
680             Invalidate the entire env.  We could do a lot better
681             here. */
682          IRDirty* d      = st->Ist.Dirty.details;
683          Bool     writes = False;
684          for (j = 0; j < d->nFxState; j++) {
685             if (d->fxState[j].fx == Ifx_Modify
686                 || d->fxState[j].fx == Ifx_Write)
687             writes = True;
688          }
689          if (writes) {
690             /* dump the entire env (not clever, but correct ...) */
691             for (j = 0; j < env->used; j++)
692                env->inuse[j] = False;
693             if (0) vex_printf("rGET: trash env due to dirty helper\n");
694          }
695       }
696 
697       /* add this one to the env, if appropriate */
698       if (st->tag == Ist_Put) {
699          vassert(isIRAtom(st->Ist.Put.data));
700          addToHHW( env, (HWord)key, (HWord)(st->Ist.Put.data));
701       }
702 
703    } /* for (i = 0; i < bb->stmts_used; i++) */
704 
705 }
706 
707 
708 /*---------------------------------------------------------------*/
709 /*--- In-place removal of redundant PUTs                      ---*/
710 /*---------------------------------------------------------------*/
711 
712 /* Find any Get uses in st and invalidate any partially or fully
713    overlapping ranges listed in env.  Due to the flattening phase, the
714    only stmt kind we expect to find a Get on is IRStmt_WrTmp. */
715 
handle_gets_Stmt(HashHW * env,IRStmt * st,Bool (* preciseMemExnsFn)(Int,Int,VexRegisterUpdates),VexRegisterUpdates pxControl)716 static void handle_gets_Stmt (
717                HashHW* env,
718                IRStmt* st,
719                Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates),
720                VexRegisterUpdates pxControl
721             )
722 {
723    Int     j;
724    UInt    key = 0; /* keep gcc -O happy */
725    Bool    isGet;
726    Bool    memRW = False;
727    IRExpr* e;
728 
729    switch (st->tag) {
730 
731       /* This is the only interesting case.  Deal with Gets in the RHS
732          expression. */
733       case Ist_WrTmp:
734          e = st->Ist.WrTmp.data;
735          switch (e->tag) {
736             case Iex_Get:
737                isGet = True;
738                key = mk_key_GetPut ( e->Iex.Get.offset, e->Iex.Get.ty );
739                break;
740             case Iex_GetI:
741                isGet = True;
742                key = mk_key_GetIPutI ( e->Iex.GetI.descr );
743                break;
744             case Iex_Load:
745                isGet = False;
746                memRW = True;
747                break;
748             default:
749                isGet = False;
750          }
751          if (isGet) {
752             UInt k_lo, k_hi;
753             k_lo = (key >> 16) & 0xFFFF;
754             k_hi = key & 0xFFFF;
755             invalidateOverlaps(env, k_lo, k_hi);
756          }
757          break;
758 
759       /* Be very conservative for dirty helper calls; dump the entire
760          environment.  The helper might read guest state, in which
761          case it needs to be flushed first.  Also, the helper might
762          access guest memory, in which case all parts of the guest
763          state requiring precise exceptions needs to be flushed.  The
764          crude solution is just to flush everything; we could easily
765          enough do a lot better if needed. */
766       /* Probably also overly-conservative, but also dump everything
767          if we hit a memory bus event (fence, lock, unlock).  Ditto
768          AbiHints, CASs, LLs and SCs. */
769       case Ist_AbiHint:
770          vassert(isIRAtom(st->Ist.AbiHint.base));
771          vassert(isIRAtom(st->Ist.AbiHint.nia));
772          /* fall through */
773       case Ist_MBE:
774       case Ist_Dirty:
775       case Ist_CAS:
776       case Ist_LLSC:
777          for (j = 0; j < env->used; j++)
778             env->inuse[j] = False;
779          break;
780 
781       /* all other cases are boring. */
782       case Ist_Store:
783          vassert(isIRAtom(st->Ist.Store.addr));
784          vassert(isIRAtom(st->Ist.Store.data));
785          memRW = True;
786          break;
787       case Ist_StoreG: {
788          IRStoreG* sg = st->Ist.StoreG.details;
789          vassert(isIRAtom(sg->addr));
790          vassert(isIRAtom(sg->data));
791          vassert(isIRAtom(sg->guard));
792          memRW = True;
793          break;
794       }
795       case Ist_LoadG: {
796          IRLoadG* lg = st->Ist.LoadG.details;
797          vassert(isIRAtom(lg->addr));
798          vassert(isIRAtom(lg->alt));
799          vassert(isIRAtom(lg->guard));
800          memRW = True;
801          break;
802       }
803       case Ist_Exit:
804          vassert(isIRAtom(st->Ist.Exit.guard));
805          break;
806 
807       case Ist_Put:
808          vassert(isIRAtom(st->Ist.Put.data));
809          break;
810 
811       case Ist_PutI:
812          vassert(isIRAtom(st->Ist.PutI.details->ix));
813          vassert(isIRAtom(st->Ist.PutI.details->data));
814          break;
815 
816       case Ist_NoOp:
817       case Ist_IMark:
818          break;
819 
820       default:
821          vex_printf("\n");
822          ppIRStmt(st);
823          vex_printf("\n");
824          vpanic("handle_gets_Stmt");
825    }
826 
827    if (memRW) {
828       /* This statement accesses memory.  So we might need to dump all parts
829          of the environment corresponding to guest state that may not
830          be reordered with respect to memory references.  That means
831          at least the stack pointer. */
832       switch (pxControl) {
833          case VexRegUpdAllregsAtMemAccess:
834             /* Precise exceptions required at mem access.
835                Flush all guest state. */
836             for (j = 0; j < env->used; j++)
837                env->inuse[j] = False;
838             break;
839          case VexRegUpdSpAtMemAccess:
840             /* We need to dump the stack pointer
841                (needed for stack extension in m_signals.c).
842                preciseMemExnsFn will use vex_control.iropt_register_updates
843                to verify only the sp is to be checked. */
844             /* fallthrough */
845          case VexRegUpdUnwindregsAtMemAccess:
846             for (j = 0; j < env->used; j++) {
847                if (!env->inuse[j])
848                   continue;
849                /* Just flush the minimal amount required, as computed by
850                   preciseMemExnsFn. */
851                HWord k_lo = (env->key[j] >> 16) & 0xFFFF;
852                HWord k_hi = env->key[j] & 0xFFFF;
853                if (preciseMemExnsFn( k_lo, k_hi, pxControl ))
854                   env->inuse[j] = False;
855             }
856             break;
857          case VexRegUpdAllregsAtEachInsn:
858             // VexRegUpdAllregsAtEachInsn cannot happen here.
859             // fall through
860          case VexRegUpd_INVALID:
861          default:
862             vassert(0);
863       }
864    } /* if (memRW) */
865 
866 }
867 
868 
869 /* Scan backwards, building up a set of (min offset, max
870    offset) pairs, indicating those parts of the guest state
871    for which the next event is a write.
872 
873    On seeing a conditional exit, empty the set.
874 
875    On seeing 'Put (minoff,maxoff) = t or c', if (minoff,maxoff) is
876    completely within the set, remove the Put.  Otherwise, add
877    (minoff,maxoff) to the set.
878 
879    On seeing 'Get (minoff,maxoff)', remove any part of the set
880    overlapping (minoff,maxoff).  The same has to happen for any events
881    which implicitly read parts of the guest state: dirty helper calls
882    and loads/stores.
883 */
884 
redundant_put_removal_BB(IRSB * bb,Bool (* preciseMemExnsFn)(Int,Int,VexRegisterUpdates),VexRegisterUpdates pxControl)885 static void redundant_put_removal_BB (
886                IRSB* bb,
887                Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates),
888                VexRegisterUpdates pxControl
889             )
890 {
891    Int     i, j;
892    Bool    isPut;
893    IRStmt* st;
894    UInt    key = 0; /* keep gcc -O happy */
895 
896    vassert(pxControl < VexRegUpdAllregsAtEachInsn);
897 
898    HashHW* env = newHHW();
899 
900    /* Initialise the running env with the fact that the final exit
901       writes the IP (or, whatever it claims to write.  We don't
902       care.) */
903    key = mk_key_GetPut(bb->offsIP, typeOfIRExpr(bb->tyenv, bb->next));
904    addToHHW(env, (HWord)key, 0);
905 
906    /* And now scan backwards through the statements. */
907    for (i = bb->stmts_used-1; i >= 0; i--) {
908       st = bb->stmts[i];
909 
910       if (st->tag == Ist_NoOp)
911          continue;
912 
913       /* Deal with conditional exits. */
914       if (st->tag == Ist_Exit) {
915          //Bool re_add;
916          /* Need to throw out from the env, any part of it which
917             doesn't overlap with the guest state written by this exit.
918             Since the exit only writes one section, it's simplest to
919             do this: (1) check whether env contains a write that
920             completely overlaps the write done by this exit; (2) empty
921             out env; and (3) if (1) was true, add the write done by
922             this exit.
923 
924             To make (1) a bit simpler, merely search for a write that
925             exactly matches the one done by this exit.  That's safe
926             because it will fail as often or more often than a full
927             overlap check, and failure to find an overlapping write in
928             env is the safe case (we just nuke env if that
929             happens). */
930          //vassert(isIRAtom(st->Ist.Exit.guard));
931          /* (1) */
932          //key = mk_key_GetPut(st->Ist.Exit.offsIP,
933          //                    typeOfIRConst(st->Ist.Exit.dst));
934          //re_add = lookupHHW(env, NULL, key);
935          /* (2) */
936          for (j = 0; j < env->used; j++)
937             env->inuse[j] = False;
938          /* (3) */
939          //if (0 && re_add)
940          //   addToHHW(env, (HWord)key, 0);
941          continue;
942       }
943 
944       /* Deal with Puts */
945       switch (st->tag) {
946          case Ist_Put:
947             isPut = True;
948             key = mk_key_GetPut( st->Ist.Put.offset,
949                                  typeOfIRExpr(bb->tyenv,st->Ist.Put.data) );
950             vassert(isIRAtom(st->Ist.Put.data));
951             break;
952          case Ist_PutI:
953             isPut = True;
954             key = mk_key_GetIPutI( st->Ist.PutI.details->descr );
955             vassert(isIRAtom(st->Ist.PutI.details->ix));
956             vassert(isIRAtom(st->Ist.PutI.details->data));
957             break;
958          default:
959             isPut = False;
960       }
961       if (isPut && st->tag != Ist_PutI) {
962          /* See if any single entry in env overlaps this Put.  This is
963             simplistic in that the transformation is valid if, say, two
964             or more entries in the env overlap this Put, but the use of
965             lookupHHW will only find a single entry which exactly
966             overlaps this Put.  This is suboptimal but safe. */
967          if (lookupHHW(env, NULL, (HWord)key)) {
968             /* This Put is redundant because a later one will overwrite
969                it.  So NULL (nop) it out. */
970             if (DEBUG_IROPT) {
971                vex_printf("rPUT: "); ppIRStmt(st);
972                vex_printf("\n");
973             }
974             bb->stmts[i] = IRStmt_NoOp();
975          } else {
976             /* We can't demonstrate that this Put is redundant, so add it
977                to the running collection. */
978             addToHHW(env, (HWord)key, 0);
979          }
980          continue;
981       }
982 
983       /* Deal with Gets.  These remove bits of the environment since
984          appearance of a Get means that the next event for that slice
985          of the guest state is no longer a write, but a read.  Also
986          deals with implicit reads of guest state needed to maintain
987          precise exceptions. */
988       handle_gets_Stmt( env, st, preciseMemExnsFn, pxControl );
989    }
990 }
991 
992 
993 /*---------------------------------------------------------------*/
994 /*--- Constant propagation and folding                        ---*/
995 /*---------------------------------------------------------------*/
996 
997 #if STATS_IROPT
998 /* How often sameIRExprs was invoked */
999 static UInt invocation_count;
1000 /* How often sameIRExprs recursed through IRTemp assignments */
1001 static UInt recursion_count;
1002 /* How often sameIRExprs found identical IRExprs */
1003 static UInt success_count;
1004 /* How often recursing through assignments to IRTemps helped
1005    establishing equality. */
1006 static UInt recursion_success_count;
1007 /* Whether or not recursing through an IRTemp assignment helped
1008    establishing IRExpr equality for a given sameIRExprs invocation. */
1009 static Bool recursion_helped;
1010 /* Whether or not a given sameIRExprs invocation recursed through an
1011    IRTemp assignment */
1012 static Bool recursed;
1013 /* Maximum number of nodes ever visited when comparing two IRExprs. */
1014 static UInt max_nodes_visited;
1015 #endif /* STATS_IROPT */
1016 
1017 /* Count the number of nodes visited for a given sameIRExprs invocation. */
1018 static UInt num_nodes_visited;
1019 
1020 /* Do not visit more than NODE_LIMIT nodes when comparing two IRExprs.
1021    This is to guard against performance degradation by visiting large
1022    trees without success. */
1023 #define NODE_LIMIT 30
1024 
1025 
1026 /* The env in this section is a map from IRTemp to IRExpr*,
1027    that is, an array indexed by IRTemp. */
1028 
1029 /* Do both expressions compute the same value? The answer is generally
1030    conservative, i.e. it will report that the expressions do not compute
1031    the same value when in fact they do. The reason is that we do not
1032    keep track of changes in the guest state and memory. Thusly, two
1033    Get's, GetI's or Load's, even when accessing the same location, will be
1034    assumed to compute different values. After all the accesses may happen
1035    at different times and the guest state / memory can have changed in
1036    the meantime.
1037 
1038    XXX IMPORTANT XXX the two expressions must have the same IR type.
1039    DO NOT CALL HERE WITH DIFFERENTLY-TYPED EXPRESSIONS. */
1040 
1041 /* JRS 20-Mar-2012: split sameIRExprs_aux into a fast inlineable
1042    wrapper that deals with the common tags-don't-match case, and a
1043    slower out of line general case.  Saves a few insns. */
1044 
1045 __attribute__((noinline))
1046 static Bool sameIRExprs_aux2 ( IRExpr** env, IRExpr* e1, IRExpr* e2 );
1047 
1048 inline
sameIRExprs_aux(IRExpr ** env,IRExpr * e1,IRExpr * e2)1049 static Bool sameIRExprs_aux ( IRExpr** env, IRExpr* e1, IRExpr* e2 )
1050 {
1051    if (e1->tag != e2->tag) return False;
1052    return sameIRExprs_aux2(env, e1, e2);
1053 }
1054 
1055 __attribute__((noinline))
sameIRExprs_aux2(IRExpr ** env,IRExpr * e1,IRExpr * e2)1056 static Bool sameIRExprs_aux2 ( IRExpr** env, IRExpr* e1, IRExpr* e2 )
1057 {
1058    if (num_nodes_visited++ > NODE_LIMIT) return False;
1059 
1060    switch (e1->tag) {
1061       case Iex_RdTmp:
1062          if (e1->Iex.RdTmp.tmp == e2->Iex.RdTmp.tmp) return True;
1063 
1064          if (env[e1->Iex.RdTmp.tmp] && env[e2->Iex.RdTmp.tmp]) {
1065             Bool same = sameIRExprs_aux(env, env[e1->Iex.RdTmp.tmp],
1066                                         env[e2->Iex.RdTmp.tmp]);
1067 #if STATS_IROPT
1068             recursed = True;
1069             if (same) recursion_helped = True;
1070 #endif
1071             return same;
1072          }
1073          return False;
1074 
1075       case Iex_Get:
1076       case Iex_GetI:
1077       case Iex_Load:
1078          /* Guest state / memory could have changed in the meantime. */
1079          return False;
1080 
1081       case Iex_Binop:
1082          return toBool( e1->Iex.Binop.op == e2->Iex.Binop.op
1083                         && sameIRExprs_aux( env, e1->Iex.Binop.arg1,
1084                                                  e2->Iex.Binop.arg1 )
1085                         && sameIRExprs_aux( env, e1->Iex.Binop.arg2,
1086                                                  e2->Iex.Binop.arg2 ));
1087 
1088       case Iex_Unop:
1089          return toBool( e1->Iex.Unop.op == e2->Iex.Unop.op
1090                         && sameIRExprs_aux( env, e1->Iex.Unop.arg,
1091                                                  e2->Iex.Unop.arg ));
1092 
1093       case Iex_Const: {
1094          IRConst *c1 = e1->Iex.Const.con;
1095          IRConst *c2 = e2->Iex.Const.con;
1096          vassert(c1->tag == c2->tag);
1097          switch (c1->tag) {
1098             case Ico_U1:   return toBool( c1->Ico.U1  == c2->Ico.U1 );
1099             case Ico_U8:   return toBool( c1->Ico.U8  == c2->Ico.U8 );
1100             case Ico_U16:  return toBool( c1->Ico.U16 == c2->Ico.U16 );
1101             case Ico_U32:  return toBool( c1->Ico.U32 == c2->Ico.U32 );
1102             case Ico_U64:  return toBool( c1->Ico.U64 == c2->Ico.U64 );
1103             default: break;
1104          }
1105          return False;
1106       }
1107 
1108       case Iex_Triop: {
1109          IRTriop *tri1 = e1->Iex.Triop.details;
1110          IRTriop *tri2 = e2->Iex.Triop.details;
1111          return toBool( tri1->op == tri2->op
1112                         && sameIRExprs_aux( env, tri1->arg1, tri2->arg1 )
1113                         && sameIRExprs_aux( env, tri1->arg2, tri2->arg2 )
1114                         && sameIRExprs_aux( env, tri1->arg3, tri2->arg3 ));
1115       }
1116 
1117       case Iex_ITE:
1118          return toBool(    sameIRExprs_aux( env, e1->Iex.ITE.cond,
1119                                                  e2->Iex.ITE.cond )
1120                         && sameIRExprs_aux( env, e1->Iex.ITE.iftrue,
1121                                                  e2->Iex.ITE.iftrue )
1122                         && sameIRExprs_aux( env, e1->Iex.ITE.iffalse,
1123                                                  e2->Iex.ITE.iffalse ));
1124 
1125       default:
1126          /* Not very likely to be "same". */
1127          break;
1128    }
1129 
1130    return False;
1131 }
1132 
1133 inline
sameIRExprs(IRExpr ** env,IRExpr * e1,IRExpr * e2)1134 static Bool sameIRExprs ( IRExpr** env, IRExpr* e1, IRExpr* e2 )
1135 {
1136    Bool same;
1137 
1138    num_nodes_visited = 0;
1139    same = sameIRExprs_aux(env, e1, e2);
1140 
1141 #if STATS_IROPT
1142    ++invocation_count;
1143    if (recursed) ++recursion_count;
1144    success_count += same;
1145    if (same && recursion_helped)
1146       ++recursion_success_count;
1147    if (num_nodes_visited > max_nodes_visited)
1148       max_nodes_visited = num_nodes_visited;
1149    recursed = False; /* reset */
1150    recursion_helped = False;  /* reset */
1151 #endif /* STATS_IROPT */
1152 
1153    return same;
1154 }
1155 
1156 
1157 /* Debugging-only hack (not used in production runs): make a guess
1158    whether sameIRExprs might assert due to the two args being of
1159    different types.  If in doubt return False.  Is only used when
1160    --vex-iropt-level > 0, that is, vex_control.iropt_verbosity > 0.
1161    Bad because it duplicates functionality from typeOfIRExpr.  See
1162    comment on the single use point below for rationale. */
1163 static
debug_only_hack_sameIRExprs_might_assert(IRExpr * e1,IRExpr * e2)1164 Bool debug_only_hack_sameIRExprs_might_assert ( IRExpr* e1, IRExpr* e2 )
1165 {
1166    if (e1->tag != e2->tag) return False;
1167    switch (e1->tag) {
1168       case Iex_Const: {
1169          /* The only interesting case */
1170          IRConst *c1 = e1->Iex.Const.con;
1171          IRConst *c2 = e2->Iex.Const.con;
1172          return c1->tag != c2->tag;
1173       }
1174       default:
1175          break;
1176    }
1177    return False;
1178 }
1179 
1180 
1181 /* Is this literally IRExpr_Const(IRConst_U32(0)) ? */
isZeroU32(IRExpr * e)1182 static Bool isZeroU32 ( IRExpr* e )
1183 {
1184    return toBool( e->tag == Iex_Const
1185                   && e->Iex.Const.con->tag == Ico_U32
1186                   && e->Iex.Const.con->Ico.U32 == 0);
1187 }
1188 
1189 /* Is this literally IRExpr_Const(IRConst_U64(0)) ?
1190    Currently unused; commented out to avoid compiler warning */
1191 #if 0
1192 static Bool isZeroU64 ( IRExpr* e )
1193 {
1194    return toBool( e->tag == Iex_Const
1195                   && e->Iex.Const.con->tag == Ico_U64
1196                   && e->Iex.Const.con->Ico.U64 == 0);
1197 }
1198 #endif
1199 
1200 /* Is this literally IRExpr_Const(IRConst_V128(0)) ? */
isZeroV128(IRExpr * e)1201 static Bool isZeroV128 ( IRExpr* e )
1202 {
1203    return toBool( e->tag == Iex_Const
1204                   && e->Iex.Const.con->tag == Ico_V128
1205                   && e->Iex.Const.con->Ico.V128 == 0x0000);
1206 }
1207 
1208 /* Is this literally IRExpr_Const(IRConst_V256(0)) ? */
isZeroV256(IRExpr * e)1209 static Bool isZeroV256 ( IRExpr* e )
1210 {
1211    return toBool( e->tag == Iex_Const
1212                   && e->Iex.Const.con->tag == Ico_V256
1213                   && e->Iex.Const.con->Ico.V256 == 0x00000000);
1214 }
1215 
1216 /* Is this an integer constant with value 0 ? */
isZeroU(IRExpr * e)1217 static Bool isZeroU ( IRExpr* e )
1218 {
1219    if (e->tag != Iex_Const) return False;
1220    switch (e->Iex.Const.con->tag) {
1221       case Ico_U1:    return toBool( e->Iex.Const.con->Ico.U1  == 0);
1222       case Ico_U8:    return toBool( e->Iex.Const.con->Ico.U8  == 0);
1223       case Ico_U16:   return toBool( e->Iex.Const.con->Ico.U16 == 0);
1224       case Ico_U32:   return toBool( e->Iex.Const.con->Ico.U32 == 0);
1225       case Ico_U64:   return toBool( e->Iex.Const.con->Ico.U64 == 0);
1226       case Ico_V256:  return toBool( e->Iex.Const.con->Ico.V256 == 0x00000000);
1227       default: vpanic("isZeroU");
1228    }
1229 }
1230 
1231 /* Is this an integer constant with value 1---1b ? */
isOnesU(IRExpr * e)1232 static Bool isOnesU ( IRExpr* e )
1233 {
1234    if (e->tag != Iex_Const) return False;
1235    switch (e->Iex.Const.con->tag) {
1236       case Ico_U8:    return toBool( e->Iex.Const.con->Ico.U8  == 0xFF);
1237       case Ico_U16:   return toBool( e->Iex.Const.con->Ico.U16 == 0xFFFF);
1238       case Ico_U32:   return toBool( e->Iex.Const.con->Ico.U32
1239                                      == 0xFFFFFFFF);
1240       case Ico_U64:   return toBool( e->Iex.Const.con->Ico.U64
1241                                      == 0xFFFFFFFFFFFFFFFFULL);
1242       default: ppIRExpr(e); vpanic("isOnesU");
1243    }
1244 }
1245 
notBool(Bool b)1246 static Bool notBool ( Bool b )
1247 {
1248    if (b == True) return False;
1249    if (b == False) return True;
1250    vpanic("notBool");
1251 }
1252 
1253 /* Make a zero which has the same type as the result of the given
1254    primop. */
mkZeroOfPrimopResultType(IROp op)1255 static IRExpr* mkZeroOfPrimopResultType ( IROp op )
1256 {
1257    switch (op) {
1258       case Iop_CmpNE32: return IRExpr_Const(IRConst_U1(toBool(0)));
1259       case Iop_Xor8:  return IRExpr_Const(IRConst_U8(0));
1260       case Iop_Xor16: return IRExpr_Const(IRConst_U16(0));
1261       case Iop_Sub32:
1262       case Iop_Xor32: return IRExpr_Const(IRConst_U32(0));
1263       case Iop_And64:
1264       case Iop_Sub64:
1265       case Iop_Xor64: return IRExpr_Const(IRConst_U64(0));
1266       case Iop_XorV128:
1267       case Iop_AndV128: return IRExpr_Const(IRConst_V128(0));
1268       case Iop_XorV256:
1269       case Iop_AndV256: return IRExpr_Const(IRConst_V256(0));
1270       default: vpanic("mkZeroOfPrimopResultType: bad primop");
1271    }
1272 }
1273 
1274 /* Make a value containing all 1-bits, which has the same type as the
1275    result of the given primop. */
mkOnesOfPrimopResultType(IROp op)1276 static IRExpr* mkOnesOfPrimopResultType ( IROp op )
1277 {
1278    switch (op) {
1279       case Iop_CmpEQ32:
1280       case Iop_CmpEQ64:
1281          return IRExpr_Const(IRConst_U1(toBool(1)));
1282       case Iop_Or8:
1283          return IRExpr_Const(IRConst_U8(0xFF));
1284       case Iop_Or16:
1285          return IRExpr_Const(IRConst_U16(0xFFFF));
1286       case Iop_Or32:
1287          return IRExpr_Const(IRConst_U32(0xFFFFFFFF));
1288       case Iop_CmpEQ8x8:
1289       case Iop_Or64:
1290          return IRExpr_Const(IRConst_U64(0xFFFFFFFFFFFFFFFFULL));
1291       case Iop_CmpEQ8x16:
1292       case Iop_CmpEQ16x8:
1293       case Iop_CmpEQ32x4:
1294          return IRExpr_Const(IRConst_V128(0xFFFF));
1295       default:
1296          ppIROp(op);
1297          vpanic("mkOnesOfPrimopResultType: bad primop");
1298    }
1299 }
1300 
1301 /* Helpers for folding Clz32/64. */
fold_Clz64(ULong value)1302 static UInt fold_Clz64 ( ULong value )
1303 {
1304    UInt i;
1305    vassert(value != 0ULL); /* no defined semantics for arg==0 */
1306    for (i = 0; i < 64; ++i) {
1307       if (0ULL != (value & (((ULong)1) << (63 - i)))) return i;
1308    }
1309    vassert(0);
1310    /*NOTREACHED*/
1311    return 0;
1312 }
1313 
fold_Clz32(UInt value)1314 static UInt fold_Clz32 ( UInt value )
1315 {
1316    UInt i;
1317    vassert(value != 0); /* no defined semantics for arg==0 */
1318    for (i = 0; i < 32; ++i) {
1319       if (0 != (value & (((UInt)1) << (31 - i)))) return i;
1320    }
1321    vassert(0);
1322    /*NOTREACHED*/
1323    return 0;
1324 }
1325 
1326 /* V64 holds 8 summary-constant bits in V128/V256 style.  Convert to
1327    the corresponding real constant. */
1328 //XXX re-check this before use
1329 //static ULong de_summarise_V64 ( UChar v64 )
1330 //{
1331 //   ULong r = 0;
1332 //   if (v64 & (1<<0)) r |= 0x00000000000000FFULL;
1333 //   if (v64 & (1<<1)) r |= 0x000000000000FF00ULL;
1334 //   if (v64 & (1<<2)) r |= 0x0000000000FF0000ULL;
1335 //   if (v64 & (1<<3)) r |= 0x00000000FF000000ULL;
1336 //   if (v64 & (1<<4)) r |= 0x000000FF00000000ULL;
1337 //   if (v64 & (1<<5)) r |= 0x0000FF0000000000ULL;
1338 //   if (v64 & (1<<6)) r |= 0x00FF000000000000ULL;
1339 //   if (v64 & (1<<7)) r |= 0xFF00000000000000ULL;
1340 //   return r;
1341 //}
1342 
1343 /* Helper for arbitrary expression pattern matching in flat IR.  If
1344    'e' is a reference to a tmp, look it up in env -- repeatedly, if
1345    necessary -- until it resolves to a non-tmp.  Note that this can
1346    return NULL if it can't resolve 'e' to a new expression, which will
1347    be the case if 'e' is instead defined by an IRStmt (IRDirty or
1348    LLSC). */
chase(IRExpr ** env,IRExpr * e)1349 static IRExpr* chase ( IRExpr** env, IRExpr* e )
1350 {
1351    /* Why is this loop guaranteed to terminate?  Because all tmps must
1352       have definitions before use, hence a tmp cannot be bound
1353       (directly or indirectly) to itself. */
1354    while (e->tag == Iex_RdTmp) {
1355       if (0) { vex_printf("chase "); ppIRExpr(e); vex_printf("\n"); }
1356       e = env[(Int)e->Iex.RdTmp.tmp];
1357       if (e == NULL) break;
1358    }
1359    return e;
1360 }
1361 
1362 /* Similar to |chase|, but follows at most one level of tmp reference. */
chase1(IRExpr ** env,IRExpr * e)1363 static IRExpr* chase1 ( IRExpr** env, IRExpr* e )
1364 {
1365    if (e == NULL || e->tag != Iex_RdTmp)
1366       return e;
1367    else
1368       return env[(Int)e->Iex.RdTmp.tmp];
1369 }
1370 
fold_Expr(IRExpr ** env,IRExpr * e)1371 static IRExpr* fold_Expr ( IRExpr** env, IRExpr* e )
1372 {
1373    Int     shift;
1374    IRExpr* e2 = e; /* e2 is the result of folding e, if possible */
1375 
1376    switch (e->tag) {
1377    case Iex_Unop:
1378       /* UNARY ops */
1379       if (e->Iex.Unop.arg->tag == Iex_Const) {
1380          switch (e->Iex.Unop.op) {
1381          case Iop_1Uto8:
1382             e2 = IRExpr_Const(IRConst_U8(toUChar(
1383                     e->Iex.Unop.arg->Iex.Const.con->Ico.U1
1384                     ? 1 : 0)));
1385             break;
1386          case Iop_1Uto32:
1387             e2 = IRExpr_Const(IRConst_U32(
1388                     e->Iex.Unop.arg->Iex.Const.con->Ico.U1
1389                     ? 1 : 0));
1390             break;
1391          case Iop_1Uto64:
1392             e2 = IRExpr_Const(IRConst_U64(
1393                     e->Iex.Unop.arg->Iex.Const.con->Ico.U1
1394                     ? 1 : 0));
1395             break;
1396 
1397          case Iop_1Sto8:
1398             e2 = IRExpr_Const(IRConst_U8(toUChar(
1399                     e->Iex.Unop.arg->Iex.Const.con->Ico.U1
1400                     ? 0xFF : 0)));
1401             break;
1402          case Iop_1Sto16:
1403             e2 = IRExpr_Const(IRConst_U16(toUShort(
1404                     e->Iex.Unop.arg->Iex.Const.con->Ico.U1
1405                     ? 0xFFFF : 0)));
1406             break;
1407          case Iop_1Sto32:
1408             e2 = IRExpr_Const(IRConst_U32(
1409                     e->Iex.Unop.arg->Iex.Const.con->Ico.U1
1410                     ? 0xFFFFFFFF : 0));
1411             break;
1412          case Iop_1Sto64:
1413             e2 = IRExpr_Const(IRConst_U64(
1414                     e->Iex.Unop.arg->Iex.Const.con->Ico.U1
1415                     ? 0xFFFFFFFFFFFFFFFFULL : 0));
1416             break;
1417 
1418          case Iop_8Sto32: {
1419             UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U8;
1420             u32 <<= 24;
1421             u32 = (Int)u32 >> 24;   /* signed shift */
1422             e2 = IRExpr_Const(IRConst_U32(u32));
1423             break;
1424          }
1425          case Iop_16Sto32: {
1426             UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16;
1427             u32 <<= 16;
1428             u32 = (Int)u32 >> 16;   /* signed shift */
1429             e2 = IRExpr_Const(IRConst_U32(u32));
1430             break;
1431          }
1432          case Iop_8Uto64:
1433             e2 = IRExpr_Const(IRConst_U64(
1434                     0xFFULL & e->Iex.Unop.arg->Iex.Const.con->Ico.U8));
1435             break;
1436          case Iop_16Uto64:
1437             e2 = IRExpr_Const(IRConst_U64(
1438                     0xFFFFULL & e->Iex.Unop.arg->Iex.Const.con->Ico.U16));
1439             break;
1440          case Iop_8Uto32:
1441             e2 = IRExpr_Const(IRConst_U32(
1442                     0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U8));
1443             break;
1444          case Iop_8Sto16: {
1445             UShort u16 = e->Iex.Unop.arg->Iex.Const.con->Ico.U8;
1446             u16 <<= 8;
1447             u16 = (Short)u16 >> 8;  /* signed shift */
1448             e2 = IRExpr_Const(IRConst_U16(u16));
1449             break;
1450          }
1451          case Iop_8Uto16:
1452             e2 = IRExpr_Const(IRConst_U16(
1453                     0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U8));
1454             break;
1455          case Iop_16Uto32:
1456             e2 = IRExpr_Const(IRConst_U32(
1457                     0xFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U16));
1458             break;
1459          case Iop_32to16:
1460             e2 = IRExpr_Const(IRConst_U16(toUShort(
1461                     0xFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)));
1462             break;
1463          case Iop_32to8:
1464             e2 = IRExpr_Const(IRConst_U8(toUChar(
1465                     0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)));
1466             break;
1467          case Iop_32to1:
1468             e2 = IRExpr_Const(IRConst_U1(toBool(
1469                     1 == (1 & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)
1470                  )));
1471             break;
1472          case Iop_64to1:
1473             e2 = IRExpr_Const(IRConst_U1(toBool(
1474                     1 == (1 & e->Iex.Unop.arg->Iex.Const.con->Ico.U64)
1475                  )));
1476             break;
1477 
1478          case Iop_NotV128:
1479             e2 = IRExpr_Const(IRConst_V128(
1480                     ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.V128)));
1481             break;
1482          case Iop_Not64:
1483             e2 = IRExpr_Const(IRConst_U64(
1484                     ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U64)));
1485             break;
1486          case Iop_Not32:
1487             e2 = IRExpr_Const(IRConst_U32(
1488                     ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U32)));
1489             break;
1490          case Iop_Not16:
1491             e2 = IRExpr_Const(IRConst_U16(toUShort(
1492                     ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U16))));
1493             break;
1494          case Iop_Not8:
1495             e2 = IRExpr_Const(IRConst_U8(toUChar(
1496                     ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U8))));
1497             break;
1498 
1499          case Iop_Not1:
1500             e2 = IRExpr_Const(IRConst_U1(
1501                     notBool(e->Iex.Unop.arg->Iex.Const.con->Ico.U1)));
1502             break;
1503 
1504          case Iop_64to8: {
1505             ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1506             w64 &= 0xFFULL;
1507             e2 = IRExpr_Const(IRConst_U8( (UChar)w64 ));
1508             break;
1509          }
1510          case Iop_64to16: {
1511             ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1512             w64 &= 0xFFFFULL;
1513             e2 = IRExpr_Const(IRConst_U16( (UShort)w64 ));
1514             break;
1515          }
1516          case Iop_64to32: {
1517             ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1518             w64 &= 0x00000000FFFFFFFFULL;
1519             e2 = IRExpr_Const(IRConst_U32( (UInt)w64 ));
1520             break;
1521          }
1522          case Iop_64HIto32: {
1523             ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1524             w64 >>= 32;
1525             e2 = IRExpr_Const(IRConst_U32( (UInt)w64 ));
1526             break;
1527          }
1528          case Iop_32Uto64:
1529             e2 = IRExpr_Const(IRConst_U64(
1530                     0xFFFFFFFFULL
1531                     & e->Iex.Unop.arg->Iex.Const.con->Ico.U32));
1532             break;
1533          case Iop_16Sto64: {
1534             ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16;
1535             u64 <<= 48;
1536             u64 = (Long)u64 >> 48;   /* signed shift */
1537             e2 = IRExpr_Const(IRConst_U64(u64));
1538             break;
1539          }
1540          case Iop_32Sto64: {
1541             ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
1542             u64 <<= 32;
1543             u64 = (Long)u64 >> 32;   /* signed shift */
1544             e2 = IRExpr_Const(IRConst_U64(u64));
1545             break;
1546          }
1547 
1548          case Iop_16to8: {
1549             UShort w16 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16;
1550             w16 &= 0xFF;
1551             e2 = IRExpr_Const(IRConst_U8( (UChar)w16 ));
1552             break;
1553          }
1554          case Iop_16HIto8: {
1555             UShort w16 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16;
1556             w16 >>= 8;
1557             w16 &= 0xFF;
1558             e2 = IRExpr_Const(IRConst_U8( (UChar)w16 ));
1559             break;
1560          }
1561 
1562          case Iop_CmpNEZ8:
1563             e2 = IRExpr_Const(IRConst_U1(toBool(
1564                     0 !=
1565                     (0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U8)
1566                  )));
1567             break;
1568          case Iop_CmpNEZ32:
1569             e2 = IRExpr_Const(IRConst_U1(toBool(
1570                     0 !=
1571                     (0xFFFFFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)
1572                  )));
1573             break;
1574          case Iop_CmpNEZ64:
1575             e2 = IRExpr_Const(IRConst_U1(toBool(
1576                     0ULL != e->Iex.Unop.arg->Iex.Const.con->Ico.U64
1577                  )));
1578             break;
1579 
1580          case Iop_CmpwNEZ32: {
1581             UInt w32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
1582             if (w32 == 0)
1583                e2 = IRExpr_Const(IRConst_U32( 0 ));
1584             else
1585                e2 = IRExpr_Const(IRConst_U32( 0xFFFFFFFF ));
1586             break;
1587          }
1588          case Iop_CmpwNEZ64: {
1589             ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1590             if (w64 == 0)
1591                e2 = IRExpr_Const(IRConst_U64( 0 ));
1592             else
1593                e2 = IRExpr_Const(IRConst_U64( 0xFFFFFFFFFFFFFFFFULL ));
1594             break;
1595          }
1596 
1597          case Iop_Left32: {
1598             UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
1599             Int  s32 = (Int)(u32 & 0xFFFFFFFF);
1600             s32 = (s32 | (-s32));
1601             e2 = IRExpr_Const( IRConst_U32( (UInt)s32 ));
1602             break;
1603          }
1604 
1605          case Iop_Left64: {
1606             ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1607             Long  s64 = (Long)u64;
1608             s64 = (s64 | (-s64));
1609             e2 = IRExpr_Const( IRConst_U64( (ULong)s64 ));
1610             break;
1611          }
1612 
1613          case Iop_Clz32: {
1614             UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
1615             if (u32 != 0)
1616                e2 = IRExpr_Const(IRConst_U32(fold_Clz32(u32)));
1617             break;
1618          }
1619          case Iop_Clz64: {
1620             ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1621             if (u64 != 0ULL)
1622                e2 = IRExpr_Const(IRConst_U64(fold_Clz64(u64)));
1623             break;
1624          }
1625 
1626          /* For these vector ones, can't fold all cases, but at least
1627             do the most obvious one.  Could do better here using
1628             summarise/desummarise of vector constants, but too
1629             difficult to verify; hence just handle the zero cases. */
1630          case Iop_32UtoV128: {
1631             UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
1632             if (0 == u32) {
1633                e2 = IRExpr_Const(IRConst_V128(0x0000));
1634             } else {
1635                goto unhandled;
1636             }
1637             break;
1638          }
1639          case Iop_V128to64: {
1640             UShort v128 = e->Iex.Unop.arg->Iex.Const.con->Ico.V128;
1641             if (0 == ((v128 >> 0) & 0xFF)) {
1642                e2 = IRExpr_Const(IRConst_U64(0));
1643             } else {
1644                goto unhandled;
1645             }
1646             break;
1647          }
1648          case Iop_V128HIto64: {
1649             UShort v128 = e->Iex.Unop.arg->Iex.Const.con->Ico.V128;
1650             if (0 == ((v128 >> 8) & 0xFF)) {
1651                e2 = IRExpr_Const(IRConst_U64(0));
1652             } else {
1653                goto unhandled;
1654             }
1655             break;
1656          }
1657          case Iop_64UtoV128: {
1658             ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1659             if (0 == u64) {
1660                e2 = IRExpr_Const(IRConst_V128(0x0000));
1661             } else {
1662                goto unhandled;
1663             }
1664             break;
1665          }
1666 
1667          /* Even stupider (although still correct ..) */
1668          case Iop_V256to64_0: case Iop_V256to64_1:
1669          case Iop_V256to64_2: case Iop_V256to64_3: {
1670             UInt v256 = e->Iex.Unop.arg->Iex.Const.con->Ico.V256;
1671             if (v256 == 0x00000000) {
1672                e2 = IRExpr_Const(IRConst_U64(0));
1673             } else {
1674                goto unhandled;
1675             }
1676             break;
1677          }
1678 
1679          case Iop_ZeroHI64ofV128: {
1680             /* Could do better here -- only need to look at the bottom 64 bits
1681                of the argument, really. */
1682             UShort v128 = e->Iex.Unop.arg->Iex.Const.con->Ico.V128;
1683             if (v128 == 0x0000) {
1684                e2 = IRExpr_Const(IRConst_V128(0x0000));
1685             } else {
1686                goto unhandled;
1687             }
1688             break;
1689          }
1690 
1691          default:
1692             goto unhandled;
1693       }
1694       }
1695       break;
1696 
1697    case Iex_Binop:
1698       /* BINARY ops */
1699       if (e->Iex.Binop.arg1->tag == Iex_Const
1700           && e->Iex.Binop.arg2->tag == Iex_Const) {
1701          /* cases where both args are consts */
1702          switch (e->Iex.Binop.op) {
1703 
1704             /* -- Or -- */
1705             case Iop_Or8:
1706                e2 = IRExpr_Const(IRConst_U8(toUChar(
1707                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
1708                         | e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
1709                break;
1710             case Iop_Or16:
1711                e2 = IRExpr_Const(IRConst_U16(toUShort(
1712                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U16
1713                         | e->Iex.Binop.arg2->Iex.Const.con->Ico.U16))));
1714                break;
1715             case Iop_Or32:
1716                e2 = IRExpr_Const(IRConst_U32(
1717                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1718                         | e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1719                break;
1720             case Iop_Or64:
1721                e2 = IRExpr_Const(IRConst_U64(
1722                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1723                         | e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1724                break;
1725             case Iop_OrV128:
1726                e2 = IRExpr_Const(IRConst_V128(
1727                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.V128
1728                         | e->Iex.Binop.arg2->Iex.Const.con->Ico.V128)));
1729                break;
1730 
1731             /* -- Xor -- */
1732             case Iop_Xor8:
1733                e2 = IRExpr_Const(IRConst_U8(toUChar(
1734                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
1735                         ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
1736                break;
1737             case Iop_Xor16:
1738                e2 = IRExpr_Const(IRConst_U16(toUShort(
1739                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U16
1740                         ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U16))));
1741                break;
1742             case Iop_Xor32:
1743                e2 = IRExpr_Const(IRConst_U32(
1744                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1745                         ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1746                break;
1747             case Iop_Xor64:
1748                e2 = IRExpr_Const(IRConst_U64(
1749                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1750                         ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1751                break;
1752             case Iop_XorV128:
1753                e2 = IRExpr_Const(IRConst_V128(
1754                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.V128
1755                         ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.V128)));
1756                break;
1757 
1758             /* -- And -- */
1759             case Iop_And8:
1760                e2 = IRExpr_Const(IRConst_U8(toUChar(
1761                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
1762                         & e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
1763                break;
1764             case Iop_And16:
1765                e2 = IRExpr_Const(IRConst_U16(toUShort(
1766                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U16
1767                         & e->Iex.Binop.arg2->Iex.Const.con->Ico.U16))));
1768                break;
1769             case Iop_And32:
1770                e2 = IRExpr_Const(IRConst_U32(
1771                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1772                         & e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1773                break;
1774             case Iop_And64:
1775                e2 = IRExpr_Const(IRConst_U64(
1776                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1777                         & e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1778                break;
1779             case Iop_AndV128:
1780                e2 = IRExpr_Const(IRConst_V128(
1781                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.V128
1782                         & e->Iex.Binop.arg2->Iex.Const.con->Ico.V128)));
1783                break;
1784 
1785             /* -- Add -- */
1786             case Iop_Add8:
1787                e2 = IRExpr_Const(IRConst_U8(toUChar(
1788                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
1789                         + e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
1790                break;
1791             case Iop_Add32:
1792                e2 = IRExpr_Const(IRConst_U32(
1793                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1794                         + e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1795                break;
1796             case Iop_Add64:
1797                e2 = IRExpr_Const(IRConst_U64(
1798                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1799                         + e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1800                break;
1801 
1802             /* -- Sub -- */
1803             case Iop_Sub8:
1804                e2 = IRExpr_Const(IRConst_U8(toUChar(
1805                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
1806                         - e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
1807                break;
1808             case Iop_Sub32:
1809                e2 = IRExpr_Const(IRConst_U32(
1810                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1811                         - e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1812                break;
1813             case Iop_Sub64:
1814                e2 = IRExpr_Const(IRConst_U64(
1815                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1816                         - e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1817                break;
1818 
1819             /* -- Max32U -- */
1820             case Iop_Max32U: {
1821                UInt u32a = e->Iex.Binop.arg1->Iex.Const.con->Ico.U32;
1822                UInt u32b = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
1823                UInt res  = u32a > u32b ? u32a : u32b;
1824                e2 = IRExpr_Const(IRConst_U32(res));
1825                break;
1826             }
1827 
1828             /* -- Mul -- */
1829             case Iop_Mul32:
1830                e2 = IRExpr_Const(IRConst_U32(
1831                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1832                         * e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1833                break;
1834             case Iop_Mul64:
1835                e2 = IRExpr_Const(IRConst_U64(
1836                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1837                         * e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1838                break;
1839 
1840             case Iop_MullS32: {
1841                /* very paranoid */
1842                UInt  u32a = e->Iex.Binop.arg1->Iex.Const.con->Ico.U32;
1843                UInt  u32b = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
1844                Int   s32a = (Int)u32a;
1845                Int   s32b = (Int)u32b;
1846                Long  s64a = (Long)s32a;
1847                Long  s64b = (Long)s32b;
1848                Long  sres = s64a * s64b;
1849                ULong ures = (ULong)sres;
1850                e2 = IRExpr_Const(IRConst_U64(ures));
1851                break;
1852             }
1853 
1854             /* -- Shl -- */
1855             case Iop_Shl32:
1856                vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1857                shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1858                if (shift >= 0 && shift <= 31)
1859                   e2 = IRExpr_Const(IRConst_U32(
1860                           (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1861                            << shift)));
1862                break;
1863             case Iop_Shl64:
1864                vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1865                shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1866                if (shift >= 0 && shift <= 63)
1867                   e2 = IRExpr_Const(IRConst_U64(
1868                           (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1869                            << shift)));
1870                break;
1871 
1872             /* -- Sar -- */
1873             case Iop_Sar32: {
1874                /* paranoid ... */
1875                /*signed*/ Int s32;
1876                vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1877                s32   = (Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32);
1878                shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1879                if (shift >= 0 && shift <= 31) {
1880                   s32 >>=/*signed*/ shift;
1881                   e2 = IRExpr_Const(IRConst_U32((UInt)s32));
1882                }
1883                break;
1884             }
1885             case Iop_Sar64: {
1886                /* paranoid ... */
1887                /*signed*/ Long s64;
1888                vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1889                s64   = (Long)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64);
1890                shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1891                if (shift >= 0 && shift <= 63) {
1892                   s64 >>=/*signed*/ shift;
1893                   e2 = IRExpr_Const(IRConst_U64((ULong)s64));
1894                }
1895                break;
1896             }
1897 
1898             /* -- Shr -- */
1899             case Iop_Shr32: {
1900                /* paranoid ... */
1901                /*unsigned*/ UInt u32;
1902                vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1903                u32   = (UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32);
1904                shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1905                if (shift >= 0 && shift <= 31) {
1906                   u32 >>=/*unsigned*/ shift;
1907                   e2 = IRExpr_Const(IRConst_U32(u32));
1908                }
1909                break;
1910             }
1911             case Iop_Shr64: {
1912                /* paranoid ... */
1913                /*unsigned*/ ULong u64;
1914                vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1915                u64   = (ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64);
1916                shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1917                if (shift >= 0 && shift <= 63) {
1918                   u64 >>=/*unsigned*/ shift;
1919                   e2 = IRExpr_Const(IRConst_U64(u64));
1920                }
1921                break;
1922             }
1923 
1924             /* -- CmpEQ -- */
1925             case Iop_CmpEQ32:
1926                e2 = IRExpr_Const(IRConst_U1(toBool(
1927                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1928                         == e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))));
1929                break;
1930             case Iop_CmpEQ64:
1931                e2 = IRExpr_Const(IRConst_U1(toBool(
1932                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1933                         == e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))));
1934                break;
1935 
1936             /* -- CmpNE -- */
1937             case Iop_CmpNE8:
1938             case Iop_CasCmpNE8:
1939             case Iop_ExpCmpNE8:
1940                e2 = IRExpr_Const(IRConst_U1(toBool(
1941                        ((0xFF & e->Iex.Binop.arg1->Iex.Const.con->Ico.U8)
1942                         != (0xFF & e->Iex.Binop.arg2->Iex.Const.con->Ico.U8)))));
1943                break;
1944             case Iop_CmpNE32:
1945             case Iop_CasCmpNE32:
1946             case Iop_ExpCmpNE32:
1947                e2 = IRExpr_Const(IRConst_U1(toBool(
1948                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1949                         != e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))));
1950                break;
1951             case Iop_CmpNE64:
1952             case Iop_CasCmpNE64:
1953             case Iop_ExpCmpNE64:
1954                e2 = IRExpr_Const(IRConst_U1(toBool(
1955                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1956                         != e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))));
1957                break;
1958 
1959             /* -- CmpLEU -- */
1960             case Iop_CmpLE32U:
1961                e2 = IRExpr_Const(IRConst_U1(toBool(
1962                        ((UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
1963                         <= (UInt)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
1964                break;
1965             case Iop_CmpLE64U:
1966                e2 = IRExpr_Const(IRConst_U1(toBool(
1967                        ((ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64)
1968                         <= (ULong)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))));
1969                break;
1970 
1971             /* -- CmpLES -- */
1972             case Iop_CmpLE32S:
1973                e2 = IRExpr_Const(IRConst_U1(toBool(
1974                        ((Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
1975                         <= (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
1976                break;
1977             case Iop_CmpLE64S:
1978                e2 = IRExpr_Const(IRConst_U1(toBool(
1979                        ((Long)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64)
1980                         <= (Long)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))));
1981                break;
1982 
1983             /* -- CmpLTS -- */
1984             case Iop_CmpLT32S:
1985                e2 = IRExpr_Const(IRConst_U1(toBool(
1986                        ((Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
1987                         < (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
1988                break;
1989             case Iop_CmpLT64S:
1990                e2 = IRExpr_Const(IRConst_U1(toBool(
1991                        ((Long)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64)
1992                         < (Long)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))));
1993                break;
1994 
1995             /* -- CmpLTU -- */
1996             case Iop_CmpLT32U:
1997                e2 = IRExpr_Const(IRConst_U1(toBool(
1998                        ((UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
1999                         < (UInt)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
2000                break;
2001             case Iop_CmpLT64U:
2002                e2 = IRExpr_Const(IRConst_U1(toBool(
2003                        ((ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64)
2004                         < (ULong)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))));
2005                break;
2006 
2007             /* -- CmpORD -- */
2008             case Iop_CmpORD32S: {
2009                /* very paranoid */
2010                UInt  u32a = e->Iex.Binop.arg1->Iex.Const.con->Ico.U32;
2011                UInt  u32b = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
2012                Int   s32a = (Int)u32a;
2013                Int   s32b = (Int)u32b;
2014                Int   r = 0x2; /* EQ */
2015                if (s32a < s32b) {
2016                   r = 0x8; /* LT */
2017                }
2018                else if (s32a > s32b) {
2019                   r = 0x4; /* GT */
2020                }
2021                e2 = IRExpr_Const(IRConst_U32(r));
2022                break;
2023             }
2024 
2025             /* -- nHLto2n -- */
2026             case Iop_32HLto64:
2027                e2 = IRExpr_Const(IRConst_U64(
2028                        (((ULong)(e->Iex.Binop.arg1
2029                                   ->Iex.Const.con->Ico.U32)) << 32)
2030                        | ((ULong)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))
2031                     ));
2032                break;
2033             case Iop_64HLto128:
2034                /* We can't fold this, because there is no way to
2035                   express he result in IR, but at least pretend to
2036                   handle it, so as to stop getting blasted with
2037                   no-rule-for-this-primop messages. */
2038                break;
2039             /* For this vector one, can't fold all cases, but at
2040                least do the most obvious one.  Could do better here
2041                using summarise/desummarise of vector constants, but
2042                too difficult to verify; hence just handle the zero
2043                cases. */
2044             case Iop_64HLtoV128: {
2045                ULong argHi = e->Iex.Binop.arg1->Iex.Const.con->Ico.U64;
2046                ULong argLo = e->Iex.Binop.arg2->Iex.Const.con->Ico.U64;
2047                if (0 == argHi && 0 == argLo) {
2048                   e2 = IRExpr_Const(IRConst_V128(0));
2049                } else {
2050                   goto unhandled;
2051                }
2052                break;
2053             }
2054             /* Same reasoning for the 256-bit version. */
2055             case Iop_V128HLtoV256: {
2056                IRExpr* argHi = e->Iex.Binop.arg1;
2057                IRExpr* argLo = e->Iex.Binop.arg2;
2058                if (isZeroV128(argHi) && isZeroV128(argLo)) {
2059                   e2 = IRExpr_Const(IRConst_V256(0));
2060                } else {
2061                   goto unhandled;
2062                }
2063                break;
2064             }
2065 
2066             /* -- V128 stuff -- */
2067             case Iop_InterleaveLO8x16: {
2068                /* This turns up a lot in Memcheck instrumentation of
2069                   Icc generated code.  I don't know why. */
2070                UShort arg1 =  e->Iex.Binop.arg1->Iex.Const.con->Ico.V128;
2071                UShort arg2 =  e->Iex.Binop.arg2->Iex.Const.con->Ico.V128;
2072                if (0 == arg1 && 0 == arg2) {
2073                   e2 = IRExpr_Const(IRConst_V128(0));
2074                } else {
2075                   goto unhandled;
2076                }
2077                break;
2078             }
2079 
2080             default:
2081                goto unhandled;
2082          }
2083 
2084       } else {
2085 
2086          /* other cases (identities, etc) */
2087          switch (e->Iex.Binop.op) {
2088 
2089             case Iop_Shl32:
2090             case Iop_Shl64:
2091             case Iop_Shr64:
2092             case Iop_Sar64:
2093                /* Shl32/Shl64/Shr64/Sar64(x,0) ==> x */
2094                if (isZeroU(e->Iex.Binop.arg2)) {
2095                   e2 = e->Iex.Binop.arg1;
2096                   break;
2097                }
2098                /* Shl32/Shl64/Shr64(0,x) ==> 0 */
2099                if (isZeroU(e->Iex.Binop.arg1)) {
2100                   e2 = e->Iex.Binop.arg1;
2101                   break;
2102                }
2103                break;
2104 
2105             case Iop_Sar32:
2106             case Iop_Shr32:
2107                /* Shr32/Sar32(x,0) ==> x */
2108                if (isZeroU(e->Iex.Binop.arg2)) {
2109                   e2 = e->Iex.Binop.arg1;
2110                   break;
2111                }
2112                break;
2113 
2114             case Iop_Or8:
2115             case Iop_Or16:
2116             case Iop_Or32:
2117             case Iop_Or64:
2118             case Iop_Max32U:
2119                /* Or8/Or16/Or32/Or64/Max32U(x,0) ==> x */
2120                if (isZeroU(e->Iex.Binop.arg2)) {
2121                   e2 = e->Iex.Binop.arg1;
2122                   break;
2123                }
2124                /* Or8/Or16/Or32/Or64/Max32U(0,x) ==> x */
2125                if (isZeroU(e->Iex.Binop.arg1)) {
2126                   e2 = e->Iex.Binop.arg2;
2127                   break;
2128                }
2129                /* Or8/Or16/Or32/Or64/Max32U(x,1---1b) ==> 1---1b */
2130                /* Or8/Or16/Or32/Or64/Max32U(1---1b,x) ==> 1---1b */
2131                if (isOnesU(e->Iex.Binop.arg1) || isOnesU(e->Iex.Binop.arg2)) {
2132                   e2 = mkOnesOfPrimopResultType(e->Iex.Binop.op);
2133                   break;
2134                }
2135                /* Or8/Or16/Or32/Or64/Max32U(t,t) ==> t, for some IRTemp t */
2136                if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2137                   e2 = e->Iex.Binop.arg1;
2138                   break;
2139                }
2140                break;
2141 
2142             case Iop_Add8:
2143                /* Add8(t,t) ==> t << 1.
2144                   Memcheck doesn't understand that
2145                   x+x produces a defined least significant bit, and it seems
2146                   simplest just to get rid of the problem by rewriting it
2147                   out, since the opportunity to do so exists. */
2148                if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2149                   e2 = IRExpr_Binop(Iop_Shl8, e->Iex.Binop.arg1,
2150                                     IRExpr_Const(IRConst_U8(1)));
2151                   break;
2152                }
2153                break;
2154 
2155                /* NB no Add16(t,t) case yet as no known test case exists */
2156 
2157             case Iop_Add32:
2158             case Iop_Add64:
2159                /* Add32/Add64(x,0) ==> x */
2160                if (isZeroU(e->Iex.Binop.arg2)) {
2161                   e2 = e->Iex.Binop.arg1;
2162                   break;
2163                }
2164                /* Add32/Add64(0,x) ==> x */
2165                if (isZeroU(e->Iex.Binop.arg1)) {
2166                   e2 = e->Iex.Binop.arg2;
2167                   break;
2168                }
2169                /* Add32/Add64(t,t) ==> t << 1. Same rationale as for Add8. */
2170                if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2171                   e2 = IRExpr_Binop(
2172                           e->Iex.Binop.op == Iop_Add32 ? Iop_Shl32 : Iop_Shl64,
2173                           e->Iex.Binop.arg1, IRExpr_Const(IRConst_U8(1)));
2174                   break;
2175                }
2176                break;
2177 
2178             case Iop_Sub32:
2179             case Iop_Sub64:
2180                /* Sub32/Sub64(x,0) ==> x */
2181                if (isZeroU(e->Iex.Binop.arg2)) {
2182                   e2 = e->Iex.Binop.arg1;
2183                   break;
2184                }
2185                /* Sub32/Sub64(t,t) ==> 0, for some IRTemp t */
2186                if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2187                   e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op);
2188                   break;
2189                }
2190                break;
2191             case Iop_Sub8x16:
2192                /* Sub8x16(x,0) ==> x */
2193                if (isZeroV128(e->Iex.Binop.arg2)) {
2194                   e2 = e->Iex.Binop.arg1;
2195                   break;
2196                }
2197                break;
2198 
2199             case Iop_And8:
2200             case Iop_And16:
2201             case Iop_And32:
2202             case Iop_And64:
2203                /* And8/And16/And32/And64(x,1---1b) ==> x */
2204                if (isOnesU(e->Iex.Binop.arg2)) {
2205                   e2 = e->Iex.Binop.arg1;
2206                   break;
2207                }
2208                /* And8/And16/And32/And64(1---1b,x) ==> x */
2209                if (isOnesU(e->Iex.Binop.arg1)) {
2210                   e2 = e->Iex.Binop.arg2;
2211                   break;
2212                }
2213                /* And8/And16/And32/And64(x,0) ==> 0 */
2214                if (isZeroU(e->Iex.Binop.arg2)) {
2215                   e2 = e->Iex.Binop.arg2;
2216                   break;
2217                }
2218                /* And8/And16/And32/And64(0,x) ==> 0 */
2219                if (isZeroU(e->Iex.Binop.arg1)) {
2220                   e2 = e->Iex.Binop.arg1;
2221                   break;
2222                }
2223                /* And8/And16/And32/And64(t,t) ==> t, for some IRTemp t */
2224                if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2225                   e2 = e->Iex.Binop.arg1;
2226                   break;
2227                }
2228                break;
2229 
2230             case Iop_AndV128:
2231             case Iop_AndV256:
2232                /* And8/And16/AndV128/AndV256(t,t)
2233                   ==> t, for some IRTemp t */
2234                if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2235                   e2 = e->Iex.Binop.arg1;
2236                   break;
2237                }
2238                /* Deal with either arg zero.  Could handle other And
2239                   cases here too. */
2240                if (e->Iex.Binop.op == Iop_AndV256
2241                    && (isZeroV256(e->Iex.Binop.arg1)
2242                        || isZeroV256(e->Iex.Binop.arg2))) {
2243                   e2 =  mkZeroOfPrimopResultType(e->Iex.Binop.op);
2244                   break;
2245                } else if (e->Iex.Binop.op == Iop_AndV128
2246                           && (isZeroV128(e->Iex.Binop.arg1)
2247                               || isZeroV128(e->Iex.Binop.arg2))) {
2248                   e2 =  mkZeroOfPrimopResultType(e->Iex.Binop.op);
2249                   break;
2250                }
2251                break;
2252 
2253             case Iop_OrV128:
2254             case Iop_OrV256:
2255                /* V128/V256(t,t) ==> t, for some IRTemp t */
2256                if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2257                   e2 = e->Iex.Binop.arg1;
2258                   break;
2259                }
2260                /* OrV128(t,0) ==> t */
2261                if (e->Iex.Binop.op == Iop_OrV128) {
2262                   if (isZeroV128(e->Iex.Binop.arg2)) {
2263                      e2 = e->Iex.Binop.arg1;
2264                      break;
2265                   }
2266                   if (isZeroV128(e->Iex.Binop.arg1)) {
2267                      e2 = e->Iex.Binop.arg2;
2268                      break;
2269                   }
2270                }
2271                /* OrV256(t,0) ==> t */
2272                if (e->Iex.Binop.op == Iop_OrV256) {
2273                   if (isZeroV256(e->Iex.Binop.arg2)) {
2274                      e2 = e->Iex.Binop.arg1;
2275                      break;
2276                   }
2277                   //Disabled because there's no known test case right now.
2278                   //if (isZeroV256(e->Iex.Binop.arg1)) {
2279                   //   e2 = e->Iex.Binop.arg2;
2280                   //   break;
2281                   //}
2282                }
2283                break;
2284 
2285             case Iop_Xor8:
2286             case Iop_Xor16:
2287             case Iop_Xor32:
2288             case Iop_Xor64:
2289             case Iop_XorV128:
2290             case Iop_XorV256:
2291                /* Xor8/16/32/64/V128(t,t) ==> 0, for some IRTemp t */
2292                if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2293                   e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op);
2294                   break;
2295                }
2296                /* XorV128(t,0) ==> t */
2297                if (e->Iex.Binop.op == Iop_XorV128) {
2298                   if (isZeroV128(e->Iex.Binop.arg2)) {
2299                      e2 = e->Iex.Binop.arg1;
2300                      break;
2301                   }
2302                   //Disabled because there's no known test case right now.
2303                   //if (isZeroV128(e->Iex.Binop.arg1)) {
2304                   //   e2 = e->Iex.Binop.arg2;
2305                   //   break;
2306                   //}
2307                } else {
2308                   /* Xor8/16/32/64(0,t) ==> t */
2309                   if (isZeroU(e->Iex.Binop.arg1)) {
2310                      e2 = e->Iex.Binop.arg2;
2311                      break;
2312                   }
2313                   /* Xor8/16/32/64(t,0) ==> t */
2314                   if (isZeroU(e->Iex.Binop.arg2)) {
2315                      e2 = e->Iex.Binop.arg1;
2316                      break;
2317                   }
2318                }
2319                break;
2320 
2321             case Iop_CmpNE32:
2322                /* CmpNE32(t,t) ==> 0, for some IRTemp t */
2323                if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2324                   e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op);
2325                   break;
2326                }
2327                /* CmpNE32(1Uto32(b), 0) ==> b */
2328                if (isZeroU32(e->Iex.Binop.arg2)) {
2329                   IRExpr* a1 = chase(env, e->Iex.Binop.arg1);
2330                   if (a1 && a1->tag == Iex_Unop
2331                          && a1->Iex.Unop.op == Iop_1Uto32) {
2332                      e2 = a1->Iex.Unop.arg;
2333                      break;
2334                   }
2335                }
2336                break;
2337 
2338             case Iop_CmpEQ32:
2339             case Iop_CmpEQ64:
2340             case Iop_CmpEQ8x8:
2341             case Iop_CmpEQ8x16:
2342             case Iop_CmpEQ16x8:
2343             case Iop_CmpEQ32x4:
2344                if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2345                   e2 = mkOnesOfPrimopResultType(e->Iex.Binop.op);
2346                   break;
2347                }
2348                break;
2349 
2350             default:
2351                break;
2352          }
2353       }
2354       break;
2355 
2356    case Iex_ITE:
2357       /* ITE */
2358       /* is the discriminant is a constant? */
2359       if (e->Iex.ITE.cond->tag == Iex_Const) {
2360          /* assured us by the IR type rules */
2361          vassert(e->Iex.ITE.cond->Iex.Const.con->tag == Ico_U1);
2362          e2 = e->Iex.ITE.cond->Iex.Const.con->Ico.U1
2363                  ? e->Iex.ITE.iftrue : e->Iex.ITE.iffalse;
2364       }
2365       else
2366       /* are the arms identical? (pretty weedy test) */
2367       if (sameIRExprs(env, e->Iex.ITE.iftrue,
2368                            e->Iex.ITE.iffalse)) {
2369          e2 = e->Iex.ITE.iffalse;
2370       }
2371       break;
2372 
2373    default:
2374       /* not considered */
2375       break;
2376    }
2377 
2378    /* Show cases where we've found but not folded 'op(t,t)'.  Be
2379       careful not to call sameIRExprs with values of different types,
2380       though, else it will assert (and so it should!).  We can't
2381       conveniently call typeOfIRExpr on the two args without a whole
2382       bunch of extra plumbing to pass in a type env, so just use a
2383       hacky test to check the arguments are not anything that might
2384       sameIRExprs to assert.  This is only OK because this kludge is
2385       only used for debug printing, not for "real" operation.  For
2386       "real" operation (ie, all other calls to sameIRExprs), it is
2387       essential that the to args have the same type.
2388 
2389       The "right" solution is to plumb the containing block's
2390       IRTypeEnv through to here and use typeOfIRExpr to be sure.  But
2391       that's a bunch of extra parameter passing which will just slow
2392       down the normal case, for no purpose. */
2393    if (vex_control.iropt_verbosity > 0
2394        && e == e2
2395        && e->tag == Iex_Binop
2396        && !debug_only_hack_sameIRExprs_might_assert(e->Iex.Binop.arg1,
2397                                                     e->Iex.Binop.arg2)
2398        && sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2399       vex_printf("vex iropt: fold_Expr: no ident rule for: ");
2400       ppIRExpr(e);
2401       vex_printf("\n");
2402    }
2403 
2404    /* Show the overall results of folding. */
2405    if (DEBUG_IROPT && e2 != e) {
2406       vex_printf("FOLD: ");
2407       ppIRExpr(e); vex_printf("  ->  ");
2408       ppIRExpr(e2); vex_printf("\n");
2409    }
2410 
2411    return e2;
2412 
2413  unhandled:
2414 #  if 0
2415    vex_printf("\n\n");
2416    ppIRExpr(e);
2417    vpanic("fold_Expr: no rule for the above");
2418 #  else
2419    if (vex_control.iropt_verbosity > 0) {
2420       vex_printf("vex iropt: fold_Expr: no const rule for: ");
2421       ppIRExpr(e);
2422       vex_printf("\n");
2423    }
2424    return e2;
2425 #  endif
2426 }
2427 
2428 
2429 /* Apply the subst to a simple 1-level expression -- guaranteed to be
2430    1-level due to previous flattening pass. */
2431 
subst_Expr(IRExpr ** env,IRExpr * ex)2432 static IRExpr* subst_Expr ( IRExpr** env, IRExpr* ex )
2433 {
2434    switch (ex->tag) {
2435       case Iex_RdTmp:
2436          if (env[(Int)ex->Iex.RdTmp.tmp] != NULL) {
2437             IRExpr *rhs = env[(Int)ex->Iex.RdTmp.tmp];
2438             if (rhs->tag == Iex_RdTmp)
2439                return rhs;
2440             if (rhs->tag == Iex_Const
2441                 && rhs->Iex.Const.con->tag != Ico_F64i)
2442                return rhs;
2443          }
2444          /* not bound in env */
2445          return ex;
2446 
2447       case Iex_Const:
2448       case Iex_Get:
2449          return ex;
2450 
2451       case Iex_GetI:
2452          vassert(isIRAtom(ex->Iex.GetI.ix));
2453          return IRExpr_GetI(
2454             ex->Iex.GetI.descr,
2455             subst_Expr(env, ex->Iex.GetI.ix),
2456             ex->Iex.GetI.bias
2457          );
2458 
2459       case Iex_Qop: {
2460          IRQop* qop = ex->Iex.Qop.details;
2461          vassert(isIRAtom(qop->arg1));
2462          vassert(isIRAtom(qop->arg2));
2463          vassert(isIRAtom(qop->arg3));
2464          vassert(isIRAtom(qop->arg4));
2465          return IRExpr_Qop(
2466                    qop->op,
2467                    subst_Expr(env, qop->arg1),
2468                    subst_Expr(env, qop->arg2),
2469                    subst_Expr(env, qop->arg3),
2470                    subst_Expr(env, qop->arg4)
2471                 );
2472       }
2473 
2474       case Iex_Triop: {
2475          IRTriop* triop = ex->Iex.Triop.details;
2476          vassert(isIRAtom(triop->arg1));
2477          vassert(isIRAtom(triop->arg2));
2478          vassert(isIRAtom(triop->arg3));
2479          return IRExpr_Triop(
2480                    triop->op,
2481                    subst_Expr(env, triop->arg1),
2482                    subst_Expr(env, triop->arg2),
2483                    subst_Expr(env, triop->arg3)
2484                 );
2485       }
2486 
2487       case Iex_Binop:
2488          vassert(isIRAtom(ex->Iex.Binop.arg1));
2489          vassert(isIRAtom(ex->Iex.Binop.arg2));
2490          return IRExpr_Binop(
2491                    ex->Iex.Binop.op,
2492                    subst_Expr(env, ex->Iex.Binop.arg1),
2493                    subst_Expr(env, ex->Iex.Binop.arg2)
2494                 );
2495 
2496       case Iex_Unop:
2497          vassert(isIRAtom(ex->Iex.Unop.arg));
2498          return IRExpr_Unop(
2499                    ex->Iex.Unop.op,
2500                    subst_Expr(env, ex->Iex.Unop.arg)
2501                 );
2502 
2503       case Iex_Load:
2504          vassert(isIRAtom(ex->Iex.Load.addr));
2505          return IRExpr_Load(
2506                    ex->Iex.Load.end,
2507                    ex->Iex.Load.ty,
2508                    subst_Expr(env, ex->Iex.Load.addr)
2509                 );
2510 
2511       case Iex_CCall: {
2512          Int      i;
2513          IRExpr** args2 = shallowCopyIRExprVec(ex->Iex.CCall.args);
2514          for (i = 0; args2[i]; i++) {
2515             vassert(isIRAtom(args2[i]));
2516             args2[i] = subst_Expr(env, args2[i]);
2517          }
2518          return IRExpr_CCall(
2519                    ex->Iex.CCall.cee,
2520                    ex->Iex.CCall.retty,
2521                    args2
2522                 );
2523       }
2524 
2525       case Iex_ITE:
2526          vassert(isIRAtom(ex->Iex.ITE.cond));
2527          vassert(isIRAtom(ex->Iex.ITE.iftrue));
2528          vassert(isIRAtom(ex->Iex.ITE.iffalse));
2529          return IRExpr_ITE(
2530                    subst_Expr(env, ex->Iex.ITE.cond),
2531                    subst_Expr(env, ex->Iex.ITE.iftrue),
2532                    subst_Expr(env, ex->Iex.ITE.iffalse)
2533                 );
2534 
2535       default:
2536          vex_printf("\n\n"); ppIRExpr(ex);
2537          vpanic("subst_Expr");
2538 
2539    }
2540 }
2541 
2542 
2543 /* Apply the subst to stmt, then fold the result as much as possible.
2544    Much simplified due to stmt being previously flattened.  As a
2545    result of this, the stmt may wind up being turned into a no-op.
2546 */
subst_and_fold_Stmt(IRExpr ** env,IRStmt * st)2547 static IRStmt* subst_and_fold_Stmt ( IRExpr** env, IRStmt* st )
2548 {
2549 #  if 0
2550    vex_printf("\nsubst and fold stmt\n");
2551    ppIRStmt(st);
2552    vex_printf("\n");
2553 #  endif
2554 
2555    switch (st->tag) {
2556       case Ist_AbiHint:
2557          vassert(isIRAtom(st->Ist.AbiHint.base));
2558          vassert(isIRAtom(st->Ist.AbiHint.nia));
2559          return IRStmt_AbiHint(
2560                    fold_Expr(env, subst_Expr(env, st->Ist.AbiHint.base)),
2561                    st->Ist.AbiHint.len,
2562                    fold_Expr(env, subst_Expr(env, st->Ist.AbiHint.nia))
2563                 );
2564       case Ist_Put:
2565          vassert(isIRAtom(st->Ist.Put.data));
2566          return IRStmt_Put(
2567                    st->Ist.Put.offset,
2568                    fold_Expr(env, subst_Expr(env, st->Ist.Put.data))
2569                 );
2570 
2571       case Ist_PutI: {
2572          IRPutI *puti, *puti2;
2573          puti = st->Ist.PutI.details;
2574          vassert(isIRAtom(puti->ix));
2575          vassert(isIRAtom(puti->data));
2576          puti2 = mkIRPutI(puti->descr,
2577                           fold_Expr(env, subst_Expr(env, puti->ix)),
2578                           puti->bias,
2579                           fold_Expr(env, subst_Expr(env, puti->data)));
2580          return IRStmt_PutI(puti2);
2581       }
2582 
2583       case Ist_WrTmp:
2584          /* This is the one place where an expr (st->Ist.WrTmp.data) is
2585             allowed to be more than just a constant or a tmp. */
2586          return IRStmt_WrTmp(
2587                    st->Ist.WrTmp.tmp,
2588                    fold_Expr(env, subst_Expr(env, st->Ist.WrTmp.data))
2589                 );
2590 
2591       case Ist_Store:
2592          vassert(isIRAtom(st->Ist.Store.addr));
2593          vassert(isIRAtom(st->Ist.Store.data));
2594          return IRStmt_Store(
2595                    st->Ist.Store.end,
2596                    fold_Expr(env, subst_Expr(env, st->Ist.Store.addr)),
2597                    fold_Expr(env, subst_Expr(env, st->Ist.Store.data))
2598                 );
2599 
2600       case Ist_StoreG: {
2601          IRStoreG* sg = st->Ist.StoreG.details;
2602          vassert(isIRAtom(sg->addr));
2603          vassert(isIRAtom(sg->data));
2604          vassert(isIRAtom(sg->guard));
2605          IRExpr* faddr  = fold_Expr(env, subst_Expr(env, sg->addr));
2606          IRExpr* fdata  = fold_Expr(env, subst_Expr(env, sg->data));
2607          IRExpr* fguard = fold_Expr(env, subst_Expr(env, sg->guard));
2608          if (fguard->tag == Iex_Const) {
2609             /* The condition on this store has folded down to a constant. */
2610             vassert(fguard->Iex.Const.con->tag == Ico_U1);
2611             if (fguard->Iex.Const.con->Ico.U1 == False) {
2612                return IRStmt_NoOp();
2613             } else {
2614                vassert(fguard->Iex.Const.con->Ico.U1 == True);
2615                return IRStmt_Store(sg->end, faddr, fdata);
2616             }
2617          }
2618          return IRStmt_StoreG(sg->end, faddr, fdata, fguard);
2619       }
2620 
2621       case Ist_LoadG: {
2622          /* This is complicated.  If the guard folds down to 'false',
2623             we can replace it with an assignment 'dst := alt', but if
2624             the guard folds down to 'true', we can't conveniently
2625             replace it with an unconditional load, because doing so
2626             requires generating a new temporary, and that is not easy
2627             to do at this point. */
2628          IRLoadG* lg = st->Ist.LoadG.details;
2629          vassert(isIRAtom(lg->addr));
2630          vassert(isIRAtom(lg->alt));
2631          vassert(isIRAtom(lg->guard));
2632          IRExpr* faddr  = fold_Expr(env, subst_Expr(env, lg->addr));
2633          IRExpr* falt   = fold_Expr(env, subst_Expr(env, lg->alt));
2634          IRExpr* fguard = fold_Expr(env, subst_Expr(env, lg->guard));
2635          if (fguard->tag == Iex_Const) {
2636             /* The condition on this load has folded down to a constant. */
2637             vassert(fguard->Iex.Const.con->tag == Ico_U1);
2638             if (fguard->Iex.Const.con->Ico.U1 == False) {
2639                /* The load is not going to happen -- instead 'alt' is
2640                   assigned to 'dst'.  */
2641                return IRStmt_WrTmp(lg->dst, falt);
2642             } else {
2643                vassert(fguard->Iex.Const.con->Ico.U1 == True);
2644                /* The load is always going to happen.  We want to
2645                   convert to an unconditional load and assign to 'dst'
2646                   (IRStmt_WrTmp).  Problem is we need an extra temp to
2647                   hold the loaded value, but none is available.
2648                   Instead, reconstitute the conditional load (with
2649                   folded args, of course) and let the caller of this
2650                   routine deal with the problem. */
2651             }
2652          }
2653          return IRStmt_LoadG(lg->end, lg->cvt, lg->dst, faddr, falt, fguard);
2654       }
2655 
2656       case Ist_CAS: {
2657          IRCAS *cas, *cas2;
2658          cas = st->Ist.CAS.details;
2659          vassert(isIRAtom(cas->addr));
2660          vassert(cas->expdHi == NULL || isIRAtom(cas->expdHi));
2661          vassert(isIRAtom(cas->expdLo));
2662          vassert(cas->dataHi == NULL || isIRAtom(cas->dataHi));
2663          vassert(isIRAtom(cas->dataLo));
2664          cas2 = mkIRCAS(
2665                    cas->oldHi, cas->oldLo, cas->end,
2666                    fold_Expr(env, subst_Expr(env, cas->addr)),
2667                    cas->expdHi ? fold_Expr(env, subst_Expr(env, cas->expdHi))
2668                                : NULL,
2669                    fold_Expr(env, subst_Expr(env, cas->expdLo)),
2670                    cas->dataHi ? fold_Expr(env, subst_Expr(env, cas->dataHi))
2671                                : NULL,
2672                    fold_Expr(env, subst_Expr(env, cas->dataLo))
2673                 );
2674          return IRStmt_CAS(cas2);
2675       }
2676 
2677       case Ist_LLSC:
2678          vassert(isIRAtom(st->Ist.LLSC.addr));
2679          if (st->Ist.LLSC.storedata)
2680             vassert(isIRAtom(st->Ist.LLSC.storedata));
2681          return IRStmt_LLSC(
2682                    st->Ist.LLSC.end,
2683                    st->Ist.LLSC.result,
2684                    fold_Expr(env, subst_Expr(env, st->Ist.LLSC.addr)),
2685                    st->Ist.LLSC.storedata
2686                       ? fold_Expr(env, subst_Expr(env, st->Ist.LLSC.storedata))
2687                       : NULL
2688                 );
2689 
2690       case Ist_Dirty: {
2691          Int     i;
2692          IRDirty *d, *d2;
2693          d = st->Ist.Dirty.details;
2694          d2 = emptyIRDirty();
2695          *d2 = *d;
2696          d2->args = shallowCopyIRExprVec(d2->args);
2697          if (d2->mFx != Ifx_None) {
2698             vassert(isIRAtom(d2->mAddr));
2699             d2->mAddr = fold_Expr(env, subst_Expr(env, d2->mAddr));
2700          }
2701          vassert(isIRAtom(d2->guard));
2702          d2->guard = fold_Expr(env, subst_Expr(env, d2->guard));
2703          for (i = 0; d2->args[i]; i++) {
2704             IRExpr* arg = d2->args[i];
2705             if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg))) {
2706                vassert(isIRAtom(arg));
2707                d2->args[i] = fold_Expr(env, subst_Expr(env, arg));
2708             }
2709          }
2710          return IRStmt_Dirty(d2);
2711       }
2712 
2713       case Ist_IMark:
2714          return IRStmt_IMark(st->Ist.IMark.addr,
2715                              st->Ist.IMark.len,
2716                              st->Ist.IMark.delta);
2717 
2718       case Ist_NoOp:
2719          return IRStmt_NoOp();
2720 
2721       case Ist_MBE:
2722          return IRStmt_MBE(st->Ist.MBE.event);
2723 
2724       case Ist_Exit: {
2725          IRExpr* fcond;
2726          vassert(isIRAtom(st->Ist.Exit.guard));
2727          fcond = fold_Expr(env, subst_Expr(env, st->Ist.Exit.guard));
2728          if (fcond->tag == Iex_Const) {
2729             /* Interesting.  The condition on this exit has folded down to
2730                a constant. */
2731             vassert(fcond->Iex.Const.con->tag == Ico_U1);
2732             if (fcond->Iex.Const.con->Ico.U1 == False) {
2733                /* exit is never going to happen, so dump the statement. */
2734                return IRStmt_NoOp();
2735             } else {
2736                vassert(fcond->Iex.Const.con->Ico.U1 == True);
2737                /* Hmmm.  The exit has become unconditional.  Leave it
2738                   as it is for now, since we'd have to truncate the BB
2739                   at this point, which is tricky.  Such truncation is
2740                   done later by the dead-code elimination pass. */
2741                /* fall out into the reconstruct-the-exit code. */
2742                if (vex_control.iropt_verbosity > 0)
2743                   /* really a misuse of vex_control.iropt_verbosity */
2744                   vex_printf("vex iropt: IRStmt_Exit became unconditional\n");
2745             }
2746          }
2747          return IRStmt_Exit(fcond, st->Ist.Exit.jk,
2748                                    st->Ist.Exit.dst, st->Ist.Exit.offsIP);
2749       }
2750 
2751    default:
2752       vex_printf("\n"); ppIRStmt(st);
2753       vpanic("subst_and_fold_Stmt");
2754    }
2755 }
2756 
2757 
cprop_BB(IRSB * in)2758 IRSB* cprop_BB ( IRSB* in )
2759 {
2760    Int      i;
2761    IRSB*    out;
2762    IRStmt*  st2;
2763    Int      n_tmps = in->tyenv->types_used;
2764    IRExpr** env = LibVEX_Alloc_inline(n_tmps * sizeof(IRExpr*));
2765    /* Keep track of IRStmt_LoadGs that we need to revisit after
2766       processing all the other statements. */
2767    const Int N_FIXUPS = 16;
2768    Int fixups[N_FIXUPS]; /* indices in the stmt array of 'out' */
2769    Int n_fixups = 0;
2770 
2771    out = emptyIRSB();
2772    out->tyenv = deepCopyIRTypeEnv( in->tyenv );
2773 
2774    /* Set up the env with which travels forward.  This holds a
2775       substitution, mapping IRTemps to IRExprs. The environment
2776       is to be applied as we move along.  Keys are IRTemps.
2777       Values are IRExpr*s.
2778    */
2779    for (i = 0; i < n_tmps; i++)
2780       env[i] = NULL;
2781 
2782    /* For each original SSA-form stmt ... */
2783    for (i = 0; i < in->stmts_used; i++) {
2784 
2785       /* First apply the substitution to the current stmt.  This
2786          propagates in any constants and tmp-tmp assignments
2787          accumulated prior to this point.  As part of the subst_Stmt
2788          call, also then fold any constant expressions resulting. */
2789 
2790       st2 = in->stmts[i];
2791 
2792       /* perhaps st2 is already a no-op? */
2793       if (st2->tag == Ist_NoOp) continue;
2794 
2795       st2 = subst_and_fold_Stmt( env, st2 );
2796 
2797       /* Deal with some post-folding special cases. */
2798       switch (st2->tag) {
2799 
2800          /* If the statement has been folded into a no-op, forget
2801             it. */
2802          case Ist_NoOp:
2803             continue;
2804 
2805          /* If the statement assigns to an IRTemp add it to the
2806             running environment. This is for the benefit of copy
2807             propagation and to allow sameIRExpr look through
2808             IRTemps. */
2809          case Ist_WrTmp: {
2810             vassert(env[(Int)(st2->Ist.WrTmp.tmp)] == NULL);
2811             env[(Int)(st2->Ist.WrTmp.tmp)] = st2->Ist.WrTmp.data;
2812 
2813             /* 't1 = t2' -- don't add to BB; will be optimized out */
2814             if (st2->Ist.WrTmp.data->tag == Iex_RdTmp)
2815                continue;
2816 
2817             /* 't = const' && 'const != F64i' -- don't add to BB
2818                Note, we choose not to propagate const when const is an
2819                F64i, so that F64i literals can be CSE'd later.  This
2820                helps x86 floating point code generation. */
2821             if (st2->Ist.WrTmp.data->tag == Iex_Const
2822                 && st2->Ist.WrTmp.data->Iex.Const.con->tag != Ico_F64i) {
2823                continue;
2824             }
2825             /* else add it to the output, as normal */
2826             break;
2827          }
2828 
2829          case Ist_LoadG: {
2830             IRLoadG* lg    = st2->Ist.LoadG.details;
2831             IRExpr*  guard = lg->guard;
2832             if (guard->tag == Iex_Const) {
2833                /* The guard has folded to a constant, and that
2834                   constant must be 1:I1, since subst_and_fold_Stmt
2835                   folds out the case 0:I1 by itself. */
2836                vassert(guard->Iex.Const.con->tag == Ico_U1);
2837                vassert(guard->Iex.Const.con->Ico.U1 == True);
2838                /* Add a NoOp here as a placeholder, and make a note of
2839                   where it is in the output block.  Afterwards we'll
2840                   come back here and transform the NoOp and the LoadG
2841                   into a load-convert pair.  The fixups[] entry
2842                   refers to the inserted NoOp, and we expect to find
2843                   the relevant LoadG immediately after it. */
2844                vassert(n_fixups >= 0 && n_fixups <= N_FIXUPS);
2845                if (n_fixups < N_FIXUPS) {
2846                   fixups[n_fixups++] = out->stmts_used;
2847                   addStmtToIRSB( out, IRStmt_NoOp() );
2848                }
2849             }
2850             /* And always add the LoadG to the output, regardless. */
2851             break;
2852          }
2853 
2854       default:
2855          break;
2856       }
2857 
2858       /* Not interesting, copy st2 into the output block. */
2859       addStmtToIRSB( out, st2 );
2860    }
2861 
2862 #  if STATS_IROPT
2863    vex_printf("sameIRExpr: invoked = %u/%u  equal = %u/%u max_nodes = %u\n",
2864               invocation_count, recursion_count, success_count,
2865               recursion_success_count, max_nodes_visited);
2866 #  endif
2867 
2868    out->next     = subst_Expr( env, in->next );
2869    out->jumpkind = in->jumpkind;
2870    out->offsIP   = in->offsIP;
2871 
2872    /* Process any leftover unconditional LoadGs that we noticed
2873       in the main pass. */
2874    vassert(n_fixups >= 0 && n_fixups <= N_FIXUPS);
2875    for (i = 0; i < n_fixups; i++) {
2876       Int ix = fixups[i];
2877       /* Carefully verify that the LoadG has the expected form. */
2878       vassert(ix >= 0 && ix+1 < out->stmts_used);
2879       IRStmt* nop = out->stmts[ix];
2880       IRStmt* lgu = out->stmts[ix+1];
2881       vassert(nop->tag == Ist_NoOp);
2882       vassert(lgu->tag == Ist_LoadG);
2883       IRLoadG* lg    = lgu->Ist.LoadG.details;
2884       IRExpr*  guard = lg->guard;
2885       vassert(guard->Iex.Const.con->tag == Ico_U1);
2886       vassert(guard->Iex.Const.con->Ico.U1 == True);
2887       /* Figure out the load and result types, and the implied
2888          conversion operation. */
2889       IRType cvtRes = Ity_INVALID, cvtArg = Ity_INVALID;
2890       typeOfIRLoadGOp(lg->cvt, &cvtRes, &cvtArg);
2891       IROp cvtOp = Iop_INVALID;
2892       switch (lg->cvt) {
2893          case ILGop_IdentV128:
2894          case ILGop_Ident64:
2895          case ILGop_Ident32: break;
2896          case ILGop_8Uto32:  cvtOp = Iop_8Uto32;  break;
2897          case ILGop_8Sto32:  cvtOp = Iop_8Sto32;  break;
2898          case ILGop_16Uto32: cvtOp = Iop_16Uto32; break;
2899          case ILGop_16Sto32: cvtOp = Iop_16Sto32; break;
2900          default: vpanic("cprop_BB: unhandled ILGOp");
2901       }
2902       /* Replace the placeholder NoOp by the required unconditional
2903          load. */
2904       IRTemp tLoaded = newIRTemp(out->tyenv, cvtArg);
2905       out->stmts[ix]
2906          = IRStmt_WrTmp(tLoaded,
2907                         IRExpr_Load(lg->end, cvtArg, lg->addr));
2908       /* Replace the LoadG by a conversion from the loaded value's
2909          type to the required result type. */
2910       out->stmts[ix+1]
2911          = IRStmt_WrTmp(
2912               lg->dst, cvtOp == Iop_INVALID
2913                           ? IRExpr_RdTmp(tLoaded)
2914                           : IRExpr_Unop(cvtOp, IRExpr_RdTmp(tLoaded)));
2915    }
2916 
2917    return out;
2918 }
2919 
2920 
2921 /*---------------------------------------------------------------*/
2922 /*--- Dead code (t = E) removal                               ---*/
2923 /*---------------------------------------------------------------*/
2924 
2925 /* As a side effect, also removes all code following an unconditional
2926    side exit. */
2927 
2928 /* The type of the HashHW map is: a map from IRTemp to nothing
2929    -- really just operating a set or IRTemps.
2930 */
2931 
2932 inline
addUses_Temp(Bool * set,IRTemp tmp)2933 static void addUses_Temp ( Bool* set, IRTemp tmp )
2934 {
2935    set[(Int)tmp] = True;
2936 }
2937 
addUses_Expr(Bool * set,IRExpr * e)2938 static void addUses_Expr ( Bool* set, IRExpr* e )
2939 {
2940    Int i;
2941    switch (e->tag) {
2942       case Iex_GetI:
2943          addUses_Expr(set, e->Iex.GetI.ix);
2944          return;
2945       case Iex_ITE:
2946          addUses_Expr(set, e->Iex.ITE.cond);
2947          addUses_Expr(set, e->Iex.ITE.iftrue);
2948          addUses_Expr(set, e->Iex.ITE.iffalse);
2949          return;
2950       case Iex_CCall:
2951          for (i = 0; e->Iex.CCall.args[i]; i++)
2952             addUses_Expr(set, e->Iex.CCall.args[i]);
2953          return;
2954       case Iex_Load:
2955          addUses_Expr(set, e->Iex.Load.addr);
2956          return;
2957       case Iex_Qop:
2958          addUses_Expr(set, e->Iex.Qop.details->arg1);
2959          addUses_Expr(set, e->Iex.Qop.details->arg2);
2960          addUses_Expr(set, e->Iex.Qop.details->arg3);
2961          addUses_Expr(set, e->Iex.Qop.details->arg4);
2962          return;
2963       case Iex_Triop:
2964          addUses_Expr(set, e->Iex.Triop.details->arg1);
2965          addUses_Expr(set, e->Iex.Triop.details->arg2);
2966          addUses_Expr(set, e->Iex.Triop.details->arg3);
2967          return;
2968       case Iex_Binop:
2969          addUses_Expr(set, e->Iex.Binop.arg1);
2970          addUses_Expr(set, e->Iex.Binop.arg2);
2971          return;
2972       case Iex_Unop:
2973          addUses_Expr(set, e->Iex.Unop.arg);
2974          return;
2975       case Iex_RdTmp:
2976          addUses_Temp(set, e->Iex.RdTmp.tmp);
2977          return;
2978       case Iex_Const:
2979       case Iex_Get:
2980          return;
2981       default:
2982          vex_printf("\n");
2983          ppIRExpr(e);
2984          vpanic("addUses_Expr");
2985    }
2986 }
2987 
addUses_Stmt(Bool * set,IRStmt * st)2988 static void addUses_Stmt ( Bool* set, IRStmt* st )
2989 {
2990    Int      i;
2991    IRDirty* d;
2992    IRCAS*   cas;
2993    switch (st->tag) {
2994       case Ist_AbiHint:
2995          addUses_Expr(set, st->Ist.AbiHint.base);
2996          addUses_Expr(set, st->Ist.AbiHint.nia);
2997          return;
2998       case Ist_PutI:
2999          addUses_Expr(set, st->Ist.PutI.details->ix);
3000          addUses_Expr(set, st->Ist.PutI.details->data);
3001          return;
3002       case Ist_WrTmp:
3003          addUses_Expr(set, st->Ist.WrTmp.data);
3004          return;
3005       case Ist_Put:
3006          addUses_Expr(set, st->Ist.Put.data);
3007          return;
3008       case Ist_Store:
3009          addUses_Expr(set, st->Ist.Store.addr);
3010          addUses_Expr(set, st->Ist.Store.data);
3011          return;
3012       case Ist_StoreG: {
3013          IRStoreG* sg = st->Ist.StoreG.details;
3014          addUses_Expr(set, sg->addr);
3015          addUses_Expr(set, sg->data);
3016          addUses_Expr(set, sg->guard);
3017          return;
3018       }
3019       case Ist_LoadG: {
3020          IRLoadG* lg = st->Ist.LoadG.details;
3021          addUses_Expr(set, lg->addr);
3022          addUses_Expr(set, lg->alt);
3023          addUses_Expr(set, lg->guard);
3024          return;
3025       }
3026       case Ist_CAS:
3027          cas = st->Ist.CAS.details;
3028          addUses_Expr(set, cas->addr);
3029          if (cas->expdHi)
3030             addUses_Expr(set, cas->expdHi);
3031          addUses_Expr(set, cas->expdLo);
3032          if (cas->dataHi)
3033             addUses_Expr(set, cas->dataHi);
3034          addUses_Expr(set, cas->dataLo);
3035          return;
3036       case Ist_LLSC:
3037          addUses_Expr(set, st->Ist.LLSC.addr);
3038          if (st->Ist.LLSC.storedata)
3039             addUses_Expr(set, st->Ist.LLSC.storedata);
3040          return;
3041       case Ist_Dirty:
3042          d = st->Ist.Dirty.details;
3043          if (d->mFx != Ifx_None)
3044             addUses_Expr(set, d->mAddr);
3045          addUses_Expr(set, d->guard);
3046          for (i = 0; d->args[i] != NULL; i++) {
3047             IRExpr* arg = d->args[i];
3048             if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg)))
3049                addUses_Expr(set, arg);
3050          }
3051          return;
3052       case Ist_NoOp:
3053       case Ist_IMark:
3054       case Ist_MBE:
3055          return;
3056       case Ist_Exit:
3057          addUses_Expr(set, st->Ist.Exit.guard);
3058          return;
3059       default:
3060          vex_printf("\n");
3061          ppIRStmt(st);
3062          vpanic("addUses_Stmt");
3063    }
3064 }
3065 
3066 
3067 /* Is this literally IRExpr_Const(IRConst_U1(False)) ? */
isZeroU1(IRExpr * e)3068 static Bool isZeroU1 ( IRExpr* e )
3069 {
3070    return toBool( e->tag == Iex_Const
3071                   && e->Iex.Const.con->tag == Ico_U1
3072                   && e->Iex.Const.con->Ico.U1 == False );
3073 }
3074 
3075 /* Is this literally IRExpr_Const(IRConst_U1(True)) ? */
isOneU1(IRExpr * e)3076 static Bool isOneU1 ( IRExpr* e )
3077 {
3078    return toBool( e->tag == Iex_Const
3079                   && e->Iex.Const.con->tag == Ico_U1
3080                   && e->Iex.Const.con->Ico.U1 == True );
3081 }
3082 
3083 
3084 /* Note, this destructively modifies the given IRSB. */
3085 
3086 /* Scan backwards through statements, carrying a set of IRTemps which
3087    are known to be used after the current point.  On encountering 't =
3088    E', delete the binding if it is not used.  Otherwise, add any temp
3089    uses to the set and keep on moving backwards.
3090 
3091    As an enhancement, the first (backwards) pass searches for IR exits
3092    with always-taken conditions and notes the location of the earliest
3093    one in the block.  If any such are found, a second pass copies the
3094    exit destination and jump kind to the bb-end.  Then, the exit and
3095    all statements following it are turned into no-ops.
3096 */
3097 
do_deadcode_BB(IRSB * bb)3098 /* notstatic */ void do_deadcode_BB ( IRSB* bb )
3099 {
3100    Int     i, i_unconditional_exit;
3101    Int     n_tmps = bb->tyenv->types_used;
3102    Bool*   set = LibVEX_Alloc_inline(n_tmps * sizeof(Bool));
3103    IRStmt* st;
3104 
3105    for (i = 0; i < n_tmps; i++)
3106       set[i] = False;
3107 
3108    /* start off by recording IRTemp uses in the next field. */
3109    addUses_Expr(set, bb->next);
3110 
3111    /* First pass */
3112 
3113    /* Work backwards through the stmts */
3114    i_unconditional_exit = -1;
3115    for (i = bb->stmts_used-1; i >= 0; i--) {
3116       st = bb->stmts[i];
3117       if (st->tag == Ist_NoOp)
3118          continue;
3119       /* take note of any unconditional exits */
3120       if (st->tag == Ist_Exit
3121           && isOneU1(st->Ist.Exit.guard))
3122          i_unconditional_exit = i;
3123       if (st->tag == Ist_WrTmp
3124           && set[(Int)(st->Ist.WrTmp.tmp)] == False) {
3125           /* it's an IRTemp which never got used.  Delete it. */
3126          if (DEBUG_IROPT) {
3127             vex_printf("DEAD: ");
3128             ppIRStmt(st);
3129             vex_printf("\n");
3130          }
3131          bb->stmts[i] = IRStmt_NoOp();
3132       }
3133       else
3134       if (st->tag == Ist_Dirty
3135           && st->Ist.Dirty.details->guard
3136           && isZeroU1(st->Ist.Dirty.details->guard)) {
3137          /* This is a dirty helper which will never get called.
3138             Delete it. */
3139          bb->stmts[i] = IRStmt_NoOp();
3140        }
3141        else {
3142          /* Note any IRTemp uses made by the current statement. */
3143          addUses_Stmt(set, st);
3144       }
3145    }
3146 
3147    /* Optional second pass: if any unconditional exits were found,
3148       delete them and all following statements. */
3149 
3150    if (i_unconditional_exit != -1) {
3151       if (0) vex_printf("ZAPPING ALL FORWARDS from %d\n",
3152                         i_unconditional_exit);
3153       vassert(i_unconditional_exit >= 0
3154               && i_unconditional_exit < bb->stmts_used);
3155       bb->next
3156          = IRExpr_Const( bb->stmts[i_unconditional_exit]->Ist.Exit.dst );
3157       bb->jumpkind
3158          = bb->stmts[i_unconditional_exit]->Ist.Exit.jk;
3159       bb->offsIP
3160          = bb->stmts[i_unconditional_exit]->Ist.Exit.offsIP;
3161       for (i = i_unconditional_exit; i < bb->stmts_used; i++)
3162          bb->stmts[i] = IRStmt_NoOp();
3163    }
3164 }
3165 
3166 
3167 /*---------------------------------------------------------------*/
3168 /*--- Specialisation of helper function calls, in             ---*/
3169 /*--- collaboration with the front end                        ---*/
3170 /*---------------------------------------------------------------*/
3171 
3172 static
spec_helpers_BB(IRSB * bb,IRExpr * (* specHelper)(const HChar *,IRExpr **,IRStmt **,Int))3173 IRSB* spec_helpers_BB(
3174          IRSB* bb,
3175          IRExpr* (*specHelper) (const HChar*, IRExpr**, IRStmt**, Int)
3176       )
3177 {
3178    Int     i;
3179    IRStmt* st;
3180    IRExpr* ex;
3181    Bool    any = False;
3182 
3183    for (i = bb->stmts_used-1; i >= 0; i--) {
3184       st = bb->stmts[i];
3185 
3186       if (st->tag != Ist_WrTmp
3187           || st->Ist.WrTmp.data->tag != Iex_CCall)
3188          continue;
3189 
3190       ex = (*specHelper)( st->Ist.WrTmp.data->Iex.CCall.cee->name,
3191                           st->Ist.WrTmp.data->Iex.CCall.args,
3192                           &bb->stmts[0], i );
3193       if (!ex)
3194         /* the front end can't think of a suitable replacement */
3195         continue;
3196 
3197       /* We got something better.  Install it in the bb. */
3198       any = True;
3199       bb->stmts[i]
3200          = IRStmt_WrTmp(st->Ist.WrTmp.tmp, ex);
3201 
3202       if (0) {
3203          vex_printf("SPEC: ");
3204          ppIRExpr(st->Ist.WrTmp.data);
3205          vex_printf("  -->  ");
3206          ppIRExpr(ex);
3207          vex_printf("\n");
3208       }
3209    }
3210 
3211    if (any)
3212       bb = flatten_BB(bb);
3213    return bb;
3214 }
3215 
3216 
3217 /*---------------------------------------------------------------*/
3218 /*--- Determination of guest state aliasing relationships     ---*/
3219 /*---------------------------------------------------------------*/
3220 
3221 /* These are helper functions for CSE and GetI/PutI transformations.
3222 
3223    Determine, to the extent possible, the relationship between two
3224    guest state accesses.  The possible outcomes are:
3225 
3226    * Exact alias.  These two accesses denote precisely the same
3227      piece of the guest state.
3228 
3229    * Definitely no alias.  These two accesses are guaranteed not to
3230      overlap any part of the guest state.
3231 
3232    * Unknown -- if neither of the above can be established.
3233 
3234    If in doubt, return Unknown.  */
3235 
3236 typedef
3237    enum { ExactAlias, NoAlias, UnknownAlias }
3238    GSAliasing;
3239 
3240 
3241 /* Produces the alias relation between an indexed guest
3242    state access and a non-indexed access. */
3243 
3244 static
getAliasingRelation_IC(IRRegArray * descr1,IRExpr * ix1,Int offset2,IRType ty2)3245 GSAliasing getAliasingRelation_IC ( IRRegArray* descr1, IRExpr* ix1,
3246                                     Int offset2, IRType ty2 )
3247 {
3248    UInt minoff1, maxoff1, minoff2, maxoff2;
3249 
3250    getArrayBounds( descr1, &minoff1, &maxoff1 );
3251    minoff2 = offset2;
3252    maxoff2 = minoff2 + sizeofIRType(ty2) - 1;
3253 
3254    if (maxoff1 < minoff2 || maxoff2 < minoff1)
3255       return NoAlias;
3256 
3257    /* Could probably do better here if required.  For the moment
3258       however just claim not to know anything more. */
3259    return UnknownAlias;
3260 }
3261 
3262 
3263 /* Produces the alias relation between two indexed guest state
3264    accesses. */
3265 
3266 static
getAliasingRelation_II(IRRegArray * descr1,IRExpr * ix1,Int bias1,IRRegArray * descr2,IRExpr * ix2,Int bias2)3267 GSAliasing getAliasingRelation_II (
3268               IRRegArray* descr1, IRExpr* ix1, Int bias1,
3269               IRRegArray* descr2, IRExpr* ix2, Int bias2
3270            )
3271 {
3272    UInt minoff1, maxoff1, minoff2, maxoff2;
3273    Int  iters;
3274 
3275    /* First try hard to show they don't alias. */
3276    getArrayBounds( descr1, &minoff1, &maxoff1 );
3277    getArrayBounds( descr2, &minoff2, &maxoff2 );
3278    if (maxoff1 < minoff2 || maxoff2 < minoff1)
3279       return NoAlias;
3280 
3281    /* So the two arrays at least partially overlap.  To get any
3282       further we'll have to be sure that the descriptors are
3283       identical. */
3284    if (!eqIRRegArray(descr1, descr2))
3285       return UnknownAlias;
3286 
3287    /* The descriptors are identical.  Now the only difference can be
3288       in the index expressions.  If they cannot be shown to be
3289       identical, we have to say we don't know what the aliasing
3290       relation will be.  Now, since the IR is flattened, the index
3291       expressions should be atoms -- either consts or tmps.  So that
3292       makes the comparison simple. */
3293    vassert(isIRAtom(ix1));
3294    vassert(isIRAtom(ix2));
3295    if (!eqIRAtom(ix1,ix2))
3296       return UnknownAlias;
3297 
3298    /* Ok, the index expressions are identical.  So now the only way
3299       they can be different is in the bias.  Normalise this
3300       paranoidly, to reliably establish equality/non-equality. */
3301 
3302    /* So now we know that the GetI and PutI index the same array
3303       with the same base.  Are the offsets the same, modulo the
3304       array size?  Do this paranoidly. */
3305    vassert(descr1->nElems == descr2->nElems);
3306    vassert(descr1->elemTy == descr2->elemTy);
3307    vassert(descr1->base   == descr2->base);
3308    iters = 0;
3309    while (bias1 < 0 || bias2 < 0) {
3310       bias1 += descr1->nElems;
3311       bias2 += descr1->nElems;
3312       iters++;
3313       if (iters > 10)
3314          vpanic("getAliasingRelation: iters");
3315    }
3316    vassert(bias1 >= 0 && bias2 >= 0);
3317    bias1 %= descr1->nElems;
3318    bias2 %= descr1->nElems;
3319    vassert(bias1 >= 0 && bias1 < descr1->nElems);
3320    vassert(bias2 >= 0 && bias2 < descr1->nElems);
3321 
3322    /* Finally, biasP and biasG are normalised into the range
3323       0 .. descrP/G->nElems - 1.  And so we can establish
3324       equality/non-equality. */
3325 
3326    return bias1==bias2 ? ExactAlias : NoAlias;
3327 }
3328 
3329 
3330 /*---------------------------------------------------------------*/
3331 /*--- Common Subexpression Elimination                        ---*/
3332 /*---------------------------------------------------------------*/
3333 
3334 /* Expensive in time and space. */
3335 
3336 /* Uses two environments:
3337    a IRTemp -> IRTemp mapping
3338    a mapping from AvailExpr* to IRTemp
3339 */
3340 
3341 typedef
3342    struct {
3343       enum { TCc, TCt } tag;
3344       union { IRTemp tmp; IRConst* con; } u;
3345    }
3346    TmpOrConst;
3347 
eqTmpOrConst(TmpOrConst * tc1,TmpOrConst * tc2)3348 static Bool eqTmpOrConst ( TmpOrConst* tc1, TmpOrConst* tc2 )
3349 {
3350    if (tc1->tag != tc2->tag)
3351       return False;
3352    switch (tc1->tag) {
3353       case TCc:
3354          return eqIRConst(tc1->u.con, tc2->u.con);
3355       case TCt:
3356          return tc1->u.tmp == tc2->u.tmp;
3357       default:
3358          vpanic("eqTmpOrConst");
3359    }
3360 }
3361 
eqIRCallee(IRCallee * cee1,IRCallee * cee2)3362 static Bool eqIRCallee ( IRCallee* cee1, IRCallee* cee2 )
3363 {
3364    Bool eq = cee1->addr == cee2->addr;
3365    if (eq) {
3366       vassert(cee1->regparms == cee2->regparms);
3367       vassert(cee1->mcx_mask == cee2->mcx_mask);
3368       /* Names should be the same too, but we don't bother to
3369          check. */
3370    }
3371    return eq;
3372 }
3373 
3374 /* Convert an atomic IRExpr* to a TmpOrConst. */
irExpr_to_TmpOrConst(TmpOrConst * tc,IRExpr * e)3375 static void irExpr_to_TmpOrConst ( /*OUT*/TmpOrConst* tc, IRExpr* e )
3376 {
3377    switch (e->tag) {
3378       case Iex_RdTmp:
3379          tc->tag   = TCt;
3380          tc->u.tmp = e->Iex.RdTmp.tmp;
3381          break;
3382       case Iex_Const:
3383          tc->tag   = TCc;
3384          tc->u.con = e->Iex.Const.con;
3385          break;
3386       default:
3387          /* Getting here is a serious error.  It means that the
3388             presented arg isn't an IR atom, as it should be. */
3389          vpanic("irExpr_to_TmpOrConst");
3390    }
3391 }
3392 
3393 /* Convert a TmpOrConst to an atomic IRExpr*. */
tmpOrConst_to_IRExpr(TmpOrConst * tc)3394 static IRExpr* tmpOrConst_to_IRExpr ( TmpOrConst* tc )
3395 {
3396    switch (tc->tag) {
3397       case TCc: return IRExpr_Const(tc->u.con);
3398       case TCt: return IRExpr_RdTmp(tc->u.tmp);
3399       default:  vpanic("tmpOrConst_to_IRExpr");
3400    }
3401 }
3402 
3403 /* Convert a NULL terminated IRExpr* vector to an array of
3404    TmpOrConsts, and a length. */
irExprVec_to_TmpOrConsts(TmpOrConst ** outs,Int * nOuts,IRExpr ** ins)3405 static void irExprVec_to_TmpOrConsts ( /*OUT*/TmpOrConst** outs,
3406                                        /*OUT*/Int* nOuts,
3407                                        IRExpr** ins )
3408 {
3409    Int i, n;
3410    /* We have to make two passes, one to count, one to copy. */
3411    for (n = 0; ins[n]; n++)
3412       ;
3413    *outs  = LibVEX_Alloc_inline(n * sizeof(TmpOrConst));
3414    *nOuts = n;
3415    /* and now copy .. */
3416    for (i = 0; i < n; i++) {
3417       IRExpr*     arg = ins[i];
3418       TmpOrConst* dst = &(*outs)[i];
3419       irExpr_to_TmpOrConst(dst, arg);
3420    }
3421 }
3422 
3423 typedef
3424    struct {
3425       enum { Ut, Btt, Btc, Bct, Cf64i, Ittt, Itct, Ittc, Itcc, GetIt,
3426              CCall, Load
3427       } tag;
3428       union {
3429          /* unop(tmp) */
3430          struct {
3431             IROp   op;
3432             IRTemp arg;
3433          } Ut;
3434          /* binop(tmp,tmp) */
3435          struct {
3436             IROp   op;
3437             IRTemp arg1;
3438             IRTemp arg2;
3439          } Btt;
3440          /* binop(tmp,const) */
3441          struct {
3442             IROp    op;
3443             IRTemp  arg1;
3444             IRConst con2;
3445          } Btc;
3446          /* binop(const,tmp) */
3447          struct {
3448             IROp    op;
3449             IRConst con1;
3450             IRTemp  arg2;
3451          } Bct;
3452          /* F64i-style const */
3453          struct {
3454             ULong f64i;
3455          } Cf64i;
3456          /* ITE(tmp,tmp,tmp) */
3457          struct {
3458             IRTemp co;
3459             IRTemp e1;
3460             IRTemp e0;
3461          } Ittt;
3462          /* ITE(tmp,tmp,const) */
3463          struct {
3464             IRTemp  co;
3465             IRTemp  e1;
3466             IRConst con0;
3467          } Ittc;
3468          /* ITE(tmp,const,tmp) */
3469          struct {
3470             IRTemp  co;
3471             IRConst con1;
3472             IRTemp  e0;
3473          } Itct;
3474          /* ITE(tmp,const,const) */
3475          struct {
3476             IRTemp  co;
3477             IRConst con1;
3478             IRConst con0;
3479          } Itcc;
3480          /* GetI(descr,tmp,bias)*/
3481          struct {
3482             IRRegArray* descr;
3483             IRTemp      ix;
3484             Int         bias;
3485          } GetIt;
3486          /* Clean helper call */
3487          struct {
3488             IRCallee*   cee;
3489             TmpOrConst* args;
3490             Int         nArgs;
3491             IRType      retty;
3492          } CCall;
3493          /* Load(end,ty,addr) */
3494          struct {
3495             IREndness  end;
3496             IRType     ty;
3497             TmpOrConst addr;
3498          } Load;
3499       } u;
3500    }
3501    AvailExpr;
3502 
eq_AvailExpr(AvailExpr * a1,AvailExpr * a2)3503 static Bool eq_AvailExpr ( AvailExpr* a1, AvailExpr* a2 )
3504 {
3505    if (LIKELY(a1->tag != a2->tag))
3506       return False;
3507    switch (a1->tag) {
3508       case Ut:
3509          return toBool(
3510                 a1->u.Ut.op == a2->u.Ut.op
3511                 && a1->u.Ut.arg == a2->u.Ut.arg);
3512       case Btt:
3513          return toBool(
3514                 a1->u.Btt.op == a2->u.Btt.op
3515                 && a1->u.Btt.arg1 == a2->u.Btt.arg1
3516                 && a1->u.Btt.arg2 == a2->u.Btt.arg2);
3517       case Btc:
3518          return toBool(
3519                 a1->u.Btc.op == a2->u.Btc.op
3520                 && a1->u.Btc.arg1 == a2->u.Btc.arg1
3521                 && eqIRConst(&a1->u.Btc.con2, &a2->u.Btc.con2));
3522       case Bct:
3523          return toBool(
3524                 a1->u.Bct.op == a2->u.Bct.op
3525                 && a1->u.Bct.arg2 == a2->u.Bct.arg2
3526                 && eqIRConst(&a1->u.Bct.con1, &a2->u.Bct.con1));
3527       case Cf64i:
3528          return toBool(a1->u.Cf64i.f64i == a2->u.Cf64i.f64i);
3529       case Ittt:
3530          return toBool(a1->u.Ittt.co == a2->u.Ittt.co
3531                        && a1->u.Ittt.e1 == a2->u.Ittt.e1
3532                        && a1->u.Ittt.e0 == a2->u.Ittt.e0);
3533       case Ittc:
3534          return toBool(a1->u.Ittc.co == a2->u.Ittc.co
3535                        && a1->u.Ittc.e1 == a2->u.Ittc.e1
3536                        && eqIRConst(&a1->u.Ittc.con0, &a2->u.Ittc.con0));
3537       case Itct:
3538          return toBool(a1->u.Itct.co == a2->u.Itct.co
3539                        && eqIRConst(&a1->u.Itct.con1, &a2->u.Itct.con1)
3540                        && a1->u.Itct.e0 == a2->u.Itct.e0);
3541       case Itcc:
3542          return toBool(a1->u.Itcc.co == a2->u.Itcc.co
3543                        && eqIRConst(&a1->u.Itcc.con1, &a2->u.Itcc.con1)
3544                        && eqIRConst(&a1->u.Itcc.con0, &a2->u.Itcc.con0));
3545       case GetIt:
3546          return toBool(eqIRRegArray(a1->u.GetIt.descr, a2->u.GetIt.descr)
3547                        && a1->u.GetIt.ix == a2->u.GetIt.ix
3548                        && a1->u.GetIt.bias == a2->u.GetIt.bias);
3549       case CCall: {
3550          Int  i, n;
3551          Bool eq = a1->u.CCall.nArgs == a2->u.CCall.nArgs
3552                    && eqIRCallee(a1->u.CCall.cee, a2->u.CCall.cee);
3553          if (eq) {
3554             n = a1->u.CCall.nArgs;
3555             for (i = 0; i < n; i++) {
3556                if (!eqTmpOrConst( &a1->u.CCall.args[i],
3557                                   &a2->u.CCall.args[i] )) {
3558                   eq = False;
3559                   break;
3560                }
3561             }
3562          }
3563          if (eq) vassert(a1->u.CCall.retty == a2->u.CCall.retty);
3564          return eq;
3565       }
3566       case Load: {
3567          Bool eq = toBool(a1->u.Load.end == a2->u.Load.end
3568                           && a1->u.Load.ty == a2->u.Load.ty
3569                           && eqTmpOrConst(&a1->u.Load.addr, &a2->u.Load.addr));
3570          return eq;
3571       }
3572       default:
3573          vpanic("eq_AvailExpr");
3574    }
3575 }
3576 
availExpr_to_IRExpr(AvailExpr * ae)3577 static IRExpr* availExpr_to_IRExpr ( AvailExpr* ae )
3578 {
3579    IRConst *con, *con0, *con1;
3580    switch (ae->tag) {
3581       case Ut:
3582          return IRExpr_Unop( ae->u.Ut.op, IRExpr_RdTmp(ae->u.Ut.arg) );
3583       case Btt:
3584          return IRExpr_Binop( ae->u.Btt.op,
3585                               IRExpr_RdTmp(ae->u.Btt.arg1),
3586                               IRExpr_RdTmp(ae->u.Btt.arg2) );
3587       case Btc:
3588          con = LibVEX_Alloc_inline(sizeof(IRConst));
3589          *con = ae->u.Btc.con2;
3590          return IRExpr_Binop( ae->u.Btc.op,
3591                               IRExpr_RdTmp(ae->u.Btc.arg1),
3592                               IRExpr_Const(con) );
3593       case Bct:
3594          con = LibVEX_Alloc_inline(sizeof(IRConst));
3595          *con = ae->u.Bct.con1;
3596          return IRExpr_Binop( ae->u.Bct.op,
3597                               IRExpr_Const(con),
3598                               IRExpr_RdTmp(ae->u.Bct.arg2) );
3599       case Cf64i:
3600          return IRExpr_Const(IRConst_F64i(ae->u.Cf64i.f64i));
3601       case Ittt:
3602          return IRExpr_ITE(IRExpr_RdTmp(ae->u.Ittt.co),
3603                            IRExpr_RdTmp(ae->u.Ittt.e1),
3604                            IRExpr_RdTmp(ae->u.Ittt.e0));
3605       case Ittc:
3606          con0 = LibVEX_Alloc_inline(sizeof(IRConst));
3607          *con0 = ae->u.Ittc.con0;
3608          return IRExpr_ITE(IRExpr_RdTmp(ae->u.Ittc.co),
3609                            IRExpr_RdTmp(ae->u.Ittc.e1),
3610                            IRExpr_Const(con0));
3611       case Itct:
3612          con1 = LibVEX_Alloc_inline(sizeof(IRConst));
3613          *con1 = ae->u.Itct.con1;
3614          return IRExpr_ITE(IRExpr_RdTmp(ae->u.Itct.co),
3615                            IRExpr_Const(con1),
3616                            IRExpr_RdTmp(ae->u.Itct.e0));
3617 
3618       case Itcc:
3619          con0 = LibVEX_Alloc_inline(sizeof(IRConst));
3620          con1 = LibVEX_Alloc_inline(sizeof(IRConst));
3621          *con0 = ae->u.Itcc.con0;
3622          *con1 = ae->u.Itcc.con1;
3623          return IRExpr_ITE(IRExpr_RdTmp(ae->u.Itcc.co),
3624                            IRExpr_Const(con1),
3625                            IRExpr_Const(con0));
3626       case GetIt:
3627          return IRExpr_GetI(ae->u.GetIt.descr,
3628                             IRExpr_RdTmp(ae->u.GetIt.ix),
3629                             ae->u.GetIt.bias);
3630       case CCall: {
3631          Int i, n = ae->u.CCall.nArgs;
3632          vassert(n >= 0);
3633          IRExpr** vec = LibVEX_Alloc_inline((n+1) * sizeof(IRExpr*));
3634          vec[n] = NULL;
3635          for (i = 0; i < n; i++) {
3636             vec[i] = tmpOrConst_to_IRExpr(&ae->u.CCall.args[i]);
3637          }
3638          return IRExpr_CCall(ae->u.CCall.cee,
3639                              ae->u.CCall.retty,
3640                              vec);
3641       }
3642       case Load:
3643          return IRExpr_Load(ae->u.Load.end, ae->u.Load.ty,
3644                             tmpOrConst_to_IRExpr(&ae->u.Load.addr));
3645       default:
3646          vpanic("availExpr_to_IRExpr");
3647    }
3648 }
3649 
3650 inline
subst_AvailExpr_Temp(HashHW * env,IRTemp tmp)3651 static IRTemp subst_AvailExpr_Temp ( HashHW* env, IRTemp tmp )
3652 {
3653    HWord res;
3654    /* env :: IRTemp -> IRTemp */
3655    if (lookupHHW( env, &res, (HWord)tmp ))
3656       return (IRTemp)res;
3657    else
3658       return tmp;
3659 }
3660 
3661 inline
subst_AvailExpr_TmpOrConst(TmpOrConst * tc,HashHW * env)3662 static void subst_AvailExpr_TmpOrConst ( /*MB_MOD*/TmpOrConst* tc,
3663                                           HashHW* env )
3664 {
3665    /* env :: IRTemp -> IRTemp */
3666    if (tc->tag == TCt) {
3667       tc->u.tmp = subst_AvailExpr_Temp( env, tc->u.tmp );
3668    }
3669 }
3670 
subst_AvailExpr(HashHW * env,AvailExpr * ae)3671 static void subst_AvailExpr ( HashHW* env, AvailExpr* ae )
3672 {
3673    /* env :: IRTemp -> IRTemp */
3674    switch (ae->tag) {
3675       case Ut:
3676          ae->u.Ut.arg = subst_AvailExpr_Temp( env, ae->u.Ut.arg );
3677          break;
3678       case Btt:
3679          ae->u.Btt.arg1 = subst_AvailExpr_Temp( env, ae->u.Btt.arg1 );
3680          ae->u.Btt.arg2 = subst_AvailExpr_Temp( env, ae->u.Btt.arg2 );
3681          break;
3682       case Btc:
3683          ae->u.Btc.arg1 = subst_AvailExpr_Temp( env, ae->u.Btc.arg1 );
3684          break;
3685       case Bct:
3686          ae->u.Bct.arg2 = subst_AvailExpr_Temp( env, ae->u.Bct.arg2 );
3687          break;
3688       case Cf64i:
3689          break;
3690       case Ittt:
3691          ae->u.Ittt.co = subst_AvailExpr_Temp( env, ae->u.Ittt.co );
3692          ae->u.Ittt.e1 = subst_AvailExpr_Temp( env, ae->u.Ittt.e1 );
3693          ae->u.Ittt.e0 = subst_AvailExpr_Temp( env, ae->u.Ittt.e0 );
3694          break;
3695       case Ittc:
3696          ae->u.Ittc.co = subst_AvailExpr_Temp( env, ae->u.Ittc.co );
3697          ae->u.Ittc.e1 = subst_AvailExpr_Temp( env, ae->u.Ittc.e1 );
3698          break;
3699       case Itct:
3700          ae->u.Itct.co = subst_AvailExpr_Temp( env, ae->u.Itct.co );
3701          ae->u.Itct.e0 = subst_AvailExpr_Temp( env, ae->u.Itct.e0 );
3702          break;
3703       case Itcc:
3704          ae->u.Itcc.co = subst_AvailExpr_Temp( env, ae->u.Itcc.co );
3705          break;
3706       case GetIt:
3707          ae->u.GetIt.ix = subst_AvailExpr_Temp( env, ae->u.GetIt.ix );
3708          break;
3709       case CCall: {
3710          Int i, n = ae->u.CCall.nArgs;;
3711          for (i = 0; i < n; i++) {
3712             subst_AvailExpr_TmpOrConst(&ae->u.CCall.args[i], env);
3713          }
3714          break;
3715       }
3716       case Load:
3717          subst_AvailExpr_TmpOrConst(&ae->u.Load.addr, env);
3718          break;
3719       default:
3720          vpanic("subst_AvailExpr");
3721    }
3722 }
3723 
irExpr_to_AvailExpr(IRExpr * e,Bool allowLoadsToBeCSEd)3724 static AvailExpr* irExpr_to_AvailExpr ( IRExpr* e, Bool allowLoadsToBeCSEd )
3725 {
3726    AvailExpr* ae;
3727 
3728    switch (e->tag) {
3729       case Iex_Unop:
3730          if (e->Iex.Unop.arg->tag == Iex_RdTmp) {
3731             ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
3732             ae->tag      = Ut;
3733             ae->u.Ut.op  = e->Iex.Unop.op;
3734             ae->u.Ut.arg = e->Iex.Unop.arg->Iex.RdTmp.tmp;
3735             return ae;
3736          }
3737          break;
3738 
3739       case Iex_Binop:
3740          if (e->Iex.Binop.arg1->tag == Iex_RdTmp) {
3741             if (e->Iex.Binop.arg2->tag == Iex_RdTmp) {
3742                ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
3743                ae->tag        = Btt;
3744                ae->u.Btt.op   = e->Iex.Binop.op;
3745                ae->u.Btt.arg1 = e->Iex.Binop.arg1->Iex.RdTmp.tmp;
3746                ae->u.Btt.arg2 = e->Iex.Binop.arg2->Iex.RdTmp.tmp;
3747                return ae;
3748             }
3749             if (e->Iex.Binop.arg2->tag == Iex_Const) {
3750                ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
3751                ae->tag        = Btc;
3752                ae->u.Btc.op   = e->Iex.Binop.op;
3753                ae->u.Btc.arg1 = e->Iex.Binop.arg1->Iex.RdTmp.tmp;
3754                ae->u.Btc.con2 = *(e->Iex.Binop.arg2->Iex.Const.con);
3755                return ae;
3756             }
3757          } else if (e->Iex.Binop.arg1->tag == Iex_Const
3758                     && e->Iex.Binop.arg2->tag == Iex_RdTmp) {
3759             ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
3760             ae->tag        = Bct;
3761             ae->u.Bct.op   = e->Iex.Binop.op;
3762             ae->u.Bct.arg2 = e->Iex.Binop.arg2->Iex.RdTmp.tmp;
3763             ae->u.Bct.con1 = *(e->Iex.Binop.arg1->Iex.Const.con);
3764             return ae;
3765          }
3766          break;
3767 
3768       case Iex_Const:
3769          if (e->Iex.Const.con->tag == Ico_F64i) {
3770             ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
3771             ae->tag          = Cf64i;
3772             ae->u.Cf64i.f64i = e->Iex.Const.con->Ico.F64i;
3773             return ae;
3774          }
3775          break;
3776 
3777       case Iex_ITE:
3778          if (e->Iex.ITE.cond->tag == Iex_RdTmp) {
3779             if (e->Iex.ITE.iffalse->tag == Iex_RdTmp) {
3780                if (e->Iex.ITE.iftrue->tag == Iex_RdTmp) {
3781                   ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
3782                   ae->tag       = Ittt;
3783                   ae->u.Ittt.co = e->Iex.ITE.cond->Iex.RdTmp.tmp;
3784                   ae->u.Ittt.e1 = e->Iex.ITE.iftrue->Iex.RdTmp.tmp;
3785                   ae->u.Ittt.e0 = e->Iex.ITE.iffalse->Iex.RdTmp.tmp;
3786                   return ae;
3787                }
3788                if (e->Iex.ITE.iftrue->tag == Iex_Const) {
3789                   ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
3790                   ae->tag       = Itct;
3791                   ae->u.Itct.co = e->Iex.ITE.cond->Iex.RdTmp.tmp;
3792                   ae->u.Itct.con1 = *(e->Iex.ITE.iftrue->Iex.Const.con);
3793                   ae->u.Itct.e0 = e->Iex.ITE.iffalse->Iex.RdTmp.tmp;
3794                   return ae;
3795                }
3796             } else if (e->Iex.ITE.iffalse->tag == Iex_Const) {
3797                if (e->Iex.ITE.iftrue->tag == Iex_RdTmp) {
3798                   ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
3799                   ae->tag       = Ittc;
3800                   ae->u.Ittc.co = e->Iex.ITE.cond->Iex.RdTmp.tmp;
3801                   ae->u.Ittc.e1 = e->Iex.ITE.iftrue->Iex.RdTmp.tmp;
3802                   ae->u.Ittc.con0 = *(e->Iex.ITE.iffalse->Iex.Const.con);
3803                   return ae;
3804                }
3805                if (e->Iex.ITE.iftrue->tag == Iex_Const) {
3806                   ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
3807                   ae->tag       = Itcc;
3808                   ae->u.Itcc.co = e->Iex.ITE.cond->Iex.RdTmp.tmp;
3809                   ae->u.Itcc.con1 = *(e->Iex.ITE.iftrue->Iex.Const.con);
3810                   ae->u.Itcc.con0 = *(e->Iex.ITE.iffalse->Iex.Const.con);
3811                   return ae;
3812                }
3813             }
3814          }
3815          break;
3816 
3817       case Iex_GetI:
3818          if (e->Iex.GetI.ix->tag == Iex_RdTmp) {
3819             ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
3820             ae->tag           = GetIt;
3821             ae->u.GetIt.descr = e->Iex.GetI.descr;
3822             ae->u.GetIt.ix    = e->Iex.GetI.ix->Iex.RdTmp.tmp;
3823             ae->u.GetIt.bias  = e->Iex.GetI.bias;
3824             return ae;
3825          }
3826          break;
3827 
3828       case Iex_CCall:
3829          ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
3830          ae->tag = CCall;
3831          /* Ok to share only the cee, since it is immutable. */
3832          ae->u.CCall.cee   = e->Iex.CCall.cee;
3833          ae->u.CCall.retty = e->Iex.CCall.retty;
3834          /* irExprVec_to_TmpOrConsts will assert if the args are
3835             neither tmps nor constants, but that's ok .. that's all they
3836             should be. */
3837          irExprVec_to_TmpOrConsts(
3838                                   &ae->u.CCall.args, &ae->u.CCall.nArgs,
3839                                   e->Iex.CCall.args
3840                                  );
3841          return ae;
3842 
3843       case Iex_Load:
3844          /* If the caller of do_cse_BB has requested that loads also
3845             be CSEd, convert them into AvailExprs.  If not, we'll just
3846             return NULL here, and the load never becomes considered
3847             "available", which effectively disables CSEing of them, as
3848             desired. */
3849          if (allowLoadsToBeCSEd) {
3850             ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
3851             ae->tag        = Load;
3852             ae->u.Load.end = e->Iex.Load.end;
3853             ae->u.Load.ty  = e->Iex.Load.ty;
3854             irExpr_to_TmpOrConst(&ae->u.Load.addr, e->Iex.Load.addr);
3855             return ae;
3856          }
3857          break;
3858 
3859       default:
3860          break;
3861    }
3862 
3863    return NULL;
3864 }
3865 
3866 
3867 /* The BB is modified in-place.  Returns True if any changes were
3868    made.  The caller can choose whether or not loads should be CSEd.
3869    In the normal course of things we don't do that, since CSEing loads
3870    is something of a dodgy proposition if the guest program is doing
3871    some screwy stuff to do with races and spinloops. */
3872 
do_cse_BB(IRSB * bb,Bool allowLoadsToBeCSEd)3873 static Bool do_cse_BB ( IRSB* bb, Bool allowLoadsToBeCSEd )
3874 {
3875    Int        i, j, paranoia;
3876    IRTemp     t, q;
3877    IRStmt*    st;
3878    AvailExpr* eprime;
3879    AvailExpr* ae;
3880    Bool       invalidate;
3881    Bool       anyDone = False;
3882 
3883    HashHW* tenv = newHHW(); /* :: IRTemp -> IRTemp */
3884    HashHW* aenv = newHHW(); /* :: AvailExpr* -> IRTemp */
3885 
3886    vassert(sizeof(IRTemp) <= sizeof(HWord));
3887 
3888    if (0) { ppIRSB(bb); vex_printf("\n\n"); }
3889 
3890    /* Iterate forwards over the stmts.
3891       On seeing "t = E", where E is one of the AvailExpr forms:
3892          let E' = apply tenv substitution to E
3893          search aenv for E'
3894             if a mapping E' -> q is found,
3895                replace this stmt by "t = q"
3896                and add binding t -> q to tenv
3897             else
3898                add binding E' -> t to aenv
3899                replace this stmt by "t = E'"
3900 
3901       Other statements are only interesting to the extent that they
3902       might invalidate some of the expressions in aenv.  So there is
3903       an invalidate-bindings check for each statement seen.
3904    */
3905    for (i = 0; i < bb->stmts_used; i++) {
3906       st = bb->stmts[i];
3907 
3908       /* ------ BEGIN invalidate aenv bindings ------ */
3909       /* This is critical: remove from aenv any E' -> .. bindings
3910          which might be invalidated by this statement.  The only
3911          vulnerable kind of bindings are the GetI and Load kinds.
3912             Dirty call - dump (paranoia level -> 2)
3913             Store      - dump (ditto)
3914             Put, PutI  - dump unless no-overlap is proven (.. -> 1)
3915          Uses getAliasingRelation_IC and getAliasingRelation_II
3916          to do the no-overlap assessments needed for Put/PutI.
3917       */
3918       switch (st->tag) {
3919          case Ist_Dirty: case Ist_Store: case Ist_MBE:
3920          case Ist_CAS: case Ist_LLSC:
3921          case Ist_StoreG:
3922             paranoia = 2; break;
3923          case Ist_Put: case Ist_PutI:
3924             paranoia = 1; break;
3925          case Ist_NoOp: case Ist_IMark: case Ist_AbiHint:
3926          case Ist_WrTmp: case Ist_Exit: case Ist_LoadG:
3927             paranoia = 0; break;
3928          default:
3929             vpanic("do_cse_BB(1)");
3930       }
3931 
3932       if (paranoia > 0) {
3933          for (j = 0; j < aenv->used; j++) {
3934             if (!aenv->inuse[j])
3935                continue;
3936             ae = (AvailExpr*)aenv->key[j];
3937             if (ae->tag != GetIt && ae->tag != Load)
3938                continue;
3939             invalidate = False;
3940             if (paranoia >= 2) {
3941                invalidate = True;
3942             } else {
3943                vassert(paranoia == 1);
3944                if (ae->tag == Load) {
3945                   /* Loads can be invalidated by anything that could
3946                      possibly touch memory.  But in that case we
3947                      should have |paranoia| == 2 and we won't get
3948                      here.  So there's nothing to do; we don't have to
3949                      invalidate the load. */
3950                }
3951                else
3952                if (st->tag == Ist_Put) {
3953                   if (getAliasingRelation_IC(
3954                          ae->u.GetIt.descr,
3955                          IRExpr_RdTmp(ae->u.GetIt.ix),
3956                          st->Ist.Put.offset,
3957                          typeOfIRExpr(bb->tyenv,st->Ist.Put.data)
3958                       ) != NoAlias)
3959                      invalidate = True;
3960                }
3961                else
3962                if (st->tag == Ist_PutI) {
3963                   IRPutI *puti = st->Ist.PutI.details;
3964                   if (getAliasingRelation_II(
3965                          ae->u.GetIt.descr,
3966                          IRExpr_RdTmp(ae->u.GetIt.ix),
3967                          ae->u.GetIt.bias,
3968                          puti->descr,
3969                          puti->ix,
3970                          puti->bias
3971                       ) != NoAlias)
3972                      invalidate = True;
3973                }
3974                else
3975                   vpanic("do_cse_BB(2)");
3976             }
3977 
3978             if (invalidate) {
3979                aenv->inuse[j] = False;
3980                aenv->key[j]   = (HWord)NULL;  /* be sure */
3981             }
3982          } /* for j */
3983       } /* paranoia > 0 */
3984 
3985       /* ------ ENV invalidate aenv bindings ------ */
3986 
3987       /* ignore not-interestings */
3988       if (st->tag != Ist_WrTmp)
3989          continue;
3990 
3991       t = st->Ist.WrTmp.tmp;
3992       eprime = irExpr_to_AvailExpr(st->Ist.WrTmp.data, allowLoadsToBeCSEd);
3993       /* ignore if not of AvailExpr form */
3994       if (!eprime)
3995          continue;
3996 
3997       /* vex_printf("considering: " ); ppIRStmt(st); vex_printf("\n"); */
3998 
3999       /* apply tenv */
4000       subst_AvailExpr( tenv, eprime );
4001 
4002       /* search aenv for eprime, unfortunately the hard way */
4003       for (j = 0; j < aenv->used; j++)
4004          if (aenv->inuse[j] && eq_AvailExpr(eprime, (AvailExpr*)aenv->key[j]))
4005             break;
4006 
4007       if (j < aenv->used) {
4008          /* A binding E' -> q was found.  Replace stmt by "t = q" and
4009             note the t->q binding in tenv. */
4010          /* (this is the core of the CSE action) */
4011          q = (IRTemp)aenv->val[j];
4012          bb->stmts[i] = IRStmt_WrTmp( t, IRExpr_RdTmp(q) );
4013          addToHHW( tenv, (HWord)t, (HWord)q );
4014          anyDone = True;
4015       } else {
4016          /* No binding was found, so instead we add E' -> t to our
4017             collection of available expressions, replace this stmt
4018             with "t = E'", and move on. */
4019          bb->stmts[i] = IRStmt_WrTmp( t, availExpr_to_IRExpr(eprime) );
4020          addToHHW( aenv, (HWord)eprime, (HWord)t );
4021       }
4022    }
4023 
4024    /*
4025    ppIRSB(bb);
4026    sanityCheckIRSB(bb, Ity_I32);
4027    vex_printf("\n\n");
4028    */
4029    return anyDone;
4030 }
4031 
4032 
4033 /*---------------------------------------------------------------*/
4034 /*--- Add32/Sub32 chain collapsing                            ---*/
4035 /*---------------------------------------------------------------*/
4036 
4037 /* ----- Helper functions for Add32/Sub32 chain collapsing ----- */
4038 
4039 /* Is this expression "Add32(tmp,const)" or "Sub32(tmp,const)" ?  If
4040    yes, set *tmp and *i32 appropriately.  *i32 is set as if the
4041    root node is Add32, not Sub32. */
4042 
isAdd32OrSub32(IRExpr * e,IRTemp * tmp,Int * i32)4043 static Bool isAdd32OrSub32 ( IRExpr* e, IRTemp* tmp, Int* i32 )
4044 {
4045    if (e->tag != Iex_Binop)
4046       return False;
4047    if (e->Iex.Binop.op != Iop_Add32 && e->Iex.Binop.op != Iop_Sub32)
4048       return False;
4049    if (e->Iex.Binop.arg1->tag != Iex_RdTmp)
4050       return False;
4051    if (e->Iex.Binop.arg2->tag != Iex_Const)
4052       return False;
4053    *tmp = e->Iex.Binop.arg1->Iex.RdTmp.tmp;
4054    *i32 = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32);
4055    if (e->Iex.Binop.op == Iop_Sub32)
4056       *i32 = -*i32;
4057    return True;
4058 }
4059 
4060 
4061 /* Figure out if tmp can be expressed as tmp2 +32 const, for some
4062    other tmp2.  Scan backwards from the specified start point -- an
4063    optimisation. */
4064 
collapseChain(IRSB * bb,Int startHere,IRTemp tmp,IRTemp * tmp2,Int * i32)4065 static Bool collapseChain ( IRSB* bb, Int startHere,
4066                             IRTemp tmp,
4067                             IRTemp* tmp2, Int* i32 )
4068 {
4069    Int     j, ii;
4070    IRTemp  vv;
4071    IRStmt* st;
4072    IRExpr* e;
4073 
4074    /* the (var, con) pair contain the current 'representation' for
4075       'tmp'.  We start with 'tmp + 0'.  */
4076    IRTemp var = tmp;
4077    Int    con = 0;
4078 
4079    /* Scan backwards to see if tmp can be replaced by some other tmp
4080      +/- a constant. */
4081    for (j = startHere; j >= 0; j--) {
4082       st = bb->stmts[j];
4083       if (st->tag != Ist_WrTmp)
4084          continue;
4085       if (st->Ist.WrTmp.tmp != var)
4086          continue;
4087       e = st->Ist.WrTmp.data;
4088       if (!isAdd32OrSub32(e, &vv, &ii))
4089          break;
4090       var = vv;
4091       con += ii;
4092    }
4093    if (j == -1)
4094       /* no earlier binding for var .. ill-formed IR */
4095       vpanic("collapseChain");
4096 
4097    /* so, did we find anything interesting? */
4098    if (var == tmp)
4099       return False; /* no .. */
4100 
4101    *tmp2 = var;
4102    *i32  = con;
4103    return True;
4104 }
4105 
4106 
4107 /* ------- Main function for Add32/Sub32 chain collapsing ------ */
4108 
collapse_AddSub_chains_BB(IRSB * bb)4109 static void collapse_AddSub_chains_BB ( IRSB* bb )
4110 {
4111    IRStmt *st;
4112    IRTemp var, var2;
4113    Int    i, con, con2;
4114 
4115    for (i = bb->stmts_used-1; i >= 0; i--) {
4116       st = bb->stmts[i];
4117       if (st->tag == Ist_NoOp)
4118          continue;
4119 
4120       /* Try to collapse 't1 = Add32/Sub32(t2, con)'. */
4121 
4122       if (st->tag == Ist_WrTmp
4123           && isAdd32OrSub32(st->Ist.WrTmp.data, &var, &con)) {
4124 
4125          /* So e1 is of the form Add32(var,con) or Sub32(var,-con).
4126             Find out if var can be expressed as var2 + con2. */
4127          if (collapseChain(bb, i-1, var, &var2, &con2)) {
4128             if (DEBUG_IROPT) {
4129                vex_printf("replacing1 ");
4130                ppIRStmt(st);
4131                vex_printf(" with ");
4132             }
4133             con2 += con;
4134             bb->stmts[i]
4135                = IRStmt_WrTmp(
4136                     st->Ist.WrTmp.tmp,
4137                     (con2 >= 0)
4138                       ? IRExpr_Binop(Iop_Add32,
4139                                      IRExpr_RdTmp(var2),
4140                                      IRExpr_Const(IRConst_U32(con2)))
4141                       : IRExpr_Binop(Iop_Sub32,
4142                                      IRExpr_RdTmp(var2),
4143                                      IRExpr_Const(IRConst_U32(-con2)))
4144                  );
4145             if (DEBUG_IROPT) {
4146                ppIRStmt(bb->stmts[i]);
4147                vex_printf("\n");
4148             }
4149          }
4150 
4151          continue;
4152       }
4153 
4154       /* Try to collapse 't1 = GetI[t2, con]'. */
4155 
4156       if (st->tag == Ist_WrTmp
4157           && st->Ist.WrTmp.data->tag == Iex_GetI
4158           && st->Ist.WrTmp.data->Iex.GetI.ix->tag == Iex_RdTmp
4159           && collapseChain(bb, i-1, st->Ist.WrTmp.data->Iex.GetI.ix
4160                                       ->Iex.RdTmp.tmp, &var2, &con2)) {
4161          if (DEBUG_IROPT) {
4162             vex_printf("replacing3 ");
4163             ppIRStmt(st);
4164             vex_printf(" with ");
4165          }
4166          con2 += st->Ist.WrTmp.data->Iex.GetI.bias;
4167          bb->stmts[i]
4168             = IRStmt_WrTmp(
4169                  st->Ist.WrTmp.tmp,
4170                  IRExpr_GetI(st->Ist.WrTmp.data->Iex.GetI.descr,
4171                              IRExpr_RdTmp(var2),
4172                              con2));
4173          if (DEBUG_IROPT) {
4174             ppIRStmt(bb->stmts[i]);
4175             vex_printf("\n");
4176          }
4177          continue;
4178       }
4179 
4180       /* Perhaps st is PutI[t, con] ? */
4181       IRPutI *puti = st->Ist.PutI.details;
4182       if (st->tag == Ist_PutI
4183           && puti->ix->tag == Iex_RdTmp
4184           && collapseChain(bb, i-1, puti->ix->Iex.RdTmp.tmp,
4185                                &var2, &con2)) {
4186          if (DEBUG_IROPT) {
4187             vex_printf("replacing2 ");
4188             ppIRStmt(st);
4189             vex_printf(" with ");
4190          }
4191          con2 += puti->bias;
4192          bb->stmts[i]
4193             = IRStmt_PutI(mkIRPutI(puti->descr,
4194                                    IRExpr_RdTmp(var2),
4195                                    con2,
4196                                    puti->data));
4197          if (DEBUG_IROPT) {
4198             ppIRStmt(bb->stmts[i]);
4199             vex_printf("\n");
4200          }
4201          continue;
4202       }
4203 
4204    } /* for */
4205 }
4206 
4207 
4208 /*---------------------------------------------------------------*/
4209 /*--- PutI/GetI transformations                               ---*/
4210 /*---------------------------------------------------------------*/
4211 
4212 /* Given the parts (descr, tmp, bias) for a GetI, scan backwards from
4213    the given starting point to find, if any, a PutI which writes
4214    exactly the same piece of guest state, and so return the expression
4215    that the PutI writes.  This is the core of PutI-GetI forwarding. */
4216 
4217 static
findPutI(IRSB * bb,Int startHere,IRRegArray * descrG,IRExpr * ixG,Int biasG)4218 IRExpr* findPutI ( IRSB* bb, Int startHere,
4219                    IRRegArray* descrG, IRExpr* ixG, Int biasG )
4220 {
4221    Int        j;
4222    IRStmt*    st;
4223    GSAliasing relation;
4224 
4225    if (0) {
4226       vex_printf("\nfindPutI ");
4227       ppIRRegArray(descrG);
4228       vex_printf(" ");
4229       ppIRExpr(ixG);
4230       vex_printf(" %d\n", biasG);
4231    }
4232 
4233    /* Scan backwards in bb from startHere to find a suitable PutI
4234       binding for (descrG, ixG, biasG), if any. */
4235 
4236    for (j = startHere; j >= 0; j--) {
4237       st = bb->stmts[j];
4238       if (st->tag == Ist_NoOp)
4239          continue;
4240 
4241       if (st->tag == Ist_Put) {
4242          /* Non-indexed Put.  This can't give a binding, but we do
4243             need to check it doesn't invalidate the search by
4244             overlapping any part of the indexed guest state. */
4245 
4246          relation
4247             = getAliasingRelation_IC(
4248                  descrG, ixG,
4249                  st->Ist.Put.offset,
4250                  typeOfIRExpr(bb->tyenv,st->Ist.Put.data) );
4251 
4252          if (relation == NoAlias) {
4253             /* we're OK; keep going */
4254             continue;
4255          } else {
4256             /* relation == UnknownAlias || relation == ExactAlias */
4257             /* If this assertion fails, we've found a Put which writes
4258                an area of guest state which is read by a GetI.  Which
4259                is unlikely (although not per se wrong). */
4260             vassert(relation != ExactAlias);
4261             /* This Put potentially writes guest state that the GetI
4262                reads; we must fail. */
4263             return NULL;
4264          }
4265       }
4266 
4267       if (st->tag == Ist_PutI) {
4268          IRPutI *puti = st->Ist.PutI.details;
4269 
4270          relation = getAliasingRelation_II(
4271                        descrG, ixG, biasG,
4272                        puti->descr,
4273                        puti->ix,
4274                        puti->bias
4275                     );
4276 
4277          if (relation == NoAlias) {
4278             /* This PutI definitely doesn't overlap.  Ignore it and
4279                keep going. */
4280             continue; /* the for j loop */
4281          }
4282 
4283          if (relation == UnknownAlias) {
4284             /* We don't know if this PutI writes to the same guest
4285                state that the GetI, or not.  So we have to give up. */
4286             return NULL;
4287          }
4288 
4289          /* Otherwise, we've found what we're looking for.  */
4290          vassert(relation == ExactAlias);
4291          return puti->data;
4292 
4293       } /* if (st->tag == Ist_PutI) */
4294 
4295       if (st->tag == Ist_Dirty) {
4296          /* Be conservative.  If the dirty call has any guest effects at
4297             all, give up.  We could do better -- only give up if there
4298             are any guest writes/modifies. */
4299          if (st->Ist.Dirty.details->nFxState > 0)
4300             return NULL;
4301       }
4302 
4303    } /* for */
4304 
4305    /* No valid replacement was found. */
4306    return NULL;
4307 }
4308 
4309 
4310 
4311 /* Assuming pi is a PutI stmt, is s2 identical to it (in the sense
4312    that it writes exactly the same piece of guest state) ?  Safe
4313    answer: False. */
4314 
identicalPutIs(IRStmt * pi,IRStmt * s2)4315 static Bool identicalPutIs ( IRStmt* pi, IRStmt* s2 )
4316 {
4317    vassert(pi->tag == Ist_PutI);
4318    if (s2->tag != Ist_PutI)
4319       return False;
4320 
4321    IRPutI *p1 = pi->Ist.PutI.details;
4322    IRPutI *p2 = s2->Ist.PutI.details;
4323 
4324    return toBool(
4325           getAliasingRelation_II(
4326              p1->descr, p1->ix, p1->bias,
4327              p2->descr, p2->ix, p2->bias
4328           )
4329           == ExactAlias
4330           );
4331 }
4332 
4333 
4334 /* Assuming pi is a PutI stmt, is s2 a Get/GetI/Put/PutI which might
4335    overlap it?  Safe answer: True.  Note, we could do a lot better
4336    than this if needed. */
4337 
4338 static
guestAccessWhichMightOverlapPutI(IRTypeEnv * tyenv,IRStmt * pi,IRStmt * s2)4339 Bool guestAccessWhichMightOverlapPutI (
4340         IRTypeEnv* tyenv, IRStmt* pi, IRStmt* s2
4341      )
4342 {
4343    GSAliasing relation;
4344    UInt       minoffP, maxoffP;
4345 
4346    vassert(pi->tag == Ist_PutI);
4347 
4348    IRPutI *p1 = pi->Ist.PutI.details;
4349 
4350    getArrayBounds(p1->descr, &minoffP, &maxoffP);
4351    switch (s2->tag) {
4352 
4353       case Ist_NoOp:
4354       case Ist_IMark:
4355          return False;
4356 
4357       case Ist_MBE:
4358       case Ist_AbiHint:
4359          /* just be paranoid ... these should be rare. */
4360          return True;
4361 
4362       case Ist_CAS:
4363          /* This is unbelievably lame, but it's probably not
4364             significant from a performance point of view.  Really, a
4365             CAS is a load-store op, so it should be safe to say False.
4366             However .. */
4367          return True;
4368 
4369       case Ist_Dirty:
4370          /* If the dirty call has any guest effects at all, give up.
4371             Probably could do better. */
4372          if (s2->Ist.Dirty.details->nFxState > 0)
4373             return True;
4374          return False;
4375 
4376       case Ist_Put:
4377          vassert(isIRAtom(s2->Ist.Put.data));
4378          relation
4379             = getAliasingRelation_IC(
4380                  p1->descr, p1->ix,
4381                  s2->Ist.Put.offset,
4382                  typeOfIRExpr(tyenv,s2->Ist.Put.data)
4383               );
4384          goto have_relation;
4385 
4386       case Ist_PutI: {
4387          IRPutI *p2 = s2->Ist.PutI.details;
4388 
4389          vassert(isIRAtom(p2->ix));
4390          vassert(isIRAtom(p2->data));
4391          relation
4392             = getAliasingRelation_II(
4393                  p1->descr, p1->ix, p1->bias,
4394                  p2->descr, p2->ix, p2->bias
4395               );
4396          goto have_relation;
4397       }
4398 
4399       case Ist_WrTmp:
4400          if (s2->Ist.WrTmp.data->tag == Iex_GetI) {
4401             relation
4402                = getAliasingRelation_II(
4403                     p1->descr, p1->ix, p1->bias,
4404                     s2->Ist.WrTmp.data->Iex.GetI.descr,
4405                     s2->Ist.WrTmp.data->Iex.GetI.ix,
4406                     s2->Ist.WrTmp.data->Iex.GetI.bias
4407                  );
4408             goto have_relation;
4409          }
4410          if (s2->Ist.WrTmp.data->tag == Iex_Get) {
4411             relation
4412                = getAliasingRelation_IC(
4413                     p1->descr, p1->ix,
4414                     s2->Ist.WrTmp.data->Iex.Get.offset,
4415                     s2->Ist.WrTmp.data->Iex.Get.ty
4416                  );
4417             goto have_relation;
4418          }
4419          return False;
4420 
4421       case Ist_Store:
4422          vassert(isIRAtom(s2->Ist.Store.addr));
4423          vassert(isIRAtom(s2->Ist.Store.data));
4424          return False;
4425 
4426       default:
4427          vex_printf("\n"); ppIRStmt(s2); vex_printf("\n");
4428          vpanic("guestAccessWhichMightOverlapPutI");
4429    }
4430 
4431   have_relation:
4432    if (relation == NoAlias)
4433       return False;
4434    else
4435       return True; /* ExactAlias or UnknownAlias */
4436 }
4437 
4438 
4439 
4440 /* ---------- PutI/GetI transformations main functions --------- */
4441 
4442 /* Remove redundant GetIs, to the extent that they can be detected.
4443    bb is modified in-place. */
4444 
4445 static
do_redundant_GetI_elimination(IRSB * bb)4446 void do_redundant_GetI_elimination ( IRSB* bb )
4447 {
4448    Int     i;
4449    IRStmt* st;
4450 
4451    for (i = bb->stmts_used-1; i >= 0; i--) {
4452       st = bb->stmts[i];
4453       if (st->tag == Ist_NoOp)
4454          continue;
4455 
4456       if (st->tag == Ist_WrTmp
4457           && st->Ist.WrTmp.data->tag == Iex_GetI
4458           && st->Ist.WrTmp.data->Iex.GetI.ix->tag == Iex_RdTmp) {
4459          IRRegArray* descr = st->Ist.WrTmp.data->Iex.GetI.descr;
4460          IRExpr*     ix    = st->Ist.WrTmp.data->Iex.GetI.ix;
4461          Int         bias  = st->Ist.WrTmp.data->Iex.GetI.bias;
4462          IRExpr*     replacement = findPutI(bb, i-1, descr, ix, bias);
4463          if (replacement
4464              && isIRAtom(replacement)
4465              /* Make sure we're doing a type-safe transformation! */
4466              && typeOfIRExpr(bb->tyenv, replacement) == descr->elemTy) {
4467             if (DEBUG_IROPT) {
4468                vex_printf("rGI:  ");
4469                ppIRExpr(st->Ist.WrTmp.data);
4470                vex_printf(" -> ");
4471                ppIRExpr(replacement);
4472                vex_printf("\n");
4473             }
4474             bb->stmts[i] = IRStmt_WrTmp(st->Ist.WrTmp.tmp, replacement);
4475          }
4476       }
4477    }
4478 
4479 }
4480 
4481 
4482 /* Remove redundant PutIs, to the extent which they can be detected.
4483    bb is modified in-place. */
4484 
4485 static
do_redundant_PutI_elimination(IRSB * bb,VexRegisterUpdates pxControl)4486 void do_redundant_PutI_elimination ( IRSB* bb, VexRegisterUpdates pxControl )
4487 {
4488    Int    i, j;
4489    Bool   delete;
4490    IRStmt *st, *stj;
4491 
4492    vassert(pxControl < VexRegUpdAllregsAtEachInsn);
4493 
4494    for (i = 0; i < bb->stmts_used; i++) {
4495       st = bb->stmts[i];
4496       if (st->tag != Ist_PutI)
4497          continue;
4498       /* Ok, search forwards from here to see if we can find another
4499          PutI which makes this one redundant, and dodging various
4500          hazards.  Search forwards:
4501          * If conditional exit, give up (because anything after that
4502            does not postdominate this put).
4503          * If a Get which might overlap, give up (because this PutI
4504            not necessarily dead).
4505          * If a Put which is identical, stop with success.
4506          * If a Put which might overlap, but is not identical, give up.
4507          * If a dirty helper call which might write guest state, give up.
4508          * If a Put which definitely doesn't overlap, or any other
4509            kind of stmt, continue.
4510       */
4511       delete = False;
4512       for (j = i+1; j < bb->stmts_used; j++) {
4513          stj = bb->stmts[j];
4514          if (stj->tag == Ist_NoOp)
4515             continue;
4516          if (identicalPutIs(st, stj)) {
4517             /* success! */
4518             delete = True;
4519             break;
4520          }
4521          if (stj->tag == Ist_Exit)
4522             /* give up */
4523             break;
4524          if (st->tag == Ist_Dirty)
4525             /* give up; could do better here */
4526             break;
4527          if (guestAccessWhichMightOverlapPutI(bb->tyenv, st, stj))
4528             /* give up */
4529            break;
4530       }
4531 
4532       if (delete) {
4533          if (DEBUG_IROPT) {
4534             vex_printf("rPI:  ");
4535             ppIRStmt(st);
4536             vex_printf("\n");
4537          }
4538          bb->stmts[i] = IRStmt_NoOp();
4539       }
4540 
4541    }
4542 }
4543 
4544 
4545 /*---------------------------------------------------------------*/
4546 /*--- Loop unrolling                                          ---*/
4547 /*---------------------------------------------------------------*/
4548 
4549 /* Adjust all tmp values (names) in e by delta.  e is destructively
4550    modified. */
4551 
deltaIRExpr(IRExpr * e,Int delta)4552 static void deltaIRExpr ( IRExpr* e, Int delta )
4553 {
4554    Int i;
4555    switch (e->tag) {
4556       case Iex_RdTmp:
4557          e->Iex.RdTmp.tmp += delta;
4558          break;
4559       case Iex_Get:
4560       case Iex_Const:
4561          break;
4562       case Iex_GetI:
4563          deltaIRExpr(e->Iex.GetI.ix, delta);
4564          break;
4565       case Iex_Qop:
4566          deltaIRExpr(e->Iex.Qop.details->arg1, delta);
4567          deltaIRExpr(e->Iex.Qop.details->arg2, delta);
4568          deltaIRExpr(e->Iex.Qop.details->arg3, delta);
4569          deltaIRExpr(e->Iex.Qop.details->arg4, delta);
4570          break;
4571       case Iex_Triop:
4572          deltaIRExpr(e->Iex.Triop.details->arg1, delta);
4573          deltaIRExpr(e->Iex.Triop.details->arg2, delta);
4574          deltaIRExpr(e->Iex.Triop.details->arg3, delta);
4575          break;
4576       case Iex_Binop:
4577          deltaIRExpr(e->Iex.Binop.arg1, delta);
4578          deltaIRExpr(e->Iex.Binop.arg2, delta);
4579          break;
4580       case Iex_Unop:
4581          deltaIRExpr(e->Iex.Unop.arg, delta);
4582          break;
4583       case Iex_Load:
4584          deltaIRExpr(e->Iex.Load.addr, delta);
4585          break;
4586       case Iex_CCall:
4587          for (i = 0; e->Iex.CCall.args[i]; i++)
4588             deltaIRExpr(e->Iex.CCall.args[i], delta);
4589          break;
4590       case Iex_ITE:
4591          deltaIRExpr(e->Iex.ITE.cond, delta);
4592          deltaIRExpr(e->Iex.ITE.iftrue, delta);
4593          deltaIRExpr(e->Iex.ITE.iffalse, delta);
4594          break;
4595       default:
4596          vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
4597          vpanic("deltaIRExpr");
4598    }
4599 }
4600 
4601 /* Adjust all tmp values (names) in st by delta.  st is destructively
4602    modified. */
4603 
deltaIRStmt(IRStmt * st,Int delta)4604 static void deltaIRStmt ( IRStmt* st, Int delta )
4605 {
4606    Int      i;
4607    IRDirty* d;
4608    switch (st->tag) {
4609       case Ist_NoOp:
4610       case Ist_IMark:
4611       case Ist_MBE:
4612          break;
4613       case Ist_AbiHint:
4614          deltaIRExpr(st->Ist.AbiHint.base, delta);
4615          deltaIRExpr(st->Ist.AbiHint.nia, delta);
4616          break;
4617       case Ist_Put:
4618          deltaIRExpr(st->Ist.Put.data, delta);
4619          break;
4620       case Ist_PutI:
4621          deltaIRExpr(st->Ist.PutI.details->ix, delta);
4622          deltaIRExpr(st->Ist.PutI.details->data, delta);
4623          break;
4624       case Ist_WrTmp:
4625          st->Ist.WrTmp.tmp += delta;
4626          deltaIRExpr(st->Ist.WrTmp.data, delta);
4627          break;
4628       case Ist_Exit:
4629          deltaIRExpr(st->Ist.Exit.guard, delta);
4630          break;
4631       case Ist_Store:
4632          deltaIRExpr(st->Ist.Store.addr, delta);
4633          deltaIRExpr(st->Ist.Store.data, delta);
4634          break;
4635       case Ist_StoreG: {
4636          IRStoreG* sg = st->Ist.StoreG.details;
4637          deltaIRExpr(sg->addr, delta);
4638          deltaIRExpr(sg->data, delta);
4639          deltaIRExpr(sg->guard, delta);
4640          break;
4641       }
4642       case Ist_LoadG: {
4643          IRLoadG* lg = st->Ist.LoadG.details;
4644          lg->dst += delta;
4645          deltaIRExpr(lg->addr, delta);
4646          deltaIRExpr(lg->alt, delta);
4647          deltaIRExpr(lg->guard, delta);
4648          break;
4649       }
4650       case Ist_CAS:
4651          if (st->Ist.CAS.details->oldHi != IRTemp_INVALID)
4652             st->Ist.CAS.details->oldHi += delta;
4653          st->Ist.CAS.details->oldLo += delta;
4654          deltaIRExpr(st->Ist.CAS.details->addr, delta);
4655          if (st->Ist.CAS.details->expdHi)
4656             deltaIRExpr(st->Ist.CAS.details->expdHi, delta);
4657          deltaIRExpr(st->Ist.CAS.details->expdLo, delta);
4658          if (st->Ist.CAS.details->dataHi)
4659             deltaIRExpr(st->Ist.CAS.details->dataHi, delta);
4660          deltaIRExpr(st->Ist.CAS.details->dataLo, delta);
4661          break;
4662       case Ist_LLSC:
4663          st->Ist.LLSC.result += delta;
4664          deltaIRExpr(st->Ist.LLSC.addr, delta);
4665          if (st->Ist.LLSC.storedata)
4666             deltaIRExpr(st->Ist.LLSC.storedata, delta);
4667          break;
4668       case Ist_Dirty:
4669          d = st->Ist.Dirty.details;
4670          deltaIRExpr(d->guard, delta);
4671          for (i = 0; d->args[i]; i++) {
4672             IRExpr* arg = d->args[i];
4673             if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg)))
4674                deltaIRExpr(arg, delta);
4675          }
4676          if (d->tmp != IRTemp_INVALID)
4677             d->tmp += delta;
4678          if (d->mAddr)
4679             deltaIRExpr(d->mAddr, delta);
4680          break;
4681       default:
4682          vex_printf("\n"); ppIRStmt(st); vex_printf("\n");
4683          vpanic("deltaIRStmt");
4684    }
4685 }
4686 
4687 
4688 /* If possible, return a loop-unrolled version of bb0.  The original
4689    is changed.  If not possible, return NULL.  */
4690 
4691 /* The two schemas considered are:
4692 
4693      X: BODY; goto X
4694 
4695      which unrolls to (eg)  X: BODY;BODY; goto X
4696 
4697    and
4698 
4699        X: BODY; if (c) goto X; goto Y
4700    which trivially transforms to
4701        X: BODY; if (!c) goto Y; goto X;
4702    so it falls in the scope of the first case.
4703 
4704    X and Y must be literal (guest) addresses.
4705 */
4706 
calc_unroll_factor(IRSB * bb)4707 static Int calc_unroll_factor( IRSB* bb )
4708 {
4709    Int n_stmts, i;
4710 
4711    n_stmts = 0;
4712    for (i = 0; i < bb->stmts_used; i++) {
4713       if (bb->stmts[i]->tag != Ist_NoOp)
4714          n_stmts++;
4715    }
4716 
4717    if (n_stmts <= vex_control.iropt_unroll_thresh/8) {
4718       if (vex_control.iropt_verbosity > 0)
4719          vex_printf("vex iropt: 8 x unrolling (%d sts -> %d sts)\n",
4720                     n_stmts, 8* n_stmts);
4721       return 8;
4722    }
4723    if (n_stmts <= vex_control.iropt_unroll_thresh/4) {
4724       if (vex_control.iropt_verbosity > 0)
4725          vex_printf("vex iropt: 4 x unrolling (%d sts -> %d sts)\n",
4726                     n_stmts, 4* n_stmts);
4727       return 4;
4728    }
4729 
4730    if (n_stmts <= vex_control.iropt_unroll_thresh/2) {
4731       if (vex_control.iropt_verbosity > 0)
4732          vex_printf("vex iropt: 2 x unrolling (%d sts -> %d sts)\n",
4733                     n_stmts, 2* n_stmts);
4734       return 2;
4735    }
4736 
4737    if (vex_control.iropt_verbosity > 0)
4738       vex_printf("vex iropt: not unrolling (%d sts)\n", n_stmts);
4739 
4740    return 1;
4741 }
4742 
4743 
maybe_loop_unroll_BB(IRSB * bb0,Addr my_addr)4744 static IRSB* maybe_loop_unroll_BB ( IRSB* bb0, Addr my_addr )
4745 {
4746    Int      i, j, jmax, n_vars;
4747    Bool     xxx_known;
4748    Addr64   xxx_value, yyy_value;
4749    IRExpr*  udst;
4750    IRStmt*  st;
4751    IRConst* con;
4752    IRSB     *bb1, *bb2;
4753    Int      unroll_factor;
4754 
4755    if (vex_control.iropt_unroll_thresh <= 0)
4756       return NULL;
4757 
4758    /* First off, figure out if we can unroll this loop.  Do this
4759       without modifying bb0. */
4760 
4761    if (bb0->jumpkind != Ijk_Boring)
4762       return NULL;
4763 
4764    xxx_known = False;
4765    xxx_value = 0;
4766 
4767    /* Extract the next-guest address.  If it isn't a literal, we
4768       have to give up. */
4769 
4770    udst = bb0->next;
4771    if (udst->tag == Iex_Const
4772        && (udst->Iex.Const.con->tag == Ico_U32
4773            || udst->Iex.Const.con->tag == Ico_U64)) {
4774       /* The BB ends in a jump to a literal location. */
4775       xxx_known = True;
4776       xxx_value = udst->Iex.Const.con->tag == Ico_U64
4777                     ?  udst->Iex.Const.con->Ico.U64
4778                     : (Addr64)(udst->Iex.Const.con->Ico.U32);
4779    }
4780 
4781    if (!xxx_known)
4782       return NULL;
4783 
4784    /* Now we know the BB ends to a jump to a literal location.  If
4785       it's a jump to itself (viz, idiom #1), move directly to the
4786       unrolling stage, first cloning the bb so the original isn't
4787       modified. */
4788    if (xxx_value == my_addr) {
4789       unroll_factor = calc_unroll_factor( bb0 );
4790       if (unroll_factor < 2)
4791          return NULL;
4792       bb1 = deepCopyIRSB( bb0 );
4793       bb0 = NULL;
4794       udst = NULL; /* is now invalid */
4795       goto do_unroll;
4796    }
4797 
4798    /* Search for the second idiomatic form:
4799         X: BODY; if (c) goto X; goto Y
4800       We know Y, but need to establish that the last stmt
4801       is 'if (c) goto X'.
4802    */
4803    yyy_value = xxx_value;
4804    for (i = bb0->stmts_used-1; i >= 0; i--)
4805       if (bb0->stmts[i])
4806          break;
4807 
4808    if (i < 0)
4809       return NULL; /* block with no stmts.  Strange. */
4810 
4811    st = bb0->stmts[i];
4812    if (st->tag != Ist_Exit)
4813       return NULL;
4814    if (st->Ist.Exit.jk != Ijk_Boring)
4815       return NULL;
4816 
4817    con = st->Ist.Exit.dst;
4818    vassert(con->tag == Ico_U32 || con->tag == Ico_U64);
4819 
4820    xxx_value = con->tag == Ico_U64
4821                   ? st->Ist.Exit.dst->Ico.U64
4822                   : (Addr64)(st->Ist.Exit.dst->Ico.U32);
4823 
4824    /* If this assertion fails, we have some kind of type error. */
4825    vassert(con->tag == udst->Iex.Const.con->tag);
4826 
4827    if (xxx_value != my_addr)
4828       /* We didn't find either idiom.  Give up. */
4829       return NULL;
4830 
4831    /* Ok, we found idiom #2.  Copy the BB, switch around the xxx and
4832       yyy values (which makes it look like idiom #1), and go into
4833       unrolling proper.  This means finding (again) the last stmt, in
4834       the copied BB. */
4835 
4836    unroll_factor = calc_unroll_factor( bb0 );
4837    if (unroll_factor < 2)
4838       return NULL;
4839 
4840    bb1 = deepCopyIRSB( bb0 );
4841    bb0 = NULL;
4842    udst = NULL; /* is now invalid */
4843    for (i = bb1->stmts_used-1; i >= 0; i--)
4844       if (bb1->stmts[i])
4845          break;
4846 
4847    /* The next bunch of assertions should be true since we already
4848       found and checked the last stmt in the original bb. */
4849 
4850    vassert(i >= 0);
4851 
4852    st = bb1->stmts[i];
4853    vassert(st->tag == Ist_Exit);
4854 
4855    con = st->Ist.Exit.dst;
4856    vassert(con->tag == Ico_U32 || con->tag == Ico_U64);
4857 
4858    udst = bb1->next;
4859    vassert(udst->tag == Iex_Const);
4860    vassert(udst->Iex.Const.con->tag == Ico_U32
4861           || udst->Iex.Const.con->tag == Ico_U64);
4862    vassert(con->tag == udst->Iex.Const.con->tag);
4863 
4864    /* switch the xxx and yyy fields around */
4865    if (con->tag == Ico_U64) {
4866       udst->Iex.Const.con->Ico.U64 = xxx_value;
4867       con->Ico.U64 = yyy_value;
4868    } else {
4869       udst->Iex.Const.con->Ico.U32 = (UInt)xxx_value;
4870       con->Ico.U32 = (UInt)yyy_value;
4871    }
4872 
4873    /* negate the test condition */
4874    st->Ist.Exit.guard
4875       = IRExpr_Unop(Iop_Not1,deepCopyIRExpr(st->Ist.Exit.guard));
4876 
4877    /* --- The unroller proper.  Both idioms are by now --- */
4878    /* --- now converted to idiom 1. --- */
4879 
4880   do_unroll:
4881 
4882    vassert(unroll_factor == 2
4883            || unroll_factor == 4
4884            || unroll_factor == 8);
4885 
4886    jmax = unroll_factor==8 ? 3 : (unroll_factor==4 ? 2 : 1);
4887    for (j = 1; j <= jmax; j++) {
4888 
4889       n_vars = bb1->tyenv->types_used;
4890 
4891       bb2 = deepCopyIRSB(bb1);
4892       for (i = 0; i < n_vars; i++)
4893          (void)newIRTemp(bb1->tyenv, bb2->tyenv->types[i]);
4894 
4895       for (i = 0; i < bb2->stmts_used; i++) {
4896          /* deltaIRStmt destructively modifies the stmt, but
4897             that's OK since bb2 is a complete fresh copy of bb1. */
4898          deltaIRStmt(bb2->stmts[i], n_vars);
4899          addStmtToIRSB(bb1, bb2->stmts[i]);
4900       }
4901    }
4902 
4903    if (DEBUG_IROPT) {
4904       vex_printf("\nUNROLLED (%lx)\n", my_addr);
4905       ppIRSB(bb1);
4906       vex_printf("\n");
4907    }
4908 
4909    /* Flattening; sigh.  The unroller succeeds in breaking flatness
4910       by negating the test condition.  This should be fixed properly.
4911       For the moment use this shotgun approach.  */
4912    return flatten_BB(bb1);
4913 }
4914 
4915 
4916 /*---------------------------------------------------------------*/
4917 /*--- The tree builder                                        ---*/
4918 /*---------------------------------------------------------------*/
4919 
4920 /* This isn't part of IR optimisation.  Really it's a pass done prior
4921    to instruction selection, which improves the code that the
4922    instruction selector can produce. */
4923 
4924 /* --- The 'tmp' environment is the central data structure here --- */
4925 
4926 /* The number of outstanding bindings we're prepared to track.
4927    The number of times the env becomes full and we have to dump
4928    the oldest binding (hence reducing code quality) falls very
4929    rapidly as the env size increases.  8 gives reasonable performance
4930    under most circumstances. */
4931 #define A_NENV 10
4932 
4933 /* An interval. Used to record the bytes in the guest state accessed
4934    by a Put[I] statement or by (one or more) Get[I] expression(s). In
4935    case of several Get[I] expressions, the lower/upper bounds are recorded.
4936    This is conservative but cheap.
4937    E.g. a Put of 8 bytes at address 100 would be recorded as [100,107].
4938    E.g. an expression that reads 8 bytes at offset 100 and 4 bytes at
4939    offset 200 would be recorded as [100,203] */
4940 typedef
4941    struct {
4942       Bool present;
4943       Int  low;
4944       Int  high;
4945    }
4946    Interval;
4947 
4948 static inline Bool
intervals_overlap(Interval i1,Interval i2)4949 intervals_overlap(Interval i1, Interval i2)
4950 {
4951    return (i1.low >= i2.low && i1.low <= i2.high) ||
4952           (i2.low >= i1.low && i2.low <= i1.high);
4953 }
4954 
4955 static inline void
update_interval(Interval * i,Int low,Int high)4956 update_interval(Interval *i, Int low, Int high)
4957 {
4958    vassert(low <= high);
4959 
4960    if (i->present) {
4961       if (low  < i->low)  i->low  = low;
4962       if (high > i->high) i->high = high;
4963    } else {
4964       i->present = True;
4965       i->low  = low;
4966       i->high = high;
4967    }
4968 }
4969 
4970 
4971 /* bindee == NULL   ===  slot is not in use
4972    bindee != NULL   ===  slot is in use
4973 */
4974 typedef
4975    struct {
4976       IRTemp  binder;
4977       IRExpr* bindee;
4978       Bool    doesLoad;
4979       /* Record the bytes of the guest state BINDEE reads from. */
4980       Interval getInterval;
4981    }
4982    ATmpInfo;
4983 
4984 __attribute__((unused))
ppAEnv(ATmpInfo * env)4985 static void ppAEnv ( ATmpInfo* env )
4986 {
4987    Int i;
4988    for (i = 0; i < A_NENV; i++) {
4989       vex_printf("%d  tmp %d  val ", i, (Int)env[i].binder);
4990       if (env[i].bindee)
4991          ppIRExpr(env[i].bindee);
4992       else
4993          vex_printf("(null)");
4994       vex_printf("\n");
4995    }
4996 }
4997 
4998 /* --- Tree-traversal fns --- */
4999 
5000 /* Traverse an expr, and detect if any part of it reads memory or does
5001    a Get.  Be careful ... this really controls how much the
5002    tree-builder can reorder the code, so getting it right is critical.
5003 */
setHints_Expr(Bool * doesLoad,Interval * getInterval,IRExpr * e)5004 static void setHints_Expr (Bool* doesLoad, Interval* getInterval, IRExpr* e )
5005 {
5006    Int i;
5007    switch (e->tag) {
5008       case Iex_CCall:
5009          for (i = 0; e->Iex.CCall.args[i]; i++)
5010             setHints_Expr(doesLoad, getInterval, e->Iex.CCall.args[i]);
5011          return;
5012       case Iex_ITE:
5013          setHints_Expr(doesLoad, getInterval, e->Iex.ITE.cond);
5014          setHints_Expr(doesLoad, getInterval, e->Iex.ITE.iftrue);
5015          setHints_Expr(doesLoad, getInterval, e->Iex.ITE.iffalse);
5016          return;
5017       case Iex_Qop:
5018          setHints_Expr(doesLoad, getInterval, e->Iex.Qop.details->arg1);
5019          setHints_Expr(doesLoad, getInterval, e->Iex.Qop.details->arg2);
5020          setHints_Expr(doesLoad, getInterval, e->Iex.Qop.details->arg3);
5021          setHints_Expr(doesLoad, getInterval, e->Iex.Qop.details->arg4);
5022          return;
5023       case Iex_Triop:
5024          setHints_Expr(doesLoad, getInterval, e->Iex.Triop.details->arg1);
5025          setHints_Expr(doesLoad, getInterval, e->Iex.Triop.details->arg2);
5026          setHints_Expr(doesLoad, getInterval, e->Iex.Triop.details->arg3);
5027          return;
5028       case Iex_Binop:
5029          setHints_Expr(doesLoad, getInterval, e->Iex.Binop.arg1);
5030          setHints_Expr(doesLoad, getInterval, e->Iex.Binop.arg2);
5031          return;
5032       case Iex_Unop:
5033          setHints_Expr(doesLoad, getInterval, e->Iex.Unop.arg);
5034          return;
5035       case Iex_Load:
5036          *doesLoad = True;
5037          setHints_Expr(doesLoad, getInterval, e->Iex.Load.addr);
5038          return;
5039       case Iex_Get: {
5040          Int low = e->Iex.Get.offset;
5041          Int high = low + sizeofIRType(e->Iex.Get.ty) - 1;
5042          update_interval(getInterval, low, high);
5043          return;
5044       }
5045       case Iex_GetI: {
5046          IRRegArray *descr = e->Iex.GetI.descr;
5047          Int size = sizeofIRType(descr->elemTy);
5048          Int low  = descr->base;
5049          Int high = low + descr->nElems * size - 1;
5050          update_interval(getInterval, low, high);
5051          setHints_Expr(doesLoad, getInterval, e->Iex.GetI.ix);
5052          return;
5053       }
5054       case Iex_RdTmp:
5055       case Iex_Const:
5056          return;
5057       default:
5058          vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
5059          vpanic("setHints_Expr");
5060    }
5061 }
5062 
5063 
5064 /* Add a binding to the front of the env and slide all the rest
5065    backwards.  It should be the case that the last slot is free. */
addToEnvFront(ATmpInfo * env,IRTemp binder,IRExpr * bindee)5066 static void addToEnvFront ( ATmpInfo* env, IRTemp binder, IRExpr* bindee )
5067 {
5068    Int i;
5069    vassert(env[A_NENV-1].bindee == NULL);
5070    for (i = A_NENV-1; i >= 1; i--)
5071       env[i] = env[i-1];
5072    env[0].binder   = binder;
5073    env[0].bindee   = bindee;
5074    env[0].doesLoad = False; /* filled in later */
5075    env[0].getInterval.present  = False; /* filled in later */
5076    env[0].getInterval.low  = -1; /* filled in later */
5077    env[0].getInterval.high = -1; /* filled in later */
5078 }
5079 
5080 /* Given uses :: array of UShort, indexed by IRTemp
5081    Add the use-occurrences of temps in this expression
5082    to the env.
5083 */
aoccCount_Expr(UShort * uses,IRExpr * e)5084 static void aoccCount_Expr ( UShort* uses, IRExpr* e )
5085 {
5086    Int i;
5087 
5088    switch (e->tag) {
5089 
5090       case Iex_RdTmp: /* the only interesting case */
5091          uses[e->Iex.RdTmp.tmp]++;
5092          return;
5093 
5094       case Iex_ITE:
5095          aoccCount_Expr(uses, e->Iex.ITE.cond);
5096          aoccCount_Expr(uses, e->Iex.ITE.iftrue);
5097          aoccCount_Expr(uses, e->Iex.ITE.iffalse);
5098          return;
5099 
5100       case Iex_Qop:
5101          aoccCount_Expr(uses, e->Iex.Qop.details->arg1);
5102          aoccCount_Expr(uses, e->Iex.Qop.details->arg2);
5103          aoccCount_Expr(uses, e->Iex.Qop.details->arg3);
5104          aoccCount_Expr(uses, e->Iex.Qop.details->arg4);
5105          return;
5106 
5107       case Iex_Triop:
5108          aoccCount_Expr(uses, e->Iex.Triop.details->arg1);
5109          aoccCount_Expr(uses, e->Iex.Triop.details->arg2);
5110          aoccCount_Expr(uses, e->Iex.Triop.details->arg3);
5111          return;
5112 
5113       case Iex_Binop:
5114          aoccCount_Expr(uses, e->Iex.Binop.arg1);
5115          aoccCount_Expr(uses, e->Iex.Binop.arg2);
5116          return;
5117 
5118       case Iex_Unop:
5119          aoccCount_Expr(uses, e->Iex.Unop.arg);
5120          return;
5121 
5122       case Iex_Load:
5123          aoccCount_Expr(uses, e->Iex.Load.addr);
5124          return;
5125 
5126       case Iex_CCall:
5127          for (i = 0; e->Iex.CCall.args[i]; i++)
5128             aoccCount_Expr(uses, e->Iex.CCall.args[i]);
5129          return;
5130 
5131       case Iex_GetI:
5132          aoccCount_Expr(uses, e->Iex.GetI.ix);
5133          return;
5134 
5135       case Iex_Const:
5136       case Iex_Get:
5137          return;
5138 
5139       default:
5140          vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
5141          vpanic("aoccCount_Expr");
5142     }
5143 }
5144 
5145 
5146 /* Given uses :: array of UShort, indexed by IRTemp
5147    Add the use-occurrences of temps in this statement
5148    to the env.
5149 */
aoccCount_Stmt(UShort * uses,IRStmt * st)5150 static void aoccCount_Stmt ( UShort* uses, IRStmt* st )
5151 {
5152    Int      i;
5153    IRDirty* d;
5154    IRCAS*   cas;
5155    switch (st->tag) {
5156       case Ist_AbiHint:
5157          aoccCount_Expr(uses, st->Ist.AbiHint.base);
5158          aoccCount_Expr(uses, st->Ist.AbiHint.nia);
5159          return;
5160       case Ist_WrTmp:
5161          aoccCount_Expr(uses, st->Ist.WrTmp.data);
5162          return;
5163       case Ist_Put:
5164          aoccCount_Expr(uses, st->Ist.Put.data);
5165          return;
5166       case Ist_PutI:
5167          aoccCount_Expr(uses, st->Ist.PutI.details->ix);
5168          aoccCount_Expr(uses, st->Ist.PutI.details->data);
5169          return;
5170       case Ist_Store:
5171          aoccCount_Expr(uses, st->Ist.Store.addr);
5172          aoccCount_Expr(uses, st->Ist.Store.data);
5173          return;
5174       case Ist_StoreG: {
5175          IRStoreG* sg = st->Ist.StoreG.details;
5176          aoccCount_Expr(uses, sg->addr);
5177          aoccCount_Expr(uses, sg->data);
5178          aoccCount_Expr(uses, sg->guard);
5179          return;
5180       }
5181       case Ist_LoadG: {
5182          IRLoadG* lg = st->Ist.LoadG.details;
5183          aoccCount_Expr(uses, lg->addr);
5184          aoccCount_Expr(uses, lg->alt);
5185          aoccCount_Expr(uses, lg->guard);
5186          return;
5187       }
5188       case Ist_CAS:
5189          cas = st->Ist.CAS.details;
5190          aoccCount_Expr(uses, cas->addr);
5191          if (cas->expdHi)
5192             aoccCount_Expr(uses, cas->expdHi);
5193          aoccCount_Expr(uses, cas->expdLo);
5194          if (cas->dataHi)
5195             aoccCount_Expr(uses, cas->dataHi);
5196          aoccCount_Expr(uses, cas->dataLo);
5197          return;
5198       case Ist_LLSC:
5199          aoccCount_Expr(uses, st->Ist.LLSC.addr);
5200          if (st->Ist.LLSC.storedata)
5201             aoccCount_Expr(uses, st->Ist.LLSC.storedata);
5202          return;
5203       case Ist_Dirty:
5204          d = st->Ist.Dirty.details;
5205          if (d->mFx != Ifx_None)
5206             aoccCount_Expr(uses, d->mAddr);
5207          aoccCount_Expr(uses, d->guard);
5208          for (i = 0; d->args[i]; i++) {
5209             IRExpr* arg = d->args[i];
5210             if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg)))
5211                aoccCount_Expr(uses, arg);
5212          }
5213          return;
5214       case Ist_NoOp:
5215       case Ist_IMark:
5216       case Ist_MBE:
5217          return;
5218       case Ist_Exit:
5219          aoccCount_Expr(uses, st->Ist.Exit.guard);
5220          return;
5221       default:
5222          vex_printf("\n"); ppIRStmt(st); vex_printf("\n");
5223          vpanic("aoccCount_Stmt");
5224    }
5225 }
5226 
5227 /* Look up a binding for tmp in the env.  If found, return the bound
5228    expression, and set the env's binding to NULL so it is marked as
5229    used.  If not found, return NULL. */
5230 
atbSubst_Temp(ATmpInfo * env,IRTemp tmp)5231 static IRExpr* atbSubst_Temp ( ATmpInfo* env, IRTemp tmp )
5232 {
5233    Int i;
5234    for (i = 0; i < A_NENV; i++) {
5235       if (env[i].binder == tmp && env[i].bindee != NULL) {
5236          IRExpr* bindee = env[i].bindee;
5237          env[i].bindee = NULL;
5238          return bindee;
5239       }
5240    }
5241    return NULL;
5242 }
5243 
5244 /* Traverse e, looking for temps.  For each observed temp, see if env
5245    contains a binding for the temp, and if so return the bound value.
5246    The env has the property that any binding it holds is
5247    'single-shot', so once a binding is used, it is marked as no longer
5248    available, by setting its .bindee field to NULL. */
5249 
is_Unop(IRExpr * e,IROp op)5250 static inline Bool is_Unop ( IRExpr* e, IROp op ) {
5251    return e->tag == Iex_Unop && e->Iex.Unop.op == op;
5252 }
is_Binop(IRExpr * e,IROp op)5253 static inline Bool is_Binop ( IRExpr* e, IROp op ) {
5254    return e->tag == Iex_Binop && e->Iex.Binop.op == op;
5255 }
5256 
fold_IRExpr_Binop(IROp op,IRExpr * a1,IRExpr * a2)5257 static IRExpr* fold_IRExpr_Binop ( IROp op, IRExpr* a1, IRExpr* a2 )
5258 {
5259    switch (op) {
5260    case Iop_Or32:
5261       /* Or32( CmpwNEZ32(x), CmpwNEZ32(y) ) --> CmpwNEZ32( Or32( x, y ) )  */
5262       if (is_Unop(a1, Iop_CmpwNEZ32) && is_Unop(a2, Iop_CmpwNEZ32))
5263          return IRExpr_Unop( Iop_CmpwNEZ32,
5264                              IRExpr_Binop( Iop_Or32, a1->Iex.Unop.arg,
5265                                                      a2->Iex.Unop.arg ) );
5266       break;
5267 
5268    case Iop_CmpNE32:
5269       /* Since X has type Ity_I1 we can simplify:
5270          CmpNE32(1Uto32(X),0)) ==> X */
5271       if (is_Unop(a1, Iop_1Uto32) && isZeroU32(a2))
5272          return a1->Iex.Unop.arg;
5273       break;
5274 
5275    default:
5276       break;
5277    }
5278    /* no reduction rule applies */
5279    return IRExpr_Binop( op, a1, a2 );
5280 }
5281 
fold_IRExpr_Unop(IROp op,IRExpr * aa)5282 static IRExpr* fold_IRExpr_Unop ( IROp op, IRExpr* aa )
5283 {
5284    switch (op) {
5285    case Iop_CmpwNEZ64:
5286       /* CmpwNEZ64( CmpwNEZ64 ( x ) ) --> CmpwNEZ64 ( x ) */
5287       if (is_Unop(aa, Iop_CmpwNEZ64))
5288          return IRExpr_Unop( Iop_CmpwNEZ64, aa->Iex.Unop.arg );
5289       /* CmpwNEZ64( Or64 ( CmpwNEZ64(x), y ) ) --> CmpwNEZ64( Or64( x, y ) ) */
5290       if (is_Binop(aa, Iop_Or64)
5291           && is_Unop(aa->Iex.Binop.arg1, Iop_CmpwNEZ64))
5292          return fold_IRExpr_Unop(
5293                    Iop_CmpwNEZ64,
5294                    IRExpr_Binop(Iop_Or64,
5295                                 aa->Iex.Binop.arg1->Iex.Unop.arg,
5296                                 aa->Iex.Binop.arg2));
5297       /* CmpwNEZ64( Or64 ( x, CmpwNEZ64(y) ) ) --> CmpwNEZ64( Or64( x, y ) ) */
5298       if (is_Binop(aa, Iop_Or64)
5299           && is_Unop(aa->Iex.Binop.arg2, Iop_CmpwNEZ64))
5300          return fold_IRExpr_Unop(
5301                    Iop_CmpwNEZ64,
5302                    IRExpr_Binop(Iop_Or64,
5303                                 aa->Iex.Binop.arg1,
5304                                 aa->Iex.Binop.arg2->Iex.Unop.arg));
5305       break;
5306    case Iop_CmpNEZ64:
5307       /* CmpNEZ64( Left64(x) ) --> CmpNEZ64(x) */
5308       if (is_Unop(aa, Iop_Left64))
5309          return IRExpr_Unop(Iop_CmpNEZ64, aa->Iex.Unop.arg);
5310       /* CmpNEZ64( 1Uto64(X) ) --> X */
5311       if (is_Unop(aa, Iop_1Uto64))
5312          return aa->Iex.Unop.arg;
5313       break;
5314    case Iop_CmpwNEZ32:
5315       /* CmpwNEZ32( CmpwNEZ32 ( x ) ) --> CmpwNEZ32 ( x ) */
5316       if (is_Unop(aa, Iop_CmpwNEZ32))
5317          return IRExpr_Unop( Iop_CmpwNEZ32, aa->Iex.Unop.arg );
5318       break;
5319    case Iop_CmpNEZ32:
5320       /* CmpNEZ32( Left32(x) ) --> CmpNEZ32(x) */
5321       if (is_Unop(aa, Iop_Left32))
5322          return IRExpr_Unop(Iop_CmpNEZ32, aa->Iex.Unop.arg);
5323       /* CmpNEZ32( 1Uto32(X) ) --> X */
5324       if (is_Unop(aa, Iop_1Uto32))
5325          return aa->Iex.Unop.arg;
5326       /* CmpNEZ32( 64to32( CmpwNEZ64(X) ) ) --> CmpNEZ64(X) */
5327       if (is_Unop(aa, Iop_64to32) && is_Unop(aa->Iex.Unop.arg, Iop_CmpwNEZ64))
5328          return IRExpr_Unop(Iop_CmpNEZ64, aa->Iex.Unop.arg->Iex.Unop.arg);
5329       break;
5330    case Iop_CmpNEZ8:
5331       /* CmpNEZ8( 1Uto8(X) ) --> X */
5332       if (is_Unop(aa, Iop_1Uto8))
5333          return aa->Iex.Unop.arg;
5334       break;
5335    case Iop_Left32:
5336       /* Left32( Left32(x) ) --> Left32(x) */
5337       if (is_Unop(aa, Iop_Left32))
5338          return IRExpr_Unop( Iop_Left32, aa->Iex.Unop.arg );
5339       break;
5340    case Iop_Left64:
5341       /* Left64( Left64(x) ) --> Left64(x) */
5342       if (is_Unop(aa, Iop_Left64))
5343          return IRExpr_Unop( Iop_Left64, aa->Iex.Unop.arg );
5344       break;
5345    case Iop_ZeroHI64ofV128:
5346       /* ZeroHI64ofV128( ZeroHI64ofV128(x) ) --> ZeroHI64ofV128(x) */
5347       if (is_Unop(aa, Iop_ZeroHI64ofV128))
5348          return IRExpr_Unop( Iop_ZeroHI64ofV128, aa->Iex.Unop.arg );
5349       break;
5350    case Iop_32to1:
5351       /* 32to1( 1Uto32 ( x ) ) --> x */
5352       if (is_Unop(aa, Iop_1Uto32))
5353          return aa->Iex.Unop.arg;
5354       /* 32to1( CmpwNEZ32 ( x )) --> CmpNEZ32(x) */
5355       if (is_Unop(aa, Iop_CmpwNEZ32))
5356          return IRExpr_Unop( Iop_CmpNEZ32, aa->Iex.Unop.arg );
5357       break;
5358    case Iop_64to1:
5359       /* 64to1( 1Uto64 ( x ) ) --> x */
5360       if (is_Unop(aa, Iop_1Uto64))
5361          return aa->Iex.Unop.arg;
5362       /* 64to1( CmpwNEZ64 ( x )) --> CmpNEZ64(x) */
5363       if (is_Unop(aa, Iop_CmpwNEZ64))
5364          return IRExpr_Unop( Iop_CmpNEZ64, aa->Iex.Unop.arg );
5365       break;
5366    case Iop_64to32:
5367       /* 64to32( 32Uto64 ( x )) --> x */
5368       if (is_Unop(aa, Iop_32Uto64))
5369          return aa->Iex.Unop.arg;
5370       /* 64to32( 8Uto64 ( x )) --> 8Uto32(x) */
5371       if (is_Unop(aa, Iop_8Uto64))
5372          return IRExpr_Unop(Iop_8Uto32, aa->Iex.Unop.arg);
5373       break;
5374 
5375    case Iop_32Uto64:
5376       /* 32Uto64( 8Uto32( x )) --> 8Uto64(x) */
5377       if (is_Unop(aa, Iop_8Uto32))
5378          return IRExpr_Unop(Iop_8Uto64, aa->Iex.Unop.arg);
5379       /* 32Uto64( 16Uto32( x )) --> 16Uto64(x) */
5380       if (is_Unop(aa, Iop_16Uto32))
5381          return IRExpr_Unop(Iop_16Uto64, aa->Iex.Unop.arg);
5382       /* 32Uto64(64to32( Shr64( 32Uto64(64to32(x)), sh ))
5383                      --> Shr64( 32Uto64(64to32(x)), sh )) */
5384       if (is_Unop(aa, Iop_64to32)
5385           && is_Binop(aa->Iex.Unop.arg, Iop_Shr64)
5386           && is_Unop(aa->Iex.Unop.arg->Iex.Binop.arg1, Iop_32Uto64)
5387           && is_Unop(aa->Iex.Unop.arg->Iex.Binop.arg1->Iex.Unop.arg,
5388                      Iop_64to32)) {
5389          return aa->Iex.Unop.arg;
5390       }
5391       /*     32Uto64(64to32( Shl64( 32Uto64(64to32(x)), sh ))
5392          --> 32Uto64(64to32( Shl64(                x,   sh )) */
5393       if (is_Unop(aa, Iop_64to32)
5394           && is_Binop(aa->Iex.Unop.arg, Iop_Shl64)
5395           && is_Unop(aa->Iex.Unop.arg->Iex.Binop.arg1, Iop_32Uto64)
5396           && is_Unop(aa->Iex.Unop.arg->Iex.Binop.arg1->Iex.Unop.arg,
5397                      Iop_64to32)) {
5398          return
5399             IRExpr_Unop(
5400                Iop_32Uto64,
5401                IRExpr_Unop(
5402                   Iop_64to32,
5403                   IRExpr_Binop(
5404                      Iop_Shl64,
5405                      aa->Iex.Unop.arg->Iex.Binop.arg1->Iex.Unop.arg->Iex.Unop.arg,
5406                      aa->Iex.Unop.arg->Iex.Binop.arg2
5407             )));
5408       }
5409       break;
5410 
5411    case Iop_1Sto32:
5412       /* 1Sto32( CmpNEZ8( 32to8( 1Uto32( CmpNEZ32( x ))))) -> CmpwNEZ32(x) */
5413       if (is_Unop(aa, Iop_CmpNEZ8)
5414           && is_Unop(aa->Iex.Unop.arg, Iop_32to8)
5415           && is_Unop(aa->Iex.Unop.arg->Iex.Unop.arg, Iop_1Uto32)
5416           && is_Unop(aa->Iex.Unop.arg->Iex.Unop.arg->Iex.Unop.arg,
5417                      Iop_CmpNEZ32)) {
5418          return IRExpr_Unop( Iop_CmpwNEZ32,
5419                              aa->Iex.Unop.arg->Iex.Unop.arg
5420                                ->Iex.Unop.arg->Iex.Unop.arg);
5421       }
5422       break;
5423 
5424    default:
5425       break;
5426    }
5427    /* no reduction rule applies */
5428    return IRExpr_Unop( op, aa );
5429 }
5430 
atbSubst_Expr(ATmpInfo * env,IRExpr * e)5431 static IRExpr* atbSubst_Expr ( ATmpInfo* env, IRExpr* e )
5432 {
5433    IRExpr*  e2;
5434    IRExpr** args2;
5435    Int      i;
5436 
5437    switch (e->tag) {
5438 
5439       case Iex_CCall:
5440          args2 = shallowCopyIRExprVec(e->Iex.CCall.args);
5441          for (i = 0; args2[i]; i++)
5442             args2[i] = atbSubst_Expr(env,args2[i]);
5443          return IRExpr_CCall(
5444                    e->Iex.CCall.cee,
5445                    e->Iex.CCall.retty,
5446                    args2
5447                 );
5448       case Iex_RdTmp:
5449          e2 = atbSubst_Temp(env, e->Iex.RdTmp.tmp);
5450          return e2 ? e2 : e;
5451       case Iex_ITE:
5452          return IRExpr_ITE(
5453                    atbSubst_Expr(env, e->Iex.ITE.cond),
5454                    atbSubst_Expr(env, e->Iex.ITE.iftrue),
5455                    atbSubst_Expr(env, e->Iex.ITE.iffalse)
5456                 );
5457       case Iex_Qop:
5458          return IRExpr_Qop(
5459                    e->Iex.Qop.details->op,
5460                    atbSubst_Expr(env, e->Iex.Qop.details->arg1),
5461                    atbSubst_Expr(env, e->Iex.Qop.details->arg2),
5462                    atbSubst_Expr(env, e->Iex.Qop.details->arg3),
5463                    atbSubst_Expr(env, e->Iex.Qop.details->arg4)
5464                 );
5465       case Iex_Triop:
5466          return IRExpr_Triop(
5467                    e->Iex.Triop.details->op,
5468                    atbSubst_Expr(env, e->Iex.Triop.details->arg1),
5469                    atbSubst_Expr(env, e->Iex.Triop.details->arg2),
5470                    atbSubst_Expr(env, e->Iex.Triop.details->arg3)
5471                 );
5472       case Iex_Binop:
5473          return fold_IRExpr_Binop(
5474                    e->Iex.Binop.op,
5475                    atbSubst_Expr(env, e->Iex.Binop.arg1),
5476                    atbSubst_Expr(env, e->Iex.Binop.arg2)
5477                 );
5478       case Iex_Unop:
5479          return fold_IRExpr_Unop(
5480                    e->Iex.Unop.op,
5481                    atbSubst_Expr(env, e->Iex.Unop.arg)
5482                 );
5483       case Iex_Load:
5484          return IRExpr_Load(
5485                    e->Iex.Load.end,
5486                    e->Iex.Load.ty,
5487                    atbSubst_Expr(env, e->Iex.Load.addr)
5488                 );
5489       case Iex_GetI:
5490          return IRExpr_GetI(
5491                    e->Iex.GetI.descr,
5492                    atbSubst_Expr(env, e->Iex.GetI.ix),
5493                    e->Iex.GetI.bias
5494                 );
5495       case Iex_Const:
5496       case Iex_Get:
5497          return e;
5498       default:
5499          vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
5500          vpanic("atbSubst_Expr");
5501    }
5502 }
5503 
5504 /* Same deal as atbSubst_Expr, except for stmts. */
5505 
atbSubst_Stmt(ATmpInfo * env,IRStmt * st)5506 static IRStmt* atbSubst_Stmt ( ATmpInfo* env, IRStmt* st )
5507 {
5508    Int     i;
5509    IRDirty *d, *d2;
5510    IRCAS   *cas, *cas2;
5511    IRPutI  *puti, *puti2;
5512 
5513    switch (st->tag) {
5514       case Ist_AbiHint:
5515          return IRStmt_AbiHint(
5516                    atbSubst_Expr(env, st->Ist.AbiHint.base),
5517                    st->Ist.AbiHint.len,
5518                    atbSubst_Expr(env, st->Ist.AbiHint.nia)
5519                 );
5520       case Ist_Store:
5521          return IRStmt_Store(
5522                    st->Ist.Store.end,
5523                    atbSubst_Expr(env, st->Ist.Store.addr),
5524                    atbSubst_Expr(env, st->Ist.Store.data)
5525                 );
5526       case Ist_StoreG: {
5527          IRStoreG* sg = st->Ist.StoreG.details;
5528          return IRStmt_StoreG(sg->end,
5529                               atbSubst_Expr(env, sg->addr),
5530                               atbSubst_Expr(env, sg->data),
5531                               atbSubst_Expr(env, sg->guard));
5532       }
5533       case Ist_LoadG: {
5534          IRLoadG* lg = st->Ist.LoadG.details;
5535          return IRStmt_LoadG(lg->end, lg->cvt, lg->dst,
5536                              atbSubst_Expr(env, lg->addr),
5537                              atbSubst_Expr(env, lg->alt),
5538                              atbSubst_Expr(env, lg->guard));
5539       }
5540       case Ist_WrTmp:
5541          return IRStmt_WrTmp(
5542                    st->Ist.WrTmp.tmp,
5543                    atbSubst_Expr(env, st->Ist.WrTmp.data)
5544                 );
5545       case Ist_Put:
5546          return IRStmt_Put(
5547                    st->Ist.Put.offset,
5548                    atbSubst_Expr(env, st->Ist.Put.data)
5549                 );
5550       case Ist_PutI:
5551          puti  = st->Ist.PutI.details;
5552          puti2 = mkIRPutI(puti->descr,
5553                           atbSubst_Expr(env, puti->ix),
5554                           puti->bias,
5555                           atbSubst_Expr(env, puti->data));
5556          return IRStmt_PutI(puti2);
5557 
5558       case Ist_Exit:
5559          return IRStmt_Exit(
5560                    atbSubst_Expr(env, st->Ist.Exit.guard),
5561                    st->Ist.Exit.jk,
5562                    st->Ist.Exit.dst,
5563                    st->Ist.Exit.offsIP
5564                 );
5565       case Ist_IMark:
5566          return IRStmt_IMark(st->Ist.IMark.addr,
5567                              st->Ist.IMark.len,
5568                              st->Ist.IMark.delta);
5569       case Ist_NoOp:
5570          return IRStmt_NoOp();
5571       case Ist_MBE:
5572          return IRStmt_MBE(st->Ist.MBE.event);
5573       case Ist_CAS:
5574          cas  = st->Ist.CAS.details;
5575          cas2 = mkIRCAS(
5576                    cas->oldHi, cas->oldLo, cas->end,
5577                    atbSubst_Expr(env, cas->addr),
5578                    cas->expdHi ? atbSubst_Expr(env, cas->expdHi) : NULL,
5579                    atbSubst_Expr(env, cas->expdLo),
5580                    cas->dataHi ? atbSubst_Expr(env, cas->dataHi) : NULL,
5581                    atbSubst_Expr(env, cas->dataLo)
5582                 );
5583          return IRStmt_CAS(cas2);
5584       case Ist_LLSC:
5585          return IRStmt_LLSC(
5586                    st->Ist.LLSC.end,
5587                    st->Ist.LLSC.result,
5588                    atbSubst_Expr(env, st->Ist.LLSC.addr),
5589                    st->Ist.LLSC.storedata
5590                       ? atbSubst_Expr(env, st->Ist.LLSC.storedata) : NULL
5591                 );
5592       case Ist_Dirty:
5593          d  = st->Ist.Dirty.details;
5594          d2 = emptyIRDirty();
5595          *d2 = *d;
5596          if (d2->mFx != Ifx_None)
5597             d2->mAddr = atbSubst_Expr(env, d2->mAddr);
5598          d2->guard = atbSubst_Expr(env, d2->guard);
5599          for (i = 0; d2->args[i]; i++) {
5600             IRExpr* arg = d2->args[i];
5601             if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg)))
5602                d2->args[i] = atbSubst_Expr(env, arg);
5603          }
5604          return IRStmt_Dirty(d2);
5605       default:
5606          vex_printf("\n"); ppIRStmt(st); vex_printf("\n");
5607          vpanic("atbSubst_Stmt");
5608    }
5609 }
5610 
5611 inline
dirty_helper_stores(const IRDirty * d)5612 static Bool dirty_helper_stores ( const IRDirty *d )
5613 {
5614    return d->mFx == Ifx_Write || d->mFx == Ifx_Modify;
5615 }
5616 
5617 inline
dirty_helper_puts(const IRDirty * d,Bool (* preciseMemExnsFn)(Int,Int,VexRegisterUpdates),VexRegisterUpdates pxControl,Bool * requiresPreciseMemExns)5618 static Interval dirty_helper_puts (
5619                    const IRDirty *d,
5620                    Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates),
5621                    VexRegisterUpdates pxControl,
5622                    /*OUT*/Bool *requiresPreciseMemExns
5623                 )
5624 {
5625    Int i;
5626    Interval interval;
5627 
5628    /* Passing the guest state pointer opens the door to modifying the
5629       guest state under the covers.  It's not allowed, but let's be
5630       extra conservative and assume the worst. */
5631    for (i = 0; d->args[i]; i++) {
5632       if (UNLIKELY(d->args[i]->tag == Iex_GSPTR)) {
5633          *requiresPreciseMemExns = True;
5634          /* Assume all guest state is written. */
5635          interval.present = True;
5636          interval.low  = 0;
5637          interval.high = 0x7FFFFFFF;
5638          return interval;
5639       }
5640    }
5641 
5642    /* Check the side effects on the guest state */
5643    interval.present = False;
5644    interval.low = interval.high = -1;
5645    *requiresPreciseMemExns = False;
5646 
5647    for (i = 0; i < d->nFxState; ++i) {
5648       if (d->fxState[i].fx != Ifx_Read) {
5649          Int offset = d->fxState[i].offset;
5650          Int size = d->fxState[i].size;
5651          Int nRepeats = d->fxState[i].nRepeats;
5652          Int repeatLen = d->fxState[i].repeatLen;
5653 
5654          if (preciseMemExnsFn(offset,
5655                               offset + nRepeats * repeatLen + size - 1,
5656                               pxControl)) {
5657             *requiresPreciseMemExns = True;
5658          }
5659          update_interval(&interval, offset,
5660                          offset + nRepeats * repeatLen + size - 1);
5661       }
5662    }
5663 
5664    return interval;
5665 }
5666 
5667 /* Return an interval if st modifies the guest state.  Via
5668    requiresPreciseMemExns return whether or not that modification
5669    requires precise exceptions. */
stmt_modifies_guest_state(IRSB * bb,const IRStmt * st,Bool (* preciseMemExnsFn)(Int,Int,VexRegisterUpdates),VexRegisterUpdates pxControl,Bool * requiresPreciseMemExns)5670 static Interval stmt_modifies_guest_state (
5671                    IRSB *bb, const IRStmt *st,
5672                    Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates),
5673                    VexRegisterUpdates pxControl,
5674                    /*OUT*/Bool *requiresPreciseMemExns
5675                 )
5676 {
5677    Interval interval;
5678 
5679    switch (st->tag) {
5680    case Ist_Put: {
5681       Int offset = st->Ist.Put.offset;
5682       Int size = sizeofIRType(typeOfIRExpr(bb->tyenv, st->Ist.Put.data));
5683 
5684       *requiresPreciseMemExns
5685          = preciseMemExnsFn(offset, offset + size - 1, pxControl);
5686       interval.present = True;
5687       interval.low  = offset;
5688       interval.high = offset + size - 1;
5689       return interval;
5690    }
5691 
5692    case Ist_PutI: {
5693       IRRegArray *descr = st->Ist.PutI.details->descr;
5694       Int offset = descr->base;
5695       Int size = sizeofIRType(descr->elemTy);
5696 
5697       /* We quietly assume here that all segments are contiguous and there
5698          are no holes. This is to avoid a loop. The assumption is conservative
5699          in the sense that we might report that precise memory exceptions are
5700          needed when in fact they are not. */
5701       *requiresPreciseMemExns
5702          = preciseMemExnsFn(offset, offset + descr->nElems * size - 1,
5703                             pxControl);
5704       interval.present = True;
5705       interval.low  = offset;
5706       interval.high = offset + descr->nElems * size - 1;
5707       return interval;
5708    }
5709 
5710    case Ist_Dirty:
5711       return dirty_helper_puts(st->Ist.Dirty.details,
5712                                preciseMemExnsFn, pxControl,
5713                                requiresPreciseMemExns);
5714 
5715    default:
5716       *requiresPreciseMemExns = False;
5717       interval.present = False;
5718       interval.low  = -1;
5719       interval.high = -1;
5720       return interval;
5721    }
5722 }
5723 
ado_treebuild_BB(IRSB * bb,Bool (* preciseMemExnsFn)(Int,Int,VexRegisterUpdates),VexRegisterUpdates pxControl)5724 /* notstatic */ Addr ado_treebuild_BB (
5725                         IRSB* bb,
5726                         Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates),
5727                         VexRegisterUpdates pxControl
5728                      )
5729 {
5730    Int      i, j, k, m;
5731    Bool     stmtStores, invalidateMe;
5732    Interval putInterval;
5733    IRStmt*  st;
5734    IRStmt*  st2;
5735    ATmpInfo env[A_NENV];
5736 
5737    Bool   max_ga_known = False;
5738    Addr   max_ga       = 0;
5739 
5740    Int       n_tmps = bb->tyenv->types_used;
5741    UShort*   uses   = LibVEX_Alloc_inline(n_tmps * sizeof(UShort));
5742 
5743    /* Phase 1.  Scan forwards in bb, counting use occurrences of each
5744       temp.  Also count occurrences in the bb->next field.  Take the
5745       opportunity to also find the maximum guest address in the block,
5746       since that will be needed later for deciding when we can safely
5747       elide event checks. */
5748 
5749    for (i = 0; i < n_tmps; i++)
5750       uses[i] = 0;
5751 
5752    for (i = 0; i < bb->stmts_used; i++) {
5753       st = bb->stmts[i];
5754       switch (st->tag) {
5755          case Ist_NoOp:
5756             continue;
5757          case Ist_IMark: {
5758             UInt len = st->Ist.IMark.len;
5759             Addr mga = st->Ist.IMark.addr + (len < 1 ? 1 : len) - 1;
5760             max_ga_known = True;
5761             if (mga > max_ga)
5762                max_ga = mga;
5763             break;
5764          }
5765          default:
5766             break;
5767       }
5768       aoccCount_Stmt( uses, st );
5769    }
5770    aoccCount_Expr(uses, bb->next );
5771 
5772 #  if 0
5773    for (i = 0; i < n_tmps; i++) {
5774       if (uses[i] == 0)
5775         continue;
5776       ppIRTemp( (IRTemp)i );
5777       vex_printf("  used %d\n", (Int)uses[i] );
5778    }
5779 #  endif
5780 
5781    /* Phase 2.  Scan forwards in bb.  For each statement in turn:
5782 
5783          If the env is full, emit the end element.  This guarantees
5784          there is at least one free slot in the following.
5785 
5786          On seeing 't = E', occ(t)==1,
5787             let E'=env(E)
5788             delete this stmt
5789             add t -> E' to the front of the env
5790             Examine E' and set the hints for E' appropriately
5791               (doesLoad? doesGet?)
5792 
5793          On seeing any other stmt,
5794             let stmt' = env(stmt)
5795             remove from env any 't=E' binds invalidated by stmt
5796                 emit the invalidated stmts
5797             emit stmt'
5798             compact any holes in env
5799               by sliding entries towards the front
5800 
5801       Finally, apply env to bb->next.
5802    */
5803 
5804    for (i = 0; i < A_NENV; i++) {
5805       env[i].bindee = NULL;
5806       env[i].binder = IRTemp_INVALID;
5807    }
5808 
5809    /* The stmts in bb are being reordered, and we are guaranteed to
5810       end up with no more than the number we started with.  Use i to
5811       be the cursor of the current stmt examined and j <= i to be that
5812       for the current stmt being written.
5813    */
5814    j = 0;
5815    for (i = 0; i < bb->stmts_used; i++) {
5816 
5817       st = bb->stmts[i];
5818       if (st->tag == Ist_NoOp)
5819          continue;
5820 
5821       /* Ensure there's at least one space in the env, by emitting
5822          the oldest binding if necessary. */
5823       if (env[A_NENV-1].bindee != NULL) {
5824          bb->stmts[j] = IRStmt_WrTmp( env[A_NENV-1].binder,
5825                                       env[A_NENV-1].bindee );
5826          j++;
5827          vassert(j <= i);
5828          env[A_NENV-1].bindee = NULL;
5829       }
5830 
5831       /* Consider current stmt. */
5832       if (st->tag == Ist_WrTmp && uses[st->Ist.WrTmp.tmp] <= 1) {
5833          IRExpr *e, *e2;
5834 
5835          /* optional extra: dump dead bindings as we find them.
5836             Removes the need for a prior dead-code removal pass. */
5837          if (uses[st->Ist.WrTmp.tmp] == 0) {
5838 	    if (0) vex_printf("DEAD binding\n");
5839             continue; /* for (i = 0; i < bb->stmts_used; i++) loop */
5840          }
5841          vassert(uses[st->Ist.WrTmp.tmp] == 1);
5842 
5843          /* ok, we have 't = E', occ(t)==1.  Do the abovementioned
5844             actions. */
5845          e  = st->Ist.WrTmp.data;
5846          e2 = atbSubst_Expr(env, e);
5847          addToEnvFront(env, st->Ist.WrTmp.tmp, e2);
5848          setHints_Expr(&env[0].doesLoad, &env[0].getInterval, e2);
5849          /* don't advance j, as we are deleting this stmt and instead
5850             holding it temporarily in the env. */
5851          continue; /* for (i = 0; i < bb->stmts_used; i++) loop */
5852       }
5853 
5854       /* we get here for any other kind of statement. */
5855       /* 'use up' any bindings required by the current statement. */
5856       st2 = atbSubst_Stmt(env, st);
5857 
5858       /* Now, before this stmt, dump any bindings in env that it
5859          invalidates.  These need to be dumped in the order in which
5860          they originally entered env -- that means from oldest to
5861          youngest. */
5862 
5863       /* putInterval/stmtStores characterise what the stmt under
5864          consideration does, or might do (sidely safe @ True). */
5865 
5866       Bool putRequiresPreciseMemExns;
5867       putInterval = stmt_modifies_guest_state(
5868                        bb, st, preciseMemExnsFn, pxControl,
5869                        &putRequiresPreciseMemExns
5870                     );
5871 
5872       /* be True if this stmt writes memory or might do (==> we don't
5873          want to reorder other loads or stores relative to it).  Also,
5874          both LL and SC fall under this classification, since we
5875          really ought to be conservative and not reorder any other
5876          memory transactions relative to them. */
5877       stmtStores
5878          = toBool( st->tag == Ist_Store
5879                    || (st->tag == Ist_Dirty
5880                        && dirty_helper_stores(st->Ist.Dirty.details))
5881                    || st->tag == Ist_LLSC
5882                    || st->tag == Ist_CAS );
5883 
5884       for (k = A_NENV-1; k >= 0; k--) {
5885          if (env[k].bindee == NULL)
5886             continue;
5887          /* Compare the actions of this stmt with the actions of
5888             binding 'k', to see if they invalidate the binding. */
5889          invalidateMe
5890             = toBool(
5891               /* a store invalidates loaded data */
5892               (env[k].doesLoad && stmtStores)
5893               /* a put invalidates get'd data, if they overlap */
5894               || ((env[k].getInterval.present && putInterval.present) &&
5895                   intervals_overlap(env[k].getInterval, putInterval))
5896               /* a put invalidates loaded data. That means, in essense, that
5897                  a load expression cannot be substituted into a statement
5898                  that follows the put. But there is nothing wrong doing so
5899                  except when the put statement requries precise exceptions.
5900                  Think of a load that is moved past a put where the put
5901                  updates the IP in the guest state. If the load generates
5902                  a segfault, the wrong address (line number) would be
5903                  reported. */
5904               || (env[k].doesLoad && putInterval.present &&
5905                   putRequiresPreciseMemExns)
5906               /* probably overly conservative: a memory bus event
5907                  invalidates absolutely everything, so that all
5908                  computation prior to it is forced to complete before
5909                  proceeding with the event (fence,lock,unlock). */
5910               || st->tag == Ist_MBE
5911               /* also be (probably overly) paranoid re AbiHints */
5912               || st->tag == Ist_AbiHint
5913               );
5914          if (invalidateMe) {
5915             bb->stmts[j] = IRStmt_WrTmp( env[k].binder, env[k].bindee );
5916             j++;
5917             vassert(j <= i);
5918             env[k].bindee = NULL;
5919          }
5920       }
5921 
5922       /* Slide in-use entries in env up to the front */
5923       m = 0;
5924       for (k = 0; k < A_NENV; k++) {
5925          if (env[k].bindee != NULL) {
5926             env[m] = env[k];
5927             m++;
5928 	 }
5929       }
5930       for (m = m; m < A_NENV; m++) {
5931          env[m].bindee = NULL;
5932       }
5933 
5934       /* finally, emit the substituted statement */
5935       bb->stmts[j] = st2;
5936       /* vex_printf("**2  "); ppIRStmt(bb->stmts[j]); vex_printf("\n"); */
5937       j++;
5938 
5939       vassert(j <= i+1);
5940    } /* for each stmt in the original bb ... */
5941 
5942    /* Finally ... substitute the ->next field as much as possible, and
5943       dump any left-over bindings.  Hmm.  Perhaps there should be no
5944       left over bindings?  Or any left-over bindings are
5945       by definition dead? */
5946    bb->next = atbSubst_Expr(env, bb->next);
5947    bb->stmts_used = j;
5948 
5949    return max_ga_known ? max_ga : ~(Addr)0;
5950 }
5951 
5952 
5953 /*---------------------------------------------------------------*/
5954 /*--- MSVC specific transformation hacks                      ---*/
5955 /*---------------------------------------------------------------*/
5956 
5957 /* The purpose of all this is to find MSVC's idiom for non-constant
5958    bitfield assignment, "a ^ ((a ^ b) & c)", and transform it into
5959    gcc's idiom "(a & ~c) | (b & c)".  Motivation is that Memcheck has
5960    generates a lot of false positives from the MSVC version because it
5961    doesn't understand that XORing an undefined bit with itself gives a
5962    defined result.
5963 
5964    This isn't a problem for the simple case "x ^ x", because iropt
5965    folds it to a constant zero before Memcheck ever sees it.  But in
5966    this case we have an intervening "& c" which defeats the simple
5967    case.  So we have to carefully inspect all expressions rooted at an
5968    XOR to see if any of them match "a ^ ((a ^ b) & c)", or any of the
5969    7 other variants resulting from swapping the order of arguments to
5970    the three binary operations.  If we get a match, we then replace
5971    the tree with "(a & ~c) | (b & c)", and Memcheck is happy.
5972 
5973    The key difficulty is to spot the two uses of "a".  To normalise
5974    the IR to maximise the chances of success, we first do a CSE pass,
5975    with CSEing of loads enabled, since the two "a" expressions may be
5976    loads, which need to be commoned up.  Then we do a constant folding
5977    pass, so as to remove any tmp-to-tmp assignment chains that would
5978    make matching the original expression more difficult.
5979 */
5980 
5981 
5982 /* Helper function for debug printing */
5983 __attribute__((unused))
print_flat_expr(IRExpr ** env,IRExpr * e)5984 static void print_flat_expr ( IRExpr** env, IRExpr* e )
5985 {
5986    if (e == NULL) {
5987       vex_printf("?");
5988       return;
5989    }
5990    switch (e->tag) {
5991       case Iex_Binop: {
5992          ppIROp(e->Iex.Binop.op);
5993          vex_printf("(");
5994          print_flat_expr(env, e->Iex.Binop.arg1);
5995          vex_printf(",");
5996          print_flat_expr(env, e->Iex.Binop.arg2);
5997          vex_printf(")");
5998          break;
5999       }
6000       case Iex_Unop: {
6001          ppIROp(e->Iex.Unop.op);
6002          vex_printf("(");
6003          print_flat_expr(env, e->Iex.Unop.arg);
6004          vex_printf(")");
6005          break;
6006       }
6007       case Iex_RdTmp:
6008          ppIRTemp(e->Iex.RdTmp.tmp);
6009          vex_printf("=");
6010          print_flat_expr(env, chase1(env, e));
6011          break;
6012       case Iex_Const:
6013       case Iex_CCall:
6014       case Iex_Load:
6015       case Iex_ITE:
6016       case Iex_Get:
6017          ppIRExpr(e);
6018          break;
6019       default:
6020          vex_printf("FAIL: "); ppIRExpr(e); vex_printf("\n");
6021          vassert(0);
6022    }
6023 }
6024 
6025 /* Spot   a ^ ((a ^ b) & c)   for a,b and c tmp-or-const (atoms)
6026    or any of the other 7 variants generated by switching the order
6027    of arguments to the outer ^, the inner ^ and the &.
6028 */
spotBitfieldAssignment(IRExpr ** aa,IRExpr ** bb,IRExpr ** cc,IRExpr ** env,IRExpr * e,IROp opAND,IROp opXOR)6029 static UInt spotBitfieldAssignment ( /*OUT*/IRExpr** aa, /*OUT*/IRExpr** bb,
6030                                      /*OUT*/IRExpr** cc,
6031                                      IRExpr** env, IRExpr* e,
6032                                      IROp opAND, IROp opXOR)
6033 {
6034 #  define ISBIN(_e,_op) ((_e) && (_e)->tag == Iex_Binop \
6035                               && (_e)->Iex.Binop.op == (_op))
6036 #  define ISATOM(_e)    isIRAtom(_e)
6037 #  define STEP(_e)      chase1(env, (_e))
6038 #  define LL(_e)        ((_e)->Iex.Binop.arg1)
6039 #  define RR(_e)        ((_e)->Iex.Binop.arg2)
6040 
6041    IRExpr *a1, *and, *xor, *c, *a2bL, *a2bR;
6042 
6043    /* This is common to all 8 cases */
6044    if (!ISBIN(e, opXOR)) goto fail;
6045 
6046    /*                        -----and------ */
6047    /*                         --xor---      */
6048    /* find variant 1:   a1 ^ ((a2 ^ b) & c) */
6049    /* find variant 2:   a1 ^ ((b ^ a2) & c) */
6050    a1 = and = xor = c = a2bL = a2bR = NULL;
6051 
6052    a1   = LL(e);
6053    and  = STEP(RR(e));
6054    if (!ISBIN(and, opAND)) goto v34;
6055    xor  = STEP(LL(and));
6056    c    = RR(and);
6057    if (!ISBIN(xor, opXOR)) goto v34;
6058    a2bL = LL(xor);
6059    a2bR = RR(xor);
6060 
6061    if (eqIRAtom(a1, a2bL) && !eqIRAtom(a1, a2bR)) {
6062       *aa = a1;
6063       *bb = a2bR;
6064       *cc = c;
6065       return 1;
6066    }
6067    if (eqIRAtom(a1, a2bR) && !eqIRAtom(a1, a2bL)) {
6068       *aa = a1;
6069       *bb = a2bL;
6070       *cc = c;
6071       return 2;
6072    }
6073 
6074   v34:
6075    /*                   -----and------      */
6076    /*                    --xor---           */
6077    /* find variant 3:   ((a2 ^ b) & c) ^ a1 */
6078    /* find variant 4:   ((b ^ a2) & c) ^ a1 */
6079    a1 = and = xor = c = a2bL = a2bR = NULL;
6080 
6081    a1   = RR(e);
6082    and  = STEP(LL(e));
6083    if (!ISBIN(and, opAND)) goto v56;
6084    xor  = STEP(LL(and));
6085    c    = RR(and);
6086    if (!ISBIN(xor, opXOR)) goto v56;
6087    a2bL = LL(xor);
6088    a2bR = RR(xor);
6089 
6090    if (eqIRAtom(a1, a2bL) && !eqIRAtom(a1, a2bR)) {
6091       *aa = a1;
6092       *bb = a2bR;
6093       *cc = c;
6094       return 3;
6095    }
6096    if (eqIRAtom(a1, a2bR) && !eqIRAtom(a1, a2bL)) {
6097       *aa = a1;
6098       *bb = a2bL;
6099       *cc = c;
6100       return 4;
6101    }
6102 
6103   v56:
6104    /*                        -----and------ */
6105    /*                         --xor---      */
6106    /* find variant 5:   a1 ^ (c & (a2 ^ b)) */
6107    /* find variant 6:   a1 ^ (c & (b ^ a2)) */
6108    a1 = and = xor = c = a2bL = a2bR = NULL;
6109 
6110    a1   = LL(e);
6111    and  = STEP(RR(e));
6112    if (!ISBIN(and, opAND)) goto v78;
6113    xor  = STEP(RR(and));
6114    c    = LL(and);
6115    if (!ISBIN(xor, opXOR)) goto v78;
6116    a2bL = LL(xor);
6117    a2bR = RR(xor);
6118 
6119    if (eqIRAtom(a1, a2bL) && !eqIRAtom(a1, a2bR)) {
6120       *aa = a1;
6121       *bb = a2bR;
6122       *cc = c;
6123       vassert(5-5); // ATC
6124       return 5;
6125    }
6126    if (eqIRAtom(a1, a2bR) && !eqIRAtom(a1, a2bL)) {
6127       *aa = a1;
6128       *bb = a2bL;
6129       *cc = c;
6130       vassert(6-6); // ATC
6131       return 6;
6132    }
6133 
6134  v78:
6135    /*                   -----and------      */
6136    /*                    --xor---           */
6137    /* find variant 7:   (c & (a2 ^ b)) ^ a1 */
6138    /* find variant 8:   (c & (b ^ a2)) ^ a1 */
6139    a1 = and = xor = c = a2bL = a2bR = NULL;
6140 
6141    a1   = RR(e);
6142    and  = STEP(LL(e));
6143    if (!ISBIN(and, opAND)) goto fail;
6144    xor  = STEP(RR(and));
6145    c    = LL(and);
6146    if (!ISBIN(xor, opXOR)) goto fail;
6147    a2bL = LL(xor);
6148    a2bR = RR(xor);
6149 
6150    if (eqIRAtom(a1, a2bL) && !eqIRAtom(a1, a2bR)) {
6151       *aa = a1;
6152       *bb = a2bR;
6153       *cc = c;
6154       return 7;
6155    }
6156    if (eqIRAtom(a1, a2bR) && !eqIRAtom(a1, a2bL)) {
6157       *aa = a1;
6158       *bb = a2bL;
6159       *cc = c;
6160       return 8;
6161    }
6162 
6163  fail:
6164    return 0;
6165 
6166 #  undef ISBIN
6167 #  undef ISATOM
6168 #  undef STEP
6169 #  undef LL
6170 #  undef RR
6171 }
6172 
6173 /* If |e| is of the form a ^ ((a ^ b) & c) (or any of the 7 other
6174    variants thereof generated by switching arguments around), return
6175    the IRExpr* for (a & ~c) | (b & c).  Else return NULL. */
do_XOR_TRANSFORMS_IRExpr(IRExpr ** env,IRExpr * e)6176 static IRExpr* do_XOR_TRANSFORMS_IRExpr ( IRExpr** env, IRExpr* e )
6177 {
6178    if (e->tag != Iex_Binop)
6179       return NULL;
6180 
6181    const HChar* tyNm = NULL;
6182    IROp   opOR  = Iop_INVALID;
6183    IROp   opAND = Iop_INVALID;
6184    IROp   opNOT = Iop_INVALID;
6185    IROp   opXOR = Iop_INVALID;
6186    switch (e->Iex.Binop.op) {
6187       case Iop_Xor32:
6188          tyNm  = "I32";
6189          opOR  = Iop_Or32;  opAND = Iop_And32;
6190          opNOT = Iop_Not32; opXOR = Iop_Xor32;
6191          break;
6192       case Iop_Xor16:
6193          tyNm  = "I16";
6194          opOR  = Iop_Or16;  opAND = Iop_And16;
6195          opNOT = Iop_Not16; opXOR = Iop_Xor16;
6196          break;
6197       case Iop_Xor8:
6198          tyNm  = "I8";
6199          opOR  = Iop_Or8;  opAND = Iop_And8;
6200          opNOT = Iop_Not8; opXOR = Iop_Xor8;
6201          break;
6202       default:
6203          return NULL;
6204    }
6205 
6206    IRExpr* aa = NULL;
6207    IRExpr* bb = NULL;
6208    IRExpr* cc = NULL;
6209    UInt variant = spotBitfieldAssignment(&aa, &bb, &cc, env, e, opAND, opXOR);
6210    if (variant > 0) {
6211       static UInt ctr = 0;
6212       if (0)
6213          vex_printf("XXXXXXXXXX Bitfield Assignment number %u, "
6214                     "type %s, variant %u\n",
6215                     ++ctr, tyNm, variant);
6216       /* It's vitally important that the returned aa, bb and cc are
6217          atoms -- either constants or tmps.  If it's anything else
6218          (eg, a GET) then incorporating them in a tree at this point
6219          in the SB may erroneously pull them forwards (eg of a PUT
6220          that originally was after the GET) and so transform the IR
6221          wrongly.  spotBitfieldAssignment should guarantee only to
6222          give us atoms, but we check here anyway. */
6223       vassert(aa && isIRAtom(aa));
6224       vassert(bb && isIRAtom(bb));
6225       vassert(cc && isIRAtom(cc));
6226       return IRExpr_Binop(
6227                 opOR,
6228                 IRExpr_Binop(opAND, aa, IRExpr_Unop(opNOT, cc)),
6229                 IRExpr_Binop(opAND, bb, cc)
6230              );
6231    }
6232    return NULL;
6233 }
6234 
6235 
6236 /* SB is modified in-place.  Visit all the IRExprs and, for those
6237    which are allowed to be non-atomic, perform the XOR transform if
6238    possible.  This makes |sb| be non-flat, but that's ok, the caller
6239    can re-flatten it.  Returns True iff any changes were made. */
do_XOR_TRANSFORM_IRSB(IRSB * sb)6240 static Bool do_XOR_TRANSFORM_IRSB ( IRSB* sb )
6241 {
6242    Int  i;
6243    Bool changed = False;
6244 
6245    /* Make the tmp->expr environment, so we can use it for
6246       chasing expressions. */
6247    Int      n_tmps = sb->tyenv->types_used;
6248    IRExpr** env = LibVEX_Alloc_inline(n_tmps * sizeof(IRExpr*));
6249    for (i = 0; i < n_tmps; i++)
6250       env[i] = NULL;
6251 
6252    for (i = 0; i < sb->stmts_used; i++) {
6253       IRStmt* st = sb->stmts[i];
6254       if (st->tag != Ist_WrTmp)
6255          continue;
6256       IRTemp t = st->Ist.WrTmp.tmp;
6257       vassert(t >= 0 && t < n_tmps);
6258       env[t] = st->Ist.WrTmp.data;
6259    }
6260 
6261    for (i = 0; i < sb->stmts_used; i++) {
6262       IRStmt* st = sb->stmts[i];
6263 
6264       switch (st->tag) {
6265          case Ist_AbiHint:
6266             vassert(isIRAtom(st->Ist.AbiHint.base));
6267             vassert(isIRAtom(st->Ist.AbiHint.nia));
6268             break;
6269          case Ist_Put:
6270             vassert(isIRAtom(st->Ist.Put.data));
6271             break;
6272          case Ist_PutI: {
6273             IRPutI* puti = st->Ist.PutI.details;
6274             vassert(isIRAtom(puti->ix));
6275             vassert(isIRAtom(puti->data));
6276             break;
6277          }
6278          case Ist_WrTmp: {
6279             /* This is the one place where an expr (st->Ist.WrTmp.data) is
6280                allowed to be more than just a constant or a tmp. */
6281             IRExpr* mb_new_data
6282                = do_XOR_TRANSFORMS_IRExpr(env, st->Ist.WrTmp.data);
6283             if (mb_new_data) {
6284                //ppIRSB(sb);
6285                st->Ist.WrTmp.data = mb_new_data;
6286                //ppIRSB(sb);
6287                changed = True;
6288             }
6289             break;
6290          }
6291          case Ist_Store:
6292             vassert(isIRAtom(st->Ist.Store.addr));
6293             vassert(isIRAtom(st->Ist.Store.data));
6294             break;
6295          case Ist_StoreG: {
6296             IRStoreG* sg = st->Ist.StoreG.details;
6297             vassert(isIRAtom(sg->addr));
6298             vassert(isIRAtom(sg->data));
6299             vassert(isIRAtom(sg->guard));
6300             break;
6301          }
6302          case Ist_LoadG: {
6303             IRLoadG* lg = st->Ist.LoadG.details;
6304             vassert(isIRAtom(lg->addr));
6305             vassert(isIRAtom(lg->alt));
6306             vassert(isIRAtom(lg->guard));
6307             break;
6308          }
6309          case Ist_CAS: {
6310             IRCAS* cas = st->Ist.CAS.details;
6311             vassert(isIRAtom(cas->addr));
6312             vassert(cas->expdHi == NULL || isIRAtom(cas->expdHi));
6313             vassert(isIRAtom(cas->expdLo));
6314             vassert(cas->dataHi == NULL || isIRAtom(cas->dataHi));
6315             vassert(isIRAtom(cas->dataLo));
6316             break;
6317          }
6318          case Ist_LLSC:
6319             vassert(isIRAtom(st->Ist.LLSC.addr));
6320             if (st->Ist.LLSC.storedata)
6321                vassert(isIRAtom(st->Ist.LLSC.storedata));
6322             break;
6323          case Ist_Dirty: {
6324             IRDirty* d = st->Ist.Dirty.details;
6325             if (d->mFx != Ifx_None) {
6326                vassert(isIRAtom(d->mAddr));
6327             }
6328             vassert(isIRAtom(d->guard));
6329             for (Int j = 0; d->args[j]; j++) {
6330                IRExpr* arg = d->args[j];
6331                if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg))) {
6332                   vassert(isIRAtom(arg));
6333                }
6334             }
6335             break;
6336          }
6337          case Ist_IMark:
6338          case Ist_NoOp:
6339          case Ist_MBE:
6340             break;
6341          case Ist_Exit:
6342             vassert(isIRAtom(st->Ist.Exit.guard));
6343             break;
6344          default:
6345             vex_printf("\n"); ppIRStmt(st);
6346             vpanic("do_XOR_TRANSFORMS_IRSB");
6347       }
6348    }
6349 
6350    vassert(isIRAtom(sb->next));
6351    return changed;
6352 }
6353 
6354 
do_MSVC_HACKS(IRSB * sb)6355 static IRSB* do_MSVC_HACKS ( IRSB* sb )
6356 {
6357    // Normalise as much as we can.  This is the one-and-only place
6358    // where we call do_cse_BB with allowLoadsToBeCSEd set to True.
6359    Bool any_cse_changes = do_cse_BB( sb, True/*allowLoadsToBeCSEd*/ );
6360    if (any_cse_changes) {
6361       // CSEing might have created dead code.  Remove it.
6362       sb = cprop_BB ( sb );
6363       do_deadcode_BB(sb);
6364    }
6365 
6366    // Visit all atoms, do the transformation proper.  bb is modified
6367    // in-place.
6368    Bool changed = do_XOR_TRANSFORM_IRSB(sb);
6369 
6370    if (changed) {
6371       // The transformation generates non-flat expressions, so we now
6372       // need to re-flatten the block.
6373       sb = flatten_BB(sb);
6374    }
6375 
6376    return sb;
6377 }
6378 
6379 
6380 /*---------------------------------------------------------------*/
6381 /*--- iropt main                                              ---*/
6382 /*---------------------------------------------------------------*/
6383 
6384 static Bool iropt_verbose = False; /* True; */
6385 
6386 
6387 /* Do a simple cleanup pass on bb.  This is: redundant Get removal,
6388    redundant Put removal, constant propagation, dead code removal,
6389    clean helper specialisation, and dead code removal (again).
6390 */
6391 
6392 
6393 static
cheap_transformations(IRSB * bb,IRExpr * (* specHelper)(const HChar *,IRExpr **,IRStmt **,Int),Bool (* preciseMemExnsFn)(Int,Int,VexRegisterUpdates),VexRegisterUpdates pxControl)6394 IRSB* cheap_transformations (
6395          IRSB* bb,
6396          IRExpr* (*specHelper) (const HChar*, IRExpr**, IRStmt**, Int),
6397          Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates),
6398          VexRegisterUpdates pxControl
6399       )
6400 {
6401    redundant_get_removal_BB ( bb );
6402    if (iropt_verbose) {
6403       vex_printf("\n========= REDUNDANT GET\n\n" );
6404       ppIRSB(bb);
6405    }
6406 
6407    if (pxControl < VexRegUpdAllregsAtEachInsn) {
6408       redundant_put_removal_BB ( bb, preciseMemExnsFn, pxControl );
6409    }
6410    if (iropt_verbose) {
6411       vex_printf("\n========= REDUNDANT PUT\n\n" );
6412       ppIRSB(bb);
6413    }
6414 
6415    bb = cprop_BB ( bb );
6416    if (iropt_verbose) {
6417       vex_printf("\n========= CPROPD\n\n" );
6418       ppIRSB(bb);
6419    }
6420 
6421    do_deadcode_BB ( bb );
6422    if (iropt_verbose) {
6423       vex_printf("\n========= DEAD\n\n" );
6424       ppIRSB(bb);
6425    }
6426 
6427    bb = spec_helpers_BB ( bb, specHelper );
6428    do_deadcode_BB ( bb );
6429    if (iropt_verbose) {
6430       vex_printf("\n========= SPECd \n\n" );
6431       ppIRSB(bb);
6432    }
6433 
6434    return bb;
6435 }
6436 
6437 
6438 /* Do some more expensive transformations on bb, which are aimed at
6439    optimising as much as possible in the presence of GetI and PutI.  */
6440 
6441 static
expensive_transformations(IRSB * bb,VexRegisterUpdates pxControl)6442 IRSB* expensive_transformations( IRSB* bb, VexRegisterUpdates pxControl )
6443 {
6444    (void)do_cse_BB( bb, False/*!allowLoadsToBeCSEd*/ );
6445    collapse_AddSub_chains_BB( bb );
6446    do_redundant_GetI_elimination( bb );
6447    if (pxControl < VexRegUpdAllregsAtEachInsn) {
6448       do_redundant_PutI_elimination( bb, pxControl );
6449    }
6450    do_deadcode_BB( bb );
6451    return bb;
6452 }
6453 
6454 
6455 /* Scan a flattened BB to look for signs that more expensive
6456    optimisations might be useful:
6457    - find out if there are any GetIs and PutIs
6458    - find out if there are any floating or vector-typed temporaries
6459 */
6460 
considerExpensives(Bool * hasGetIorPutI,Bool * hasVorFtemps,IRSB * bb)6461 static void considerExpensives ( /*OUT*/Bool* hasGetIorPutI,
6462                                  /*OUT*/Bool* hasVorFtemps,
6463                                  IRSB* bb )
6464 {
6465    Int      i, j;
6466    IRStmt*  st;
6467    IRDirty* d;
6468    IRCAS*   cas;
6469 
6470    *hasGetIorPutI = False;
6471    *hasVorFtemps  = False;
6472 
6473    for (i = 0; i < bb->stmts_used; i++) {
6474       st = bb->stmts[i];
6475       switch (st->tag) {
6476          case Ist_AbiHint:
6477             vassert(isIRAtom(st->Ist.AbiHint.base));
6478             vassert(isIRAtom(st->Ist.AbiHint.nia));
6479             break;
6480          case Ist_PutI:
6481             *hasGetIorPutI = True;
6482             break;
6483          case Ist_WrTmp:
6484             if (st->Ist.WrTmp.data->tag == Iex_GetI)
6485                *hasGetIorPutI = True;
6486             switch (typeOfIRTemp(bb->tyenv, st->Ist.WrTmp.tmp)) {
6487                case Ity_I1: case Ity_I8: case Ity_I16:
6488                case Ity_I32: case Ity_I64: case Ity_I128:
6489                   break;
6490                case Ity_F16: case Ity_F32: case Ity_F64: case Ity_F128:
6491                case Ity_V128: case Ity_V256:
6492                   *hasVorFtemps = True;
6493                   break;
6494                case Ity_D32: case Ity_D64: case Ity_D128:
6495                   *hasVorFtemps = True;
6496                   break;
6497                default:
6498                   goto bad;
6499             }
6500             break;
6501          case Ist_Put:
6502             vassert(isIRAtom(st->Ist.Put.data));
6503             break;
6504          case Ist_Store:
6505             vassert(isIRAtom(st->Ist.Store.addr));
6506             vassert(isIRAtom(st->Ist.Store.data));
6507             break;
6508          case Ist_StoreG: {
6509             IRStoreG* sg = st->Ist.StoreG.details;
6510             vassert(isIRAtom(sg->addr));
6511             vassert(isIRAtom(sg->data));
6512             vassert(isIRAtom(sg->guard));
6513             break;
6514          }
6515          case Ist_LoadG: {
6516             IRLoadG* lg = st->Ist.LoadG.details;
6517             vassert(isIRAtom(lg->addr));
6518             vassert(isIRAtom(lg->alt));
6519             vassert(isIRAtom(lg->guard));
6520             break;
6521          }
6522          case Ist_CAS:
6523             cas = st->Ist.CAS.details;
6524             vassert(isIRAtom(cas->addr));
6525             vassert(cas->expdHi == NULL || isIRAtom(cas->expdHi));
6526             vassert(isIRAtom(cas->expdLo));
6527             vassert(cas->dataHi == NULL || isIRAtom(cas->dataHi));
6528             vassert(isIRAtom(cas->dataLo));
6529             break;
6530          case Ist_LLSC:
6531             vassert(isIRAtom(st->Ist.LLSC.addr));
6532             if (st->Ist.LLSC.storedata)
6533                vassert(isIRAtom(st->Ist.LLSC.storedata));
6534             break;
6535          case Ist_Dirty:
6536             d = st->Ist.Dirty.details;
6537             vassert(isIRAtom(d->guard));
6538             for (j = 0; d->args[j]; j++) {
6539                IRExpr* arg = d->args[j];
6540                if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg)))
6541                   vassert(isIRAtom(arg));
6542             }
6543             if (d->mFx != Ifx_None)
6544                vassert(isIRAtom(d->mAddr));
6545             break;
6546          case Ist_NoOp:
6547          case Ist_IMark:
6548          case Ist_MBE:
6549             break;
6550          case Ist_Exit:
6551             vassert(isIRAtom(st->Ist.Exit.guard));
6552             break;
6553          default:
6554          bad:
6555             ppIRStmt(st);
6556             vpanic("considerExpensives");
6557       }
6558    }
6559 }
6560 
6561 
6562 /* ---------------- The main iropt entry point. ---------------- */
6563 
6564 /* exported from this file */
6565 /* Rules of the game:
6566 
6567    - IRExpr/IRStmt trees should be treated as immutable, as they
6568      may get shared.  So never change a field of such a tree node;
6569      instead construct and return a new one if needed.
6570 */
6571 
6572 
do_iropt_BB(IRSB * bb0,IRExpr * (* specHelper)(const HChar *,IRExpr **,IRStmt **,Int),Bool (* preciseMemExnsFn)(Int,Int,VexRegisterUpdates),VexRegisterUpdates pxControl,Addr guest_addr,VexArch guest_arch)6573 IRSB* do_iropt_BB(
6574          IRSB* bb0,
6575          IRExpr* (*specHelper) (const HChar*, IRExpr**, IRStmt**, Int),
6576          Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates),
6577          VexRegisterUpdates pxControl,
6578          Addr    guest_addr,
6579          VexArch guest_arch
6580       )
6581 {
6582    static Int n_total     = 0;
6583    static Int n_expensive = 0;
6584 
6585    Bool hasGetIorPutI, hasVorFtemps;
6586    IRSB *bb, *bb2;
6587 
6588    n_total++;
6589 
6590    /* First flatten the block out, since all other
6591       phases assume flat code. */
6592 
6593    bb = flatten_BB ( bb0 );
6594 
6595    if (iropt_verbose) {
6596       vex_printf("\n========= FLAT\n\n" );
6597       ppIRSB(bb);
6598    }
6599 
6600    /* If at level 0, stop now. */
6601    if (vex_control.iropt_level <= 0) return bb;
6602 
6603    /* Now do a preliminary cleanup pass, and figure out if we also
6604       need to do 'expensive' optimisations.  Expensive optimisations
6605       are deemed necessary if the block contains any GetIs or PutIs.
6606       If needed, do expensive transformations and then another cheap
6607       cleanup pass. */
6608 
6609    bb = cheap_transformations( bb, specHelper, preciseMemExnsFn, pxControl );
6610 
6611    if (guest_arch == VexArchARM) {
6612       /* Translating Thumb2 code produces a lot of chaff.  We have to
6613          work extra hard to get rid of it. */
6614       bb = cprop_BB(bb);
6615       bb = spec_helpers_BB ( bb, specHelper );
6616       if (pxControl < VexRegUpdAllregsAtEachInsn) {
6617          redundant_put_removal_BB ( bb, preciseMemExnsFn, pxControl );
6618       }
6619       do_cse_BB( bb, False/*!allowLoadsToBeCSEd*/ );
6620       do_deadcode_BB( bb );
6621    }
6622 
6623    if (vex_control.iropt_level > 1) {
6624 
6625       /* Peer at what we have, to decide how much more effort to throw
6626          at it. */
6627       considerExpensives( &hasGetIorPutI, &hasVorFtemps, bb );
6628 
6629       if (hasVorFtemps && !hasGetIorPutI) {
6630          /* If any evidence of FP or Vector activity, CSE, as that
6631             tends to mop up all manner of lardy code to do with
6632             rounding modes.  Don't bother if hasGetIorPutI since that
6633             case leads into the expensive transformations, which do
6634             CSE anyway. */
6635          (void)do_cse_BB( bb, False/*!allowLoadsToBeCSEd*/ );
6636          do_deadcode_BB( bb );
6637       }
6638 
6639       if (hasGetIorPutI) {
6640          Bool cses;
6641          n_expensive++;
6642          if (DEBUG_IROPT)
6643             vex_printf("***** EXPENSIVE %d %d\n", n_total, n_expensive);
6644          bb = expensive_transformations( bb, pxControl );
6645          bb = cheap_transformations( bb, specHelper,
6646                                      preciseMemExnsFn, pxControl );
6647          /* Potentially common up GetIs */
6648          cses = do_cse_BB( bb, False/*!allowLoadsToBeCSEd*/ );
6649          if (cses)
6650             bb = cheap_transformations( bb, specHelper,
6651                                         preciseMemExnsFn, pxControl );
6652       }
6653 
6654       ///////////////////////////////////////////////////////////
6655       // BEGIN MSVC optimised code transformation hacks
6656       if (0)
6657          bb = do_MSVC_HACKS(bb);
6658       // END   MSVC optimised code transformation hacks
6659       ///////////////////////////////////////////////////////////
6660 
6661       /* Now have a go at unrolling simple (single-BB) loops.  If
6662          successful, clean up the results as much as possible. */
6663 
6664       bb2 = maybe_loop_unroll_BB( bb, guest_addr );
6665       if (bb2) {
6666          bb = cheap_transformations( bb2, specHelper,
6667                                      preciseMemExnsFn, pxControl );
6668          if (hasGetIorPutI) {
6669             bb = expensive_transformations( bb, pxControl );
6670             bb = cheap_transformations( bb, specHelper,
6671                                         preciseMemExnsFn, pxControl );
6672          } else {
6673             /* at least do CSE and dead code removal */
6674             do_cse_BB( bb, False/*!allowLoadsToBeCSEd*/ );
6675             do_deadcode_BB( bb );
6676          }
6677          if (0) vex_printf("vex iropt: unrolled a loop\n");
6678       }
6679 
6680    }
6681 
6682    return bb;
6683 }
6684 
6685 
6686 /*---------------------------------------------------------------*/
6687 /*--- end                                            ir_opt.c ---*/
6688 /*---------------------------------------------------------------*/
6689