1 /* -*- mode: C; c-basic-offset: 3; -*- */
2
3 /*---------------------------------------------------------------*/
4 /*--- begin ir_opt.c ---*/
5 /*---------------------------------------------------------------*/
6
7 /*
8 This file is part of Valgrind, a dynamic binary instrumentation
9 framework.
10
11 Copyright (C) 2004-2017 OpenWorks LLP
12 info@open-works.net
13
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
18
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
23
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
27 02110-1301, USA.
28
29 The GNU General Public License is contained in the file COPYING.
30
31 Neither the names of the U.S. Department of Energy nor the
32 University of California nor the names of its contributors may be
33 used to endorse or promote products derived from this software
34 without prior written permission.
35 */
36
37 #include "libvex_basictypes.h"
38 #include "libvex_ir.h"
39 #include "libvex.h"
40
41 #include "main_util.h"
42 #include "main_globals.h"
43 #include "ir_opt.h"
44
45
46 /* Set to 1 for lots of debugging output. */
47 #define DEBUG_IROPT 0
48
49 /* Set to 1 to gather some statistics. Currently only for sameIRExprs. */
50 #define STATS_IROPT 0
51
52
53 /* What iropt does, 29 Dec 04.
54
55 It takes an IRSB and produces a new one with the same meaning,
56 defined thus:
57
58 After execution of the new BB, all guest state and guest memory is
59 the same as after execution of the original. This is true
60 regardless of how the block was exited (at the end vs side exit).
61
62 In addition, parts of the guest state will be identical to that
63 created by execution of the original at the following observation
64 points:
65
66 * In a dirty helper call, any parts of the guest state that the
67 helper states that it reads or modifies will be up to date.
68 Also, guest memory will be up to date. Parts of the guest state
69 not marked as being read or modified by the helper cannot be
70 assumed to be up-to-date at the point where the helper is called.
71
72 * If iropt_register_updates == VexRegUpdSpAtMemAccess :
73 The guest state is only up to date only as explained above
74 (i.e. at SB exits and as specified by dirty helper call).
75 Also, the stack pointer register is up to date at memory
76 exception points (as this is needed for the stack extension
77 logic in m_signals.c).
78
79 * If iropt_register_updates == VexRegUpdUnwindregsAtMemAccess :
80 Immediately prior to any load or store, those parts of the guest
81 state marked as requiring precise exceptions will be up to date.
82 Also, guest memory will be up to date. Parts of the guest state
83 not marked as requiring precise exceptions cannot be assumed to
84 be up-to-date at the point of the load/store.
85
86 * If iropt_register_updates == VexRegUpdAllregsAtMemAccess:
87 Same as minimal, but all the guest state is up to date at memory
88 exception points.
89
90 * If iropt_register_updates == VexRegUpdAllregsAtEachInsn :
91 Guest state is up to date at each instruction.
92
93 The relative order of loads and stores (including loads/stores of
94 guest memory done by dirty helpers annotated as such) is not
95 changed. However, the relative order of loads with no intervening
96 stores/modifies may be changed.
97
98 Transformation order
99 ~~~~~~~~~~~~~~~~~~~~
100
101 There are three levels of optimisation, controlled by
102 vex_control.iropt_level. Define first:
103
104 "Cheap transformations" are the following sequence:
105 * Redundant-Get removal
106 * Redundant-Put removal
107 * Constant propagation/folding
108 * Dead code removal
109 * Specialisation of clean helper functions
110 * Dead code removal
111
112 "Expensive transformations" are the following sequence:
113 * CSE
114 * Folding of add/sub chains
115 * Redundant-GetI removal
116 * Redundant-PutI removal
117 * Dead code removal
118
119 Then the transformations are as follows, as defined by
120 vex_control.iropt_level:
121
122 Level 0:
123 * Flatten into atomic form.
124
125 Level 1: the following sequence:
126 * Flatten into atomic form.
127 * Cheap transformations.
128
129 Level 2: the following sequence
130 * Flatten into atomic form.
131 * Cheap transformations.
132 * If block contains any floating or vector types, CSE.
133 * If block contains GetI or PutI, Expensive transformations.
134 * Try unrolling loops. Three possible outcomes:
135 - No effect: do nothing more.
136 - Unrolled a loop, and block does not contain GetI or PutI:
137 Do: * CSE
138 * Dead code removal
139 - Unrolled a loop, and block contains GetI or PutI:
140 Do: * Expensive transformations
141 * Cheap transformations
142 */
143
144 /* Implementation notes, 29 Dec 04.
145
146 TODO (important): I think rPutI removal ignores precise exceptions
147 and is therefore in a sense, wrong. In the sense that PutIs are
148 assumed not to write parts of the guest state that we need to have
149 up-to-date at loads/stores. So far on x86 guest that has not
150 mattered since indeed only the x87 FP registers and tags are
151 accessed using GetI/PutI, and there is no need so far for them to
152 be up to date at mem exception points. The rPutI pass should be
153 fixed.
154
155 TODO: improve pessimistic handling of precise exceptions
156 in the tree builder.
157
158 TODO: check interaction of rGetI and dirty helpers.
159
160 F64i constants are treated differently from other constants.
161 They are not regarded as atoms, and instead lifted off and
162 bound to temps. This allows them to participate in CSE, which
163 is important for getting good performance for x86 guest code.
164
165 CSE up F64 literals (already doing F64is)
166
167 CSE: consider carefully the requirement for precise exns
168 prior to making CSE any more aggressive. */
169
170
171 /*---------------------------------------------------------------*/
172 /*--- Finite mappery, of a sort ---*/
173 /*---------------------------------------------------------------*/
174
175 /* General map from HWord-sized thing HWord-sized thing. Could be by
176 hashing, but it's not clear whether or not this would really be any
177 faster. */
178
179 typedef
180 struct {
181 Bool* inuse;
182 HWord* key;
183 HWord* val;
184 Int size;
185 Int used;
186 }
187 HashHW;
188
newHHW(void)189 static HashHW* newHHW ( void )
190 {
191 HashHW* h = LibVEX_Alloc_inline(sizeof(HashHW));
192 h->size = 8;
193 h->used = 0;
194 h->inuse = LibVEX_Alloc_inline(h->size * sizeof(Bool));
195 h->key = LibVEX_Alloc_inline(h->size * sizeof(HWord));
196 h->val = LibVEX_Alloc_inline(h->size * sizeof(HWord));
197 return h;
198 }
199
200
201 /* Look up key in the map. */
202
lookupHHW(HashHW * h,HWord * val,HWord key)203 static Bool lookupHHW ( HashHW* h, /*OUT*/HWord* val, HWord key )
204 {
205 Int i;
206 /* vex_printf("lookupHHW(%llx)\n", key ); */
207 for (i = 0; i < h->used; i++) {
208 if (h->inuse[i] && h->key[i] == key) {
209 if (val)
210 *val = h->val[i];
211 return True;
212 }
213 }
214 return False;
215 }
216
217
218 /* Add key->val to the map. Replaces any existing binding for key. */
219
addToHHW(HashHW * h,HWord key,HWord val)220 static void addToHHW ( HashHW* h, HWord key, HWord val )
221 {
222 Int i, j;
223 /* vex_printf("addToHHW(%llx, %llx)\n", key, val); */
224
225 /* Find and replace existing binding, if any. */
226 for (i = 0; i < h->used; i++) {
227 if (h->inuse[i] && h->key[i] == key) {
228 h->val[i] = val;
229 return;
230 }
231 }
232
233 /* Ensure a space is available. */
234 if (h->used == h->size) {
235 /* Copy into arrays twice the size. */
236 Bool* inuse2 = LibVEX_Alloc_inline(2 * h->size * sizeof(Bool));
237 HWord* key2 = LibVEX_Alloc_inline(2 * h->size * sizeof(HWord));
238 HWord* val2 = LibVEX_Alloc_inline(2 * h->size * sizeof(HWord));
239 for (i = j = 0; i < h->size; i++) {
240 if (!h->inuse[i]) continue;
241 inuse2[j] = True;
242 key2[j] = h->key[i];
243 val2[j] = h->val[i];
244 j++;
245 }
246 h->used = j;
247 h->size *= 2;
248 h->inuse = inuse2;
249 h->key = key2;
250 h->val = val2;
251 }
252
253 /* Finally, add it. */
254 vassert(h->used < h->size);
255 h->inuse[h->used] = True;
256 h->key[h->used] = key;
257 h->val[h->used] = val;
258 h->used++;
259 }
260
261
262 /*---------------------------------------------------------------*/
263 /*--- Flattening out a BB into atomic SSA form ---*/
264 /*---------------------------------------------------------------*/
265
266 /* Non-critical helper, heuristic for reducing the number of tmp-tmp
267 copies made by flattening. If in doubt return False. */
268
isFlat(IRExpr * e)269 static Bool isFlat ( IRExpr* e )
270 {
271 if (e->tag == Iex_Get)
272 return True;
273 if (e->tag == Iex_Binop)
274 return toBool( isIRAtom(e->Iex.Binop.arg1)
275 && isIRAtom(e->Iex.Binop.arg2) );
276 if (e->tag == Iex_Load)
277 return isIRAtom(e->Iex.Load.addr);
278 return False;
279 }
280
281 /* Flatten out 'ex' so it is atomic, returning a new expression with
282 the same value, after having appended extra IRTemp assignments to
283 the end of 'bb'. */
284
flatten_Expr(IRSB * bb,IRExpr * ex)285 static IRExpr* flatten_Expr ( IRSB* bb, IRExpr* ex )
286 {
287 Int i;
288 IRExpr** newargs;
289 IRType ty = typeOfIRExpr(bb->tyenv, ex);
290 IRTemp t1;
291
292 switch (ex->tag) {
293
294 case Iex_GetI:
295 t1 = newIRTemp(bb->tyenv, ty);
296 addStmtToIRSB(bb, IRStmt_WrTmp(t1,
297 IRExpr_GetI(ex->Iex.GetI.descr,
298 flatten_Expr(bb, ex->Iex.GetI.ix),
299 ex->Iex.GetI.bias)));
300 return IRExpr_RdTmp(t1);
301
302 case Iex_Get:
303 t1 = newIRTemp(bb->tyenv, ty);
304 addStmtToIRSB(bb,
305 IRStmt_WrTmp(t1, ex));
306 return IRExpr_RdTmp(t1);
307
308 case Iex_Qop: {
309 IRQop* qop = ex->Iex.Qop.details;
310 t1 = newIRTemp(bb->tyenv, ty);
311 addStmtToIRSB(bb, IRStmt_WrTmp(t1,
312 IRExpr_Qop(qop->op,
313 flatten_Expr(bb, qop->arg1),
314 flatten_Expr(bb, qop->arg2),
315 flatten_Expr(bb, qop->arg3),
316 flatten_Expr(bb, qop->arg4))));
317 return IRExpr_RdTmp(t1);
318 }
319
320 case Iex_Triop: {
321 IRTriop* triop = ex->Iex.Triop.details;
322 t1 = newIRTemp(bb->tyenv, ty);
323 addStmtToIRSB(bb, IRStmt_WrTmp(t1,
324 IRExpr_Triop(triop->op,
325 flatten_Expr(bb, triop->arg1),
326 flatten_Expr(bb, triop->arg2),
327 flatten_Expr(bb, triop->arg3))));
328 return IRExpr_RdTmp(t1);
329 }
330
331 case Iex_Binop:
332 t1 = newIRTemp(bb->tyenv, ty);
333 addStmtToIRSB(bb, IRStmt_WrTmp(t1,
334 IRExpr_Binop(ex->Iex.Binop.op,
335 flatten_Expr(bb, ex->Iex.Binop.arg1),
336 flatten_Expr(bb, ex->Iex.Binop.arg2))));
337 return IRExpr_RdTmp(t1);
338
339 case Iex_Unop:
340 t1 = newIRTemp(bb->tyenv, ty);
341 addStmtToIRSB(bb, IRStmt_WrTmp(t1,
342 IRExpr_Unop(ex->Iex.Unop.op,
343 flatten_Expr(bb, ex->Iex.Unop.arg))));
344 return IRExpr_RdTmp(t1);
345
346 case Iex_Load:
347 t1 = newIRTemp(bb->tyenv, ty);
348 addStmtToIRSB(bb, IRStmt_WrTmp(t1,
349 IRExpr_Load(ex->Iex.Load.end,
350 ex->Iex.Load.ty,
351 flatten_Expr(bb, ex->Iex.Load.addr))));
352 return IRExpr_RdTmp(t1);
353
354 case Iex_CCall:
355 newargs = shallowCopyIRExprVec(ex->Iex.CCall.args);
356 for (i = 0; newargs[i]; i++)
357 newargs[i] = flatten_Expr(bb, newargs[i]);
358 t1 = newIRTemp(bb->tyenv, ty);
359 addStmtToIRSB(bb, IRStmt_WrTmp(t1,
360 IRExpr_CCall(ex->Iex.CCall.cee,
361 ex->Iex.CCall.retty,
362 newargs)));
363 return IRExpr_RdTmp(t1);
364
365 case Iex_ITE:
366 t1 = newIRTemp(bb->tyenv, ty);
367 addStmtToIRSB(bb, IRStmt_WrTmp(t1,
368 IRExpr_ITE(flatten_Expr(bb, ex->Iex.ITE.cond),
369 flatten_Expr(bb, ex->Iex.ITE.iftrue),
370 flatten_Expr(bb, ex->Iex.ITE.iffalse))));
371 return IRExpr_RdTmp(t1);
372
373 case Iex_Const:
374 /* Lift F64i constants out onto temps so they can be CSEd
375 later. */
376 if (ex->Iex.Const.con->tag == Ico_F64i) {
377 t1 = newIRTemp(bb->tyenv, ty);
378 addStmtToIRSB(bb, IRStmt_WrTmp(t1,
379 IRExpr_Const(ex->Iex.Const.con)));
380 return IRExpr_RdTmp(t1);
381 } else {
382 /* Leave all other constants alone. */
383 return ex;
384 }
385
386 case Iex_RdTmp:
387 return ex;
388
389 default:
390 vex_printf("\n");
391 ppIRExpr(ex);
392 vex_printf("\n");
393 vpanic("flatten_Expr");
394 }
395 }
396
397
398 /* Append a completely flattened form of 'st' to the end of 'bb'. */
399
flatten_Stmt(IRSB * bb,IRStmt * st)400 static void flatten_Stmt ( IRSB* bb, IRStmt* st )
401 {
402 Int i;
403 IRExpr *e1, *e2, *e3, *e4, *e5;
404 IRDirty *d, *d2;
405 IRCAS *cas, *cas2;
406 IRPutI *puti, *puti2;
407 IRLoadG *lg;
408 IRStoreG *sg;
409 switch (st->tag) {
410 case Ist_Put:
411 if (isIRAtom(st->Ist.Put.data)) {
412 /* optimisation to reduce the amount of heap wasted
413 by the flattener */
414 addStmtToIRSB(bb, st);
415 } else {
416 /* general case, always correct */
417 e1 = flatten_Expr(bb, st->Ist.Put.data);
418 addStmtToIRSB(bb, IRStmt_Put(st->Ist.Put.offset, e1));
419 }
420 break;
421 case Ist_PutI:
422 puti = st->Ist.PutI.details;
423 e1 = flatten_Expr(bb, puti->ix);
424 e2 = flatten_Expr(bb, puti->data);
425 puti2 = mkIRPutI(puti->descr, e1, puti->bias, e2);
426 addStmtToIRSB(bb, IRStmt_PutI(puti2));
427 break;
428 case Ist_WrTmp:
429 if (isFlat(st->Ist.WrTmp.data)) {
430 /* optimisation, to reduce the number of tmp-tmp
431 copies generated */
432 addStmtToIRSB(bb, st);
433 } else {
434 /* general case, always correct */
435 e1 = flatten_Expr(bb, st->Ist.WrTmp.data);
436 addStmtToIRSB(bb, IRStmt_WrTmp(st->Ist.WrTmp.tmp, e1));
437 }
438 break;
439 case Ist_Store:
440 e1 = flatten_Expr(bb, st->Ist.Store.addr);
441 e2 = flatten_Expr(bb, st->Ist.Store.data);
442 addStmtToIRSB(bb, IRStmt_Store(st->Ist.Store.end, e1,e2));
443 break;
444 case Ist_StoreG:
445 sg = st->Ist.StoreG.details;
446 e1 = flatten_Expr(bb, sg->addr);
447 e2 = flatten_Expr(bb, sg->data);
448 e3 = flatten_Expr(bb, sg->guard);
449 addStmtToIRSB(bb, IRStmt_StoreG(sg->end, e1, e2, e3));
450 break;
451 case Ist_LoadG:
452 lg = st->Ist.LoadG.details;
453 e1 = flatten_Expr(bb, lg->addr);
454 e2 = flatten_Expr(bb, lg->alt);
455 e3 = flatten_Expr(bb, lg->guard);
456 addStmtToIRSB(bb, IRStmt_LoadG(lg->end, lg->cvt, lg->dst,
457 e1, e2, e3));
458 break;
459 case Ist_CAS:
460 cas = st->Ist.CAS.details;
461 e1 = flatten_Expr(bb, cas->addr);
462 e2 = cas->expdHi ? flatten_Expr(bb, cas->expdHi) : NULL;
463 e3 = flatten_Expr(bb, cas->expdLo);
464 e4 = cas->dataHi ? flatten_Expr(bb, cas->dataHi) : NULL;
465 e5 = flatten_Expr(bb, cas->dataLo);
466 cas2 = mkIRCAS( cas->oldHi, cas->oldLo, cas->end,
467 e1, e2, e3, e4, e5 );
468 addStmtToIRSB(bb, IRStmt_CAS(cas2));
469 break;
470 case Ist_LLSC:
471 e1 = flatten_Expr(bb, st->Ist.LLSC.addr);
472 e2 = st->Ist.LLSC.storedata
473 ? flatten_Expr(bb, st->Ist.LLSC.storedata)
474 : NULL;
475 addStmtToIRSB(bb, IRStmt_LLSC(st->Ist.LLSC.end,
476 st->Ist.LLSC.result, e1, e2));
477 break;
478 case Ist_Dirty:
479 d = st->Ist.Dirty.details;
480 d2 = emptyIRDirty();
481 *d2 = *d;
482 d2->args = shallowCopyIRExprVec(d2->args);
483 if (d2->mFx != Ifx_None) {
484 d2->mAddr = flatten_Expr(bb, d2->mAddr);
485 } else {
486 vassert(d2->mAddr == NULL);
487 }
488 d2->guard = flatten_Expr(bb, d2->guard);
489 for (i = 0; d2->args[i]; i++) {
490 IRExpr* arg = d2->args[i];
491 if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg)))
492 d2->args[i] = flatten_Expr(bb, arg);
493 }
494 addStmtToIRSB(bb, IRStmt_Dirty(d2));
495 break;
496 case Ist_NoOp:
497 case Ist_MBE:
498 case Ist_IMark:
499 addStmtToIRSB(bb, st);
500 break;
501 case Ist_AbiHint:
502 e1 = flatten_Expr(bb, st->Ist.AbiHint.base);
503 e2 = flatten_Expr(bb, st->Ist.AbiHint.nia);
504 addStmtToIRSB(bb, IRStmt_AbiHint(e1, st->Ist.AbiHint.len, e2));
505 break;
506 case Ist_Exit:
507 e1 = flatten_Expr(bb, st->Ist.Exit.guard);
508 addStmtToIRSB(bb, IRStmt_Exit(e1, st->Ist.Exit.jk,
509 st->Ist.Exit.dst,
510 st->Ist.Exit.offsIP));
511 break;
512 default:
513 vex_printf("\n");
514 ppIRStmt(st);
515 vex_printf("\n");
516 vpanic("flatten_Stmt");
517 }
518 }
519
520
flatten_BB(IRSB * in)521 static IRSB* flatten_BB ( IRSB* in )
522 {
523 Int i;
524 IRSB* out;
525 out = emptyIRSB();
526 out->tyenv = deepCopyIRTypeEnv( in->tyenv );
527 for (i = 0; i < in->stmts_used; i++)
528 if (in->stmts[i])
529 flatten_Stmt( out, in->stmts[i] );
530 out->next = flatten_Expr( out, in->next );
531 out->jumpkind = in->jumpkind;
532 out->offsIP = in->offsIP;
533 return out;
534 }
535
536
537 /*---------------------------------------------------------------*/
538 /*--- In-place removal of redundant GETs ---*/
539 /*---------------------------------------------------------------*/
540
541 /* Scan forwards, building up an environment binding (min offset, max
542 offset) pairs to values, which will either be temps or constants.
543
544 On seeing 't = Get(minoff,maxoff)', look up (minoff,maxoff) in the
545 env and if it matches, replace the Get with the stored value. If
546 there is no match, add a (minoff,maxoff) :-> t binding.
547
548 On seeing 'Put (minoff,maxoff) = t or c', first remove in the env
549 any binding which fully or partially overlaps with (minoff,maxoff).
550 Then add a new (minoff,maxoff) :-> t or c binding. */
551
552 /* Extract the min/max offsets from a guest state array descriptor. */
553
554 inline
getArrayBounds(IRRegArray * descr,UInt * minoff,UInt * maxoff)555 static void getArrayBounds ( IRRegArray* descr,
556 UInt* minoff, UInt* maxoff )
557 {
558 *minoff = descr->base;
559 *maxoff = *minoff + descr->nElems*sizeofIRType(descr->elemTy) - 1;
560 vassert((*minoff & ~0xFFFF) == 0);
561 vassert((*maxoff & ~0xFFFF) == 0);
562 vassert(*minoff <= *maxoff);
563 }
564
565 /* Create keys, of the form ((minoffset << 16) | maxoffset). */
566
mk_key_GetPut(Int offset,IRType ty)567 static UInt mk_key_GetPut ( Int offset, IRType ty )
568 {
569 /* offset should fit in 16 bits. */
570 UInt minoff = offset;
571 UInt maxoff = minoff + sizeofIRType(ty) - 1;
572 vassert((minoff & ~0xFFFF) == 0);
573 vassert((maxoff & ~0xFFFF) == 0);
574 return (minoff << 16) | maxoff;
575 }
576
mk_key_GetIPutI(IRRegArray * descr)577 static UInt mk_key_GetIPutI ( IRRegArray* descr )
578 {
579 UInt minoff, maxoff;
580 getArrayBounds( descr, &minoff, &maxoff );
581 vassert((minoff & ~0xFFFF) == 0);
582 vassert((maxoff & ~0xFFFF) == 0);
583 return (minoff << 16) | maxoff;
584 }
585
586 /* Supposing h has keys of the form generated by mk_key_GetPut and
587 mk_key_GetIPutI, invalidate any key which overlaps (k_lo
588 .. k_hi).
589 */
invalidateOverlaps(HashHW * h,UInt k_lo,UInt k_hi)590 static void invalidateOverlaps ( HashHW* h, UInt k_lo, UInt k_hi )
591 {
592 Int j;
593 UInt e_lo, e_hi;
594 vassert(k_lo <= k_hi);
595 /* invalidate any env entries which in any way overlap (k_lo
596 .. k_hi) */
597 /* vex_printf("invalidate %d .. %d\n", k_lo, k_hi ); */
598
599 for (j = 0; j < h->used; j++) {
600 if (!h->inuse[j])
601 continue;
602 e_lo = (((UInt)h->key[j]) >> 16) & 0xFFFF;
603 e_hi = ((UInt)h->key[j]) & 0xFFFF;
604 vassert(e_lo <= e_hi);
605 if (e_hi < k_lo || k_hi < e_lo)
606 continue; /* no overlap possible */
607 else
608 /* overlap; invalidate */
609 h->inuse[j] = False;
610 }
611 }
612
613
redundant_get_removal_BB(IRSB * bb)614 static void redundant_get_removal_BB ( IRSB* bb )
615 {
616 HashHW* env = newHHW();
617 UInt key = 0; /* keep gcc -O happy */
618 Int i, j;
619 HWord val;
620
621 for (i = 0; i < bb->stmts_used; i++) {
622 IRStmt* st = bb->stmts[i];
623
624 if (st->tag == Ist_NoOp)
625 continue;
626
627 /* Deal with Gets */
628 if (st->tag == Ist_WrTmp
629 && st->Ist.WrTmp.data->tag == Iex_Get) {
630 /* st is 't = Get(...)'. Look up in the environment and see
631 if the Get can be replaced. */
632 IRExpr* get = st->Ist.WrTmp.data;
633 key = (HWord)mk_key_GetPut( get->Iex.Get.offset,
634 get->Iex.Get.ty );
635 if (lookupHHW(env, &val, (HWord)key)) {
636 /* found it */
637 /* Note, we could do better here. If the types are
638 different we don't do the substitution, since doing so
639 could lead to invalidly-typed IR. An improvement would
640 be to stick in a reinterpret-style cast, although that
641 would make maintaining flatness more difficult. */
642 IRExpr* valE = (IRExpr*)val;
643 Bool typesOK = toBool( typeOfIRExpr(bb->tyenv,valE)
644 == st->Ist.WrTmp.data->Iex.Get.ty );
645 if (typesOK && DEBUG_IROPT) {
646 vex_printf("rGET: "); ppIRExpr(get);
647 vex_printf(" -> "); ppIRExpr(valE);
648 vex_printf("\n");
649 }
650 if (typesOK)
651 bb->stmts[i] = IRStmt_WrTmp(st->Ist.WrTmp.tmp, valE);
652 } else {
653 /* Not found, but at least we know that t and the Get(...)
654 are now associated. So add a binding to reflect that
655 fact. */
656 addToHHW( env, (HWord)key,
657 (HWord)(void*)(IRExpr_RdTmp(st->Ist.WrTmp.tmp)) );
658 }
659 }
660
661 /* Deal with Puts: invalidate any env entries overlapped by this
662 Put */
663 if (st->tag == Ist_Put || st->tag == Ist_PutI) {
664 UInt k_lo, k_hi;
665 if (st->tag == Ist_Put) {
666 key = mk_key_GetPut( st->Ist.Put.offset,
667 typeOfIRExpr(bb->tyenv,st->Ist.Put.data) );
668 } else {
669 vassert(st->tag == Ist_PutI);
670 key = mk_key_GetIPutI( st->Ist.PutI.details->descr );
671 }
672
673 k_lo = (key >> 16) & 0xFFFF;
674 k_hi = key & 0xFFFF;
675 invalidateOverlaps(env, k_lo, k_hi);
676 }
677 else
678 if (st->tag == Ist_Dirty) {
679 /* Deal with dirty helpers which write or modify guest state.
680 Invalidate the entire env. We could do a lot better
681 here. */
682 IRDirty* d = st->Ist.Dirty.details;
683 Bool writes = False;
684 for (j = 0; j < d->nFxState; j++) {
685 if (d->fxState[j].fx == Ifx_Modify
686 || d->fxState[j].fx == Ifx_Write)
687 writes = True;
688 }
689 if (writes) {
690 /* dump the entire env (not clever, but correct ...) */
691 for (j = 0; j < env->used; j++)
692 env->inuse[j] = False;
693 if (0) vex_printf("rGET: trash env due to dirty helper\n");
694 }
695 }
696
697 /* add this one to the env, if appropriate */
698 if (st->tag == Ist_Put) {
699 vassert(isIRAtom(st->Ist.Put.data));
700 addToHHW( env, (HWord)key, (HWord)(st->Ist.Put.data));
701 }
702
703 } /* for (i = 0; i < bb->stmts_used; i++) */
704
705 }
706
707
708 /*---------------------------------------------------------------*/
709 /*--- In-place removal of redundant PUTs ---*/
710 /*---------------------------------------------------------------*/
711
712 /* Find any Get uses in st and invalidate any partially or fully
713 overlapping ranges listed in env. Due to the flattening phase, the
714 only stmt kind we expect to find a Get on is IRStmt_WrTmp. */
715
handle_gets_Stmt(HashHW * env,IRStmt * st,Bool (* preciseMemExnsFn)(Int,Int,VexRegisterUpdates),VexRegisterUpdates pxControl)716 static void handle_gets_Stmt (
717 HashHW* env,
718 IRStmt* st,
719 Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates),
720 VexRegisterUpdates pxControl
721 )
722 {
723 Int j;
724 UInt key = 0; /* keep gcc -O happy */
725 Bool isGet;
726 Bool memRW = False;
727 IRExpr* e;
728
729 switch (st->tag) {
730
731 /* This is the only interesting case. Deal with Gets in the RHS
732 expression. */
733 case Ist_WrTmp:
734 e = st->Ist.WrTmp.data;
735 switch (e->tag) {
736 case Iex_Get:
737 isGet = True;
738 key = mk_key_GetPut ( e->Iex.Get.offset, e->Iex.Get.ty );
739 break;
740 case Iex_GetI:
741 isGet = True;
742 key = mk_key_GetIPutI ( e->Iex.GetI.descr );
743 break;
744 case Iex_Load:
745 isGet = False;
746 memRW = True;
747 break;
748 default:
749 isGet = False;
750 }
751 if (isGet) {
752 UInt k_lo, k_hi;
753 k_lo = (key >> 16) & 0xFFFF;
754 k_hi = key & 0xFFFF;
755 invalidateOverlaps(env, k_lo, k_hi);
756 }
757 break;
758
759 /* Be very conservative for dirty helper calls; dump the entire
760 environment. The helper might read guest state, in which
761 case it needs to be flushed first. Also, the helper might
762 access guest memory, in which case all parts of the guest
763 state requiring precise exceptions needs to be flushed. The
764 crude solution is just to flush everything; we could easily
765 enough do a lot better if needed. */
766 /* Probably also overly-conservative, but also dump everything
767 if we hit a memory bus event (fence, lock, unlock). Ditto
768 AbiHints, CASs, LLs and SCs. */
769 case Ist_AbiHint:
770 vassert(isIRAtom(st->Ist.AbiHint.base));
771 vassert(isIRAtom(st->Ist.AbiHint.nia));
772 /* fall through */
773 case Ist_MBE:
774 case Ist_Dirty:
775 case Ist_CAS:
776 case Ist_LLSC:
777 for (j = 0; j < env->used; j++)
778 env->inuse[j] = False;
779 break;
780
781 /* all other cases are boring. */
782 case Ist_Store:
783 vassert(isIRAtom(st->Ist.Store.addr));
784 vassert(isIRAtom(st->Ist.Store.data));
785 memRW = True;
786 break;
787 case Ist_StoreG: {
788 IRStoreG* sg = st->Ist.StoreG.details;
789 vassert(isIRAtom(sg->addr));
790 vassert(isIRAtom(sg->data));
791 vassert(isIRAtom(sg->guard));
792 memRW = True;
793 break;
794 }
795 case Ist_LoadG: {
796 IRLoadG* lg = st->Ist.LoadG.details;
797 vassert(isIRAtom(lg->addr));
798 vassert(isIRAtom(lg->alt));
799 vassert(isIRAtom(lg->guard));
800 memRW = True;
801 break;
802 }
803 case Ist_Exit:
804 vassert(isIRAtom(st->Ist.Exit.guard));
805 break;
806
807 case Ist_Put:
808 vassert(isIRAtom(st->Ist.Put.data));
809 break;
810
811 case Ist_PutI:
812 vassert(isIRAtom(st->Ist.PutI.details->ix));
813 vassert(isIRAtom(st->Ist.PutI.details->data));
814 break;
815
816 case Ist_NoOp:
817 case Ist_IMark:
818 break;
819
820 default:
821 vex_printf("\n");
822 ppIRStmt(st);
823 vex_printf("\n");
824 vpanic("handle_gets_Stmt");
825 }
826
827 if (memRW) {
828 /* This statement accesses memory. So we might need to dump all parts
829 of the environment corresponding to guest state that may not
830 be reordered with respect to memory references. That means
831 at least the stack pointer. */
832 switch (pxControl) {
833 case VexRegUpdAllregsAtMemAccess:
834 /* Precise exceptions required at mem access.
835 Flush all guest state. */
836 for (j = 0; j < env->used; j++)
837 env->inuse[j] = False;
838 break;
839 case VexRegUpdSpAtMemAccess:
840 /* We need to dump the stack pointer
841 (needed for stack extension in m_signals.c).
842 preciseMemExnsFn will use vex_control.iropt_register_updates
843 to verify only the sp is to be checked. */
844 /* fallthrough */
845 case VexRegUpdUnwindregsAtMemAccess:
846 for (j = 0; j < env->used; j++) {
847 if (!env->inuse[j])
848 continue;
849 /* Just flush the minimal amount required, as computed by
850 preciseMemExnsFn. */
851 HWord k_lo = (env->key[j] >> 16) & 0xFFFF;
852 HWord k_hi = env->key[j] & 0xFFFF;
853 if (preciseMemExnsFn( k_lo, k_hi, pxControl ))
854 env->inuse[j] = False;
855 }
856 break;
857 case VexRegUpdAllregsAtEachInsn:
858 // VexRegUpdAllregsAtEachInsn cannot happen here.
859 // fall through
860 case VexRegUpd_INVALID:
861 default:
862 vassert(0);
863 }
864 } /* if (memRW) */
865
866 }
867
868
869 /* Scan backwards, building up a set of (min offset, max
870 offset) pairs, indicating those parts of the guest state
871 for which the next event is a write.
872
873 On seeing a conditional exit, empty the set.
874
875 On seeing 'Put (minoff,maxoff) = t or c', if (minoff,maxoff) is
876 completely within the set, remove the Put. Otherwise, add
877 (minoff,maxoff) to the set.
878
879 On seeing 'Get (minoff,maxoff)', remove any part of the set
880 overlapping (minoff,maxoff). The same has to happen for any events
881 which implicitly read parts of the guest state: dirty helper calls
882 and loads/stores.
883 */
884
redundant_put_removal_BB(IRSB * bb,Bool (* preciseMemExnsFn)(Int,Int,VexRegisterUpdates),VexRegisterUpdates pxControl)885 static void redundant_put_removal_BB (
886 IRSB* bb,
887 Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates),
888 VexRegisterUpdates pxControl
889 )
890 {
891 Int i, j;
892 Bool isPut;
893 IRStmt* st;
894 UInt key = 0; /* keep gcc -O happy */
895
896 vassert(pxControl < VexRegUpdAllregsAtEachInsn);
897
898 HashHW* env = newHHW();
899
900 /* Initialise the running env with the fact that the final exit
901 writes the IP (or, whatever it claims to write. We don't
902 care.) */
903 key = mk_key_GetPut(bb->offsIP, typeOfIRExpr(bb->tyenv, bb->next));
904 addToHHW(env, (HWord)key, 0);
905
906 /* And now scan backwards through the statements. */
907 for (i = bb->stmts_used-1; i >= 0; i--) {
908 st = bb->stmts[i];
909
910 if (st->tag == Ist_NoOp)
911 continue;
912
913 /* Deal with conditional exits. */
914 if (st->tag == Ist_Exit) {
915 //Bool re_add;
916 /* Need to throw out from the env, any part of it which
917 doesn't overlap with the guest state written by this exit.
918 Since the exit only writes one section, it's simplest to
919 do this: (1) check whether env contains a write that
920 completely overlaps the write done by this exit; (2) empty
921 out env; and (3) if (1) was true, add the write done by
922 this exit.
923
924 To make (1) a bit simpler, merely search for a write that
925 exactly matches the one done by this exit. That's safe
926 because it will fail as often or more often than a full
927 overlap check, and failure to find an overlapping write in
928 env is the safe case (we just nuke env if that
929 happens). */
930 //vassert(isIRAtom(st->Ist.Exit.guard));
931 /* (1) */
932 //key = mk_key_GetPut(st->Ist.Exit.offsIP,
933 // typeOfIRConst(st->Ist.Exit.dst));
934 //re_add = lookupHHW(env, NULL, key);
935 /* (2) */
936 for (j = 0; j < env->used; j++)
937 env->inuse[j] = False;
938 /* (3) */
939 //if (0 && re_add)
940 // addToHHW(env, (HWord)key, 0);
941 continue;
942 }
943
944 /* Deal with Puts */
945 switch (st->tag) {
946 case Ist_Put:
947 isPut = True;
948 key = mk_key_GetPut( st->Ist.Put.offset,
949 typeOfIRExpr(bb->tyenv,st->Ist.Put.data) );
950 vassert(isIRAtom(st->Ist.Put.data));
951 break;
952 case Ist_PutI:
953 isPut = True;
954 key = mk_key_GetIPutI( st->Ist.PutI.details->descr );
955 vassert(isIRAtom(st->Ist.PutI.details->ix));
956 vassert(isIRAtom(st->Ist.PutI.details->data));
957 break;
958 default:
959 isPut = False;
960 }
961 if (isPut && st->tag != Ist_PutI) {
962 /* See if any single entry in env overlaps this Put. This is
963 simplistic in that the transformation is valid if, say, two
964 or more entries in the env overlap this Put, but the use of
965 lookupHHW will only find a single entry which exactly
966 overlaps this Put. This is suboptimal but safe. */
967 if (lookupHHW(env, NULL, (HWord)key)) {
968 /* This Put is redundant because a later one will overwrite
969 it. So NULL (nop) it out. */
970 if (DEBUG_IROPT) {
971 vex_printf("rPUT: "); ppIRStmt(st);
972 vex_printf("\n");
973 }
974 bb->stmts[i] = IRStmt_NoOp();
975 } else {
976 /* We can't demonstrate that this Put is redundant, so add it
977 to the running collection. */
978 addToHHW(env, (HWord)key, 0);
979 }
980 continue;
981 }
982
983 /* Deal with Gets. These remove bits of the environment since
984 appearance of a Get means that the next event for that slice
985 of the guest state is no longer a write, but a read. Also
986 deals with implicit reads of guest state needed to maintain
987 precise exceptions. */
988 handle_gets_Stmt( env, st, preciseMemExnsFn, pxControl );
989 }
990 }
991
992
993 /*---------------------------------------------------------------*/
994 /*--- Constant propagation and folding ---*/
995 /*---------------------------------------------------------------*/
996
997 #if STATS_IROPT
998 /* How often sameIRExprs was invoked */
999 static UInt invocation_count;
1000 /* How often sameIRExprs recursed through IRTemp assignments */
1001 static UInt recursion_count;
1002 /* How often sameIRExprs found identical IRExprs */
1003 static UInt success_count;
1004 /* How often recursing through assignments to IRTemps helped
1005 establishing equality. */
1006 static UInt recursion_success_count;
1007 /* Whether or not recursing through an IRTemp assignment helped
1008 establishing IRExpr equality for a given sameIRExprs invocation. */
1009 static Bool recursion_helped;
1010 /* Whether or not a given sameIRExprs invocation recursed through an
1011 IRTemp assignment */
1012 static Bool recursed;
1013 /* Maximum number of nodes ever visited when comparing two IRExprs. */
1014 static UInt max_nodes_visited;
1015 #endif /* STATS_IROPT */
1016
1017 /* Count the number of nodes visited for a given sameIRExprs invocation. */
1018 static UInt num_nodes_visited;
1019
1020 /* Do not visit more than NODE_LIMIT nodes when comparing two IRExprs.
1021 This is to guard against performance degradation by visiting large
1022 trees without success. */
1023 #define NODE_LIMIT 30
1024
1025
1026 /* The env in this section is a map from IRTemp to IRExpr*,
1027 that is, an array indexed by IRTemp. */
1028
1029 /* Do both expressions compute the same value? The answer is generally
1030 conservative, i.e. it will report that the expressions do not compute
1031 the same value when in fact they do. The reason is that we do not
1032 keep track of changes in the guest state and memory. Thusly, two
1033 Get's, GetI's or Load's, even when accessing the same location, will be
1034 assumed to compute different values. After all the accesses may happen
1035 at different times and the guest state / memory can have changed in
1036 the meantime.
1037
1038 XXX IMPORTANT XXX the two expressions must have the same IR type.
1039 DO NOT CALL HERE WITH DIFFERENTLY-TYPED EXPRESSIONS. */
1040
1041 /* JRS 20-Mar-2012: split sameIRExprs_aux into a fast inlineable
1042 wrapper that deals with the common tags-don't-match case, and a
1043 slower out of line general case. Saves a few insns. */
1044
1045 __attribute__((noinline))
1046 static Bool sameIRExprs_aux2 ( IRExpr** env, IRExpr* e1, IRExpr* e2 );
1047
1048 inline
sameIRExprs_aux(IRExpr ** env,IRExpr * e1,IRExpr * e2)1049 static Bool sameIRExprs_aux ( IRExpr** env, IRExpr* e1, IRExpr* e2 )
1050 {
1051 if (e1->tag != e2->tag) return False;
1052 return sameIRExprs_aux2(env, e1, e2);
1053 }
1054
1055 __attribute__((noinline))
sameIRExprs_aux2(IRExpr ** env,IRExpr * e1,IRExpr * e2)1056 static Bool sameIRExprs_aux2 ( IRExpr** env, IRExpr* e1, IRExpr* e2 )
1057 {
1058 if (num_nodes_visited++ > NODE_LIMIT) return False;
1059
1060 switch (e1->tag) {
1061 case Iex_RdTmp:
1062 if (e1->Iex.RdTmp.tmp == e2->Iex.RdTmp.tmp) return True;
1063
1064 if (env[e1->Iex.RdTmp.tmp] && env[e2->Iex.RdTmp.tmp]) {
1065 Bool same = sameIRExprs_aux(env, env[e1->Iex.RdTmp.tmp],
1066 env[e2->Iex.RdTmp.tmp]);
1067 #if STATS_IROPT
1068 recursed = True;
1069 if (same) recursion_helped = True;
1070 #endif
1071 return same;
1072 }
1073 return False;
1074
1075 case Iex_Get:
1076 case Iex_GetI:
1077 case Iex_Load:
1078 /* Guest state / memory could have changed in the meantime. */
1079 return False;
1080
1081 case Iex_Binop:
1082 return toBool( e1->Iex.Binop.op == e2->Iex.Binop.op
1083 && sameIRExprs_aux( env, e1->Iex.Binop.arg1,
1084 e2->Iex.Binop.arg1 )
1085 && sameIRExprs_aux( env, e1->Iex.Binop.arg2,
1086 e2->Iex.Binop.arg2 ));
1087
1088 case Iex_Unop:
1089 return toBool( e1->Iex.Unop.op == e2->Iex.Unop.op
1090 && sameIRExprs_aux( env, e1->Iex.Unop.arg,
1091 e2->Iex.Unop.arg ));
1092
1093 case Iex_Const: {
1094 IRConst *c1 = e1->Iex.Const.con;
1095 IRConst *c2 = e2->Iex.Const.con;
1096 vassert(c1->tag == c2->tag);
1097 switch (c1->tag) {
1098 case Ico_U1: return toBool( c1->Ico.U1 == c2->Ico.U1 );
1099 case Ico_U8: return toBool( c1->Ico.U8 == c2->Ico.U8 );
1100 case Ico_U16: return toBool( c1->Ico.U16 == c2->Ico.U16 );
1101 case Ico_U32: return toBool( c1->Ico.U32 == c2->Ico.U32 );
1102 case Ico_U64: return toBool( c1->Ico.U64 == c2->Ico.U64 );
1103 default: break;
1104 }
1105 return False;
1106 }
1107
1108 case Iex_Triop: {
1109 IRTriop *tri1 = e1->Iex.Triop.details;
1110 IRTriop *tri2 = e2->Iex.Triop.details;
1111 return toBool( tri1->op == tri2->op
1112 && sameIRExprs_aux( env, tri1->arg1, tri2->arg1 )
1113 && sameIRExprs_aux( env, tri1->arg2, tri2->arg2 )
1114 && sameIRExprs_aux( env, tri1->arg3, tri2->arg3 ));
1115 }
1116
1117 case Iex_ITE:
1118 return toBool( sameIRExprs_aux( env, e1->Iex.ITE.cond,
1119 e2->Iex.ITE.cond )
1120 && sameIRExprs_aux( env, e1->Iex.ITE.iftrue,
1121 e2->Iex.ITE.iftrue )
1122 && sameIRExprs_aux( env, e1->Iex.ITE.iffalse,
1123 e2->Iex.ITE.iffalse ));
1124
1125 default:
1126 /* Not very likely to be "same". */
1127 break;
1128 }
1129
1130 return False;
1131 }
1132
1133 inline
sameIRExprs(IRExpr ** env,IRExpr * e1,IRExpr * e2)1134 static Bool sameIRExprs ( IRExpr** env, IRExpr* e1, IRExpr* e2 )
1135 {
1136 Bool same;
1137
1138 num_nodes_visited = 0;
1139 same = sameIRExprs_aux(env, e1, e2);
1140
1141 #if STATS_IROPT
1142 ++invocation_count;
1143 if (recursed) ++recursion_count;
1144 success_count += same;
1145 if (same && recursion_helped)
1146 ++recursion_success_count;
1147 if (num_nodes_visited > max_nodes_visited)
1148 max_nodes_visited = num_nodes_visited;
1149 recursed = False; /* reset */
1150 recursion_helped = False; /* reset */
1151 #endif /* STATS_IROPT */
1152
1153 return same;
1154 }
1155
1156
1157 /* Debugging-only hack (not used in production runs): make a guess
1158 whether sameIRExprs might assert due to the two args being of
1159 different types. If in doubt return False. Is only used when
1160 --vex-iropt-level > 0, that is, vex_control.iropt_verbosity > 0.
1161 Bad because it duplicates functionality from typeOfIRExpr. See
1162 comment on the single use point below for rationale. */
1163 static
debug_only_hack_sameIRExprs_might_assert(IRExpr * e1,IRExpr * e2)1164 Bool debug_only_hack_sameIRExprs_might_assert ( IRExpr* e1, IRExpr* e2 )
1165 {
1166 if (e1->tag != e2->tag) return False;
1167 switch (e1->tag) {
1168 case Iex_Const: {
1169 /* The only interesting case */
1170 IRConst *c1 = e1->Iex.Const.con;
1171 IRConst *c2 = e2->Iex.Const.con;
1172 return c1->tag != c2->tag;
1173 }
1174 default:
1175 break;
1176 }
1177 return False;
1178 }
1179
1180
1181 /* Is this literally IRExpr_Const(IRConst_U32(0)) ? */
isZeroU32(IRExpr * e)1182 static Bool isZeroU32 ( IRExpr* e )
1183 {
1184 return toBool( e->tag == Iex_Const
1185 && e->Iex.Const.con->tag == Ico_U32
1186 && e->Iex.Const.con->Ico.U32 == 0);
1187 }
1188
1189 /* Is this literally IRExpr_Const(IRConst_U64(0)) ?
1190 Currently unused; commented out to avoid compiler warning */
1191 #if 0
1192 static Bool isZeroU64 ( IRExpr* e )
1193 {
1194 return toBool( e->tag == Iex_Const
1195 && e->Iex.Const.con->tag == Ico_U64
1196 && e->Iex.Const.con->Ico.U64 == 0);
1197 }
1198 #endif
1199
1200 /* Is this literally IRExpr_Const(IRConst_V128(0)) ? */
isZeroV128(IRExpr * e)1201 static Bool isZeroV128 ( IRExpr* e )
1202 {
1203 return toBool( e->tag == Iex_Const
1204 && e->Iex.Const.con->tag == Ico_V128
1205 && e->Iex.Const.con->Ico.V128 == 0x0000);
1206 }
1207
1208 /* Is this literally IRExpr_Const(IRConst_V256(0)) ? */
isZeroV256(IRExpr * e)1209 static Bool isZeroV256 ( IRExpr* e )
1210 {
1211 return toBool( e->tag == Iex_Const
1212 && e->Iex.Const.con->tag == Ico_V256
1213 && e->Iex.Const.con->Ico.V256 == 0x00000000);
1214 }
1215
1216 /* Is this an integer constant with value 0 ? */
isZeroU(IRExpr * e)1217 static Bool isZeroU ( IRExpr* e )
1218 {
1219 if (e->tag != Iex_Const) return False;
1220 switch (e->Iex.Const.con->tag) {
1221 case Ico_U1: return toBool( e->Iex.Const.con->Ico.U1 == 0);
1222 case Ico_U8: return toBool( e->Iex.Const.con->Ico.U8 == 0);
1223 case Ico_U16: return toBool( e->Iex.Const.con->Ico.U16 == 0);
1224 case Ico_U32: return toBool( e->Iex.Const.con->Ico.U32 == 0);
1225 case Ico_U64: return toBool( e->Iex.Const.con->Ico.U64 == 0);
1226 case Ico_V256: return toBool( e->Iex.Const.con->Ico.V256 == 0x00000000);
1227 default: vpanic("isZeroU");
1228 }
1229 }
1230
1231 /* Is this an integer constant with value 1---1b ? */
isOnesU(IRExpr * e)1232 static Bool isOnesU ( IRExpr* e )
1233 {
1234 if (e->tag != Iex_Const) return False;
1235 switch (e->Iex.Const.con->tag) {
1236 case Ico_U8: return toBool( e->Iex.Const.con->Ico.U8 == 0xFF);
1237 case Ico_U16: return toBool( e->Iex.Const.con->Ico.U16 == 0xFFFF);
1238 case Ico_U32: return toBool( e->Iex.Const.con->Ico.U32
1239 == 0xFFFFFFFF);
1240 case Ico_U64: return toBool( e->Iex.Const.con->Ico.U64
1241 == 0xFFFFFFFFFFFFFFFFULL);
1242 default: ppIRExpr(e); vpanic("isOnesU");
1243 }
1244 }
1245
notBool(Bool b)1246 static Bool notBool ( Bool b )
1247 {
1248 if (b == True) return False;
1249 if (b == False) return True;
1250 vpanic("notBool");
1251 }
1252
1253 /* Make a zero which has the same type as the result of the given
1254 primop. */
mkZeroOfPrimopResultType(IROp op)1255 static IRExpr* mkZeroOfPrimopResultType ( IROp op )
1256 {
1257 switch (op) {
1258 case Iop_CmpNE32: return IRExpr_Const(IRConst_U1(toBool(0)));
1259 case Iop_Xor8: return IRExpr_Const(IRConst_U8(0));
1260 case Iop_Xor16: return IRExpr_Const(IRConst_U16(0));
1261 case Iop_Sub32:
1262 case Iop_Xor32: return IRExpr_Const(IRConst_U32(0));
1263 case Iop_And64:
1264 case Iop_Sub64:
1265 case Iop_Xor64: return IRExpr_Const(IRConst_U64(0));
1266 case Iop_XorV128:
1267 case Iop_AndV128: return IRExpr_Const(IRConst_V128(0));
1268 case Iop_XorV256:
1269 case Iop_AndV256: return IRExpr_Const(IRConst_V256(0));
1270 default: vpanic("mkZeroOfPrimopResultType: bad primop");
1271 }
1272 }
1273
1274 /* Make a value containing all 1-bits, which has the same type as the
1275 result of the given primop. */
mkOnesOfPrimopResultType(IROp op)1276 static IRExpr* mkOnesOfPrimopResultType ( IROp op )
1277 {
1278 switch (op) {
1279 case Iop_CmpEQ32:
1280 case Iop_CmpEQ64:
1281 return IRExpr_Const(IRConst_U1(toBool(1)));
1282 case Iop_Or8:
1283 return IRExpr_Const(IRConst_U8(0xFF));
1284 case Iop_Or16:
1285 return IRExpr_Const(IRConst_U16(0xFFFF));
1286 case Iop_Or32:
1287 return IRExpr_Const(IRConst_U32(0xFFFFFFFF));
1288 case Iop_CmpEQ8x8:
1289 case Iop_Or64:
1290 return IRExpr_Const(IRConst_U64(0xFFFFFFFFFFFFFFFFULL));
1291 case Iop_CmpEQ8x16:
1292 case Iop_CmpEQ16x8:
1293 case Iop_CmpEQ32x4:
1294 return IRExpr_Const(IRConst_V128(0xFFFF));
1295 default:
1296 ppIROp(op);
1297 vpanic("mkOnesOfPrimopResultType: bad primop");
1298 }
1299 }
1300
1301 /* Helpers for folding Clz32/64. */
fold_Clz64(ULong value)1302 static UInt fold_Clz64 ( ULong value )
1303 {
1304 UInt i;
1305 vassert(value != 0ULL); /* no defined semantics for arg==0 */
1306 for (i = 0; i < 64; ++i) {
1307 if (0ULL != (value & (((ULong)1) << (63 - i)))) return i;
1308 }
1309 vassert(0);
1310 /*NOTREACHED*/
1311 return 0;
1312 }
1313
fold_Clz32(UInt value)1314 static UInt fold_Clz32 ( UInt value )
1315 {
1316 UInt i;
1317 vassert(value != 0); /* no defined semantics for arg==0 */
1318 for (i = 0; i < 32; ++i) {
1319 if (0 != (value & (((UInt)1) << (31 - i)))) return i;
1320 }
1321 vassert(0);
1322 /*NOTREACHED*/
1323 return 0;
1324 }
1325
1326 /* V64 holds 8 summary-constant bits in V128/V256 style. Convert to
1327 the corresponding real constant. */
1328 //XXX re-check this before use
1329 //static ULong de_summarise_V64 ( UChar v64 )
1330 //{
1331 // ULong r = 0;
1332 // if (v64 & (1<<0)) r |= 0x00000000000000FFULL;
1333 // if (v64 & (1<<1)) r |= 0x000000000000FF00ULL;
1334 // if (v64 & (1<<2)) r |= 0x0000000000FF0000ULL;
1335 // if (v64 & (1<<3)) r |= 0x00000000FF000000ULL;
1336 // if (v64 & (1<<4)) r |= 0x000000FF00000000ULL;
1337 // if (v64 & (1<<5)) r |= 0x0000FF0000000000ULL;
1338 // if (v64 & (1<<6)) r |= 0x00FF000000000000ULL;
1339 // if (v64 & (1<<7)) r |= 0xFF00000000000000ULL;
1340 // return r;
1341 //}
1342
1343 /* Helper for arbitrary expression pattern matching in flat IR. If
1344 'e' is a reference to a tmp, look it up in env -- repeatedly, if
1345 necessary -- until it resolves to a non-tmp. Note that this can
1346 return NULL if it can't resolve 'e' to a new expression, which will
1347 be the case if 'e' is instead defined by an IRStmt (IRDirty or
1348 LLSC). */
chase(IRExpr ** env,IRExpr * e)1349 static IRExpr* chase ( IRExpr** env, IRExpr* e )
1350 {
1351 /* Why is this loop guaranteed to terminate? Because all tmps must
1352 have definitions before use, hence a tmp cannot be bound
1353 (directly or indirectly) to itself. */
1354 while (e->tag == Iex_RdTmp) {
1355 if (0) { vex_printf("chase "); ppIRExpr(e); vex_printf("\n"); }
1356 e = env[(Int)e->Iex.RdTmp.tmp];
1357 if (e == NULL) break;
1358 }
1359 return e;
1360 }
1361
1362 /* Similar to |chase|, but follows at most one level of tmp reference. */
chase1(IRExpr ** env,IRExpr * e)1363 static IRExpr* chase1 ( IRExpr** env, IRExpr* e )
1364 {
1365 if (e == NULL || e->tag != Iex_RdTmp)
1366 return e;
1367 else
1368 return env[(Int)e->Iex.RdTmp.tmp];
1369 }
1370
fold_Expr(IRExpr ** env,IRExpr * e)1371 static IRExpr* fold_Expr ( IRExpr** env, IRExpr* e )
1372 {
1373 Int shift;
1374 IRExpr* e2 = e; /* e2 is the result of folding e, if possible */
1375
1376 switch (e->tag) {
1377 case Iex_Unop:
1378 /* UNARY ops */
1379 if (e->Iex.Unop.arg->tag == Iex_Const) {
1380 switch (e->Iex.Unop.op) {
1381 case Iop_1Uto8:
1382 e2 = IRExpr_Const(IRConst_U8(toUChar(
1383 e->Iex.Unop.arg->Iex.Const.con->Ico.U1
1384 ? 1 : 0)));
1385 break;
1386 case Iop_1Uto32:
1387 e2 = IRExpr_Const(IRConst_U32(
1388 e->Iex.Unop.arg->Iex.Const.con->Ico.U1
1389 ? 1 : 0));
1390 break;
1391 case Iop_1Uto64:
1392 e2 = IRExpr_Const(IRConst_U64(
1393 e->Iex.Unop.arg->Iex.Const.con->Ico.U1
1394 ? 1 : 0));
1395 break;
1396
1397 case Iop_1Sto8:
1398 e2 = IRExpr_Const(IRConst_U8(toUChar(
1399 e->Iex.Unop.arg->Iex.Const.con->Ico.U1
1400 ? 0xFF : 0)));
1401 break;
1402 case Iop_1Sto16:
1403 e2 = IRExpr_Const(IRConst_U16(toUShort(
1404 e->Iex.Unop.arg->Iex.Const.con->Ico.U1
1405 ? 0xFFFF : 0)));
1406 break;
1407 case Iop_1Sto32:
1408 e2 = IRExpr_Const(IRConst_U32(
1409 e->Iex.Unop.arg->Iex.Const.con->Ico.U1
1410 ? 0xFFFFFFFF : 0));
1411 break;
1412 case Iop_1Sto64:
1413 e2 = IRExpr_Const(IRConst_U64(
1414 e->Iex.Unop.arg->Iex.Const.con->Ico.U1
1415 ? 0xFFFFFFFFFFFFFFFFULL : 0));
1416 break;
1417
1418 case Iop_8Sto32: {
1419 UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U8;
1420 u32 <<= 24;
1421 u32 = (Int)u32 >> 24; /* signed shift */
1422 e2 = IRExpr_Const(IRConst_U32(u32));
1423 break;
1424 }
1425 case Iop_16Sto32: {
1426 UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16;
1427 u32 <<= 16;
1428 u32 = (Int)u32 >> 16; /* signed shift */
1429 e2 = IRExpr_Const(IRConst_U32(u32));
1430 break;
1431 }
1432 case Iop_8Uto64:
1433 e2 = IRExpr_Const(IRConst_U64(
1434 0xFFULL & e->Iex.Unop.arg->Iex.Const.con->Ico.U8));
1435 break;
1436 case Iop_16Uto64:
1437 e2 = IRExpr_Const(IRConst_U64(
1438 0xFFFFULL & e->Iex.Unop.arg->Iex.Const.con->Ico.U16));
1439 break;
1440 case Iop_8Uto32:
1441 e2 = IRExpr_Const(IRConst_U32(
1442 0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U8));
1443 break;
1444 case Iop_8Sto16: {
1445 UShort u16 = e->Iex.Unop.arg->Iex.Const.con->Ico.U8;
1446 u16 <<= 8;
1447 u16 = (Short)u16 >> 8; /* signed shift */
1448 e2 = IRExpr_Const(IRConst_U16(u16));
1449 break;
1450 }
1451 case Iop_8Uto16:
1452 e2 = IRExpr_Const(IRConst_U16(
1453 0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U8));
1454 break;
1455 case Iop_16Uto32:
1456 e2 = IRExpr_Const(IRConst_U32(
1457 0xFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U16));
1458 break;
1459 case Iop_32to16:
1460 e2 = IRExpr_Const(IRConst_U16(toUShort(
1461 0xFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)));
1462 break;
1463 case Iop_32to8:
1464 e2 = IRExpr_Const(IRConst_U8(toUChar(
1465 0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)));
1466 break;
1467 case Iop_32to1:
1468 e2 = IRExpr_Const(IRConst_U1(toBool(
1469 1 == (1 & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)
1470 )));
1471 break;
1472 case Iop_64to1:
1473 e2 = IRExpr_Const(IRConst_U1(toBool(
1474 1 == (1 & e->Iex.Unop.arg->Iex.Const.con->Ico.U64)
1475 )));
1476 break;
1477
1478 case Iop_NotV128:
1479 e2 = IRExpr_Const(IRConst_V128(
1480 ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.V128)));
1481 break;
1482 case Iop_Not64:
1483 e2 = IRExpr_Const(IRConst_U64(
1484 ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U64)));
1485 break;
1486 case Iop_Not32:
1487 e2 = IRExpr_Const(IRConst_U32(
1488 ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U32)));
1489 break;
1490 case Iop_Not16:
1491 e2 = IRExpr_Const(IRConst_U16(toUShort(
1492 ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U16))));
1493 break;
1494 case Iop_Not8:
1495 e2 = IRExpr_Const(IRConst_U8(toUChar(
1496 ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U8))));
1497 break;
1498
1499 case Iop_Not1:
1500 e2 = IRExpr_Const(IRConst_U1(
1501 notBool(e->Iex.Unop.arg->Iex.Const.con->Ico.U1)));
1502 break;
1503
1504 case Iop_64to8: {
1505 ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1506 w64 &= 0xFFULL;
1507 e2 = IRExpr_Const(IRConst_U8( (UChar)w64 ));
1508 break;
1509 }
1510 case Iop_64to16: {
1511 ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1512 w64 &= 0xFFFFULL;
1513 e2 = IRExpr_Const(IRConst_U16( (UShort)w64 ));
1514 break;
1515 }
1516 case Iop_64to32: {
1517 ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1518 w64 &= 0x00000000FFFFFFFFULL;
1519 e2 = IRExpr_Const(IRConst_U32( (UInt)w64 ));
1520 break;
1521 }
1522 case Iop_64HIto32: {
1523 ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1524 w64 >>= 32;
1525 e2 = IRExpr_Const(IRConst_U32( (UInt)w64 ));
1526 break;
1527 }
1528 case Iop_32Uto64:
1529 e2 = IRExpr_Const(IRConst_U64(
1530 0xFFFFFFFFULL
1531 & e->Iex.Unop.arg->Iex.Const.con->Ico.U32));
1532 break;
1533 case Iop_16Sto64: {
1534 ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16;
1535 u64 <<= 48;
1536 u64 = (Long)u64 >> 48; /* signed shift */
1537 e2 = IRExpr_Const(IRConst_U64(u64));
1538 break;
1539 }
1540 case Iop_32Sto64: {
1541 ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
1542 u64 <<= 32;
1543 u64 = (Long)u64 >> 32; /* signed shift */
1544 e2 = IRExpr_Const(IRConst_U64(u64));
1545 break;
1546 }
1547
1548 case Iop_16to8: {
1549 UShort w16 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16;
1550 w16 &= 0xFF;
1551 e2 = IRExpr_Const(IRConst_U8( (UChar)w16 ));
1552 break;
1553 }
1554 case Iop_16HIto8: {
1555 UShort w16 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16;
1556 w16 >>= 8;
1557 w16 &= 0xFF;
1558 e2 = IRExpr_Const(IRConst_U8( (UChar)w16 ));
1559 break;
1560 }
1561
1562 case Iop_CmpNEZ8:
1563 e2 = IRExpr_Const(IRConst_U1(toBool(
1564 0 !=
1565 (0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U8)
1566 )));
1567 break;
1568 case Iop_CmpNEZ32:
1569 e2 = IRExpr_Const(IRConst_U1(toBool(
1570 0 !=
1571 (0xFFFFFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)
1572 )));
1573 break;
1574 case Iop_CmpNEZ64:
1575 e2 = IRExpr_Const(IRConst_U1(toBool(
1576 0ULL != e->Iex.Unop.arg->Iex.Const.con->Ico.U64
1577 )));
1578 break;
1579
1580 case Iop_CmpwNEZ32: {
1581 UInt w32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
1582 if (w32 == 0)
1583 e2 = IRExpr_Const(IRConst_U32( 0 ));
1584 else
1585 e2 = IRExpr_Const(IRConst_U32( 0xFFFFFFFF ));
1586 break;
1587 }
1588 case Iop_CmpwNEZ64: {
1589 ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1590 if (w64 == 0)
1591 e2 = IRExpr_Const(IRConst_U64( 0 ));
1592 else
1593 e2 = IRExpr_Const(IRConst_U64( 0xFFFFFFFFFFFFFFFFULL ));
1594 break;
1595 }
1596
1597 case Iop_Left32: {
1598 UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
1599 Int s32 = (Int)(u32 & 0xFFFFFFFF);
1600 s32 = (s32 | (-s32));
1601 e2 = IRExpr_Const( IRConst_U32( (UInt)s32 ));
1602 break;
1603 }
1604
1605 case Iop_Left64: {
1606 ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1607 Long s64 = (Long)u64;
1608 s64 = (s64 | (-s64));
1609 e2 = IRExpr_Const( IRConst_U64( (ULong)s64 ));
1610 break;
1611 }
1612
1613 case Iop_Clz32: {
1614 UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
1615 if (u32 != 0)
1616 e2 = IRExpr_Const(IRConst_U32(fold_Clz32(u32)));
1617 break;
1618 }
1619 case Iop_Clz64: {
1620 ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1621 if (u64 != 0ULL)
1622 e2 = IRExpr_Const(IRConst_U64(fold_Clz64(u64)));
1623 break;
1624 }
1625
1626 /* For these vector ones, can't fold all cases, but at least
1627 do the most obvious one. Could do better here using
1628 summarise/desummarise of vector constants, but too
1629 difficult to verify; hence just handle the zero cases. */
1630 case Iop_32UtoV128: {
1631 UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
1632 if (0 == u32) {
1633 e2 = IRExpr_Const(IRConst_V128(0x0000));
1634 } else {
1635 goto unhandled;
1636 }
1637 break;
1638 }
1639 case Iop_V128to64: {
1640 UShort v128 = e->Iex.Unop.arg->Iex.Const.con->Ico.V128;
1641 if (0 == ((v128 >> 0) & 0xFF)) {
1642 e2 = IRExpr_Const(IRConst_U64(0));
1643 } else {
1644 goto unhandled;
1645 }
1646 break;
1647 }
1648 case Iop_V128HIto64: {
1649 UShort v128 = e->Iex.Unop.arg->Iex.Const.con->Ico.V128;
1650 if (0 == ((v128 >> 8) & 0xFF)) {
1651 e2 = IRExpr_Const(IRConst_U64(0));
1652 } else {
1653 goto unhandled;
1654 }
1655 break;
1656 }
1657 case Iop_64UtoV128: {
1658 ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1659 if (0 == u64) {
1660 e2 = IRExpr_Const(IRConst_V128(0x0000));
1661 } else {
1662 goto unhandled;
1663 }
1664 break;
1665 }
1666
1667 /* Even stupider (although still correct ..) */
1668 case Iop_V256to64_0: case Iop_V256to64_1:
1669 case Iop_V256to64_2: case Iop_V256to64_3: {
1670 UInt v256 = e->Iex.Unop.arg->Iex.Const.con->Ico.V256;
1671 if (v256 == 0x00000000) {
1672 e2 = IRExpr_Const(IRConst_U64(0));
1673 } else {
1674 goto unhandled;
1675 }
1676 break;
1677 }
1678
1679 case Iop_ZeroHI64ofV128: {
1680 /* Could do better here -- only need to look at the bottom 64 bits
1681 of the argument, really. */
1682 UShort v128 = e->Iex.Unop.arg->Iex.Const.con->Ico.V128;
1683 if (v128 == 0x0000) {
1684 e2 = IRExpr_Const(IRConst_V128(0x0000));
1685 } else {
1686 goto unhandled;
1687 }
1688 break;
1689 }
1690
1691 default:
1692 goto unhandled;
1693 }
1694 }
1695 break;
1696
1697 case Iex_Binop:
1698 /* BINARY ops */
1699 if (e->Iex.Binop.arg1->tag == Iex_Const
1700 && e->Iex.Binop.arg2->tag == Iex_Const) {
1701 /* cases where both args are consts */
1702 switch (e->Iex.Binop.op) {
1703
1704 /* -- Or -- */
1705 case Iop_Or8:
1706 e2 = IRExpr_Const(IRConst_U8(toUChar(
1707 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
1708 | e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
1709 break;
1710 case Iop_Or16:
1711 e2 = IRExpr_Const(IRConst_U16(toUShort(
1712 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U16
1713 | e->Iex.Binop.arg2->Iex.Const.con->Ico.U16))));
1714 break;
1715 case Iop_Or32:
1716 e2 = IRExpr_Const(IRConst_U32(
1717 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1718 | e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1719 break;
1720 case Iop_Or64:
1721 e2 = IRExpr_Const(IRConst_U64(
1722 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1723 | e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1724 break;
1725 case Iop_OrV128:
1726 e2 = IRExpr_Const(IRConst_V128(
1727 (e->Iex.Binop.arg1->Iex.Const.con->Ico.V128
1728 | e->Iex.Binop.arg2->Iex.Const.con->Ico.V128)));
1729 break;
1730
1731 /* -- Xor -- */
1732 case Iop_Xor8:
1733 e2 = IRExpr_Const(IRConst_U8(toUChar(
1734 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
1735 ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
1736 break;
1737 case Iop_Xor16:
1738 e2 = IRExpr_Const(IRConst_U16(toUShort(
1739 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U16
1740 ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U16))));
1741 break;
1742 case Iop_Xor32:
1743 e2 = IRExpr_Const(IRConst_U32(
1744 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1745 ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1746 break;
1747 case Iop_Xor64:
1748 e2 = IRExpr_Const(IRConst_U64(
1749 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1750 ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1751 break;
1752 case Iop_XorV128:
1753 e2 = IRExpr_Const(IRConst_V128(
1754 (e->Iex.Binop.arg1->Iex.Const.con->Ico.V128
1755 ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.V128)));
1756 break;
1757
1758 /* -- And -- */
1759 case Iop_And8:
1760 e2 = IRExpr_Const(IRConst_U8(toUChar(
1761 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
1762 & e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
1763 break;
1764 case Iop_And16:
1765 e2 = IRExpr_Const(IRConst_U16(toUShort(
1766 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U16
1767 & e->Iex.Binop.arg2->Iex.Const.con->Ico.U16))));
1768 break;
1769 case Iop_And32:
1770 e2 = IRExpr_Const(IRConst_U32(
1771 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1772 & e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1773 break;
1774 case Iop_And64:
1775 e2 = IRExpr_Const(IRConst_U64(
1776 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1777 & e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1778 break;
1779 case Iop_AndV128:
1780 e2 = IRExpr_Const(IRConst_V128(
1781 (e->Iex.Binop.arg1->Iex.Const.con->Ico.V128
1782 & e->Iex.Binop.arg2->Iex.Const.con->Ico.V128)));
1783 break;
1784
1785 /* -- Add -- */
1786 case Iop_Add8:
1787 e2 = IRExpr_Const(IRConst_U8(toUChar(
1788 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
1789 + e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
1790 break;
1791 case Iop_Add32:
1792 e2 = IRExpr_Const(IRConst_U32(
1793 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1794 + e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1795 break;
1796 case Iop_Add64:
1797 e2 = IRExpr_Const(IRConst_U64(
1798 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1799 + e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1800 break;
1801
1802 /* -- Sub -- */
1803 case Iop_Sub8:
1804 e2 = IRExpr_Const(IRConst_U8(toUChar(
1805 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
1806 - e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
1807 break;
1808 case Iop_Sub32:
1809 e2 = IRExpr_Const(IRConst_U32(
1810 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1811 - e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1812 break;
1813 case Iop_Sub64:
1814 e2 = IRExpr_Const(IRConst_U64(
1815 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1816 - e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1817 break;
1818
1819 /* -- Max32U -- */
1820 case Iop_Max32U: {
1821 UInt u32a = e->Iex.Binop.arg1->Iex.Const.con->Ico.U32;
1822 UInt u32b = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
1823 UInt res = u32a > u32b ? u32a : u32b;
1824 e2 = IRExpr_Const(IRConst_U32(res));
1825 break;
1826 }
1827
1828 /* -- Mul -- */
1829 case Iop_Mul32:
1830 e2 = IRExpr_Const(IRConst_U32(
1831 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1832 * e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1833 break;
1834 case Iop_Mul64:
1835 e2 = IRExpr_Const(IRConst_U64(
1836 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1837 * e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1838 break;
1839
1840 case Iop_MullS32: {
1841 /* very paranoid */
1842 UInt u32a = e->Iex.Binop.arg1->Iex.Const.con->Ico.U32;
1843 UInt u32b = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
1844 Int s32a = (Int)u32a;
1845 Int s32b = (Int)u32b;
1846 Long s64a = (Long)s32a;
1847 Long s64b = (Long)s32b;
1848 Long sres = s64a * s64b;
1849 ULong ures = (ULong)sres;
1850 e2 = IRExpr_Const(IRConst_U64(ures));
1851 break;
1852 }
1853
1854 /* -- Shl -- */
1855 case Iop_Shl32:
1856 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1857 shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1858 if (shift >= 0 && shift <= 31)
1859 e2 = IRExpr_Const(IRConst_U32(
1860 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1861 << shift)));
1862 break;
1863 case Iop_Shl64:
1864 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1865 shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1866 if (shift >= 0 && shift <= 63)
1867 e2 = IRExpr_Const(IRConst_U64(
1868 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1869 << shift)));
1870 break;
1871
1872 /* -- Sar -- */
1873 case Iop_Sar32: {
1874 /* paranoid ... */
1875 /*signed*/ Int s32;
1876 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1877 s32 = (Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32);
1878 shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1879 if (shift >= 0 && shift <= 31) {
1880 s32 >>=/*signed*/ shift;
1881 e2 = IRExpr_Const(IRConst_U32((UInt)s32));
1882 }
1883 break;
1884 }
1885 case Iop_Sar64: {
1886 /* paranoid ... */
1887 /*signed*/ Long s64;
1888 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1889 s64 = (Long)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64);
1890 shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1891 if (shift >= 0 && shift <= 63) {
1892 s64 >>=/*signed*/ shift;
1893 e2 = IRExpr_Const(IRConst_U64((ULong)s64));
1894 }
1895 break;
1896 }
1897
1898 /* -- Shr -- */
1899 case Iop_Shr32: {
1900 /* paranoid ... */
1901 /*unsigned*/ UInt u32;
1902 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1903 u32 = (UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32);
1904 shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1905 if (shift >= 0 && shift <= 31) {
1906 u32 >>=/*unsigned*/ shift;
1907 e2 = IRExpr_Const(IRConst_U32(u32));
1908 }
1909 break;
1910 }
1911 case Iop_Shr64: {
1912 /* paranoid ... */
1913 /*unsigned*/ ULong u64;
1914 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1915 u64 = (ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64);
1916 shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1917 if (shift >= 0 && shift <= 63) {
1918 u64 >>=/*unsigned*/ shift;
1919 e2 = IRExpr_Const(IRConst_U64(u64));
1920 }
1921 break;
1922 }
1923
1924 /* -- CmpEQ -- */
1925 case Iop_CmpEQ32:
1926 e2 = IRExpr_Const(IRConst_U1(toBool(
1927 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1928 == e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))));
1929 break;
1930 case Iop_CmpEQ64:
1931 e2 = IRExpr_Const(IRConst_U1(toBool(
1932 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1933 == e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))));
1934 break;
1935
1936 /* -- CmpNE -- */
1937 case Iop_CmpNE8:
1938 case Iop_CasCmpNE8:
1939 case Iop_ExpCmpNE8:
1940 e2 = IRExpr_Const(IRConst_U1(toBool(
1941 ((0xFF & e->Iex.Binop.arg1->Iex.Const.con->Ico.U8)
1942 != (0xFF & e->Iex.Binop.arg2->Iex.Const.con->Ico.U8)))));
1943 break;
1944 case Iop_CmpNE32:
1945 case Iop_CasCmpNE32:
1946 case Iop_ExpCmpNE32:
1947 e2 = IRExpr_Const(IRConst_U1(toBool(
1948 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1949 != e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))));
1950 break;
1951 case Iop_CmpNE64:
1952 case Iop_CasCmpNE64:
1953 case Iop_ExpCmpNE64:
1954 e2 = IRExpr_Const(IRConst_U1(toBool(
1955 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1956 != e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))));
1957 break;
1958
1959 /* -- CmpLEU -- */
1960 case Iop_CmpLE32U:
1961 e2 = IRExpr_Const(IRConst_U1(toBool(
1962 ((UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
1963 <= (UInt)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
1964 break;
1965 case Iop_CmpLE64U:
1966 e2 = IRExpr_Const(IRConst_U1(toBool(
1967 ((ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64)
1968 <= (ULong)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))));
1969 break;
1970
1971 /* -- CmpLES -- */
1972 case Iop_CmpLE32S:
1973 e2 = IRExpr_Const(IRConst_U1(toBool(
1974 ((Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
1975 <= (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
1976 break;
1977 case Iop_CmpLE64S:
1978 e2 = IRExpr_Const(IRConst_U1(toBool(
1979 ((Long)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64)
1980 <= (Long)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))));
1981 break;
1982
1983 /* -- CmpLTS -- */
1984 case Iop_CmpLT32S:
1985 e2 = IRExpr_Const(IRConst_U1(toBool(
1986 ((Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
1987 < (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
1988 break;
1989 case Iop_CmpLT64S:
1990 e2 = IRExpr_Const(IRConst_U1(toBool(
1991 ((Long)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64)
1992 < (Long)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))));
1993 break;
1994
1995 /* -- CmpLTU -- */
1996 case Iop_CmpLT32U:
1997 e2 = IRExpr_Const(IRConst_U1(toBool(
1998 ((UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
1999 < (UInt)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
2000 break;
2001 case Iop_CmpLT64U:
2002 e2 = IRExpr_Const(IRConst_U1(toBool(
2003 ((ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64)
2004 < (ULong)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))));
2005 break;
2006
2007 /* -- CmpORD -- */
2008 case Iop_CmpORD32S: {
2009 /* very paranoid */
2010 UInt u32a = e->Iex.Binop.arg1->Iex.Const.con->Ico.U32;
2011 UInt u32b = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
2012 Int s32a = (Int)u32a;
2013 Int s32b = (Int)u32b;
2014 Int r = 0x2; /* EQ */
2015 if (s32a < s32b) {
2016 r = 0x8; /* LT */
2017 }
2018 else if (s32a > s32b) {
2019 r = 0x4; /* GT */
2020 }
2021 e2 = IRExpr_Const(IRConst_U32(r));
2022 break;
2023 }
2024
2025 /* -- nHLto2n -- */
2026 case Iop_32HLto64:
2027 e2 = IRExpr_Const(IRConst_U64(
2028 (((ULong)(e->Iex.Binop.arg1
2029 ->Iex.Const.con->Ico.U32)) << 32)
2030 | ((ULong)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))
2031 ));
2032 break;
2033 case Iop_64HLto128:
2034 /* We can't fold this, because there is no way to
2035 express he result in IR, but at least pretend to
2036 handle it, so as to stop getting blasted with
2037 no-rule-for-this-primop messages. */
2038 break;
2039 /* For this vector one, can't fold all cases, but at
2040 least do the most obvious one. Could do better here
2041 using summarise/desummarise of vector constants, but
2042 too difficult to verify; hence just handle the zero
2043 cases. */
2044 case Iop_64HLtoV128: {
2045 ULong argHi = e->Iex.Binop.arg1->Iex.Const.con->Ico.U64;
2046 ULong argLo = e->Iex.Binop.arg2->Iex.Const.con->Ico.U64;
2047 if (0 == argHi && 0 == argLo) {
2048 e2 = IRExpr_Const(IRConst_V128(0));
2049 } else {
2050 goto unhandled;
2051 }
2052 break;
2053 }
2054 /* Same reasoning for the 256-bit version. */
2055 case Iop_V128HLtoV256: {
2056 IRExpr* argHi = e->Iex.Binop.arg1;
2057 IRExpr* argLo = e->Iex.Binop.arg2;
2058 if (isZeroV128(argHi) && isZeroV128(argLo)) {
2059 e2 = IRExpr_Const(IRConst_V256(0));
2060 } else {
2061 goto unhandled;
2062 }
2063 break;
2064 }
2065
2066 /* -- V128 stuff -- */
2067 case Iop_InterleaveLO8x16: {
2068 /* This turns up a lot in Memcheck instrumentation of
2069 Icc generated code. I don't know why. */
2070 UShort arg1 = e->Iex.Binop.arg1->Iex.Const.con->Ico.V128;
2071 UShort arg2 = e->Iex.Binop.arg2->Iex.Const.con->Ico.V128;
2072 if (0 == arg1 && 0 == arg2) {
2073 e2 = IRExpr_Const(IRConst_V128(0));
2074 } else {
2075 goto unhandled;
2076 }
2077 break;
2078 }
2079
2080 default:
2081 goto unhandled;
2082 }
2083
2084 } else {
2085
2086 /* other cases (identities, etc) */
2087 switch (e->Iex.Binop.op) {
2088
2089 case Iop_Shl32:
2090 case Iop_Shl64:
2091 case Iop_Shr64:
2092 case Iop_Sar64:
2093 /* Shl32/Shl64/Shr64/Sar64(x,0) ==> x */
2094 if (isZeroU(e->Iex.Binop.arg2)) {
2095 e2 = e->Iex.Binop.arg1;
2096 break;
2097 }
2098 /* Shl32/Shl64/Shr64(0,x) ==> 0 */
2099 if (isZeroU(e->Iex.Binop.arg1)) {
2100 e2 = e->Iex.Binop.arg1;
2101 break;
2102 }
2103 break;
2104
2105 case Iop_Sar32:
2106 case Iop_Shr32:
2107 /* Shr32/Sar32(x,0) ==> x */
2108 if (isZeroU(e->Iex.Binop.arg2)) {
2109 e2 = e->Iex.Binop.arg1;
2110 break;
2111 }
2112 break;
2113
2114 case Iop_Or8:
2115 case Iop_Or16:
2116 case Iop_Or32:
2117 case Iop_Or64:
2118 case Iop_Max32U:
2119 /* Or8/Or16/Or32/Or64/Max32U(x,0) ==> x */
2120 if (isZeroU(e->Iex.Binop.arg2)) {
2121 e2 = e->Iex.Binop.arg1;
2122 break;
2123 }
2124 /* Or8/Or16/Or32/Or64/Max32U(0,x) ==> x */
2125 if (isZeroU(e->Iex.Binop.arg1)) {
2126 e2 = e->Iex.Binop.arg2;
2127 break;
2128 }
2129 /* Or8/Or16/Or32/Or64/Max32U(x,1---1b) ==> 1---1b */
2130 /* Or8/Or16/Or32/Or64/Max32U(1---1b,x) ==> 1---1b */
2131 if (isOnesU(e->Iex.Binop.arg1) || isOnesU(e->Iex.Binop.arg2)) {
2132 e2 = mkOnesOfPrimopResultType(e->Iex.Binop.op);
2133 break;
2134 }
2135 /* Or8/Or16/Or32/Or64/Max32U(t,t) ==> t, for some IRTemp t */
2136 if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2137 e2 = e->Iex.Binop.arg1;
2138 break;
2139 }
2140 break;
2141
2142 case Iop_Add8:
2143 /* Add8(t,t) ==> t << 1.
2144 Memcheck doesn't understand that
2145 x+x produces a defined least significant bit, and it seems
2146 simplest just to get rid of the problem by rewriting it
2147 out, since the opportunity to do so exists. */
2148 if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2149 e2 = IRExpr_Binop(Iop_Shl8, e->Iex.Binop.arg1,
2150 IRExpr_Const(IRConst_U8(1)));
2151 break;
2152 }
2153 break;
2154
2155 /* NB no Add16(t,t) case yet as no known test case exists */
2156
2157 case Iop_Add32:
2158 case Iop_Add64:
2159 /* Add32/Add64(x,0) ==> x */
2160 if (isZeroU(e->Iex.Binop.arg2)) {
2161 e2 = e->Iex.Binop.arg1;
2162 break;
2163 }
2164 /* Add32/Add64(0,x) ==> x */
2165 if (isZeroU(e->Iex.Binop.arg1)) {
2166 e2 = e->Iex.Binop.arg2;
2167 break;
2168 }
2169 /* Add32/Add64(t,t) ==> t << 1. Same rationale as for Add8. */
2170 if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2171 e2 = IRExpr_Binop(
2172 e->Iex.Binop.op == Iop_Add32 ? Iop_Shl32 : Iop_Shl64,
2173 e->Iex.Binop.arg1, IRExpr_Const(IRConst_U8(1)));
2174 break;
2175 }
2176 break;
2177
2178 case Iop_Sub32:
2179 case Iop_Sub64:
2180 /* Sub32/Sub64(x,0) ==> x */
2181 if (isZeroU(e->Iex.Binop.arg2)) {
2182 e2 = e->Iex.Binop.arg1;
2183 break;
2184 }
2185 /* Sub32/Sub64(t,t) ==> 0, for some IRTemp t */
2186 if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2187 e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op);
2188 break;
2189 }
2190 break;
2191 case Iop_Sub8x16:
2192 /* Sub8x16(x,0) ==> x */
2193 if (isZeroV128(e->Iex.Binop.arg2)) {
2194 e2 = e->Iex.Binop.arg1;
2195 break;
2196 }
2197 break;
2198
2199 case Iop_And8:
2200 case Iop_And16:
2201 case Iop_And32:
2202 case Iop_And64:
2203 /* And8/And16/And32/And64(x,1---1b) ==> x */
2204 if (isOnesU(e->Iex.Binop.arg2)) {
2205 e2 = e->Iex.Binop.arg1;
2206 break;
2207 }
2208 /* And8/And16/And32/And64(1---1b,x) ==> x */
2209 if (isOnesU(e->Iex.Binop.arg1)) {
2210 e2 = e->Iex.Binop.arg2;
2211 break;
2212 }
2213 /* And8/And16/And32/And64(x,0) ==> 0 */
2214 if (isZeroU(e->Iex.Binop.arg2)) {
2215 e2 = e->Iex.Binop.arg2;
2216 break;
2217 }
2218 /* And8/And16/And32/And64(0,x) ==> 0 */
2219 if (isZeroU(e->Iex.Binop.arg1)) {
2220 e2 = e->Iex.Binop.arg1;
2221 break;
2222 }
2223 /* And8/And16/And32/And64(t,t) ==> t, for some IRTemp t */
2224 if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2225 e2 = e->Iex.Binop.arg1;
2226 break;
2227 }
2228 break;
2229
2230 case Iop_AndV128:
2231 case Iop_AndV256:
2232 /* And8/And16/AndV128/AndV256(t,t)
2233 ==> t, for some IRTemp t */
2234 if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2235 e2 = e->Iex.Binop.arg1;
2236 break;
2237 }
2238 /* Deal with either arg zero. Could handle other And
2239 cases here too. */
2240 if (e->Iex.Binop.op == Iop_AndV256
2241 && (isZeroV256(e->Iex.Binop.arg1)
2242 || isZeroV256(e->Iex.Binop.arg2))) {
2243 e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op);
2244 break;
2245 } else if (e->Iex.Binop.op == Iop_AndV128
2246 && (isZeroV128(e->Iex.Binop.arg1)
2247 || isZeroV128(e->Iex.Binop.arg2))) {
2248 e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op);
2249 break;
2250 }
2251 break;
2252
2253 case Iop_OrV128:
2254 case Iop_OrV256:
2255 /* V128/V256(t,t) ==> t, for some IRTemp t */
2256 if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2257 e2 = e->Iex.Binop.arg1;
2258 break;
2259 }
2260 /* OrV128(t,0) ==> t */
2261 if (e->Iex.Binop.op == Iop_OrV128) {
2262 if (isZeroV128(e->Iex.Binop.arg2)) {
2263 e2 = e->Iex.Binop.arg1;
2264 break;
2265 }
2266 if (isZeroV128(e->Iex.Binop.arg1)) {
2267 e2 = e->Iex.Binop.arg2;
2268 break;
2269 }
2270 }
2271 /* OrV256(t,0) ==> t */
2272 if (e->Iex.Binop.op == Iop_OrV256) {
2273 if (isZeroV256(e->Iex.Binop.arg2)) {
2274 e2 = e->Iex.Binop.arg1;
2275 break;
2276 }
2277 //Disabled because there's no known test case right now.
2278 //if (isZeroV256(e->Iex.Binop.arg1)) {
2279 // e2 = e->Iex.Binop.arg2;
2280 // break;
2281 //}
2282 }
2283 break;
2284
2285 case Iop_Xor8:
2286 case Iop_Xor16:
2287 case Iop_Xor32:
2288 case Iop_Xor64:
2289 case Iop_XorV128:
2290 case Iop_XorV256:
2291 /* Xor8/16/32/64/V128(t,t) ==> 0, for some IRTemp t */
2292 if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2293 e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op);
2294 break;
2295 }
2296 /* XorV128(t,0) ==> t */
2297 if (e->Iex.Binop.op == Iop_XorV128) {
2298 if (isZeroV128(e->Iex.Binop.arg2)) {
2299 e2 = e->Iex.Binop.arg1;
2300 break;
2301 }
2302 //Disabled because there's no known test case right now.
2303 //if (isZeroV128(e->Iex.Binop.arg1)) {
2304 // e2 = e->Iex.Binop.arg2;
2305 // break;
2306 //}
2307 } else {
2308 /* Xor8/16/32/64(0,t) ==> t */
2309 if (isZeroU(e->Iex.Binop.arg1)) {
2310 e2 = e->Iex.Binop.arg2;
2311 break;
2312 }
2313 /* Xor8/16/32/64(t,0) ==> t */
2314 if (isZeroU(e->Iex.Binop.arg2)) {
2315 e2 = e->Iex.Binop.arg1;
2316 break;
2317 }
2318 }
2319 break;
2320
2321 case Iop_CmpNE32:
2322 /* CmpNE32(t,t) ==> 0, for some IRTemp t */
2323 if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2324 e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op);
2325 break;
2326 }
2327 /* CmpNE32(1Uto32(b), 0) ==> b */
2328 if (isZeroU32(e->Iex.Binop.arg2)) {
2329 IRExpr* a1 = chase(env, e->Iex.Binop.arg1);
2330 if (a1 && a1->tag == Iex_Unop
2331 && a1->Iex.Unop.op == Iop_1Uto32) {
2332 e2 = a1->Iex.Unop.arg;
2333 break;
2334 }
2335 }
2336 break;
2337
2338 case Iop_CmpEQ32:
2339 case Iop_CmpEQ64:
2340 case Iop_CmpEQ8x8:
2341 case Iop_CmpEQ8x16:
2342 case Iop_CmpEQ16x8:
2343 case Iop_CmpEQ32x4:
2344 if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2345 e2 = mkOnesOfPrimopResultType(e->Iex.Binop.op);
2346 break;
2347 }
2348 break;
2349
2350 default:
2351 break;
2352 }
2353 }
2354 break;
2355
2356 case Iex_ITE:
2357 /* ITE */
2358 /* is the discriminant is a constant? */
2359 if (e->Iex.ITE.cond->tag == Iex_Const) {
2360 /* assured us by the IR type rules */
2361 vassert(e->Iex.ITE.cond->Iex.Const.con->tag == Ico_U1);
2362 e2 = e->Iex.ITE.cond->Iex.Const.con->Ico.U1
2363 ? e->Iex.ITE.iftrue : e->Iex.ITE.iffalse;
2364 }
2365 else
2366 /* are the arms identical? (pretty weedy test) */
2367 if (sameIRExprs(env, e->Iex.ITE.iftrue,
2368 e->Iex.ITE.iffalse)) {
2369 e2 = e->Iex.ITE.iffalse;
2370 }
2371 break;
2372
2373 default:
2374 /* not considered */
2375 break;
2376 }
2377
2378 /* Show cases where we've found but not folded 'op(t,t)'. Be
2379 careful not to call sameIRExprs with values of different types,
2380 though, else it will assert (and so it should!). We can't
2381 conveniently call typeOfIRExpr on the two args without a whole
2382 bunch of extra plumbing to pass in a type env, so just use a
2383 hacky test to check the arguments are not anything that might
2384 sameIRExprs to assert. This is only OK because this kludge is
2385 only used for debug printing, not for "real" operation. For
2386 "real" operation (ie, all other calls to sameIRExprs), it is
2387 essential that the to args have the same type.
2388
2389 The "right" solution is to plumb the containing block's
2390 IRTypeEnv through to here and use typeOfIRExpr to be sure. But
2391 that's a bunch of extra parameter passing which will just slow
2392 down the normal case, for no purpose. */
2393 if (vex_control.iropt_verbosity > 0
2394 && e == e2
2395 && e->tag == Iex_Binop
2396 && !debug_only_hack_sameIRExprs_might_assert(e->Iex.Binop.arg1,
2397 e->Iex.Binop.arg2)
2398 && sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2399 vex_printf("vex iropt: fold_Expr: no ident rule for: ");
2400 ppIRExpr(e);
2401 vex_printf("\n");
2402 }
2403
2404 /* Show the overall results of folding. */
2405 if (DEBUG_IROPT && e2 != e) {
2406 vex_printf("FOLD: ");
2407 ppIRExpr(e); vex_printf(" -> ");
2408 ppIRExpr(e2); vex_printf("\n");
2409 }
2410
2411 return e2;
2412
2413 unhandled:
2414 # if 0
2415 vex_printf("\n\n");
2416 ppIRExpr(e);
2417 vpanic("fold_Expr: no rule for the above");
2418 # else
2419 if (vex_control.iropt_verbosity > 0) {
2420 vex_printf("vex iropt: fold_Expr: no const rule for: ");
2421 ppIRExpr(e);
2422 vex_printf("\n");
2423 }
2424 return e2;
2425 # endif
2426 }
2427
2428
2429 /* Apply the subst to a simple 1-level expression -- guaranteed to be
2430 1-level due to previous flattening pass. */
2431
subst_Expr(IRExpr ** env,IRExpr * ex)2432 static IRExpr* subst_Expr ( IRExpr** env, IRExpr* ex )
2433 {
2434 switch (ex->tag) {
2435 case Iex_RdTmp:
2436 if (env[(Int)ex->Iex.RdTmp.tmp] != NULL) {
2437 IRExpr *rhs = env[(Int)ex->Iex.RdTmp.tmp];
2438 if (rhs->tag == Iex_RdTmp)
2439 return rhs;
2440 if (rhs->tag == Iex_Const
2441 && rhs->Iex.Const.con->tag != Ico_F64i)
2442 return rhs;
2443 }
2444 /* not bound in env */
2445 return ex;
2446
2447 case Iex_Const:
2448 case Iex_Get:
2449 return ex;
2450
2451 case Iex_GetI:
2452 vassert(isIRAtom(ex->Iex.GetI.ix));
2453 return IRExpr_GetI(
2454 ex->Iex.GetI.descr,
2455 subst_Expr(env, ex->Iex.GetI.ix),
2456 ex->Iex.GetI.bias
2457 );
2458
2459 case Iex_Qop: {
2460 IRQop* qop = ex->Iex.Qop.details;
2461 vassert(isIRAtom(qop->arg1));
2462 vassert(isIRAtom(qop->arg2));
2463 vassert(isIRAtom(qop->arg3));
2464 vassert(isIRAtom(qop->arg4));
2465 return IRExpr_Qop(
2466 qop->op,
2467 subst_Expr(env, qop->arg1),
2468 subst_Expr(env, qop->arg2),
2469 subst_Expr(env, qop->arg3),
2470 subst_Expr(env, qop->arg4)
2471 );
2472 }
2473
2474 case Iex_Triop: {
2475 IRTriop* triop = ex->Iex.Triop.details;
2476 vassert(isIRAtom(triop->arg1));
2477 vassert(isIRAtom(triop->arg2));
2478 vassert(isIRAtom(triop->arg3));
2479 return IRExpr_Triop(
2480 triop->op,
2481 subst_Expr(env, triop->arg1),
2482 subst_Expr(env, triop->arg2),
2483 subst_Expr(env, triop->arg3)
2484 );
2485 }
2486
2487 case Iex_Binop:
2488 vassert(isIRAtom(ex->Iex.Binop.arg1));
2489 vassert(isIRAtom(ex->Iex.Binop.arg2));
2490 return IRExpr_Binop(
2491 ex->Iex.Binop.op,
2492 subst_Expr(env, ex->Iex.Binop.arg1),
2493 subst_Expr(env, ex->Iex.Binop.arg2)
2494 );
2495
2496 case Iex_Unop:
2497 vassert(isIRAtom(ex->Iex.Unop.arg));
2498 return IRExpr_Unop(
2499 ex->Iex.Unop.op,
2500 subst_Expr(env, ex->Iex.Unop.arg)
2501 );
2502
2503 case Iex_Load:
2504 vassert(isIRAtom(ex->Iex.Load.addr));
2505 return IRExpr_Load(
2506 ex->Iex.Load.end,
2507 ex->Iex.Load.ty,
2508 subst_Expr(env, ex->Iex.Load.addr)
2509 );
2510
2511 case Iex_CCall: {
2512 Int i;
2513 IRExpr** args2 = shallowCopyIRExprVec(ex->Iex.CCall.args);
2514 for (i = 0; args2[i]; i++) {
2515 vassert(isIRAtom(args2[i]));
2516 args2[i] = subst_Expr(env, args2[i]);
2517 }
2518 return IRExpr_CCall(
2519 ex->Iex.CCall.cee,
2520 ex->Iex.CCall.retty,
2521 args2
2522 );
2523 }
2524
2525 case Iex_ITE:
2526 vassert(isIRAtom(ex->Iex.ITE.cond));
2527 vassert(isIRAtom(ex->Iex.ITE.iftrue));
2528 vassert(isIRAtom(ex->Iex.ITE.iffalse));
2529 return IRExpr_ITE(
2530 subst_Expr(env, ex->Iex.ITE.cond),
2531 subst_Expr(env, ex->Iex.ITE.iftrue),
2532 subst_Expr(env, ex->Iex.ITE.iffalse)
2533 );
2534
2535 default:
2536 vex_printf("\n\n"); ppIRExpr(ex);
2537 vpanic("subst_Expr");
2538
2539 }
2540 }
2541
2542
2543 /* Apply the subst to stmt, then fold the result as much as possible.
2544 Much simplified due to stmt being previously flattened. As a
2545 result of this, the stmt may wind up being turned into a no-op.
2546 */
subst_and_fold_Stmt(IRExpr ** env,IRStmt * st)2547 static IRStmt* subst_and_fold_Stmt ( IRExpr** env, IRStmt* st )
2548 {
2549 # if 0
2550 vex_printf("\nsubst and fold stmt\n");
2551 ppIRStmt(st);
2552 vex_printf("\n");
2553 # endif
2554
2555 switch (st->tag) {
2556 case Ist_AbiHint:
2557 vassert(isIRAtom(st->Ist.AbiHint.base));
2558 vassert(isIRAtom(st->Ist.AbiHint.nia));
2559 return IRStmt_AbiHint(
2560 fold_Expr(env, subst_Expr(env, st->Ist.AbiHint.base)),
2561 st->Ist.AbiHint.len,
2562 fold_Expr(env, subst_Expr(env, st->Ist.AbiHint.nia))
2563 );
2564 case Ist_Put:
2565 vassert(isIRAtom(st->Ist.Put.data));
2566 return IRStmt_Put(
2567 st->Ist.Put.offset,
2568 fold_Expr(env, subst_Expr(env, st->Ist.Put.data))
2569 );
2570
2571 case Ist_PutI: {
2572 IRPutI *puti, *puti2;
2573 puti = st->Ist.PutI.details;
2574 vassert(isIRAtom(puti->ix));
2575 vassert(isIRAtom(puti->data));
2576 puti2 = mkIRPutI(puti->descr,
2577 fold_Expr(env, subst_Expr(env, puti->ix)),
2578 puti->bias,
2579 fold_Expr(env, subst_Expr(env, puti->data)));
2580 return IRStmt_PutI(puti2);
2581 }
2582
2583 case Ist_WrTmp:
2584 /* This is the one place where an expr (st->Ist.WrTmp.data) is
2585 allowed to be more than just a constant or a tmp. */
2586 return IRStmt_WrTmp(
2587 st->Ist.WrTmp.tmp,
2588 fold_Expr(env, subst_Expr(env, st->Ist.WrTmp.data))
2589 );
2590
2591 case Ist_Store:
2592 vassert(isIRAtom(st->Ist.Store.addr));
2593 vassert(isIRAtom(st->Ist.Store.data));
2594 return IRStmt_Store(
2595 st->Ist.Store.end,
2596 fold_Expr(env, subst_Expr(env, st->Ist.Store.addr)),
2597 fold_Expr(env, subst_Expr(env, st->Ist.Store.data))
2598 );
2599
2600 case Ist_StoreG: {
2601 IRStoreG* sg = st->Ist.StoreG.details;
2602 vassert(isIRAtom(sg->addr));
2603 vassert(isIRAtom(sg->data));
2604 vassert(isIRAtom(sg->guard));
2605 IRExpr* faddr = fold_Expr(env, subst_Expr(env, sg->addr));
2606 IRExpr* fdata = fold_Expr(env, subst_Expr(env, sg->data));
2607 IRExpr* fguard = fold_Expr(env, subst_Expr(env, sg->guard));
2608 if (fguard->tag == Iex_Const) {
2609 /* The condition on this store has folded down to a constant. */
2610 vassert(fguard->Iex.Const.con->tag == Ico_U1);
2611 if (fguard->Iex.Const.con->Ico.U1 == False) {
2612 return IRStmt_NoOp();
2613 } else {
2614 vassert(fguard->Iex.Const.con->Ico.U1 == True);
2615 return IRStmt_Store(sg->end, faddr, fdata);
2616 }
2617 }
2618 return IRStmt_StoreG(sg->end, faddr, fdata, fguard);
2619 }
2620
2621 case Ist_LoadG: {
2622 /* This is complicated. If the guard folds down to 'false',
2623 we can replace it with an assignment 'dst := alt', but if
2624 the guard folds down to 'true', we can't conveniently
2625 replace it with an unconditional load, because doing so
2626 requires generating a new temporary, and that is not easy
2627 to do at this point. */
2628 IRLoadG* lg = st->Ist.LoadG.details;
2629 vassert(isIRAtom(lg->addr));
2630 vassert(isIRAtom(lg->alt));
2631 vassert(isIRAtom(lg->guard));
2632 IRExpr* faddr = fold_Expr(env, subst_Expr(env, lg->addr));
2633 IRExpr* falt = fold_Expr(env, subst_Expr(env, lg->alt));
2634 IRExpr* fguard = fold_Expr(env, subst_Expr(env, lg->guard));
2635 if (fguard->tag == Iex_Const) {
2636 /* The condition on this load has folded down to a constant. */
2637 vassert(fguard->Iex.Const.con->tag == Ico_U1);
2638 if (fguard->Iex.Const.con->Ico.U1 == False) {
2639 /* The load is not going to happen -- instead 'alt' is
2640 assigned to 'dst'. */
2641 return IRStmt_WrTmp(lg->dst, falt);
2642 } else {
2643 vassert(fguard->Iex.Const.con->Ico.U1 == True);
2644 /* The load is always going to happen. We want to
2645 convert to an unconditional load and assign to 'dst'
2646 (IRStmt_WrTmp). Problem is we need an extra temp to
2647 hold the loaded value, but none is available.
2648 Instead, reconstitute the conditional load (with
2649 folded args, of course) and let the caller of this
2650 routine deal with the problem. */
2651 }
2652 }
2653 return IRStmt_LoadG(lg->end, lg->cvt, lg->dst, faddr, falt, fguard);
2654 }
2655
2656 case Ist_CAS: {
2657 IRCAS *cas, *cas2;
2658 cas = st->Ist.CAS.details;
2659 vassert(isIRAtom(cas->addr));
2660 vassert(cas->expdHi == NULL || isIRAtom(cas->expdHi));
2661 vassert(isIRAtom(cas->expdLo));
2662 vassert(cas->dataHi == NULL || isIRAtom(cas->dataHi));
2663 vassert(isIRAtom(cas->dataLo));
2664 cas2 = mkIRCAS(
2665 cas->oldHi, cas->oldLo, cas->end,
2666 fold_Expr(env, subst_Expr(env, cas->addr)),
2667 cas->expdHi ? fold_Expr(env, subst_Expr(env, cas->expdHi))
2668 : NULL,
2669 fold_Expr(env, subst_Expr(env, cas->expdLo)),
2670 cas->dataHi ? fold_Expr(env, subst_Expr(env, cas->dataHi))
2671 : NULL,
2672 fold_Expr(env, subst_Expr(env, cas->dataLo))
2673 );
2674 return IRStmt_CAS(cas2);
2675 }
2676
2677 case Ist_LLSC:
2678 vassert(isIRAtom(st->Ist.LLSC.addr));
2679 if (st->Ist.LLSC.storedata)
2680 vassert(isIRAtom(st->Ist.LLSC.storedata));
2681 return IRStmt_LLSC(
2682 st->Ist.LLSC.end,
2683 st->Ist.LLSC.result,
2684 fold_Expr(env, subst_Expr(env, st->Ist.LLSC.addr)),
2685 st->Ist.LLSC.storedata
2686 ? fold_Expr(env, subst_Expr(env, st->Ist.LLSC.storedata))
2687 : NULL
2688 );
2689
2690 case Ist_Dirty: {
2691 Int i;
2692 IRDirty *d, *d2;
2693 d = st->Ist.Dirty.details;
2694 d2 = emptyIRDirty();
2695 *d2 = *d;
2696 d2->args = shallowCopyIRExprVec(d2->args);
2697 if (d2->mFx != Ifx_None) {
2698 vassert(isIRAtom(d2->mAddr));
2699 d2->mAddr = fold_Expr(env, subst_Expr(env, d2->mAddr));
2700 }
2701 vassert(isIRAtom(d2->guard));
2702 d2->guard = fold_Expr(env, subst_Expr(env, d2->guard));
2703 for (i = 0; d2->args[i]; i++) {
2704 IRExpr* arg = d2->args[i];
2705 if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg))) {
2706 vassert(isIRAtom(arg));
2707 d2->args[i] = fold_Expr(env, subst_Expr(env, arg));
2708 }
2709 }
2710 return IRStmt_Dirty(d2);
2711 }
2712
2713 case Ist_IMark:
2714 return IRStmt_IMark(st->Ist.IMark.addr,
2715 st->Ist.IMark.len,
2716 st->Ist.IMark.delta);
2717
2718 case Ist_NoOp:
2719 return IRStmt_NoOp();
2720
2721 case Ist_MBE:
2722 return IRStmt_MBE(st->Ist.MBE.event);
2723
2724 case Ist_Exit: {
2725 IRExpr* fcond;
2726 vassert(isIRAtom(st->Ist.Exit.guard));
2727 fcond = fold_Expr(env, subst_Expr(env, st->Ist.Exit.guard));
2728 if (fcond->tag == Iex_Const) {
2729 /* Interesting. The condition on this exit has folded down to
2730 a constant. */
2731 vassert(fcond->Iex.Const.con->tag == Ico_U1);
2732 if (fcond->Iex.Const.con->Ico.U1 == False) {
2733 /* exit is never going to happen, so dump the statement. */
2734 return IRStmt_NoOp();
2735 } else {
2736 vassert(fcond->Iex.Const.con->Ico.U1 == True);
2737 /* Hmmm. The exit has become unconditional. Leave it
2738 as it is for now, since we'd have to truncate the BB
2739 at this point, which is tricky. Such truncation is
2740 done later by the dead-code elimination pass. */
2741 /* fall out into the reconstruct-the-exit code. */
2742 if (vex_control.iropt_verbosity > 0)
2743 /* really a misuse of vex_control.iropt_verbosity */
2744 vex_printf("vex iropt: IRStmt_Exit became unconditional\n");
2745 }
2746 }
2747 return IRStmt_Exit(fcond, st->Ist.Exit.jk,
2748 st->Ist.Exit.dst, st->Ist.Exit.offsIP);
2749 }
2750
2751 default:
2752 vex_printf("\n"); ppIRStmt(st);
2753 vpanic("subst_and_fold_Stmt");
2754 }
2755 }
2756
2757
cprop_BB(IRSB * in)2758 IRSB* cprop_BB ( IRSB* in )
2759 {
2760 Int i;
2761 IRSB* out;
2762 IRStmt* st2;
2763 Int n_tmps = in->tyenv->types_used;
2764 IRExpr** env = LibVEX_Alloc_inline(n_tmps * sizeof(IRExpr*));
2765 /* Keep track of IRStmt_LoadGs that we need to revisit after
2766 processing all the other statements. */
2767 const Int N_FIXUPS = 16;
2768 Int fixups[N_FIXUPS]; /* indices in the stmt array of 'out' */
2769 Int n_fixups = 0;
2770
2771 out = emptyIRSB();
2772 out->tyenv = deepCopyIRTypeEnv( in->tyenv );
2773
2774 /* Set up the env with which travels forward. This holds a
2775 substitution, mapping IRTemps to IRExprs. The environment
2776 is to be applied as we move along. Keys are IRTemps.
2777 Values are IRExpr*s.
2778 */
2779 for (i = 0; i < n_tmps; i++)
2780 env[i] = NULL;
2781
2782 /* For each original SSA-form stmt ... */
2783 for (i = 0; i < in->stmts_used; i++) {
2784
2785 /* First apply the substitution to the current stmt. This
2786 propagates in any constants and tmp-tmp assignments
2787 accumulated prior to this point. As part of the subst_Stmt
2788 call, also then fold any constant expressions resulting. */
2789
2790 st2 = in->stmts[i];
2791
2792 /* perhaps st2 is already a no-op? */
2793 if (st2->tag == Ist_NoOp) continue;
2794
2795 st2 = subst_and_fold_Stmt( env, st2 );
2796
2797 /* Deal with some post-folding special cases. */
2798 switch (st2->tag) {
2799
2800 /* If the statement has been folded into a no-op, forget
2801 it. */
2802 case Ist_NoOp:
2803 continue;
2804
2805 /* If the statement assigns to an IRTemp add it to the
2806 running environment. This is for the benefit of copy
2807 propagation and to allow sameIRExpr look through
2808 IRTemps. */
2809 case Ist_WrTmp: {
2810 vassert(env[(Int)(st2->Ist.WrTmp.tmp)] == NULL);
2811 env[(Int)(st2->Ist.WrTmp.tmp)] = st2->Ist.WrTmp.data;
2812
2813 /* 't1 = t2' -- don't add to BB; will be optimized out */
2814 if (st2->Ist.WrTmp.data->tag == Iex_RdTmp)
2815 continue;
2816
2817 /* 't = const' && 'const != F64i' -- don't add to BB
2818 Note, we choose not to propagate const when const is an
2819 F64i, so that F64i literals can be CSE'd later. This
2820 helps x86 floating point code generation. */
2821 if (st2->Ist.WrTmp.data->tag == Iex_Const
2822 && st2->Ist.WrTmp.data->Iex.Const.con->tag != Ico_F64i) {
2823 continue;
2824 }
2825 /* else add it to the output, as normal */
2826 break;
2827 }
2828
2829 case Ist_LoadG: {
2830 IRLoadG* lg = st2->Ist.LoadG.details;
2831 IRExpr* guard = lg->guard;
2832 if (guard->tag == Iex_Const) {
2833 /* The guard has folded to a constant, and that
2834 constant must be 1:I1, since subst_and_fold_Stmt
2835 folds out the case 0:I1 by itself. */
2836 vassert(guard->Iex.Const.con->tag == Ico_U1);
2837 vassert(guard->Iex.Const.con->Ico.U1 == True);
2838 /* Add a NoOp here as a placeholder, and make a note of
2839 where it is in the output block. Afterwards we'll
2840 come back here and transform the NoOp and the LoadG
2841 into a load-convert pair. The fixups[] entry
2842 refers to the inserted NoOp, and we expect to find
2843 the relevant LoadG immediately after it. */
2844 vassert(n_fixups >= 0 && n_fixups <= N_FIXUPS);
2845 if (n_fixups < N_FIXUPS) {
2846 fixups[n_fixups++] = out->stmts_used;
2847 addStmtToIRSB( out, IRStmt_NoOp() );
2848 }
2849 }
2850 /* And always add the LoadG to the output, regardless. */
2851 break;
2852 }
2853
2854 default:
2855 break;
2856 }
2857
2858 /* Not interesting, copy st2 into the output block. */
2859 addStmtToIRSB( out, st2 );
2860 }
2861
2862 # if STATS_IROPT
2863 vex_printf("sameIRExpr: invoked = %u/%u equal = %u/%u max_nodes = %u\n",
2864 invocation_count, recursion_count, success_count,
2865 recursion_success_count, max_nodes_visited);
2866 # endif
2867
2868 out->next = subst_Expr( env, in->next );
2869 out->jumpkind = in->jumpkind;
2870 out->offsIP = in->offsIP;
2871
2872 /* Process any leftover unconditional LoadGs that we noticed
2873 in the main pass. */
2874 vassert(n_fixups >= 0 && n_fixups <= N_FIXUPS);
2875 for (i = 0; i < n_fixups; i++) {
2876 Int ix = fixups[i];
2877 /* Carefully verify that the LoadG has the expected form. */
2878 vassert(ix >= 0 && ix+1 < out->stmts_used);
2879 IRStmt* nop = out->stmts[ix];
2880 IRStmt* lgu = out->stmts[ix+1];
2881 vassert(nop->tag == Ist_NoOp);
2882 vassert(lgu->tag == Ist_LoadG);
2883 IRLoadG* lg = lgu->Ist.LoadG.details;
2884 IRExpr* guard = lg->guard;
2885 vassert(guard->Iex.Const.con->tag == Ico_U1);
2886 vassert(guard->Iex.Const.con->Ico.U1 == True);
2887 /* Figure out the load and result types, and the implied
2888 conversion operation. */
2889 IRType cvtRes = Ity_INVALID, cvtArg = Ity_INVALID;
2890 typeOfIRLoadGOp(lg->cvt, &cvtRes, &cvtArg);
2891 IROp cvtOp = Iop_INVALID;
2892 switch (lg->cvt) {
2893 case ILGop_IdentV128:
2894 case ILGop_Ident64:
2895 case ILGop_Ident32: break;
2896 case ILGop_8Uto32: cvtOp = Iop_8Uto32; break;
2897 case ILGop_8Sto32: cvtOp = Iop_8Sto32; break;
2898 case ILGop_16Uto32: cvtOp = Iop_16Uto32; break;
2899 case ILGop_16Sto32: cvtOp = Iop_16Sto32; break;
2900 default: vpanic("cprop_BB: unhandled ILGOp");
2901 }
2902 /* Replace the placeholder NoOp by the required unconditional
2903 load. */
2904 IRTemp tLoaded = newIRTemp(out->tyenv, cvtArg);
2905 out->stmts[ix]
2906 = IRStmt_WrTmp(tLoaded,
2907 IRExpr_Load(lg->end, cvtArg, lg->addr));
2908 /* Replace the LoadG by a conversion from the loaded value's
2909 type to the required result type. */
2910 out->stmts[ix+1]
2911 = IRStmt_WrTmp(
2912 lg->dst, cvtOp == Iop_INVALID
2913 ? IRExpr_RdTmp(tLoaded)
2914 : IRExpr_Unop(cvtOp, IRExpr_RdTmp(tLoaded)));
2915 }
2916
2917 return out;
2918 }
2919
2920
2921 /*---------------------------------------------------------------*/
2922 /*--- Dead code (t = E) removal ---*/
2923 /*---------------------------------------------------------------*/
2924
2925 /* As a side effect, also removes all code following an unconditional
2926 side exit. */
2927
2928 /* The type of the HashHW map is: a map from IRTemp to nothing
2929 -- really just operating a set or IRTemps.
2930 */
2931
2932 inline
addUses_Temp(Bool * set,IRTemp tmp)2933 static void addUses_Temp ( Bool* set, IRTemp tmp )
2934 {
2935 set[(Int)tmp] = True;
2936 }
2937
addUses_Expr(Bool * set,IRExpr * e)2938 static void addUses_Expr ( Bool* set, IRExpr* e )
2939 {
2940 Int i;
2941 switch (e->tag) {
2942 case Iex_GetI:
2943 addUses_Expr(set, e->Iex.GetI.ix);
2944 return;
2945 case Iex_ITE:
2946 addUses_Expr(set, e->Iex.ITE.cond);
2947 addUses_Expr(set, e->Iex.ITE.iftrue);
2948 addUses_Expr(set, e->Iex.ITE.iffalse);
2949 return;
2950 case Iex_CCall:
2951 for (i = 0; e->Iex.CCall.args[i]; i++)
2952 addUses_Expr(set, e->Iex.CCall.args[i]);
2953 return;
2954 case Iex_Load:
2955 addUses_Expr(set, e->Iex.Load.addr);
2956 return;
2957 case Iex_Qop:
2958 addUses_Expr(set, e->Iex.Qop.details->arg1);
2959 addUses_Expr(set, e->Iex.Qop.details->arg2);
2960 addUses_Expr(set, e->Iex.Qop.details->arg3);
2961 addUses_Expr(set, e->Iex.Qop.details->arg4);
2962 return;
2963 case Iex_Triop:
2964 addUses_Expr(set, e->Iex.Triop.details->arg1);
2965 addUses_Expr(set, e->Iex.Triop.details->arg2);
2966 addUses_Expr(set, e->Iex.Triop.details->arg3);
2967 return;
2968 case Iex_Binop:
2969 addUses_Expr(set, e->Iex.Binop.arg1);
2970 addUses_Expr(set, e->Iex.Binop.arg2);
2971 return;
2972 case Iex_Unop:
2973 addUses_Expr(set, e->Iex.Unop.arg);
2974 return;
2975 case Iex_RdTmp:
2976 addUses_Temp(set, e->Iex.RdTmp.tmp);
2977 return;
2978 case Iex_Const:
2979 case Iex_Get:
2980 return;
2981 default:
2982 vex_printf("\n");
2983 ppIRExpr(e);
2984 vpanic("addUses_Expr");
2985 }
2986 }
2987
addUses_Stmt(Bool * set,IRStmt * st)2988 static void addUses_Stmt ( Bool* set, IRStmt* st )
2989 {
2990 Int i;
2991 IRDirty* d;
2992 IRCAS* cas;
2993 switch (st->tag) {
2994 case Ist_AbiHint:
2995 addUses_Expr(set, st->Ist.AbiHint.base);
2996 addUses_Expr(set, st->Ist.AbiHint.nia);
2997 return;
2998 case Ist_PutI:
2999 addUses_Expr(set, st->Ist.PutI.details->ix);
3000 addUses_Expr(set, st->Ist.PutI.details->data);
3001 return;
3002 case Ist_WrTmp:
3003 addUses_Expr(set, st->Ist.WrTmp.data);
3004 return;
3005 case Ist_Put:
3006 addUses_Expr(set, st->Ist.Put.data);
3007 return;
3008 case Ist_Store:
3009 addUses_Expr(set, st->Ist.Store.addr);
3010 addUses_Expr(set, st->Ist.Store.data);
3011 return;
3012 case Ist_StoreG: {
3013 IRStoreG* sg = st->Ist.StoreG.details;
3014 addUses_Expr(set, sg->addr);
3015 addUses_Expr(set, sg->data);
3016 addUses_Expr(set, sg->guard);
3017 return;
3018 }
3019 case Ist_LoadG: {
3020 IRLoadG* lg = st->Ist.LoadG.details;
3021 addUses_Expr(set, lg->addr);
3022 addUses_Expr(set, lg->alt);
3023 addUses_Expr(set, lg->guard);
3024 return;
3025 }
3026 case Ist_CAS:
3027 cas = st->Ist.CAS.details;
3028 addUses_Expr(set, cas->addr);
3029 if (cas->expdHi)
3030 addUses_Expr(set, cas->expdHi);
3031 addUses_Expr(set, cas->expdLo);
3032 if (cas->dataHi)
3033 addUses_Expr(set, cas->dataHi);
3034 addUses_Expr(set, cas->dataLo);
3035 return;
3036 case Ist_LLSC:
3037 addUses_Expr(set, st->Ist.LLSC.addr);
3038 if (st->Ist.LLSC.storedata)
3039 addUses_Expr(set, st->Ist.LLSC.storedata);
3040 return;
3041 case Ist_Dirty:
3042 d = st->Ist.Dirty.details;
3043 if (d->mFx != Ifx_None)
3044 addUses_Expr(set, d->mAddr);
3045 addUses_Expr(set, d->guard);
3046 for (i = 0; d->args[i] != NULL; i++) {
3047 IRExpr* arg = d->args[i];
3048 if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg)))
3049 addUses_Expr(set, arg);
3050 }
3051 return;
3052 case Ist_NoOp:
3053 case Ist_IMark:
3054 case Ist_MBE:
3055 return;
3056 case Ist_Exit:
3057 addUses_Expr(set, st->Ist.Exit.guard);
3058 return;
3059 default:
3060 vex_printf("\n");
3061 ppIRStmt(st);
3062 vpanic("addUses_Stmt");
3063 }
3064 }
3065
3066
3067 /* Is this literally IRExpr_Const(IRConst_U1(False)) ? */
isZeroU1(IRExpr * e)3068 static Bool isZeroU1 ( IRExpr* e )
3069 {
3070 return toBool( e->tag == Iex_Const
3071 && e->Iex.Const.con->tag == Ico_U1
3072 && e->Iex.Const.con->Ico.U1 == False );
3073 }
3074
3075 /* Is this literally IRExpr_Const(IRConst_U1(True)) ? */
isOneU1(IRExpr * e)3076 static Bool isOneU1 ( IRExpr* e )
3077 {
3078 return toBool( e->tag == Iex_Const
3079 && e->Iex.Const.con->tag == Ico_U1
3080 && e->Iex.Const.con->Ico.U1 == True );
3081 }
3082
3083
3084 /* Note, this destructively modifies the given IRSB. */
3085
3086 /* Scan backwards through statements, carrying a set of IRTemps which
3087 are known to be used after the current point. On encountering 't =
3088 E', delete the binding if it is not used. Otherwise, add any temp
3089 uses to the set and keep on moving backwards.
3090
3091 As an enhancement, the first (backwards) pass searches for IR exits
3092 with always-taken conditions and notes the location of the earliest
3093 one in the block. If any such are found, a second pass copies the
3094 exit destination and jump kind to the bb-end. Then, the exit and
3095 all statements following it are turned into no-ops.
3096 */
3097
do_deadcode_BB(IRSB * bb)3098 /* notstatic */ void do_deadcode_BB ( IRSB* bb )
3099 {
3100 Int i, i_unconditional_exit;
3101 Int n_tmps = bb->tyenv->types_used;
3102 Bool* set = LibVEX_Alloc_inline(n_tmps * sizeof(Bool));
3103 IRStmt* st;
3104
3105 for (i = 0; i < n_tmps; i++)
3106 set[i] = False;
3107
3108 /* start off by recording IRTemp uses in the next field. */
3109 addUses_Expr(set, bb->next);
3110
3111 /* First pass */
3112
3113 /* Work backwards through the stmts */
3114 i_unconditional_exit = -1;
3115 for (i = bb->stmts_used-1; i >= 0; i--) {
3116 st = bb->stmts[i];
3117 if (st->tag == Ist_NoOp)
3118 continue;
3119 /* take note of any unconditional exits */
3120 if (st->tag == Ist_Exit
3121 && isOneU1(st->Ist.Exit.guard))
3122 i_unconditional_exit = i;
3123 if (st->tag == Ist_WrTmp
3124 && set[(Int)(st->Ist.WrTmp.tmp)] == False) {
3125 /* it's an IRTemp which never got used. Delete it. */
3126 if (DEBUG_IROPT) {
3127 vex_printf("DEAD: ");
3128 ppIRStmt(st);
3129 vex_printf("\n");
3130 }
3131 bb->stmts[i] = IRStmt_NoOp();
3132 }
3133 else
3134 if (st->tag == Ist_Dirty
3135 && st->Ist.Dirty.details->guard
3136 && isZeroU1(st->Ist.Dirty.details->guard)) {
3137 /* This is a dirty helper which will never get called.
3138 Delete it. */
3139 bb->stmts[i] = IRStmt_NoOp();
3140 }
3141 else {
3142 /* Note any IRTemp uses made by the current statement. */
3143 addUses_Stmt(set, st);
3144 }
3145 }
3146
3147 /* Optional second pass: if any unconditional exits were found,
3148 delete them and all following statements. */
3149
3150 if (i_unconditional_exit != -1) {
3151 if (0) vex_printf("ZAPPING ALL FORWARDS from %d\n",
3152 i_unconditional_exit);
3153 vassert(i_unconditional_exit >= 0
3154 && i_unconditional_exit < bb->stmts_used);
3155 bb->next
3156 = IRExpr_Const( bb->stmts[i_unconditional_exit]->Ist.Exit.dst );
3157 bb->jumpkind
3158 = bb->stmts[i_unconditional_exit]->Ist.Exit.jk;
3159 bb->offsIP
3160 = bb->stmts[i_unconditional_exit]->Ist.Exit.offsIP;
3161 for (i = i_unconditional_exit; i < bb->stmts_used; i++)
3162 bb->stmts[i] = IRStmt_NoOp();
3163 }
3164 }
3165
3166
3167 /*---------------------------------------------------------------*/
3168 /*--- Specialisation of helper function calls, in ---*/
3169 /*--- collaboration with the front end ---*/
3170 /*---------------------------------------------------------------*/
3171
3172 static
spec_helpers_BB(IRSB * bb,IRExpr * (* specHelper)(const HChar *,IRExpr **,IRStmt **,Int))3173 IRSB* spec_helpers_BB(
3174 IRSB* bb,
3175 IRExpr* (*specHelper) (const HChar*, IRExpr**, IRStmt**, Int)
3176 )
3177 {
3178 Int i;
3179 IRStmt* st;
3180 IRExpr* ex;
3181 Bool any = False;
3182
3183 for (i = bb->stmts_used-1; i >= 0; i--) {
3184 st = bb->stmts[i];
3185
3186 if (st->tag != Ist_WrTmp
3187 || st->Ist.WrTmp.data->tag != Iex_CCall)
3188 continue;
3189
3190 ex = (*specHelper)( st->Ist.WrTmp.data->Iex.CCall.cee->name,
3191 st->Ist.WrTmp.data->Iex.CCall.args,
3192 &bb->stmts[0], i );
3193 if (!ex)
3194 /* the front end can't think of a suitable replacement */
3195 continue;
3196
3197 /* We got something better. Install it in the bb. */
3198 any = True;
3199 bb->stmts[i]
3200 = IRStmt_WrTmp(st->Ist.WrTmp.tmp, ex);
3201
3202 if (0) {
3203 vex_printf("SPEC: ");
3204 ppIRExpr(st->Ist.WrTmp.data);
3205 vex_printf(" --> ");
3206 ppIRExpr(ex);
3207 vex_printf("\n");
3208 }
3209 }
3210
3211 if (any)
3212 bb = flatten_BB(bb);
3213 return bb;
3214 }
3215
3216
3217 /*---------------------------------------------------------------*/
3218 /*--- Determination of guest state aliasing relationships ---*/
3219 /*---------------------------------------------------------------*/
3220
3221 /* These are helper functions for CSE and GetI/PutI transformations.
3222
3223 Determine, to the extent possible, the relationship between two
3224 guest state accesses. The possible outcomes are:
3225
3226 * Exact alias. These two accesses denote precisely the same
3227 piece of the guest state.
3228
3229 * Definitely no alias. These two accesses are guaranteed not to
3230 overlap any part of the guest state.
3231
3232 * Unknown -- if neither of the above can be established.
3233
3234 If in doubt, return Unknown. */
3235
3236 typedef
3237 enum { ExactAlias, NoAlias, UnknownAlias }
3238 GSAliasing;
3239
3240
3241 /* Produces the alias relation between an indexed guest
3242 state access and a non-indexed access. */
3243
3244 static
getAliasingRelation_IC(IRRegArray * descr1,IRExpr * ix1,Int offset2,IRType ty2)3245 GSAliasing getAliasingRelation_IC ( IRRegArray* descr1, IRExpr* ix1,
3246 Int offset2, IRType ty2 )
3247 {
3248 UInt minoff1, maxoff1, minoff2, maxoff2;
3249
3250 getArrayBounds( descr1, &minoff1, &maxoff1 );
3251 minoff2 = offset2;
3252 maxoff2 = minoff2 + sizeofIRType(ty2) - 1;
3253
3254 if (maxoff1 < minoff2 || maxoff2 < minoff1)
3255 return NoAlias;
3256
3257 /* Could probably do better here if required. For the moment
3258 however just claim not to know anything more. */
3259 return UnknownAlias;
3260 }
3261
3262
3263 /* Produces the alias relation between two indexed guest state
3264 accesses. */
3265
3266 static
getAliasingRelation_II(IRRegArray * descr1,IRExpr * ix1,Int bias1,IRRegArray * descr2,IRExpr * ix2,Int bias2)3267 GSAliasing getAliasingRelation_II (
3268 IRRegArray* descr1, IRExpr* ix1, Int bias1,
3269 IRRegArray* descr2, IRExpr* ix2, Int bias2
3270 )
3271 {
3272 UInt minoff1, maxoff1, minoff2, maxoff2;
3273 Int iters;
3274
3275 /* First try hard to show they don't alias. */
3276 getArrayBounds( descr1, &minoff1, &maxoff1 );
3277 getArrayBounds( descr2, &minoff2, &maxoff2 );
3278 if (maxoff1 < minoff2 || maxoff2 < minoff1)
3279 return NoAlias;
3280
3281 /* So the two arrays at least partially overlap. To get any
3282 further we'll have to be sure that the descriptors are
3283 identical. */
3284 if (!eqIRRegArray(descr1, descr2))
3285 return UnknownAlias;
3286
3287 /* The descriptors are identical. Now the only difference can be
3288 in the index expressions. If they cannot be shown to be
3289 identical, we have to say we don't know what the aliasing
3290 relation will be. Now, since the IR is flattened, the index
3291 expressions should be atoms -- either consts or tmps. So that
3292 makes the comparison simple. */
3293 vassert(isIRAtom(ix1));
3294 vassert(isIRAtom(ix2));
3295 if (!eqIRAtom(ix1,ix2))
3296 return UnknownAlias;
3297
3298 /* Ok, the index expressions are identical. So now the only way
3299 they can be different is in the bias. Normalise this
3300 paranoidly, to reliably establish equality/non-equality. */
3301
3302 /* So now we know that the GetI and PutI index the same array
3303 with the same base. Are the offsets the same, modulo the
3304 array size? Do this paranoidly. */
3305 vassert(descr1->nElems == descr2->nElems);
3306 vassert(descr1->elemTy == descr2->elemTy);
3307 vassert(descr1->base == descr2->base);
3308 iters = 0;
3309 while (bias1 < 0 || bias2 < 0) {
3310 bias1 += descr1->nElems;
3311 bias2 += descr1->nElems;
3312 iters++;
3313 if (iters > 10)
3314 vpanic("getAliasingRelation: iters");
3315 }
3316 vassert(bias1 >= 0 && bias2 >= 0);
3317 bias1 %= descr1->nElems;
3318 bias2 %= descr1->nElems;
3319 vassert(bias1 >= 0 && bias1 < descr1->nElems);
3320 vassert(bias2 >= 0 && bias2 < descr1->nElems);
3321
3322 /* Finally, biasP and biasG are normalised into the range
3323 0 .. descrP/G->nElems - 1. And so we can establish
3324 equality/non-equality. */
3325
3326 return bias1==bias2 ? ExactAlias : NoAlias;
3327 }
3328
3329
3330 /*---------------------------------------------------------------*/
3331 /*--- Common Subexpression Elimination ---*/
3332 /*---------------------------------------------------------------*/
3333
3334 /* Expensive in time and space. */
3335
3336 /* Uses two environments:
3337 a IRTemp -> IRTemp mapping
3338 a mapping from AvailExpr* to IRTemp
3339 */
3340
3341 typedef
3342 struct {
3343 enum { TCc, TCt } tag;
3344 union { IRTemp tmp; IRConst* con; } u;
3345 }
3346 TmpOrConst;
3347
eqTmpOrConst(TmpOrConst * tc1,TmpOrConst * tc2)3348 static Bool eqTmpOrConst ( TmpOrConst* tc1, TmpOrConst* tc2 )
3349 {
3350 if (tc1->tag != tc2->tag)
3351 return False;
3352 switch (tc1->tag) {
3353 case TCc:
3354 return eqIRConst(tc1->u.con, tc2->u.con);
3355 case TCt:
3356 return tc1->u.tmp == tc2->u.tmp;
3357 default:
3358 vpanic("eqTmpOrConst");
3359 }
3360 }
3361
eqIRCallee(IRCallee * cee1,IRCallee * cee2)3362 static Bool eqIRCallee ( IRCallee* cee1, IRCallee* cee2 )
3363 {
3364 Bool eq = cee1->addr == cee2->addr;
3365 if (eq) {
3366 vassert(cee1->regparms == cee2->regparms);
3367 vassert(cee1->mcx_mask == cee2->mcx_mask);
3368 /* Names should be the same too, but we don't bother to
3369 check. */
3370 }
3371 return eq;
3372 }
3373
3374 /* Convert an atomic IRExpr* to a TmpOrConst. */
irExpr_to_TmpOrConst(TmpOrConst * tc,IRExpr * e)3375 static void irExpr_to_TmpOrConst ( /*OUT*/TmpOrConst* tc, IRExpr* e )
3376 {
3377 switch (e->tag) {
3378 case Iex_RdTmp:
3379 tc->tag = TCt;
3380 tc->u.tmp = e->Iex.RdTmp.tmp;
3381 break;
3382 case Iex_Const:
3383 tc->tag = TCc;
3384 tc->u.con = e->Iex.Const.con;
3385 break;
3386 default:
3387 /* Getting here is a serious error. It means that the
3388 presented arg isn't an IR atom, as it should be. */
3389 vpanic("irExpr_to_TmpOrConst");
3390 }
3391 }
3392
3393 /* Convert a TmpOrConst to an atomic IRExpr*. */
tmpOrConst_to_IRExpr(TmpOrConst * tc)3394 static IRExpr* tmpOrConst_to_IRExpr ( TmpOrConst* tc )
3395 {
3396 switch (tc->tag) {
3397 case TCc: return IRExpr_Const(tc->u.con);
3398 case TCt: return IRExpr_RdTmp(tc->u.tmp);
3399 default: vpanic("tmpOrConst_to_IRExpr");
3400 }
3401 }
3402
3403 /* Convert a NULL terminated IRExpr* vector to an array of
3404 TmpOrConsts, and a length. */
irExprVec_to_TmpOrConsts(TmpOrConst ** outs,Int * nOuts,IRExpr ** ins)3405 static void irExprVec_to_TmpOrConsts ( /*OUT*/TmpOrConst** outs,
3406 /*OUT*/Int* nOuts,
3407 IRExpr** ins )
3408 {
3409 Int i, n;
3410 /* We have to make two passes, one to count, one to copy. */
3411 for (n = 0; ins[n]; n++)
3412 ;
3413 *outs = LibVEX_Alloc_inline(n * sizeof(TmpOrConst));
3414 *nOuts = n;
3415 /* and now copy .. */
3416 for (i = 0; i < n; i++) {
3417 IRExpr* arg = ins[i];
3418 TmpOrConst* dst = &(*outs)[i];
3419 irExpr_to_TmpOrConst(dst, arg);
3420 }
3421 }
3422
3423 typedef
3424 struct {
3425 enum { Ut, Btt, Btc, Bct, Cf64i, Ittt, Itct, Ittc, Itcc, GetIt,
3426 CCall, Load
3427 } tag;
3428 union {
3429 /* unop(tmp) */
3430 struct {
3431 IROp op;
3432 IRTemp arg;
3433 } Ut;
3434 /* binop(tmp,tmp) */
3435 struct {
3436 IROp op;
3437 IRTemp arg1;
3438 IRTemp arg2;
3439 } Btt;
3440 /* binop(tmp,const) */
3441 struct {
3442 IROp op;
3443 IRTemp arg1;
3444 IRConst con2;
3445 } Btc;
3446 /* binop(const,tmp) */
3447 struct {
3448 IROp op;
3449 IRConst con1;
3450 IRTemp arg2;
3451 } Bct;
3452 /* F64i-style const */
3453 struct {
3454 ULong f64i;
3455 } Cf64i;
3456 /* ITE(tmp,tmp,tmp) */
3457 struct {
3458 IRTemp co;
3459 IRTemp e1;
3460 IRTemp e0;
3461 } Ittt;
3462 /* ITE(tmp,tmp,const) */
3463 struct {
3464 IRTemp co;
3465 IRTemp e1;
3466 IRConst con0;
3467 } Ittc;
3468 /* ITE(tmp,const,tmp) */
3469 struct {
3470 IRTemp co;
3471 IRConst con1;
3472 IRTemp e0;
3473 } Itct;
3474 /* ITE(tmp,const,const) */
3475 struct {
3476 IRTemp co;
3477 IRConst con1;
3478 IRConst con0;
3479 } Itcc;
3480 /* GetI(descr,tmp,bias)*/
3481 struct {
3482 IRRegArray* descr;
3483 IRTemp ix;
3484 Int bias;
3485 } GetIt;
3486 /* Clean helper call */
3487 struct {
3488 IRCallee* cee;
3489 TmpOrConst* args;
3490 Int nArgs;
3491 IRType retty;
3492 } CCall;
3493 /* Load(end,ty,addr) */
3494 struct {
3495 IREndness end;
3496 IRType ty;
3497 TmpOrConst addr;
3498 } Load;
3499 } u;
3500 }
3501 AvailExpr;
3502
eq_AvailExpr(AvailExpr * a1,AvailExpr * a2)3503 static Bool eq_AvailExpr ( AvailExpr* a1, AvailExpr* a2 )
3504 {
3505 if (LIKELY(a1->tag != a2->tag))
3506 return False;
3507 switch (a1->tag) {
3508 case Ut:
3509 return toBool(
3510 a1->u.Ut.op == a2->u.Ut.op
3511 && a1->u.Ut.arg == a2->u.Ut.arg);
3512 case Btt:
3513 return toBool(
3514 a1->u.Btt.op == a2->u.Btt.op
3515 && a1->u.Btt.arg1 == a2->u.Btt.arg1
3516 && a1->u.Btt.arg2 == a2->u.Btt.arg2);
3517 case Btc:
3518 return toBool(
3519 a1->u.Btc.op == a2->u.Btc.op
3520 && a1->u.Btc.arg1 == a2->u.Btc.arg1
3521 && eqIRConst(&a1->u.Btc.con2, &a2->u.Btc.con2));
3522 case Bct:
3523 return toBool(
3524 a1->u.Bct.op == a2->u.Bct.op
3525 && a1->u.Bct.arg2 == a2->u.Bct.arg2
3526 && eqIRConst(&a1->u.Bct.con1, &a2->u.Bct.con1));
3527 case Cf64i:
3528 return toBool(a1->u.Cf64i.f64i == a2->u.Cf64i.f64i);
3529 case Ittt:
3530 return toBool(a1->u.Ittt.co == a2->u.Ittt.co
3531 && a1->u.Ittt.e1 == a2->u.Ittt.e1
3532 && a1->u.Ittt.e0 == a2->u.Ittt.e0);
3533 case Ittc:
3534 return toBool(a1->u.Ittc.co == a2->u.Ittc.co
3535 && a1->u.Ittc.e1 == a2->u.Ittc.e1
3536 && eqIRConst(&a1->u.Ittc.con0, &a2->u.Ittc.con0));
3537 case Itct:
3538 return toBool(a1->u.Itct.co == a2->u.Itct.co
3539 && eqIRConst(&a1->u.Itct.con1, &a2->u.Itct.con1)
3540 && a1->u.Itct.e0 == a2->u.Itct.e0);
3541 case Itcc:
3542 return toBool(a1->u.Itcc.co == a2->u.Itcc.co
3543 && eqIRConst(&a1->u.Itcc.con1, &a2->u.Itcc.con1)
3544 && eqIRConst(&a1->u.Itcc.con0, &a2->u.Itcc.con0));
3545 case GetIt:
3546 return toBool(eqIRRegArray(a1->u.GetIt.descr, a2->u.GetIt.descr)
3547 && a1->u.GetIt.ix == a2->u.GetIt.ix
3548 && a1->u.GetIt.bias == a2->u.GetIt.bias);
3549 case CCall: {
3550 Int i, n;
3551 Bool eq = a1->u.CCall.nArgs == a2->u.CCall.nArgs
3552 && eqIRCallee(a1->u.CCall.cee, a2->u.CCall.cee);
3553 if (eq) {
3554 n = a1->u.CCall.nArgs;
3555 for (i = 0; i < n; i++) {
3556 if (!eqTmpOrConst( &a1->u.CCall.args[i],
3557 &a2->u.CCall.args[i] )) {
3558 eq = False;
3559 break;
3560 }
3561 }
3562 }
3563 if (eq) vassert(a1->u.CCall.retty == a2->u.CCall.retty);
3564 return eq;
3565 }
3566 case Load: {
3567 Bool eq = toBool(a1->u.Load.end == a2->u.Load.end
3568 && a1->u.Load.ty == a2->u.Load.ty
3569 && eqTmpOrConst(&a1->u.Load.addr, &a2->u.Load.addr));
3570 return eq;
3571 }
3572 default:
3573 vpanic("eq_AvailExpr");
3574 }
3575 }
3576
availExpr_to_IRExpr(AvailExpr * ae)3577 static IRExpr* availExpr_to_IRExpr ( AvailExpr* ae )
3578 {
3579 IRConst *con, *con0, *con1;
3580 switch (ae->tag) {
3581 case Ut:
3582 return IRExpr_Unop( ae->u.Ut.op, IRExpr_RdTmp(ae->u.Ut.arg) );
3583 case Btt:
3584 return IRExpr_Binop( ae->u.Btt.op,
3585 IRExpr_RdTmp(ae->u.Btt.arg1),
3586 IRExpr_RdTmp(ae->u.Btt.arg2) );
3587 case Btc:
3588 con = LibVEX_Alloc_inline(sizeof(IRConst));
3589 *con = ae->u.Btc.con2;
3590 return IRExpr_Binop( ae->u.Btc.op,
3591 IRExpr_RdTmp(ae->u.Btc.arg1),
3592 IRExpr_Const(con) );
3593 case Bct:
3594 con = LibVEX_Alloc_inline(sizeof(IRConst));
3595 *con = ae->u.Bct.con1;
3596 return IRExpr_Binop( ae->u.Bct.op,
3597 IRExpr_Const(con),
3598 IRExpr_RdTmp(ae->u.Bct.arg2) );
3599 case Cf64i:
3600 return IRExpr_Const(IRConst_F64i(ae->u.Cf64i.f64i));
3601 case Ittt:
3602 return IRExpr_ITE(IRExpr_RdTmp(ae->u.Ittt.co),
3603 IRExpr_RdTmp(ae->u.Ittt.e1),
3604 IRExpr_RdTmp(ae->u.Ittt.e0));
3605 case Ittc:
3606 con0 = LibVEX_Alloc_inline(sizeof(IRConst));
3607 *con0 = ae->u.Ittc.con0;
3608 return IRExpr_ITE(IRExpr_RdTmp(ae->u.Ittc.co),
3609 IRExpr_RdTmp(ae->u.Ittc.e1),
3610 IRExpr_Const(con0));
3611 case Itct:
3612 con1 = LibVEX_Alloc_inline(sizeof(IRConst));
3613 *con1 = ae->u.Itct.con1;
3614 return IRExpr_ITE(IRExpr_RdTmp(ae->u.Itct.co),
3615 IRExpr_Const(con1),
3616 IRExpr_RdTmp(ae->u.Itct.e0));
3617
3618 case Itcc:
3619 con0 = LibVEX_Alloc_inline(sizeof(IRConst));
3620 con1 = LibVEX_Alloc_inline(sizeof(IRConst));
3621 *con0 = ae->u.Itcc.con0;
3622 *con1 = ae->u.Itcc.con1;
3623 return IRExpr_ITE(IRExpr_RdTmp(ae->u.Itcc.co),
3624 IRExpr_Const(con1),
3625 IRExpr_Const(con0));
3626 case GetIt:
3627 return IRExpr_GetI(ae->u.GetIt.descr,
3628 IRExpr_RdTmp(ae->u.GetIt.ix),
3629 ae->u.GetIt.bias);
3630 case CCall: {
3631 Int i, n = ae->u.CCall.nArgs;
3632 vassert(n >= 0);
3633 IRExpr** vec = LibVEX_Alloc_inline((n+1) * sizeof(IRExpr*));
3634 vec[n] = NULL;
3635 for (i = 0; i < n; i++) {
3636 vec[i] = tmpOrConst_to_IRExpr(&ae->u.CCall.args[i]);
3637 }
3638 return IRExpr_CCall(ae->u.CCall.cee,
3639 ae->u.CCall.retty,
3640 vec);
3641 }
3642 case Load:
3643 return IRExpr_Load(ae->u.Load.end, ae->u.Load.ty,
3644 tmpOrConst_to_IRExpr(&ae->u.Load.addr));
3645 default:
3646 vpanic("availExpr_to_IRExpr");
3647 }
3648 }
3649
3650 inline
subst_AvailExpr_Temp(HashHW * env,IRTemp tmp)3651 static IRTemp subst_AvailExpr_Temp ( HashHW* env, IRTemp tmp )
3652 {
3653 HWord res;
3654 /* env :: IRTemp -> IRTemp */
3655 if (lookupHHW( env, &res, (HWord)tmp ))
3656 return (IRTemp)res;
3657 else
3658 return tmp;
3659 }
3660
3661 inline
subst_AvailExpr_TmpOrConst(TmpOrConst * tc,HashHW * env)3662 static void subst_AvailExpr_TmpOrConst ( /*MB_MOD*/TmpOrConst* tc,
3663 HashHW* env )
3664 {
3665 /* env :: IRTemp -> IRTemp */
3666 if (tc->tag == TCt) {
3667 tc->u.tmp = subst_AvailExpr_Temp( env, tc->u.tmp );
3668 }
3669 }
3670
subst_AvailExpr(HashHW * env,AvailExpr * ae)3671 static void subst_AvailExpr ( HashHW* env, AvailExpr* ae )
3672 {
3673 /* env :: IRTemp -> IRTemp */
3674 switch (ae->tag) {
3675 case Ut:
3676 ae->u.Ut.arg = subst_AvailExpr_Temp( env, ae->u.Ut.arg );
3677 break;
3678 case Btt:
3679 ae->u.Btt.arg1 = subst_AvailExpr_Temp( env, ae->u.Btt.arg1 );
3680 ae->u.Btt.arg2 = subst_AvailExpr_Temp( env, ae->u.Btt.arg2 );
3681 break;
3682 case Btc:
3683 ae->u.Btc.arg1 = subst_AvailExpr_Temp( env, ae->u.Btc.arg1 );
3684 break;
3685 case Bct:
3686 ae->u.Bct.arg2 = subst_AvailExpr_Temp( env, ae->u.Bct.arg2 );
3687 break;
3688 case Cf64i:
3689 break;
3690 case Ittt:
3691 ae->u.Ittt.co = subst_AvailExpr_Temp( env, ae->u.Ittt.co );
3692 ae->u.Ittt.e1 = subst_AvailExpr_Temp( env, ae->u.Ittt.e1 );
3693 ae->u.Ittt.e0 = subst_AvailExpr_Temp( env, ae->u.Ittt.e0 );
3694 break;
3695 case Ittc:
3696 ae->u.Ittc.co = subst_AvailExpr_Temp( env, ae->u.Ittc.co );
3697 ae->u.Ittc.e1 = subst_AvailExpr_Temp( env, ae->u.Ittc.e1 );
3698 break;
3699 case Itct:
3700 ae->u.Itct.co = subst_AvailExpr_Temp( env, ae->u.Itct.co );
3701 ae->u.Itct.e0 = subst_AvailExpr_Temp( env, ae->u.Itct.e0 );
3702 break;
3703 case Itcc:
3704 ae->u.Itcc.co = subst_AvailExpr_Temp( env, ae->u.Itcc.co );
3705 break;
3706 case GetIt:
3707 ae->u.GetIt.ix = subst_AvailExpr_Temp( env, ae->u.GetIt.ix );
3708 break;
3709 case CCall: {
3710 Int i, n = ae->u.CCall.nArgs;;
3711 for (i = 0; i < n; i++) {
3712 subst_AvailExpr_TmpOrConst(&ae->u.CCall.args[i], env);
3713 }
3714 break;
3715 }
3716 case Load:
3717 subst_AvailExpr_TmpOrConst(&ae->u.Load.addr, env);
3718 break;
3719 default:
3720 vpanic("subst_AvailExpr");
3721 }
3722 }
3723
irExpr_to_AvailExpr(IRExpr * e,Bool allowLoadsToBeCSEd)3724 static AvailExpr* irExpr_to_AvailExpr ( IRExpr* e, Bool allowLoadsToBeCSEd )
3725 {
3726 AvailExpr* ae;
3727
3728 switch (e->tag) {
3729 case Iex_Unop:
3730 if (e->Iex.Unop.arg->tag == Iex_RdTmp) {
3731 ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
3732 ae->tag = Ut;
3733 ae->u.Ut.op = e->Iex.Unop.op;
3734 ae->u.Ut.arg = e->Iex.Unop.arg->Iex.RdTmp.tmp;
3735 return ae;
3736 }
3737 break;
3738
3739 case Iex_Binop:
3740 if (e->Iex.Binop.arg1->tag == Iex_RdTmp) {
3741 if (e->Iex.Binop.arg2->tag == Iex_RdTmp) {
3742 ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
3743 ae->tag = Btt;
3744 ae->u.Btt.op = e->Iex.Binop.op;
3745 ae->u.Btt.arg1 = e->Iex.Binop.arg1->Iex.RdTmp.tmp;
3746 ae->u.Btt.arg2 = e->Iex.Binop.arg2->Iex.RdTmp.tmp;
3747 return ae;
3748 }
3749 if (e->Iex.Binop.arg2->tag == Iex_Const) {
3750 ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
3751 ae->tag = Btc;
3752 ae->u.Btc.op = e->Iex.Binop.op;
3753 ae->u.Btc.arg1 = e->Iex.Binop.arg1->Iex.RdTmp.tmp;
3754 ae->u.Btc.con2 = *(e->Iex.Binop.arg2->Iex.Const.con);
3755 return ae;
3756 }
3757 } else if (e->Iex.Binop.arg1->tag == Iex_Const
3758 && e->Iex.Binop.arg2->tag == Iex_RdTmp) {
3759 ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
3760 ae->tag = Bct;
3761 ae->u.Bct.op = e->Iex.Binop.op;
3762 ae->u.Bct.arg2 = e->Iex.Binop.arg2->Iex.RdTmp.tmp;
3763 ae->u.Bct.con1 = *(e->Iex.Binop.arg1->Iex.Const.con);
3764 return ae;
3765 }
3766 break;
3767
3768 case Iex_Const:
3769 if (e->Iex.Const.con->tag == Ico_F64i) {
3770 ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
3771 ae->tag = Cf64i;
3772 ae->u.Cf64i.f64i = e->Iex.Const.con->Ico.F64i;
3773 return ae;
3774 }
3775 break;
3776
3777 case Iex_ITE:
3778 if (e->Iex.ITE.cond->tag == Iex_RdTmp) {
3779 if (e->Iex.ITE.iffalse->tag == Iex_RdTmp) {
3780 if (e->Iex.ITE.iftrue->tag == Iex_RdTmp) {
3781 ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
3782 ae->tag = Ittt;
3783 ae->u.Ittt.co = e->Iex.ITE.cond->Iex.RdTmp.tmp;
3784 ae->u.Ittt.e1 = e->Iex.ITE.iftrue->Iex.RdTmp.tmp;
3785 ae->u.Ittt.e0 = e->Iex.ITE.iffalse->Iex.RdTmp.tmp;
3786 return ae;
3787 }
3788 if (e->Iex.ITE.iftrue->tag == Iex_Const) {
3789 ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
3790 ae->tag = Itct;
3791 ae->u.Itct.co = e->Iex.ITE.cond->Iex.RdTmp.tmp;
3792 ae->u.Itct.con1 = *(e->Iex.ITE.iftrue->Iex.Const.con);
3793 ae->u.Itct.e0 = e->Iex.ITE.iffalse->Iex.RdTmp.tmp;
3794 return ae;
3795 }
3796 } else if (e->Iex.ITE.iffalse->tag == Iex_Const) {
3797 if (e->Iex.ITE.iftrue->tag == Iex_RdTmp) {
3798 ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
3799 ae->tag = Ittc;
3800 ae->u.Ittc.co = e->Iex.ITE.cond->Iex.RdTmp.tmp;
3801 ae->u.Ittc.e1 = e->Iex.ITE.iftrue->Iex.RdTmp.tmp;
3802 ae->u.Ittc.con0 = *(e->Iex.ITE.iffalse->Iex.Const.con);
3803 return ae;
3804 }
3805 if (e->Iex.ITE.iftrue->tag == Iex_Const) {
3806 ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
3807 ae->tag = Itcc;
3808 ae->u.Itcc.co = e->Iex.ITE.cond->Iex.RdTmp.tmp;
3809 ae->u.Itcc.con1 = *(e->Iex.ITE.iftrue->Iex.Const.con);
3810 ae->u.Itcc.con0 = *(e->Iex.ITE.iffalse->Iex.Const.con);
3811 return ae;
3812 }
3813 }
3814 }
3815 break;
3816
3817 case Iex_GetI:
3818 if (e->Iex.GetI.ix->tag == Iex_RdTmp) {
3819 ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
3820 ae->tag = GetIt;
3821 ae->u.GetIt.descr = e->Iex.GetI.descr;
3822 ae->u.GetIt.ix = e->Iex.GetI.ix->Iex.RdTmp.tmp;
3823 ae->u.GetIt.bias = e->Iex.GetI.bias;
3824 return ae;
3825 }
3826 break;
3827
3828 case Iex_CCall:
3829 ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
3830 ae->tag = CCall;
3831 /* Ok to share only the cee, since it is immutable. */
3832 ae->u.CCall.cee = e->Iex.CCall.cee;
3833 ae->u.CCall.retty = e->Iex.CCall.retty;
3834 /* irExprVec_to_TmpOrConsts will assert if the args are
3835 neither tmps nor constants, but that's ok .. that's all they
3836 should be. */
3837 irExprVec_to_TmpOrConsts(
3838 &ae->u.CCall.args, &ae->u.CCall.nArgs,
3839 e->Iex.CCall.args
3840 );
3841 return ae;
3842
3843 case Iex_Load:
3844 /* If the caller of do_cse_BB has requested that loads also
3845 be CSEd, convert them into AvailExprs. If not, we'll just
3846 return NULL here, and the load never becomes considered
3847 "available", which effectively disables CSEing of them, as
3848 desired. */
3849 if (allowLoadsToBeCSEd) {
3850 ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
3851 ae->tag = Load;
3852 ae->u.Load.end = e->Iex.Load.end;
3853 ae->u.Load.ty = e->Iex.Load.ty;
3854 irExpr_to_TmpOrConst(&ae->u.Load.addr, e->Iex.Load.addr);
3855 return ae;
3856 }
3857 break;
3858
3859 default:
3860 break;
3861 }
3862
3863 return NULL;
3864 }
3865
3866
3867 /* The BB is modified in-place. Returns True if any changes were
3868 made. The caller can choose whether or not loads should be CSEd.
3869 In the normal course of things we don't do that, since CSEing loads
3870 is something of a dodgy proposition if the guest program is doing
3871 some screwy stuff to do with races and spinloops. */
3872
do_cse_BB(IRSB * bb,Bool allowLoadsToBeCSEd)3873 static Bool do_cse_BB ( IRSB* bb, Bool allowLoadsToBeCSEd )
3874 {
3875 Int i, j, paranoia;
3876 IRTemp t, q;
3877 IRStmt* st;
3878 AvailExpr* eprime;
3879 AvailExpr* ae;
3880 Bool invalidate;
3881 Bool anyDone = False;
3882
3883 HashHW* tenv = newHHW(); /* :: IRTemp -> IRTemp */
3884 HashHW* aenv = newHHW(); /* :: AvailExpr* -> IRTemp */
3885
3886 vassert(sizeof(IRTemp) <= sizeof(HWord));
3887
3888 if (0) { ppIRSB(bb); vex_printf("\n\n"); }
3889
3890 /* Iterate forwards over the stmts.
3891 On seeing "t = E", where E is one of the AvailExpr forms:
3892 let E' = apply tenv substitution to E
3893 search aenv for E'
3894 if a mapping E' -> q is found,
3895 replace this stmt by "t = q"
3896 and add binding t -> q to tenv
3897 else
3898 add binding E' -> t to aenv
3899 replace this stmt by "t = E'"
3900
3901 Other statements are only interesting to the extent that they
3902 might invalidate some of the expressions in aenv. So there is
3903 an invalidate-bindings check for each statement seen.
3904 */
3905 for (i = 0; i < bb->stmts_used; i++) {
3906 st = bb->stmts[i];
3907
3908 /* ------ BEGIN invalidate aenv bindings ------ */
3909 /* This is critical: remove from aenv any E' -> .. bindings
3910 which might be invalidated by this statement. The only
3911 vulnerable kind of bindings are the GetI and Load kinds.
3912 Dirty call - dump (paranoia level -> 2)
3913 Store - dump (ditto)
3914 Put, PutI - dump unless no-overlap is proven (.. -> 1)
3915 Uses getAliasingRelation_IC and getAliasingRelation_II
3916 to do the no-overlap assessments needed for Put/PutI.
3917 */
3918 switch (st->tag) {
3919 case Ist_Dirty: case Ist_Store: case Ist_MBE:
3920 case Ist_CAS: case Ist_LLSC:
3921 case Ist_StoreG:
3922 paranoia = 2; break;
3923 case Ist_Put: case Ist_PutI:
3924 paranoia = 1; break;
3925 case Ist_NoOp: case Ist_IMark: case Ist_AbiHint:
3926 case Ist_WrTmp: case Ist_Exit: case Ist_LoadG:
3927 paranoia = 0; break;
3928 default:
3929 vpanic("do_cse_BB(1)");
3930 }
3931
3932 if (paranoia > 0) {
3933 for (j = 0; j < aenv->used; j++) {
3934 if (!aenv->inuse[j])
3935 continue;
3936 ae = (AvailExpr*)aenv->key[j];
3937 if (ae->tag != GetIt && ae->tag != Load)
3938 continue;
3939 invalidate = False;
3940 if (paranoia >= 2) {
3941 invalidate = True;
3942 } else {
3943 vassert(paranoia == 1);
3944 if (ae->tag == Load) {
3945 /* Loads can be invalidated by anything that could
3946 possibly touch memory. But in that case we
3947 should have |paranoia| == 2 and we won't get
3948 here. So there's nothing to do; we don't have to
3949 invalidate the load. */
3950 }
3951 else
3952 if (st->tag == Ist_Put) {
3953 if (getAliasingRelation_IC(
3954 ae->u.GetIt.descr,
3955 IRExpr_RdTmp(ae->u.GetIt.ix),
3956 st->Ist.Put.offset,
3957 typeOfIRExpr(bb->tyenv,st->Ist.Put.data)
3958 ) != NoAlias)
3959 invalidate = True;
3960 }
3961 else
3962 if (st->tag == Ist_PutI) {
3963 IRPutI *puti = st->Ist.PutI.details;
3964 if (getAliasingRelation_II(
3965 ae->u.GetIt.descr,
3966 IRExpr_RdTmp(ae->u.GetIt.ix),
3967 ae->u.GetIt.bias,
3968 puti->descr,
3969 puti->ix,
3970 puti->bias
3971 ) != NoAlias)
3972 invalidate = True;
3973 }
3974 else
3975 vpanic("do_cse_BB(2)");
3976 }
3977
3978 if (invalidate) {
3979 aenv->inuse[j] = False;
3980 aenv->key[j] = (HWord)NULL; /* be sure */
3981 }
3982 } /* for j */
3983 } /* paranoia > 0 */
3984
3985 /* ------ ENV invalidate aenv bindings ------ */
3986
3987 /* ignore not-interestings */
3988 if (st->tag != Ist_WrTmp)
3989 continue;
3990
3991 t = st->Ist.WrTmp.tmp;
3992 eprime = irExpr_to_AvailExpr(st->Ist.WrTmp.data, allowLoadsToBeCSEd);
3993 /* ignore if not of AvailExpr form */
3994 if (!eprime)
3995 continue;
3996
3997 /* vex_printf("considering: " ); ppIRStmt(st); vex_printf("\n"); */
3998
3999 /* apply tenv */
4000 subst_AvailExpr( tenv, eprime );
4001
4002 /* search aenv for eprime, unfortunately the hard way */
4003 for (j = 0; j < aenv->used; j++)
4004 if (aenv->inuse[j] && eq_AvailExpr(eprime, (AvailExpr*)aenv->key[j]))
4005 break;
4006
4007 if (j < aenv->used) {
4008 /* A binding E' -> q was found. Replace stmt by "t = q" and
4009 note the t->q binding in tenv. */
4010 /* (this is the core of the CSE action) */
4011 q = (IRTemp)aenv->val[j];
4012 bb->stmts[i] = IRStmt_WrTmp( t, IRExpr_RdTmp(q) );
4013 addToHHW( tenv, (HWord)t, (HWord)q );
4014 anyDone = True;
4015 } else {
4016 /* No binding was found, so instead we add E' -> t to our
4017 collection of available expressions, replace this stmt
4018 with "t = E'", and move on. */
4019 bb->stmts[i] = IRStmt_WrTmp( t, availExpr_to_IRExpr(eprime) );
4020 addToHHW( aenv, (HWord)eprime, (HWord)t );
4021 }
4022 }
4023
4024 /*
4025 ppIRSB(bb);
4026 sanityCheckIRSB(bb, Ity_I32);
4027 vex_printf("\n\n");
4028 */
4029 return anyDone;
4030 }
4031
4032
4033 /*---------------------------------------------------------------*/
4034 /*--- Add32/Sub32 chain collapsing ---*/
4035 /*---------------------------------------------------------------*/
4036
4037 /* ----- Helper functions for Add32/Sub32 chain collapsing ----- */
4038
4039 /* Is this expression "Add32(tmp,const)" or "Sub32(tmp,const)" ? If
4040 yes, set *tmp and *i32 appropriately. *i32 is set as if the
4041 root node is Add32, not Sub32. */
4042
isAdd32OrSub32(IRExpr * e,IRTemp * tmp,Int * i32)4043 static Bool isAdd32OrSub32 ( IRExpr* e, IRTemp* tmp, Int* i32 )
4044 {
4045 if (e->tag != Iex_Binop)
4046 return False;
4047 if (e->Iex.Binop.op != Iop_Add32 && e->Iex.Binop.op != Iop_Sub32)
4048 return False;
4049 if (e->Iex.Binop.arg1->tag != Iex_RdTmp)
4050 return False;
4051 if (e->Iex.Binop.arg2->tag != Iex_Const)
4052 return False;
4053 *tmp = e->Iex.Binop.arg1->Iex.RdTmp.tmp;
4054 *i32 = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32);
4055 if (e->Iex.Binop.op == Iop_Sub32)
4056 *i32 = -*i32;
4057 return True;
4058 }
4059
4060
4061 /* Figure out if tmp can be expressed as tmp2 +32 const, for some
4062 other tmp2. Scan backwards from the specified start point -- an
4063 optimisation. */
4064
collapseChain(IRSB * bb,Int startHere,IRTemp tmp,IRTemp * tmp2,Int * i32)4065 static Bool collapseChain ( IRSB* bb, Int startHere,
4066 IRTemp tmp,
4067 IRTemp* tmp2, Int* i32 )
4068 {
4069 Int j, ii;
4070 IRTemp vv;
4071 IRStmt* st;
4072 IRExpr* e;
4073
4074 /* the (var, con) pair contain the current 'representation' for
4075 'tmp'. We start with 'tmp + 0'. */
4076 IRTemp var = tmp;
4077 Int con = 0;
4078
4079 /* Scan backwards to see if tmp can be replaced by some other tmp
4080 +/- a constant. */
4081 for (j = startHere; j >= 0; j--) {
4082 st = bb->stmts[j];
4083 if (st->tag != Ist_WrTmp)
4084 continue;
4085 if (st->Ist.WrTmp.tmp != var)
4086 continue;
4087 e = st->Ist.WrTmp.data;
4088 if (!isAdd32OrSub32(e, &vv, &ii))
4089 break;
4090 var = vv;
4091 con += ii;
4092 }
4093 if (j == -1)
4094 /* no earlier binding for var .. ill-formed IR */
4095 vpanic("collapseChain");
4096
4097 /* so, did we find anything interesting? */
4098 if (var == tmp)
4099 return False; /* no .. */
4100
4101 *tmp2 = var;
4102 *i32 = con;
4103 return True;
4104 }
4105
4106
4107 /* ------- Main function for Add32/Sub32 chain collapsing ------ */
4108
collapse_AddSub_chains_BB(IRSB * bb)4109 static void collapse_AddSub_chains_BB ( IRSB* bb )
4110 {
4111 IRStmt *st;
4112 IRTemp var, var2;
4113 Int i, con, con2;
4114
4115 for (i = bb->stmts_used-1; i >= 0; i--) {
4116 st = bb->stmts[i];
4117 if (st->tag == Ist_NoOp)
4118 continue;
4119
4120 /* Try to collapse 't1 = Add32/Sub32(t2, con)'. */
4121
4122 if (st->tag == Ist_WrTmp
4123 && isAdd32OrSub32(st->Ist.WrTmp.data, &var, &con)) {
4124
4125 /* So e1 is of the form Add32(var,con) or Sub32(var,-con).
4126 Find out if var can be expressed as var2 + con2. */
4127 if (collapseChain(bb, i-1, var, &var2, &con2)) {
4128 if (DEBUG_IROPT) {
4129 vex_printf("replacing1 ");
4130 ppIRStmt(st);
4131 vex_printf(" with ");
4132 }
4133 con2 += con;
4134 bb->stmts[i]
4135 = IRStmt_WrTmp(
4136 st->Ist.WrTmp.tmp,
4137 (con2 >= 0)
4138 ? IRExpr_Binop(Iop_Add32,
4139 IRExpr_RdTmp(var2),
4140 IRExpr_Const(IRConst_U32(con2)))
4141 : IRExpr_Binop(Iop_Sub32,
4142 IRExpr_RdTmp(var2),
4143 IRExpr_Const(IRConst_U32(-con2)))
4144 );
4145 if (DEBUG_IROPT) {
4146 ppIRStmt(bb->stmts[i]);
4147 vex_printf("\n");
4148 }
4149 }
4150
4151 continue;
4152 }
4153
4154 /* Try to collapse 't1 = GetI[t2, con]'. */
4155
4156 if (st->tag == Ist_WrTmp
4157 && st->Ist.WrTmp.data->tag == Iex_GetI
4158 && st->Ist.WrTmp.data->Iex.GetI.ix->tag == Iex_RdTmp
4159 && collapseChain(bb, i-1, st->Ist.WrTmp.data->Iex.GetI.ix
4160 ->Iex.RdTmp.tmp, &var2, &con2)) {
4161 if (DEBUG_IROPT) {
4162 vex_printf("replacing3 ");
4163 ppIRStmt(st);
4164 vex_printf(" with ");
4165 }
4166 con2 += st->Ist.WrTmp.data->Iex.GetI.bias;
4167 bb->stmts[i]
4168 = IRStmt_WrTmp(
4169 st->Ist.WrTmp.tmp,
4170 IRExpr_GetI(st->Ist.WrTmp.data->Iex.GetI.descr,
4171 IRExpr_RdTmp(var2),
4172 con2));
4173 if (DEBUG_IROPT) {
4174 ppIRStmt(bb->stmts[i]);
4175 vex_printf("\n");
4176 }
4177 continue;
4178 }
4179
4180 /* Perhaps st is PutI[t, con] ? */
4181 IRPutI *puti = st->Ist.PutI.details;
4182 if (st->tag == Ist_PutI
4183 && puti->ix->tag == Iex_RdTmp
4184 && collapseChain(bb, i-1, puti->ix->Iex.RdTmp.tmp,
4185 &var2, &con2)) {
4186 if (DEBUG_IROPT) {
4187 vex_printf("replacing2 ");
4188 ppIRStmt(st);
4189 vex_printf(" with ");
4190 }
4191 con2 += puti->bias;
4192 bb->stmts[i]
4193 = IRStmt_PutI(mkIRPutI(puti->descr,
4194 IRExpr_RdTmp(var2),
4195 con2,
4196 puti->data));
4197 if (DEBUG_IROPT) {
4198 ppIRStmt(bb->stmts[i]);
4199 vex_printf("\n");
4200 }
4201 continue;
4202 }
4203
4204 } /* for */
4205 }
4206
4207
4208 /*---------------------------------------------------------------*/
4209 /*--- PutI/GetI transformations ---*/
4210 /*---------------------------------------------------------------*/
4211
4212 /* Given the parts (descr, tmp, bias) for a GetI, scan backwards from
4213 the given starting point to find, if any, a PutI which writes
4214 exactly the same piece of guest state, and so return the expression
4215 that the PutI writes. This is the core of PutI-GetI forwarding. */
4216
4217 static
findPutI(IRSB * bb,Int startHere,IRRegArray * descrG,IRExpr * ixG,Int biasG)4218 IRExpr* findPutI ( IRSB* bb, Int startHere,
4219 IRRegArray* descrG, IRExpr* ixG, Int biasG )
4220 {
4221 Int j;
4222 IRStmt* st;
4223 GSAliasing relation;
4224
4225 if (0) {
4226 vex_printf("\nfindPutI ");
4227 ppIRRegArray(descrG);
4228 vex_printf(" ");
4229 ppIRExpr(ixG);
4230 vex_printf(" %d\n", biasG);
4231 }
4232
4233 /* Scan backwards in bb from startHere to find a suitable PutI
4234 binding for (descrG, ixG, biasG), if any. */
4235
4236 for (j = startHere; j >= 0; j--) {
4237 st = bb->stmts[j];
4238 if (st->tag == Ist_NoOp)
4239 continue;
4240
4241 if (st->tag == Ist_Put) {
4242 /* Non-indexed Put. This can't give a binding, but we do
4243 need to check it doesn't invalidate the search by
4244 overlapping any part of the indexed guest state. */
4245
4246 relation
4247 = getAliasingRelation_IC(
4248 descrG, ixG,
4249 st->Ist.Put.offset,
4250 typeOfIRExpr(bb->tyenv,st->Ist.Put.data) );
4251
4252 if (relation == NoAlias) {
4253 /* we're OK; keep going */
4254 continue;
4255 } else {
4256 /* relation == UnknownAlias || relation == ExactAlias */
4257 /* If this assertion fails, we've found a Put which writes
4258 an area of guest state which is read by a GetI. Which
4259 is unlikely (although not per se wrong). */
4260 vassert(relation != ExactAlias);
4261 /* This Put potentially writes guest state that the GetI
4262 reads; we must fail. */
4263 return NULL;
4264 }
4265 }
4266
4267 if (st->tag == Ist_PutI) {
4268 IRPutI *puti = st->Ist.PutI.details;
4269
4270 relation = getAliasingRelation_II(
4271 descrG, ixG, biasG,
4272 puti->descr,
4273 puti->ix,
4274 puti->bias
4275 );
4276
4277 if (relation == NoAlias) {
4278 /* This PutI definitely doesn't overlap. Ignore it and
4279 keep going. */
4280 continue; /* the for j loop */
4281 }
4282
4283 if (relation == UnknownAlias) {
4284 /* We don't know if this PutI writes to the same guest
4285 state that the GetI, or not. So we have to give up. */
4286 return NULL;
4287 }
4288
4289 /* Otherwise, we've found what we're looking for. */
4290 vassert(relation == ExactAlias);
4291 return puti->data;
4292
4293 } /* if (st->tag == Ist_PutI) */
4294
4295 if (st->tag == Ist_Dirty) {
4296 /* Be conservative. If the dirty call has any guest effects at
4297 all, give up. We could do better -- only give up if there
4298 are any guest writes/modifies. */
4299 if (st->Ist.Dirty.details->nFxState > 0)
4300 return NULL;
4301 }
4302
4303 } /* for */
4304
4305 /* No valid replacement was found. */
4306 return NULL;
4307 }
4308
4309
4310
4311 /* Assuming pi is a PutI stmt, is s2 identical to it (in the sense
4312 that it writes exactly the same piece of guest state) ? Safe
4313 answer: False. */
4314
identicalPutIs(IRStmt * pi,IRStmt * s2)4315 static Bool identicalPutIs ( IRStmt* pi, IRStmt* s2 )
4316 {
4317 vassert(pi->tag == Ist_PutI);
4318 if (s2->tag != Ist_PutI)
4319 return False;
4320
4321 IRPutI *p1 = pi->Ist.PutI.details;
4322 IRPutI *p2 = s2->Ist.PutI.details;
4323
4324 return toBool(
4325 getAliasingRelation_II(
4326 p1->descr, p1->ix, p1->bias,
4327 p2->descr, p2->ix, p2->bias
4328 )
4329 == ExactAlias
4330 );
4331 }
4332
4333
4334 /* Assuming pi is a PutI stmt, is s2 a Get/GetI/Put/PutI which might
4335 overlap it? Safe answer: True. Note, we could do a lot better
4336 than this if needed. */
4337
4338 static
guestAccessWhichMightOverlapPutI(IRTypeEnv * tyenv,IRStmt * pi,IRStmt * s2)4339 Bool guestAccessWhichMightOverlapPutI (
4340 IRTypeEnv* tyenv, IRStmt* pi, IRStmt* s2
4341 )
4342 {
4343 GSAliasing relation;
4344 UInt minoffP, maxoffP;
4345
4346 vassert(pi->tag == Ist_PutI);
4347
4348 IRPutI *p1 = pi->Ist.PutI.details;
4349
4350 getArrayBounds(p1->descr, &minoffP, &maxoffP);
4351 switch (s2->tag) {
4352
4353 case Ist_NoOp:
4354 case Ist_IMark:
4355 return False;
4356
4357 case Ist_MBE:
4358 case Ist_AbiHint:
4359 /* just be paranoid ... these should be rare. */
4360 return True;
4361
4362 case Ist_CAS:
4363 /* This is unbelievably lame, but it's probably not
4364 significant from a performance point of view. Really, a
4365 CAS is a load-store op, so it should be safe to say False.
4366 However .. */
4367 return True;
4368
4369 case Ist_Dirty:
4370 /* If the dirty call has any guest effects at all, give up.
4371 Probably could do better. */
4372 if (s2->Ist.Dirty.details->nFxState > 0)
4373 return True;
4374 return False;
4375
4376 case Ist_Put:
4377 vassert(isIRAtom(s2->Ist.Put.data));
4378 relation
4379 = getAliasingRelation_IC(
4380 p1->descr, p1->ix,
4381 s2->Ist.Put.offset,
4382 typeOfIRExpr(tyenv,s2->Ist.Put.data)
4383 );
4384 goto have_relation;
4385
4386 case Ist_PutI: {
4387 IRPutI *p2 = s2->Ist.PutI.details;
4388
4389 vassert(isIRAtom(p2->ix));
4390 vassert(isIRAtom(p2->data));
4391 relation
4392 = getAliasingRelation_II(
4393 p1->descr, p1->ix, p1->bias,
4394 p2->descr, p2->ix, p2->bias
4395 );
4396 goto have_relation;
4397 }
4398
4399 case Ist_WrTmp:
4400 if (s2->Ist.WrTmp.data->tag == Iex_GetI) {
4401 relation
4402 = getAliasingRelation_II(
4403 p1->descr, p1->ix, p1->bias,
4404 s2->Ist.WrTmp.data->Iex.GetI.descr,
4405 s2->Ist.WrTmp.data->Iex.GetI.ix,
4406 s2->Ist.WrTmp.data->Iex.GetI.bias
4407 );
4408 goto have_relation;
4409 }
4410 if (s2->Ist.WrTmp.data->tag == Iex_Get) {
4411 relation
4412 = getAliasingRelation_IC(
4413 p1->descr, p1->ix,
4414 s2->Ist.WrTmp.data->Iex.Get.offset,
4415 s2->Ist.WrTmp.data->Iex.Get.ty
4416 );
4417 goto have_relation;
4418 }
4419 return False;
4420
4421 case Ist_Store:
4422 vassert(isIRAtom(s2->Ist.Store.addr));
4423 vassert(isIRAtom(s2->Ist.Store.data));
4424 return False;
4425
4426 default:
4427 vex_printf("\n"); ppIRStmt(s2); vex_printf("\n");
4428 vpanic("guestAccessWhichMightOverlapPutI");
4429 }
4430
4431 have_relation:
4432 if (relation == NoAlias)
4433 return False;
4434 else
4435 return True; /* ExactAlias or UnknownAlias */
4436 }
4437
4438
4439
4440 /* ---------- PutI/GetI transformations main functions --------- */
4441
4442 /* Remove redundant GetIs, to the extent that they can be detected.
4443 bb is modified in-place. */
4444
4445 static
do_redundant_GetI_elimination(IRSB * bb)4446 void do_redundant_GetI_elimination ( IRSB* bb )
4447 {
4448 Int i;
4449 IRStmt* st;
4450
4451 for (i = bb->stmts_used-1; i >= 0; i--) {
4452 st = bb->stmts[i];
4453 if (st->tag == Ist_NoOp)
4454 continue;
4455
4456 if (st->tag == Ist_WrTmp
4457 && st->Ist.WrTmp.data->tag == Iex_GetI
4458 && st->Ist.WrTmp.data->Iex.GetI.ix->tag == Iex_RdTmp) {
4459 IRRegArray* descr = st->Ist.WrTmp.data->Iex.GetI.descr;
4460 IRExpr* ix = st->Ist.WrTmp.data->Iex.GetI.ix;
4461 Int bias = st->Ist.WrTmp.data->Iex.GetI.bias;
4462 IRExpr* replacement = findPutI(bb, i-1, descr, ix, bias);
4463 if (replacement
4464 && isIRAtom(replacement)
4465 /* Make sure we're doing a type-safe transformation! */
4466 && typeOfIRExpr(bb->tyenv, replacement) == descr->elemTy) {
4467 if (DEBUG_IROPT) {
4468 vex_printf("rGI: ");
4469 ppIRExpr(st->Ist.WrTmp.data);
4470 vex_printf(" -> ");
4471 ppIRExpr(replacement);
4472 vex_printf("\n");
4473 }
4474 bb->stmts[i] = IRStmt_WrTmp(st->Ist.WrTmp.tmp, replacement);
4475 }
4476 }
4477 }
4478
4479 }
4480
4481
4482 /* Remove redundant PutIs, to the extent which they can be detected.
4483 bb is modified in-place. */
4484
4485 static
do_redundant_PutI_elimination(IRSB * bb,VexRegisterUpdates pxControl)4486 void do_redundant_PutI_elimination ( IRSB* bb, VexRegisterUpdates pxControl )
4487 {
4488 Int i, j;
4489 Bool delete;
4490 IRStmt *st, *stj;
4491
4492 vassert(pxControl < VexRegUpdAllregsAtEachInsn);
4493
4494 for (i = 0; i < bb->stmts_used; i++) {
4495 st = bb->stmts[i];
4496 if (st->tag != Ist_PutI)
4497 continue;
4498 /* Ok, search forwards from here to see if we can find another
4499 PutI which makes this one redundant, and dodging various
4500 hazards. Search forwards:
4501 * If conditional exit, give up (because anything after that
4502 does not postdominate this put).
4503 * If a Get which might overlap, give up (because this PutI
4504 not necessarily dead).
4505 * If a Put which is identical, stop with success.
4506 * If a Put which might overlap, but is not identical, give up.
4507 * If a dirty helper call which might write guest state, give up.
4508 * If a Put which definitely doesn't overlap, or any other
4509 kind of stmt, continue.
4510 */
4511 delete = False;
4512 for (j = i+1; j < bb->stmts_used; j++) {
4513 stj = bb->stmts[j];
4514 if (stj->tag == Ist_NoOp)
4515 continue;
4516 if (identicalPutIs(st, stj)) {
4517 /* success! */
4518 delete = True;
4519 break;
4520 }
4521 if (stj->tag == Ist_Exit)
4522 /* give up */
4523 break;
4524 if (st->tag == Ist_Dirty)
4525 /* give up; could do better here */
4526 break;
4527 if (guestAccessWhichMightOverlapPutI(bb->tyenv, st, stj))
4528 /* give up */
4529 break;
4530 }
4531
4532 if (delete) {
4533 if (DEBUG_IROPT) {
4534 vex_printf("rPI: ");
4535 ppIRStmt(st);
4536 vex_printf("\n");
4537 }
4538 bb->stmts[i] = IRStmt_NoOp();
4539 }
4540
4541 }
4542 }
4543
4544
4545 /*---------------------------------------------------------------*/
4546 /*--- Loop unrolling ---*/
4547 /*---------------------------------------------------------------*/
4548
4549 /* Adjust all tmp values (names) in e by delta. e is destructively
4550 modified. */
4551
deltaIRExpr(IRExpr * e,Int delta)4552 static void deltaIRExpr ( IRExpr* e, Int delta )
4553 {
4554 Int i;
4555 switch (e->tag) {
4556 case Iex_RdTmp:
4557 e->Iex.RdTmp.tmp += delta;
4558 break;
4559 case Iex_Get:
4560 case Iex_Const:
4561 break;
4562 case Iex_GetI:
4563 deltaIRExpr(e->Iex.GetI.ix, delta);
4564 break;
4565 case Iex_Qop:
4566 deltaIRExpr(e->Iex.Qop.details->arg1, delta);
4567 deltaIRExpr(e->Iex.Qop.details->arg2, delta);
4568 deltaIRExpr(e->Iex.Qop.details->arg3, delta);
4569 deltaIRExpr(e->Iex.Qop.details->arg4, delta);
4570 break;
4571 case Iex_Triop:
4572 deltaIRExpr(e->Iex.Triop.details->arg1, delta);
4573 deltaIRExpr(e->Iex.Triop.details->arg2, delta);
4574 deltaIRExpr(e->Iex.Triop.details->arg3, delta);
4575 break;
4576 case Iex_Binop:
4577 deltaIRExpr(e->Iex.Binop.arg1, delta);
4578 deltaIRExpr(e->Iex.Binop.arg2, delta);
4579 break;
4580 case Iex_Unop:
4581 deltaIRExpr(e->Iex.Unop.arg, delta);
4582 break;
4583 case Iex_Load:
4584 deltaIRExpr(e->Iex.Load.addr, delta);
4585 break;
4586 case Iex_CCall:
4587 for (i = 0; e->Iex.CCall.args[i]; i++)
4588 deltaIRExpr(e->Iex.CCall.args[i], delta);
4589 break;
4590 case Iex_ITE:
4591 deltaIRExpr(e->Iex.ITE.cond, delta);
4592 deltaIRExpr(e->Iex.ITE.iftrue, delta);
4593 deltaIRExpr(e->Iex.ITE.iffalse, delta);
4594 break;
4595 default:
4596 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
4597 vpanic("deltaIRExpr");
4598 }
4599 }
4600
4601 /* Adjust all tmp values (names) in st by delta. st is destructively
4602 modified. */
4603
deltaIRStmt(IRStmt * st,Int delta)4604 static void deltaIRStmt ( IRStmt* st, Int delta )
4605 {
4606 Int i;
4607 IRDirty* d;
4608 switch (st->tag) {
4609 case Ist_NoOp:
4610 case Ist_IMark:
4611 case Ist_MBE:
4612 break;
4613 case Ist_AbiHint:
4614 deltaIRExpr(st->Ist.AbiHint.base, delta);
4615 deltaIRExpr(st->Ist.AbiHint.nia, delta);
4616 break;
4617 case Ist_Put:
4618 deltaIRExpr(st->Ist.Put.data, delta);
4619 break;
4620 case Ist_PutI:
4621 deltaIRExpr(st->Ist.PutI.details->ix, delta);
4622 deltaIRExpr(st->Ist.PutI.details->data, delta);
4623 break;
4624 case Ist_WrTmp:
4625 st->Ist.WrTmp.tmp += delta;
4626 deltaIRExpr(st->Ist.WrTmp.data, delta);
4627 break;
4628 case Ist_Exit:
4629 deltaIRExpr(st->Ist.Exit.guard, delta);
4630 break;
4631 case Ist_Store:
4632 deltaIRExpr(st->Ist.Store.addr, delta);
4633 deltaIRExpr(st->Ist.Store.data, delta);
4634 break;
4635 case Ist_StoreG: {
4636 IRStoreG* sg = st->Ist.StoreG.details;
4637 deltaIRExpr(sg->addr, delta);
4638 deltaIRExpr(sg->data, delta);
4639 deltaIRExpr(sg->guard, delta);
4640 break;
4641 }
4642 case Ist_LoadG: {
4643 IRLoadG* lg = st->Ist.LoadG.details;
4644 lg->dst += delta;
4645 deltaIRExpr(lg->addr, delta);
4646 deltaIRExpr(lg->alt, delta);
4647 deltaIRExpr(lg->guard, delta);
4648 break;
4649 }
4650 case Ist_CAS:
4651 if (st->Ist.CAS.details->oldHi != IRTemp_INVALID)
4652 st->Ist.CAS.details->oldHi += delta;
4653 st->Ist.CAS.details->oldLo += delta;
4654 deltaIRExpr(st->Ist.CAS.details->addr, delta);
4655 if (st->Ist.CAS.details->expdHi)
4656 deltaIRExpr(st->Ist.CAS.details->expdHi, delta);
4657 deltaIRExpr(st->Ist.CAS.details->expdLo, delta);
4658 if (st->Ist.CAS.details->dataHi)
4659 deltaIRExpr(st->Ist.CAS.details->dataHi, delta);
4660 deltaIRExpr(st->Ist.CAS.details->dataLo, delta);
4661 break;
4662 case Ist_LLSC:
4663 st->Ist.LLSC.result += delta;
4664 deltaIRExpr(st->Ist.LLSC.addr, delta);
4665 if (st->Ist.LLSC.storedata)
4666 deltaIRExpr(st->Ist.LLSC.storedata, delta);
4667 break;
4668 case Ist_Dirty:
4669 d = st->Ist.Dirty.details;
4670 deltaIRExpr(d->guard, delta);
4671 for (i = 0; d->args[i]; i++) {
4672 IRExpr* arg = d->args[i];
4673 if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg)))
4674 deltaIRExpr(arg, delta);
4675 }
4676 if (d->tmp != IRTemp_INVALID)
4677 d->tmp += delta;
4678 if (d->mAddr)
4679 deltaIRExpr(d->mAddr, delta);
4680 break;
4681 default:
4682 vex_printf("\n"); ppIRStmt(st); vex_printf("\n");
4683 vpanic("deltaIRStmt");
4684 }
4685 }
4686
4687
4688 /* If possible, return a loop-unrolled version of bb0. The original
4689 is changed. If not possible, return NULL. */
4690
4691 /* The two schemas considered are:
4692
4693 X: BODY; goto X
4694
4695 which unrolls to (eg) X: BODY;BODY; goto X
4696
4697 and
4698
4699 X: BODY; if (c) goto X; goto Y
4700 which trivially transforms to
4701 X: BODY; if (!c) goto Y; goto X;
4702 so it falls in the scope of the first case.
4703
4704 X and Y must be literal (guest) addresses.
4705 */
4706
calc_unroll_factor(IRSB * bb)4707 static Int calc_unroll_factor( IRSB* bb )
4708 {
4709 Int n_stmts, i;
4710
4711 n_stmts = 0;
4712 for (i = 0; i < bb->stmts_used; i++) {
4713 if (bb->stmts[i]->tag != Ist_NoOp)
4714 n_stmts++;
4715 }
4716
4717 if (n_stmts <= vex_control.iropt_unroll_thresh/8) {
4718 if (vex_control.iropt_verbosity > 0)
4719 vex_printf("vex iropt: 8 x unrolling (%d sts -> %d sts)\n",
4720 n_stmts, 8* n_stmts);
4721 return 8;
4722 }
4723 if (n_stmts <= vex_control.iropt_unroll_thresh/4) {
4724 if (vex_control.iropt_verbosity > 0)
4725 vex_printf("vex iropt: 4 x unrolling (%d sts -> %d sts)\n",
4726 n_stmts, 4* n_stmts);
4727 return 4;
4728 }
4729
4730 if (n_stmts <= vex_control.iropt_unroll_thresh/2) {
4731 if (vex_control.iropt_verbosity > 0)
4732 vex_printf("vex iropt: 2 x unrolling (%d sts -> %d sts)\n",
4733 n_stmts, 2* n_stmts);
4734 return 2;
4735 }
4736
4737 if (vex_control.iropt_verbosity > 0)
4738 vex_printf("vex iropt: not unrolling (%d sts)\n", n_stmts);
4739
4740 return 1;
4741 }
4742
4743
maybe_loop_unroll_BB(IRSB * bb0,Addr my_addr)4744 static IRSB* maybe_loop_unroll_BB ( IRSB* bb0, Addr my_addr )
4745 {
4746 Int i, j, jmax, n_vars;
4747 Bool xxx_known;
4748 Addr64 xxx_value, yyy_value;
4749 IRExpr* udst;
4750 IRStmt* st;
4751 IRConst* con;
4752 IRSB *bb1, *bb2;
4753 Int unroll_factor;
4754
4755 if (vex_control.iropt_unroll_thresh <= 0)
4756 return NULL;
4757
4758 /* First off, figure out if we can unroll this loop. Do this
4759 without modifying bb0. */
4760
4761 if (bb0->jumpkind != Ijk_Boring)
4762 return NULL;
4763
4764 xxx_known = False;
4765 xxx_value = 0;
4766
4767 /* Extract the next-guest address. If it isn't a literal, we
4768 have to give up. */
4769
4770 udst = bb0->next;
4771 if (udst->tag == Iex_Const
4772 && (udst->Iex.Const.con->tag == Ico_U32
4773 || udst->Iex.Const.con->tag == Ico_U64)) {
4774 /* The BB ends in a jump to a literal location. */
4775 xxx_known = True;
4776 xxx_value = udst->Iex.Const.con->tag == Ico_U64
4777 ? udst->Iex.Const.con->Ico.U64
4778 : (Addr64)(udst->Iex.Const.con->Ico.U32);
4779 }
4780
4781 if (!xxx_known)
4782 return NULL;
4783
4784 /* Now we know the BB ends to a jump to a literal location. If
4785 it's a jump to itself (viz, idiom #1), move directly to the
4786 unrolling stage, first cloning the bb so the original isn't
4787 modified. */
4788 if (xxx_value == my_addr) {
4789 unroll_factor = calc_unroll_factor( bb0 );
4790 if (unroll_factor < 2)
4791 return NULL;
4792 bb1 = deepCopyIRSB( bb0 );
4793 bb0 = NULL;
4794 udst = NULL; /* is now invalid */
4795 goto do_unroll;
4796 }
4797
4798 /* Search for the second idiomatic form:
4799 X: BODY; if (c) goto X; goto Y
4800 We know Y, but need to establish that the last stmt
4801 is 'if (c) goto X'.
4802 */
4803 yyy_value = xxx_value;
4804 for (i = bb0->stmts_used-1; i >= 0; i--)
4805 if (bb0->stmts[i])
4806 break;
4807
4808 if (i < 0)
4809 return NULL; /* block with no stmts. Strange. */
4810
4811 st = bb0->stmts[i];
4812 if (st->tag != Ist_Exit)
4813 return NULL;
4814 if (st->Ist.Exit.jk != Ijk_Boring)
4815 return NULL;
4816
4817 con = st->Ist.Exit.dst;
4818 vassert(con->tag == Ico_U32 || con->tag == Ico_U64);
4819
4820 xxx_value = con->tag == Ico_U64
4821 ? st->Ist.Exit.dst->Ico.U64
4822 : (Addr64)(st->Ist.Exit.dst->Ico.U32);
4823
4824 /* If this assertion fails, we have some kind of type error. */
4825 vassert(con->tag == udst->Iex.Const.con->tag);
4826
4827 if (xxx_value != my_addr)
4828 /* We didn't find either idiom. Give up. */
4829 return NULL;
4830
4831 /* Ok, we found idiom #2. Copy the BB, switch around the xxx and
4832 yyy values (which makes it look like idiom #1), and go into
4833 unrolling proper. This means finding (again) the last stmt, in
4834 the copied BB. */
4835
4836 unroll_factor = calc_unroll_factor( bb0 );
4837 if (unroll_factor < 2)
4838 return NULL;
4839
4840 bb1 = deepCopyIRSB( bb0 );
4841 bb0 = NULL;
4842 udst = NULL; /* is now invalid */
4843 for (i = bb1->stmts_used-1; i >= 0; i--)
4844 if (bb1->stmts[i])
4845 break;
4846
4847 /* The next bunch of assertions should be true since we already
4848 found and checked the last stmt in the original bb. */
4849
4850 vassert(i >= 0);
4851
4852 st = bb1->stmts[i];
4853 vassert(st->tag == Ist_Exit);
4854
4855 con = st->Ist.Exit.dst;
4856 vassert(con->tag == Ico_U32 || con->tag == Ico_U64);
4857
4858 udst = bb1->next;
4859 vassert(udst->tag == Iex_Const);
4860 vassert(udst->Iex.Const.con->tag == Ico_U32
4861 || udst->Iex.Const.con->tag == Ico_U64);
4862 vassert(con->tag == udst->Iex.Const.con->tag);
4863
4864 /* switch the xxx and yyy fields around */
4865 if (con->tag == Ico_U64) {
4866 udst->Iex.Const.con->Ico.U64 = xxx_value;
4867 con->Ico.U64 = yyy_value;
4868 } else {
4869 udst->Iex.Const.con->Ico.U32 = (UInt)xxx_value;
4870 con->Ico.U32 = (UInt)yyy_value;
4871 }
4872
4873 /* negate the test condition */
4874 st->Ist.Exit.guard
4875 = IRExpr_Unop(Iop_Not1,deepCopyIRExpr(st->Ist.Exit.guard));
4876
4877 /* --- The unroller proper. Both idioms are by now --- */
4878 /* --- now converted to idiom 1. --- */
4879
4880 do_unroll:
4881
4882 vassert(unroll_factor == 2
4883 || unroll_factor == 4
4884 || unroll_factor == 8);
4885
4886 jmax = unroll_factor==8 ? 3 : (unroll_factor==4 ? 2 : 1);
4887 for (j = 1; j <= jmax; j++) {
4888
4889 n_vars = bb1->tyenv->types_used;
4890
4891 bb2 = deepCopyIRSB(bb1);
4892 for (i = 0; i < n_vars; i++)
4893 (void)newIRTemp(bb1->tyenv, bb2->tyenv->types[i]);
4894
4895 for (i = 0; i < bb2->stmts_used; i++) {
4896 /* deltaIRStmt destructively modifies the stmt, but
4897 that's OK since bb2 is a complete fresh copy of bb1. */
4898 deltaIRStmt(bb2->stmts[i], n_vars);
4899 addStmtToIRSB(bb1, bb2->stmts[i]);
4900 }
4901 }
4902
4903 if (DEBUG_IROPT) {
4904 vex_printf("\nUNROLLED (%lx)\n", my_addr);
4905 ppIRSB(bb1);
4906 vex_printf("\n");
4907 }
4908
4909 /* Flattening; sigh. The unroller succeeds in breaking flatness
4910 by negating the test condition. This should be fixed properly.
4911 For the moment use this shotgun approach. */
4912 return flatten_BB(bb1);
4913 }
4914
4915
4916 /*---------------------------------------------------------------*/
4917 /*--- The tree builder ---*/
4918 /*---------------------------------------------------------------*/
4919
4920 /* This isn't part of IR optimisation. Really it's a pass done prior
4921 to instruction selection, which improves the code that the
4922 instruction selector can produce. */
4923
4924 /* --- The 'tmp' environment is the central data structure here --- */
4925
4926 /* The number of outstanding bindings we're prepared to track.
4927 The number of times the env becomes full and we have to dump
4928 the oldest binding (hence reducing code quality) falls very
4929 rapidly as the env size increases. 8 gives reasonable performance
4930 under most circumstances. */
4931 #define A_NENV 10
4932
4933 /* An interval. Used to record the bytes in the guest state accessed
4934 by a Put[I] statement or by (one or more) Get[I] expression(s). In
4935 case of several Get[I] expressions, the lower/upper bounds are recorded.
4936 This is conservative but cheap.
4937 E.g. a Put of 8 bytes at address 100 would be recorded as [100,107].
4938 E.g. an expression that reads 8 bytes at offset 100 and 4 bytes at
4939 offset 200 would be recorded as [100,203] */
4940 typedef
4941 struct {
4942 Bool present;
4943 Int low;
4944 Int high;
4945 }
4946 Interval;
4947
4948 static inline Bool
intervals_overlap(Interval i1,Interval i2)4949 intervals_overlap(Interval i1, Interval i2)
4950 {
4951 return (i1.low >= i2.low && i1.low <= i2.high) ||
4952 (i2.low >= i1.low && i2.low <= i1.high);
4953 }
4954
4955 static inline void
update_interval(Interval * i,Int low,Int high)4956 update_interval(Interval *i, Int low, Int high)
4957 {
4958 vassert(low <= high);
4959
4960 if (i->present) {
4961 if (low < i->low) i->low = low;
4962 if (high > i->high) i->high = high;
4963 } else {
4964 i->present = True;
4965 i->low = low;
4966 i->high = high;
4967 }
4968 }
4969
4970
4971 /* bindee == NULL === slot is not in use
4972 bindee != NULL === slot is in use
4973 */
4974 typedef
4975 struct {
4976 IRTemp binder;
4977 IRExpr* bindee;
4978 Bool doesLoad;
4979 /* Record the bytes of the guest state BINDEE reads from. */
4980 Interval getInterval;
4981 }
4982 ATmpInfo;
4983
4984 __attribute__((unused))
ppAEnv(ATmpInfo * env)4985 static void ppAEnv ( ATmpInfo* env )
4986 {
4987 Int i;
4988 for (i = 0; i < A_NENV; i++) {
4989 vex_printf("%d tmp %d val ", i, (Int)env[i].binder);
4990 if (env[i].bindee)
4991 ppIRExpr(env[i].bindee);
4992 else
4993 vex_printf("(null)");
4994 vex_printf("\n");
4995 }
4996 }
4997
4998 /* --- Tree-traversal fns --- */
4999
5000 /* Traverse an expr, and detect if any part of it reads memory or does
5001 a Get. Be careful ... this really controls how much the
5002 tree-builder can reorder the code, so getting it right is critical.
5003 */
setHints_Expr(Bool * doesLoad,Interval * getInterval,IRExpr * e)5004 static void setHints_Expr (Bool* doesLoad, Interval* getInterval, IRExpr* e )
5005 {
5006 Int i;
5007 switch (e->tag) {
5008 case Iex_CCall:
5009 for (i = 0; e->Iex.CCall.args[i]; i++)
5010 setHints_Expr(doesLoad, getInterval, e->Iex.CCall.args[i]);
5011 return;
5012 case Iex_ITE:
5013 setHints_Expr(doesLoad, getInterval, e->Iex.ITE.cond);
5014 setHints_Expr(doesLoad, getInterval, e->Iex.ITE.iftrue);
5015 setHints_Expr(doesLoad, getInterval, e->Iex.ITE.iffalse);
5016 return;
5017 case Iex_Qop:
5018 setHints_Expr(doesLoad, getInterval, e->Iex.Qop.details->arg1);
5019 setHints_Expr(doesLoad, getInterval, e->Iex.Qop.details->arg2);
5020 setHints_Expr(doesLoad, getInterval, e->Iex.Qop.details->arg3);
5021 setHints_Expr(doesLoad, getInterval, e->Iex.Qop.details->arg4);
5022 return;
5023 case Iex_Triop:
5024 setHints_Expr(doesLoad, getInterval, e->Iex.Triop.details->arg1);
5025 setHints_Expr(doesLoad, getInterval, e->Iex.Triop.details->arg2);
5026 setHints_Expr(doesLoad, getInterval, e->Iex.Triop.details->arg3);
5027 return;
5028 case Iex_Binop:
5029 setHints_Expr(doesLoad, getInterval, e->Iex.Binop.arg1);
5030 setHints_Expr(doesLoad, getInterval, e->Iex.Binop.arg2);
5031 return;
5032 case Iex_Unop:
5033 setHints_Expr(doesLoad, getInterval, e->Iex.Unop.arg);
5034 return;
5035 case Iex_Load:
5036 *doesLoad = True;
5037 setHints_Expr(doesLoad, getInterval, e->Iex.Load.addr);
5038 return;
5039 case Iex_Get: {
5040 Int low = e->Iex.Get.offset;
5041 Int high = low + sizeofIRType(e->Iex.Get.ty) - 1;
5042 update_interval(getInterval, low, high);
5043 return;
5044 }
5045 case Iex_GetI: {
5046 IRRegArray *descr = e->Iex.GetI.descr;
5047 Int size = sizeofIRType(descr->elemTy);
5048 Int low = descr->base;
5049 Int high = low + descr->nElems * size - 1;
5050 update_interval(getInterval, low, high);
5051 setHints_Expr(doesLoad, getInterval, e->Iex.GetI.ix);
5052 return;
5053 }
5054 case Iex_RdTmp:
5055 case Iex_Const:
5056 return;
5057 default:
5058 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
5059 vpanic("setHints_Expr");
5060 }
5061 }
5062
5063
5064 /* Add a binding to the front of the env and slide all the rest
5065 backwards. It should be the case that the last slot is free. */
addToEnvFront(ATmpInfo * env,IRTemp binder,IRExpr * bindee)5066 static void addToEnvFront ( ATmpInfo* env, IRTemp binder, IRExpr* bindee )
5067 {
5068 Int i;
5069 vassert(env[A_NENV-1].bindee == NULL);
5070 for (i = A_NENV-1; i >= 1; i--)
5071 env[i] = env[i-1];
5072 env[0].binder = binder;
5073 env[0].bindee = bindee;
5074 env[0].doesLoad = False; /* filled in later */
5075 env[0].getInterval.present = False; /* filled in later */
5076 env[0].getInterval.low = -1; /* filled in later */
5077 env[0].getInterval.high = -1; /* filled in later */
5078 }
5079
5080 /* Given uses :: array of UShort, indexed by IRTemp
5081 Add the use-occurrences of temps in this expression
5082 to the env.
5083 */
aoccCount_Expr(UShort * uses,IRExpr * e)5084 static void aoccCount_Expr ( UShort* uses, IRExpr* e )
5085 {
5086 Int i;
5087
5088 switch (e->tag) {
5089
5090 case Iex_RdTmp: /* the only interesting case */
5091 uses[e->Iex.RdTmp.tmp]++;
5092 return;
5093
5094 case Iex_ITE:
5095 aoccCount_Expr(uses, e->Iex.ITE.cond);
5096 aoccCount_Expr(uses, e->Iex.ITE.iftrue);
5097 aoccCount_Expr(uses, e->Iex.ITE.iffalse);
5098 return;
5099
5100 case Iex_Qop:
5101 aoccCount_Expr(uses, e->Iex.Qop.details->arg1);
5102 aoccCount_Expr(uses, e->Iex.Qop.details->arg2);
5103 aoccCount_Expr(uses, e->Iex.Qop.details->arg3);
5104 aoccCount_Expr(uses, e->Iex.Qop.details->arg4);
5105 return;
5106
5107 case Iex_Triop:
5108 aoccCount_Expr(uses, e->Iex.Triop.details->arg1);
5109 aoccCount_Expr(uses, e->Iex.Triop.details->arg2);
5110 aoccCount_Expr(uses, e->Iex.Triop.details->arg3);
5111 return;
5112
5113 case Iex_Binop:
5114 aoccCount_Expr(uses, e->Iex.Binop.arg1);
5115 aoccCount_Expr(uses, e->Iex.Binop.arg2);
5116 return;
5117
5118 case Iex_Unop:
5119 aoccCount_Expr(uses, e->Iex.Unop.arg);
5120 return;
5121
5122 case Iex_Load:
5123 aoccCount_Expr(uses, e->Iex.Load.addr);
5124 return;
5125
5126 case Iex_CCall:
5127 for (i = 0; e->Iex.CCall.args[i]; i++)
5128 aoccCount_Expr(uses, e->Iex.CCall.args[i]);
5129 return;
5130
5131 case Iex_GetI:
5132 aoccCount_Expr(uses, e->Iex.GetI.ix);
5133 return;
5134
5135 case Iex_Const:
5136 case Iex_Get:
5137 return;
5138
5139 default:
5140 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
5141 vpanic("aoccCount_Expr");
5142 }
5143 }
5144
5145
5146 /* Given uses :: array of UShort, indexed by IRTemp
5147 Add the use-occurrences of temps in this statement
5148 to the env.
5149 */
aoccCount_Stmt(UShort * uses,IRStmt * st)5150 static void aoccCount_Stmt ( UShort* uses, IRStmt* st )
5151 {
5152 Int i;
5153 IRDirty* d;
5154 IRCAS* cas;
5155 switch (st->tag) {
5156 case Ist_AbiHint:
5157 aoccCount_Expr(uses, st->Ist.AbiHint.base);
5158 aoccCount_Expr(uses, st->Ist.AbiHint.nia);
5159 return;
5160 case Ist_WrTmp:
5161 aoccCount_Expr(uses, st->Ist.WrTmp.data);
5162 return;
5163 case Ist_Put:
5164 aoccCount_Expr(uses, st->Ist.Put.data);
5165 return;
5166 case Ist_PutI:
5167 aoccCount_Expr(uses, st->Ist.PutI.details->ix);
5168 aoccCount_Expr(uses, st->Ist.PutI.details->data);
5169 return;
5170 case Ist_Store:
5171 aoccCount_Expr(uses, st->Ist.Store.addr);
5172 aoccCount_Expr(uses, st->Ist.Store.data);
5173 return;
5174 case Ist_StoreG: {
5175 IRStoreG* sg = st->Ist.StoreG.details;
5176 aoccCount_Expr(uses, sg->addr);
5177 aoccCount_Expr(uses, sg->data);
5178 aoccCount_Expr(uses, sg->guard);
5179 return;
5180 }
5181 case Ist_LoadG: {
5182 IRLoadG* lg = st->Ist.LoadG.details;
5183 aoccCount_Expr(uses, lg->addr);
5184 aoccCount_Expr(uses, lg->alt);
5185 aoccCount_Expr(uses, lg->guard);
5186 return;
5187 }
5188 case Ist_CAS:
5189 cas = st->Ist.CAS.details;
5190 aoccCount_Expr(uses, cas->addr);
5191 if (cas->expdHi)
5192 aoccCount_Expr(uses, cas->expdHi);
5193 aoccCount_Expr(uses, cas->expdLo);
5194 if (cas->dataHi)
5195 aoccCount_Expr(uses, cas->dataHi);
5196 aoccCount_Expr(uses, cas->dataLo);
5197 return;
5198 case Ist_LLSC:
5199 aoccCount_Expr(uses, st->Ist.LLSC.addr);
5200 if (st->Ist.LLSC.storedata)
5201 aoccCount_Expr(uses, st->Ist.LLSC.storedata);
5202 return;
5203 case Ist_Dirty:
5204 d = st->Ist.Dirty.details;
5205 if (d->mFx != Ifx_None)
5206 aoccCount_Expr(uses, d->mAddr);
5207 aoccCount_Expr(uses, d->guard);
5208 for (i = 0; d->args[i]; i++) {
5209 IRExpr* arg = d->args[i];
5210 if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg)))
5211 aoccCount_Expr(uses, arg);
5212 }
5213 return;
5214 case Ist_NoOp:
5215 case Ist_IMark:
5216 case Ist_MBE:
5217 return;
5218 case Ist_Exit:
5219 aoccCount_Expr(uses, st->Ist.Exit.guard);
5220 return;
5221 default:
5222 vex_printf("\n"); ppIRStmt(st); vex_printf("\n");
5223 vpanic("aoccCount_Stmt");
5224 }
5225 }
5226
5227 /* Look up a binding for tmp in the env. If found, return the bound
5228 expression, and set the env's binding to NULL so it is marked as
5229 used. If not found, return NULL. */
5230
atbSubst_Temp(ATmpInfo * env,IRTemp tmp)5231 static IRExpr* atbSubst_Temp ( ATmpInfo* env, IRTemp tmp )
5232 {
5233 Int i;
5234 for (i = 0; i < A_NENV; i++) {
5235 if (env[i].binder == tmp && env[i].bindee != NULL) {
5236 IRExpr* bindee = env[i].bindee;
5237 env[i].bindee = NULL;
5238 return bindee;
5239 }
5240 }
5241 return NULL;
5242 }
5243
5244 /* Traverse e, looking for temps. For each observed temp, see if env
5245 contains a binding for the temp, and if so return the bound value.
5246 The env has the property that any binding it holds is
5247 'single-shot', so once a binding is used, it is marked as no longer
5248 available, by setting its .bindee field to NULL. */
5249
is_Unop(IRExpr * e,IROp op)5250 static inline Bool is_Unop ( IRExpr* e, IROp op ) {
5251 return e->tag == Iex_Unop && e->Iex.Unop.op == op;
5252 }
is_Binop(IRExpr * e,IROp op)5253 static inline Bool is_Binop ( IRExpr* e, IROp op ) {
5254 return e->tag == Iex_Binop && e->Iex.Binop.op == op;
5255 }
5256
fold_IRExpr_Binop(IROp op,IRExpr * a1,IRExpr * a2)5257 static IRExpr* fold_IRExpr_Binop ( IROp op, IRExpr* a1, IRExpr* a2 )
5258 {
5259 switch (op) {
5260 case Iop_Or32:
5261 /* Or32( CmpwNEZ32(x), CmpwNEZ32(y) ) --> CmpwNEZ32( Or32( x, y ) ) */
5262 if (is_Unop(a1, Iop_CmpwNEZ32) && is_Unop(a2, Iop_CmpwNEZ32))
5263 return IRExpr_Unop( Iop_CmpwNEZ32,
5264 IRExpr_Binop( Iop_Or32, a1->Iex.Unop.arg,
5265 a2->Iex.Unop.arg ) );
5266 break;
5267
5268 case Iop_CmpNE32:
5269 /* Since X has type Ity_I1 we can simplify:
5270 CmpNE32(1Uto32(X),0)) ==> X */
5271 if (is_Unop(a1, Iop_1Uto32) && isZeroU32(a2))
5272 return a1->Iex.Unop.arg;
5273 break;
5274
5275 default:
5276 break;
5277 }
5278 /* no reduction rule applies */
5279 return IRExpr_Binop( op, a1, a2 );
5280 }
5281
fold_IRExpr_Unop(IROp op,IRExpr * aa)5282 static IRExpr* fold_IRExpr_Unop ( IROp op, IRExpr* aa )
5283 {
5284 switch (op) {
5285 case Iop_CmpwNEZ64:
5286 /* CmpwNEZ64( CmpwNEZ64 ( x ) ) --> CmpwNEZ64 ( x ) */
5287 if (is_Unop(aa, Iop_CmpwNEZ64))
5288 return IRExpr_Unop( Iop_CmpwNEZ64, aa->Iex.Unop.arg );
5289 /* CmpwNEZ64( Or64 ( CmpwNEZ64(x), y ) ) --> CmpwNEZ64( Or64( x, y ) ) */
5290 if (is_Binop(aa, Iop_Or64)
5291 && is_Unop(aa->Iex.Binop.arg1, Iop_CmpwNEZ64))
5292 return fold_IRExpr_Unop(
5293 Iop_CmpwNEZ64,
5294 IRExpr_Binop(Iop_Or64,
5295 aa->Iex.Binop.arg1->Iex.Unop.arg,
5296 aa->Iex.Binop.arg2));
5297 /* CmpwNEZ64( Or64 ( x, CmpwNEZ64(y) ) ) --> CmpwNEZ64( Or64( x, y ) ) */
5298 if (is_Binop(aa, Iop_Or64)
5299 && is_Unop(aa->Iex.Binop.arg2, Iop_CmpwNEZ64))
5300 return fold_IRExpr_Unop(
5301 Iop_CmpwNEZ64,
5302 IRExpr_Binop(Iop_Or64,
5303 aa->Iex.Binop.arg1,
5304 aa->Iex.Binop.arg2->Iex.Unop.arg));
5305 break;
5306 case Iop_CmpNEZ64:
5307 /* CmpNEZ64( Left64(x) ) --> CmpNEZ64(x) */
5308 if (is_Unop(aa, Iop_Left64))
5309 return IRExpr_Unop(Iop_CmpNEZ64, aa->Iex.Unop.arg);
5310 /* CmpNEZ64( 1Uto64(X) ) --> X */
5311 if (is_Unop(aa, Iop_1Uto64))
5312 return aa->Iex.Unop.arg;
5313 break;
5314 case Iop_CmpwNEZ32:
5315 /* CmpwNEZ32( CmpwNEZ32 ( x ) ) --> CmpwNEZ32 ( x ) */
5316 if (is_Unop(aa, Iop_CmpwNEZ32))
5317 return IRExpr_Unop( Iop_CmpwNEZ32, aa->Iex.Unop.arg );
5318 break;
5319 case Iop_CmpNEZ32:
5320 /* CmpNEZ32( Left32(x) ) --> CmpNEZ32(x) */
5321 if (is_Unop(aa, Iop_Left32))
5322 return IRExpr_Unop(Iop_CmpNEZ32, aa->Iex.Unop.arg);
5323 /* CmpNEZ32( 1Uto32(X) ) --> X */
5324 if (is_Unop(aa, Iop_1Uto32))
5325 return aa->Iex.Unop.arg;
5326 /* CmpNEZ32( 64to32( CmpwNEZ64(X) ) ) --> CmpNEZ64(X) */
5327 if (is_Unop(aa, Iop_64to32) && is_Unop(aa->Iex.Unop.arg, Iop_CmpwNEZ64))
5328 return IRExpr_Unop(Iop_CmpNEZ64, aa->Iex.Unop.arg->Iex.Unop.arg);
5329 break;
5330 case Iop_CmpNEZ8:
5331 /* CmpNEZ8( 1Uto8(X) ) --> X */
5332 if (is_Unop(aa, Iop_1Uto8))
5333 return aa->Iex.Unop.arg;
5334 break;
5335 case Iop_Left32:
5336 /* Left32( Left32(x) ) --> Left32(x) */
5337 if (is_Unop(aa, Iop_Left32))
5338 return IRExpr_Unop( Iop_Left32, aa->Iex.Unop.arg );
5339 break;
5340 case Iop_Left64:
5341 /* Left64( Left64(x) ) --> Left64(x) */
5342 if (is_Unop(aa, Iop_Left64))
5343 return IRExpr_Unop( Iop_Left64, aa->Iex.Unop.arg );
5344 break;
5345 case Iop_ZeroHI64ofV128:
5346 /* ZeroHI64ofV128( ZeroHI64ofV128(x) ) --> ZeroHI64ofV128(x) */
5347 if (is_Unop(aa, Iop_ZeroHI64ofV128))
5348 return IRExpr_Unop( Iop_ZeroHI64ofV128, aa->Iex.Unop.arg );
5349 break;
5350 case Iop_32to1:
5351 /* 32to1( 1Uto32 ( x ) ) --> x */
5352 if (is_Unop(aa, Iop_1Uto32))
5353 return aa->Iex.Unop.arg;
5354 /* 32to1( CmpwNEZ32 ( x )) --> CmpNEZ32(x) */
5355 if (is_Unop(aa, Iop_CmpwNEZ32))
5356 return IRExpr_Unop( Iop_CmpNEZ32, aa->Iex.Unop.arg );
5357 break;
5358 case Iop_64to1:
5359 /* 64to1( 1Uto64 ( x ) ) --> x */
5360 if (is_Unop(aa, Iop_1Uto64))
5361 return aa->Iex.Unop.arg;
5362 /* 64to1( CmpwNEZ64 ( x )) --> CmpNEZ64(x) */
5363 if (is_Unop(aa, Iop_CmpwNEZ64))
5364 return IRExpr_Unop( Iop_CmpNEZ64, aa->Iex.Unop.arg );
5365 break;
5366 case Iop_64to32:
5367 /* 64to32( 32Uto64 ( x )) --> x */
5368 if (is_Unop(aa, Iop_32Uto64))
5369 return aa->Iex.Unop.arg;
5370 /* 64to32( 8Uto64 ( x )) --> 8Uto32(x) */
5371 if (is_Unop(aa, Iop_8Uto64))
5372 return IRExpr_Unop(Iop_8Uto32, aa->Iex.Unop.arg);
5373 break;
5374
5375 case Iop_32Uto64:
5376 /* 32Uto64( 8Uto32( x )) --> 8Uto64(x) */
5377 if (is_Unop(aa, Iop_8Uto32))
5378 return IRExpr_Unop(Iop_8Uto64, aa->Iex.Unop.arg);
5379 /* 32Uto64( 16Uto32( x )) --> 16Uto64(x) */
5380 if (is_Unop(aa, Iop_16Uto32))
5381 return IRExpr_Unop(Iop_16Uto64, aa->Iex.Unop.arg);
5382 /* 32Uto64(64to32( Shr64( 32Uto64(64to32(x)), sh ))
5383 --> Shr64( 32Uto64(64to32(x)), sh )) */
5384 if (is_Unop(aa, Iop_64to32)
5385 && is_Binop(aa->Iex.Unop.arg, Iop_Shr64)
5386 && is_Unop(aa->Iex.Unop.arg->Iex.Binop.arg1, Iop_32Uto64)
5387 && is_Unop(aa->Iex.Unop.arg->Iex.Binop.arg1->Iex.Unop.arg,
5388 Iop_64to32)) {
5389 return aa->Iex.Unop.arg;
5390 }
5391 /* 32Uto64(64to32( Shl64( 32Uto64(64to32(x)), sh ))
5392 --> 32Uto64(64to32( Shl64( x, sh )) */
5393 if (is_Unop(aa, Iop_64to32)
5394 && is_Binop(aa->Iex.Unop.arg, Iop_Shl64)
5395 && is_Unop(aa->Iex.Unop.arg->Iex.Binop.arg1, Iop_32Uto64)
5396 && is_Unop(aa->Iex.Unop.arg->Iex.Binop.arg1->Iex.Unop.arg,
5397 Iop_64to32)) {
5398 return
5399 IRExpr_Unop(
5400 Iop_32Uto64,
5401 IRExpr_Unop(
5402 Iop_64to32,
5403 IRExpr_Binop(
5404 Iop_Shl64,
5405 aa->Iex.Unop.arg->Iex.Binop.arg1->Iex.Unop.arg->Iex.Unop.arg,
5406 aa->Iex.Unop.arg->Iex.Binop.arg2
5407 )));
5408 }
5409 break;
5410
5411 case Iop_1Sto32:
5412 /* 1Sto32( CmpNEZ8( 32to8( 1Uto32( CmpNEZ32( x ))))) -> CmpwNEZ32(x) */
5413 if (is_Unop(aa, Iop_CmpNEZ8)
5414 && is_Unop(aa->Iex.Unop.arg, Iop_32to8)
5415 && is_Unop(aa->Iex.Unop.arg->Iex.Unop.arg, Iop_1Uto32)
5416 && is_Unop(aa->Iex.Unop.arg->Iex.Unop.arg->Iex.Unop.arg,
5417 Iop_CmpNEZ32)) {
5418 return IRExpr_Unop( Iop_CmpwNEZ32,
5419 aa->Iex.Unop.arg->Iex.Unop.arg
5420 ->Iex.Unop.arg->Iex.Unop.arg);
5421 }
5422 break;
5423
5424 default:
5425 break;
5426 }
5427 /* no reduction rule applies */
5428 return IRExpr_Unop( op, aa );
5429 }
5430
atbSubst_Expr(ATmpInfo * env,IRExpr * e)5431 static IRExpr* atbSubst_Expr ( ATmpInfo* env, IRExpr* e )
5432 {
5433 IRExpr* e2;
5434 IRExpr** args2;
5435 Int i;
5436
5437 switch (e->tag) {
5438
5439 case Iex_CCall:
5440 args2 = shallowCopyIRExprVec(e->Iex.CCall.args);
5441 for (i = 0; args2[i]; i++)
5442 args2[i] = atbSubst_Expr(env,args2[i]);
5443 return IRExpr_CCall(
5444 e->Iex.CCall.cee,
5445 e->Iex.CCall.retty,
5446 args2
5447 );
5448 case Iex_RdTmp:
5449 e2 = atbSubst_Temp(env, e->Iex.RdTmp.tmp);
5450 return e2 ? e2 : e;
5451 case Iex_ITE:
5452 return IRExpr_ITE(
5453 atbSubst_Expr(env, e->Iex.ITE.cond),
5454 atbSubst_Expr(env, e->Iex.ITE.iftrue),
5455 atbSubst_Expr(env, e->Iex.ITE.iffalse)
5456 );
5457 case Iex_Qop:
5458 return IRExpr_Qop(
5459 e->Iex.Qop.details->op,
5460 atbSubst_Expr(env, e->Iex.Qop.details->arg1),
5461 atbSubst_Expr(env, e->Iex.Qop.details->arg2),
5462 atbSubst_Expr(env, e->Iex.Qop.details->arg3),
5463 atbSubst_Expr(env, e->Iex.Qop.details->arg4)
5464 );
5465 case Iex_Triop:
5466 return IRExpr_Triop(
5467 e->Iex.Triop.details->op,
5468 atbSubst_Expr(env, e->Iex.Triop.details->arg1),
5469 atbSubst_Expr(env, e->Iex.Triop.details->arg2),
5470 atbSubst_Expr(env, e->Iex.Triop.details->arg3)
5471 );
5472 case Iex_Binop:
5473 return fold_IRExpr_Binop(
5474 e->Iex.Binop.op,
5475 atbSubst_Expr(env, e->Iex.Binop.arg1),
5476 atbSubst_Expr(env, e->Iex.Binop.arg2)
5477 );
5478 case Iex_Unop:
5479 return fold_IRExpr_Unop(
5480 e->Iex.Unop.op,
5481 atbSubst_Expr(env, e->Iex.Unop.arg)
5482 );
5483 case Iex_Load:
5484 return IRExpr_Load(
5485 e->Iex.Load.end,
5486 e->Iex.Load.ty,
5487 atbSubst_Expr(env, e->Iex.Load.addr)
5488 );
5489 case Iex_GetI:
5490 return IRExpr_GetI(
5491 e->Iex.GetI.descr,
5492 atbSubst_Expr(env, e->Iex.GetI.ix),
5493 e->Iex.GetI.bias
5494 );
5495 case Iex_Const:
5496 case Iex_Get:
5497 return e;
5498 default:
5499 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
5500 vpanic("atbSubst_Expr");
5501 }
5502 }
5503
5504 /* Same deal as atbSubst_Expr, except for stmts. */
5505
atbSubst_Stmt(ATmpInfo * env,IRStmt * st)5506 static IRStmt* atbSubst_Stmt ( ATmpInfo* env, IRStmt* st )
5507 {
5508 Int i;
5509 IRDirty *d, *d2;
5510 IRCAS *cas, *cas2;
5511 IRPutI *puti, *puti2;
5512
5513 switch (st->tag) {
5514 case Ist_AbiHint:
5515 return IRStmt_AbiHint(
5516 atbSubst_Expr(env, st->Ist.AbiHint.base),
5517 st->Ist.AbiHint.len,
5518 atbSubst_Expr(env, st->Ist.AbiHint.nia)
5519 );
5520 case Ist_Store:
5521 return IRStmt_Store(
5522 st->Ist.Store.end,
5523 atbSubst_Expr(env, st->Ist.Store.addr),
5524 atbSubst_Expr(env, st->Ist.Store.data)
5525 );
5526 case Ist_StoreG: {
5527 IRStoreG* sg = st->Ist.StoreG.details;
5528 return IRStmt_StoreG(sg->end,
5529 atbSubst_Expr(env, sg->addr),
5530 atbSubst_Expr(env, sg->data),
5531 atbSubst_Expr(env, sg->guard));
5532 }
5533 case Ist_LoadG: {
5534 IRLoadG* lg = st->Ist.LoadG.details;
5535 return IRStmt_LoadG(lg->end, lg->cvt, lg->dst,
5536 atbSubst_Expr(env, lg->addr),
5537 atbSubst_Expr(env, lg->alt),
5538 atbSubst_Expr(env, lg->guard));
5539 }
5540 case Ist_WrTmp:
5541 return IRStmt_WrTmp(
5542 st->Ist.WrTmp.tmp,
5543 atbSubst_Expr(env, st->Ist.WrTmp.data)
5544 );
5545 case Ist_Put:
5546 return IRStmt_Put(
5547 st->Ist.Put.offset,
5548 atbSubst_Expr(env, st->Ist.Put.data)
5549 );
5550 case Ist_PutI:
5551 puti = st->Ist.PutI.details;
5552 puti2 = mkIRPutI(puti->descr,
5553 atbSubst_Expr(env, puti->ix),
5554 puti->bias,
5555 atbSubst_Expr(env, puti->data));
5556 return IRStmt_PutI(puti2);
5557
5558 case Ist_Exit:
5559 return IRStmt_Exit(
5560 atbSubst_Expr(env, st->Ist.Exit.guard),
5561 st->Ist.Exit.jk,
5562 st->Ist.Exit.dst,
5563 st->Ist.Exit.offsIP
5564 );
5565 case Ist_IMark:
5566 return IRStmt_IMark(st->Ist.IMark.addr,
5567 st->Ist.IMark.len,
5568 st->Ist.IMark.delta);
5569 case Ist_NoOp:
5570 return IRStmt_NoOp();
5571 case Ist_MBE:
5572 return IRStmt_MBE(st->Ist.MBE.event);
5573 case Ist_CAS:
5574 cas = st->Ist.CAS.details;
5575 cas2 = mkIRCAS(
5576 cas->oldHi, cas->oldLo, cas->end,
5577 atbSubst_Expr(env, cas->addr),
5578 cas->expdHi ? atbSubst_Expr(env, cas->expdHi) : NULL,
5579 atbSubst_Expr(env, cas->expdLo),
5580 cas->dataHi ? atbSubst_Expr(env, cas->dataHi) : NULL,
5581 atbSubst_Expr(env, cas->dataLo)
5582 );
5583 return IRStmt_CAS(cas2);
5584 case Ist_LLSC:
5585 return IRStmt_LLSC(
5586 st->Ist.LLSC.end,
5587 st->Ist.LLSC.result,
5588 atbSubst_Expr(env, st->Ist.LLSC.addr),
5589 st->Ist.LLSC.storedata
5590 ? atbSubst_Expr(env, st->Ist.LLSC.storedata) : NULL
5591 );
5592 case Ist_Dirty:
5593 d = st->Ist.Dirty.details;
5594 d2 = emptyIRDirty();
5595 *d2 = *d;
5596 if (d2->mFx != Ifx_None)
5597 d2->mAddr = atbSubst_Expr(env, d2->mAddr);
5598 d2->guard = atbSubst_Expr(env, d2->guard);
5599 for (i = 0; d2->args[i]; i++) {
5600 IRExpr* arg = d2->args[i];
5601 if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg)))
5602 d2->args[i] = atbSubst_Expr(env, arg);
5603 }
5604 return IRStmt_Dirty(d2);
5605 default:
5606 vex_printf("\n"); ppIRStmt(st); vex_printf("\n");
5607 vpanic("atbSubst_Stmt");
5608 }
5609 }
5610
5611 inline
dirty_helper_stores(const IRDirty * d)5612 static Bool dirty_helper_stores ( const IRDirty *d )
5613 {
5614 return d->mFx == Ifx_Write || d->mFx == Ifx_Modify;
5615 }
5616
5617 inline
dirty_helper_puts(const IRDirty * d,Bool (* preciseMemExnsFn)(Int,Int,VexRegisterUpdates),VexRegisterUpdates pxControl,Bool * requiresPreciseMemExns)5618 static Interval dirty_helper_puts (
5619 const IRDirty *d,
5620 Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates),
5621 VexRegisterUpdates pxControl,
5622 /*OUT*/Bool *requiresPreciseMemExns
5623 )
5624 {
5625 Int i;
5626 Interval interval;
5627
5628 /* Passing the guest state pointer opens the door to modifying the
5629 guest state under the covers. It's not allowed, but let's be
5630 extra conservative and assume the worst. */
5631 for (i = 0; d->args[i]; i++) {
5632 if (UNLIKELY(d->args[i]->tag == Iex_GSPTR)) {
5633 *requiresPreciseMemExns = True;
5634 /* Assume all guest state is written. */
5635 interval.present = True;
5636 interval.low = 0;
5637 interval.high = 0x7FFFFFFF;
5638 return interval;
5639 }
5640 }
5641
5642 /* Check the side effects on the guest state */
5643 interval.present = False;
5644 interval.low = interval.high = -1;
5645 *requiresPreciseMemExns = False;
5646
5647 for (i = 0; i < d->nFxState; ++i) {
5648 if (d->fxState[i].fx != Ifx_Read) {
5649 Int offset = d->fxState[i].offset;
5650 Int size = d->fxState[i].size;
5651 Int nRepeats = d->fxState[i].nRepeats;
5652 Int repeatLen = d->fxState[i].repeatLen;
5653
5654 if (preciseMemExnsFn(offset,
5655 offset + nRepeats * repeatLen + size - 1,
5656 pxControl)) {
5657 *requiresPreciseMemExns = True;
5658 }
5659 update_interval(&interval, offset,
5660 offset + nRepeats * repeatLen + size - 1);
5661 }
5662 }
5663
5664 return interval;
5665 }
5666
5667 /* Return an interval if st modifies the guest state. Via
5668 requiresPreciseMemExns return whether or not that modification
5669 requires precise exceptions. */
stmt_modifies_guest_state(IRSB * bb,const IRStmt * st,Bool (* preciseMemExnsFn)(Int,Int,VexRegisterUpdates),VexRegisterUpdates pxControl,Bool * requiresPreciseMemExns)5670 static Interval stmt_modifies_guest_state (
5671 IRSB *bb, const IRStmt *st,
5672 Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates),
5673 VexRegisterUpdates pxControl,
5674 /*OUT*/Bool *requiresPreciseMemExns
5675 )
5676 {
5677 Interval interval;
5678
5679 switch (st->tag) {
5680 case Ist_Put: {
5681 Int offset = st->Ist.Put.offset;
5682 Int size = sizeofIRType(typeOfIRExpr(bb->tyenv, st->Ist.Put.data));
5683
5684 *requiresPreciseMemExns
5685 = preciseMemExnsFn(offset, offset + size - 1, pxControl);
5686 interval.present = True;
5687 interval.low = offset;
5688 interval.high = offset + size - 1;
5689 return interval;
5690 }
5691
5692 case Ist_PutI: {
5693 IRRegArray *descr = st->Ist.PutI.details->descr;
5694 Int offset = descr->base;
5695 Int size = sizeofIRType(descr->elemTy);
5696
5697 /* We quietly assume here that all segments are contiguous and there
5698 are no holes. This is to avoid a loop. The assumption is conservative
5699 in the sense that we might report that precise memory exceptions are
5700 needed when in fact they are not. */
5701 *requiresPreciseMemExns
5702 = preciseMemExnsFn(offset, offset + descr->nElems * size - 1,
5703 pxControl);
5704 interval.present = True;
5705 interval.low = offset;
5706 interval.high = offset + descr->nElems * size - 1;
5707 return interval;
5708 }
5709
5710 case Ist_Dirty:
5711 return dirty_helper_puts(st->Ist.Dirty.details,
5712 preciseMemExnsFn, pxControl,
5713 requiresPreciseMemExns);
5714
5715 default:
5716 *requiresPreciseMemExns = False;
5717 interval.present = False;
5718 interval.low = -1;
5719 interval.high = -1;
5720 return interval;
5721 }
5722 }
5723
ado_treebuild_BB(IRSB * bb,Bool (* preciseMemExnsFn)(Int,Int,VexRegisterUpdates),VexRegisterUpdates pxControl)5724 /* notstatic */ Addr ado_treebuild_BB (
5725 IRSB* bb,
5726 Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates),
5727 VexRegisterUpdates pxControl
5728 )
5729 {
5730 Int i, j, k, m;
5731 Bool stmtStores, invalidateMe;
5732 Interval putInterval;
5733 IRStmt* st;
5734 IRStmt* st2;
5735 ATmpInfo env[A_NENV];
5736
5737 Bool max_ga_known = False;
5738 Addr max_ga = 0;
5739
5740 Int n_tmps = bb->tyenv->types_used;
5741 UShort* uses = LibVEX_Alloc_inline(n_tmps * sizeof(UShort));
5742
5743 /* Phase 1. Scan forwards in bb, counting use occurrences of each
5744 temp. Also count occurrences in the bb->next field. Take the
5745 opportunity to also find the maximum guest address in the block,
5746 since that will be needed later for deciding when we can safely
5747 elide event checks. */
5748
5749 for (i = 0; i < n_tmps; i++)
5750 uses[i] = 0;
5751
5752 for (i = 0; i < bb->stmts_used; i++) {
5753 st = bb->stmts[i];
5754 switch (st->tag) {
5755 case Ist_NoOp:
5756 continue;
5757 case Ist_IMark: {
5758 UInt len = st->Ist.IMark.len;
5759 Addr mga = st->Ist.IMark.addr + (len < 1 ? 1 : len) - 1;
5760 max_ga_known = True;
5761 if (mga > max_ga)
5762 max_ga = mga;
5763 break;
5764 }
5765 default:
5766 break;
5767 }
5768 aoccCount_Stmt( uses, st );
5769 }
5770 aoccCount_Expr(uses, bb->next );
5771
5772 # if 0
5773 for (i = 0; i < n_tmps; i++) {
5774 if (uses[i] == 0)
5775 continue;
5776 ppIRTemp( (IRTemp)i );
5777 vex_printf(" used %d\n", (Int)uses[i] );
5778 }
5779 # endif
5780
5781 /* Phase 2. Scan forwards in bb. For each statement in turn:
5782
5783 If the env is full, emit the end element. This guarantees
5784 there is at least one free slot in the following.
5785
5786 On seeing 't = E', occ(t)==1,
5787 let E'=env(E)
5788 delete this stmt
5789 add t -> E' to the front of the env
5790 Examine E' and set the hints for E' appropriately
5791 (doesLoad? doesGet?)
5792
5793 On seeing any other stmt,
5794 let stmt' = env(stmt)
5795 remove from env any 't=E' binds invalidated by stmt
5796 emit the invalidated stmts
5797 emit stmt'
5798 compact any holes in env
5799 by sliding entries towards the front
5800
5801 Finally, apply env to bb->next.
5802 */
5803
5804 for (i = 0; i < A_NENV; i++) {
5805 env[i].bindee = NULL;
5806 env[i].binder = IRTemp_INVALID;
5807 }
5808
5809 /* The stmts in bb are being reordered, and we are guaranteed to
5810 end up with no more than the number we started with. Use i to
5811 be the cursor of the current stmt examined and j <= i to be that
5812 for the current stmt being written.
5813 */
5814 j = 0;
5815 for (i = 0; i < bb->stmts_used; i++) {
5816
5817 st = bb->stmts[i];
5818 if (st->tag == Ist_NoOp)
5819 continue;
5820
5821 /* Ensure there's at least one space in the env, by emitting
5822 the oldest binding if necessary. */
5823 if (env[A_NENV-1].bindee != NULL) {
5824 bb->stmts[j] = IRStmt_WrTmp( env[A_NENV-1].binder,
5825 env[A_NENV-1].bindee );
5826 j++;
5827 vassert(j <= i);
5828 env[A_NENV-1].bindee = NULL;
5829 }
5830
5831 /* Consider current stmt. */
5832 if (st->tag == Ist_WrTmp && uses[st->Ist.WrTmp.tmp] <= 1) {
5833 IRExpr *e, *e2;
5834
5835 /* optional extra: dump dead bindings as we find them.
5836 Removes the need for a prior dead-code removal pass. */
5837 if (uses[st->Ist.WrTmp.tmp] == 0) {
5838 if (0) vex_printf("DEAD binding\n");
5839 continue; /* for (i = 0; i < bb->stmts_used; i++) loop */
5840 }
5841 vassert(uses[st->Ist.WrTmp.tmp] == 1);
5842
5843 /* ok, we have 't = E', occ(t)==1. Do the abovementioned
5844 actions. */
5845 e = st->Ist.WrTmp.data;
5846 e2 = atbSubst_Expr(env, e);
5847 addToEnvFront(env, st->Ist.WrTmp.tmp, e2);
5848 setHints_Expr(&env[0].doesLoad, &env[0].getInterval, e2);
5849 /* don't advance j, as we are deleting this stmt and instead
5850 holding it temporarily in the env. */
5851 continue; /* for (i = 0; i < bb->stmts_used; i++) loop */
5852 }
5853
5854 /* we get here for any other kind of statement. */
5855 /* 'use up' any bindings required by the current statement. */
5856 st2 = atbSubst_Stmt(env, st);
5857
5858 /* Now, before this stmt, dump any bindings in env that it
5859 invalidates. These need to be dumped in the order in which
5860 they originally entered env -- that means from oldest to
5861 youngest. */
5862
5863 /* putInterval/stmtStores characterise what the stmt under
5864 consideration does, or might do (sidely safe @ True). */
5865
5866 Bool putRequiresPreciseMemExns;
5867 putInterval = stmt_modifies_guest_state(
5868 bb, st, preciseMemExnsFn, pxControl,
5869 &putRequiresPreciseMemExns
5870 );
5871
5872 /* be True if this stmt writes memory or might do (==> we don't
5873 want to reorder other loads or stores relative to it). Also,
5874 both LL and SC fall under this classification, since we
5875 really ought to be conservative and not reorder any other
5876 memory transactions relative to them. */
5877 stmtStores
5878 = toBool( st->tag == Ist_Store
5879 || (st->tag == Ist_Dirty
5880 && dirty_helper_stores(st->Ist.Dirty.details))
5881 || st->tag == Ist_LLSC
5882 || st->tag == Ist_CAS );
5883
5884 for (k = A_NENV-1; k >= 0; k--) {
5885 if (env[k].bindee == NULL)
5886 continue;
5887 /* Compare the actions of this stmt with the actions of
5888 binding 'k', to see if they invalidate the binding. */
5889 invalidateMe
5890 = toBool(
5891 /* a store invalidates loaded data */
5892 (env[k].doesLoad && stmtStores)
5893 /* a put invalidates get'd data, if they overlap */
5894 || ((env[k].getInterval.present && putInterval.present) &&
5895 intervals_overlap(env[k].getInterval, putInterval))
5896 /* a put invalidates loaded data. That means, in essense, that
5897 a load expression cannot be substituted into a statement
5898 that follows the put. But there is nothing wrong doing so
5899 except when the put statement requries precise exceptions.
5900 Think of a load that is moved past a put where the put
5901 updates the IP in the guest state. If the load generates
5902 a segfault, the wrong address (line number) would be
5903 reported. */
5904 || (env[k].doesLoad && putInterval.present &&
5905 putRequiresPreciseMemExns)
5906 /* probably overly conservative: a memory bus event
5907 invalidates absolutely everything, so that all
5908 computation prior to it is forced to complete before
5909 proceeding with the event (fence,lock,unlock). */
5910 || st->tag == Ist_MBE
5911 /* also be (probably overly) paranoid re AbiHints */
5912 || st->tag == Ist_AbiHint
5913 );
5914 if (invalidateMe) {
5915 bb->stmts[j] = IRStmt_WrTmp( env[k].binder, env[k].bindee );
5916 j++;
5917 vassert(j <= i);
5918 env[k].bindee = NULL;
5919 }
5920 }
5921
5922 /* Slide in-use entries in env up to the front */
5923 m = 0;
5924 for (k = 0; k < A_NENV; k++) {
5925 if (env[k].bindee != NULL) {
5926 env[m] = env[k];
5927 m++;
5928 }
5929 }
5930 for (m = m; m < A_NENV; m++) {
5931 env[m].bindee = NULL;
5932 }
5933
5934 /* finally, emit the substituted statement */
5935 bb->stmts[j] = st2;
5936 /* vex_printf("**2 "); ppIRStmt(bb->stmts[j]); vex_printf("\n"); */
5937 j++;
5938
5939 vassert(j <= i+1);
5940 } /* for each stmt in the original bb ... */
5941
5942 /* Finally ... substitute the ->next field as much as possible, and
5943 dump any left-over bindings. Hmm. Perhaps there should be no
5944 left over bindings? Or any left-over bindings are
5945 by definition dead? */
5946 bb->next = atbSubst_Expr(env, bb->next);
5947 bb->stmts_used = j;
5948
5949 return max_ga_known ? max_ga : ~(Addr)0;
5950 }
5951
5952
5953 /*---------------------------------------------------------------*/
5954 /*--- MSVC specific transformation hacks ---*/
5955 /*---------------------------------------------------------------*/
5956
5957 /* The purpose of all this is to find MSVC's idiom for non-constant
5958 bitfield assignment, "a ^ ((a ^ b) & c)", and transform it into
5959 gcc's idiom "(a & ~c) | (b & c)". Motivation is that Memcheck has
5960 generates a lot of false positives from the MSVC version because it
5961 doesn't understand that XORing an undefined bit with itself gives a
5962 defined result.
5963
5964 This isn't a problem for the simple case "x ^ x", because iropt
5965 folds it to a constant zero before Memcheck ever sees it. But in
5966 this case we have an intervening "& c" which defeats the simple
5967 case. So we have to carefully inspect all expressions rooted at an
5968 XOR to see if any of them match "a ^ ((a ^ b) & c)", or any of the
5969 7 other variants resulting from swapping the order of arguments to
5970 the three binary operations. If we get a match, we then replace
5971 the tree with "(a & ~c) | (b & c)", and Memcheck is happy.
5972
5973 The key difficulty is to spot the two uses of "a". To normalise
5974 the IR to maximise the chances of success, we first do a CSE pass,
5975 with CSEing of loads enabled, since the two "a" expressions may be
5976 loads, which need to be commoned up. Then we do a constant folding
5977 pass, so as to remove any tmp-to-tmp assignment chains that would
5978 make matching the original expression more difficult.
5979 */
5980
5981
5982 /* Helper function for debug printing */
5983 __attribute__((unused))
print_flat_expr(IRExpr ** env,IRExpr * e)5984 static void print_flat_expr ( IRExpr** env, IRExpr* e )
5985 {
5986 if (e == NULL) {
5987 vex_printf("?");
5988 return;
5989 }
5990 switch (e->tag) {
5991 case Iex_Binop: {
5992 ppIROp(e->Iex.Binop.op);
5993 vex_printf("(");
5994 print_flat_expr(env, e->Iex.Binop.arg1);
5995 vex_printf(",");
5996 print_flat_expr(env, e->Iex.Binop.arg2);
5997 vex_printf(")");
5998 break;
5999 }
6000 case Iex_Unop: {
6001 ppIROp(e->Iex.Unop.op);
6002 vex_printf("(");
6003 print_flat_expr(env, e->Iex.Unop.arg);
6004 vex_printf(")");
6005 break;
6006 }
6007 case Iex_RdTmp:
6008 ppIRTemp(e->Iex.RdTmp.tmp);
6009 vex_printf("=");
6010 print_flat_expr(env, chase1(env, e));
6011 break;
6012 case Iex_Const:
6013 case Iex_CCall:
6014 case Iex_Load:
6015 case Iex_ITE:
6016 case Iex_Get:
6017 ppIRExpr(e);
6018 break;
6019 default:
6020 vex_printf("FAIL: "); ppIRExpr(e); vex_printf("\n");
6021 vassert(0);
6022 }
6023 }
6024
6025 /* Spot a ^ ((a ^ b) & c) for a,b and c tmp-or-const (atoms)
6026 or any of the other 7 variants generated by switching the order
6027 of arguments to the outer ^, the inner ^ and the &.
6028 */
spotBitfieldAssignment(IRExpr ** aa,IRExpr ** bb,IRExpr ** cc,IRExpr ** env,IRExpr * e,IROp opAND,IROp opXOR)6029 static UInt spotBitfieldAssignment ( /*OUT*/IRExpr** aa, /*OUT*/IRExpr** bb,
6030 /*OUT*/IRExpr** cc,
6031 IRExpr** env, IRExpr* e,
6032 IROp opAND, IROp opXOR)
6033 {
6034 # define ISBIN(_e,_op) ((_e) && (_e)->tag == Iex_Binop \
6035 && (_e)->Iex.Binop.op == (_op))
6036 # define ISATOM(_e) isIRAtom(_e)
6037 # define STEP(_e) chase1(env, (_e))
6038 # define LL(_e) ((_e)->Iex.Binop.arg1)
6039 # define RR(_e) ((_e)->Iex.Binop.arg2)
6040
6041 IRExpr *a1, *and, *xor, *c, *a2bL, *a2bR;
6042
6043 /* This is common to all 8 cases */
6044 if (!ISBIN(e, opXOR)) goto fail;
6045
6046 /* -----and------ */
6047 /* --xor--- */
6048 /* find variant 1: a1 ^ ((a2 ^ b) & c) */
6049 /* find variant 2: a1 ^ ((b ^ a2) & c) */
6050 a1 = and = xor = c = a2bL = a2bR = NULL;
6051
6052 a1 = LL(e);
6053 and = STEP(RR(e));
6054 if (!ISBIN(and, opAND)) goto v34;
6055 xor = STEP(LL(and));
6056 c = RR(and);
6057 if (!ISBIN(xor, opXOR)) goto v34;
6058 a2bL = LL(xor);
6059 a2bR = RR(xor);
6060
6061 if (eqIRAtom(a1, a2bL) && !eqIRAtom(a1, a2bR)) {
6062 *aa = a1;
6063 *bb = a2bR;
6064 *cc = c;
6065 return 1;
6066 }
6067 if (eqIRAtom(a1, a2bR) && !eqIRAtom(a1, a2bL)) {
6068 *aa = a1;
6069 *bb = a2bL;
6070 *cc = c;
6071 return 2;
6072 }
6073
6074 v34:
6075 /* -----and------ */
6076 /* --xor--- */
6077 /* find variant 3: ((a2 ^ b) & c) ^ a1 */
6078 /* find variant 4: ((b ^ a2) & c) ^ a1 */
6079 a1 = and = xor = c = a2bL = a2bR = NULL;
6080
6081 a1 = RR(e);
6082 and = STEP(LL(e));
6083 if (!ISBIN(and, opAND)) goto v56;
6084 xor = STEP(LL(and));
6085 c = RR(and);
6086 if (!ISBIN(xor, opXOR)) goto v56;
6087 a2bL = LL(xor);
6088 a2bR = RR(xor);
6089
6090 if (eqIRAtom(a1, a2bL) && !eqIRAtom(a1, a2bR)) {
6091 *aa = a1;
6092 *bb = a2bR;
6093 *cc = c;
6094 return 3;
6095 }
6096 if (eqIRAtom(a1, a2bR) && !eqIRAtom(a1, a2bL)) {
6097 *aa = a1;
6098 *bb = a2bL;
6099 *cc = c;
6100 return 4;
6101 }
6102
6103 v56:
6104 /* -----and------ */
6105 /* --xor--- */
6106 /* find variant 5: a1 ^ (c & (a2 ^ b)) */
6107 /* find variant 6: a1 ^ (c & (b ^ a2)) */
6108 a1 = and = xor = c = a2bL = a2bR = NULL;
6109
6110 a1 = LL(e);
6111 and = STEP(RR(e));
6112 if (!ISBIN(and, opAND)) goto v78;
6113 xor = STEP(RR(and));
6114 c = LL(and);
6115 if (!ISBIN(xor, opXOR)) goto v78;
6116 a2bL = LL(xor);
6117 a2bR = RR(xor);
6118
6119 if (eqIRAtom(a1, a2bL) && !eqIRAtom(a1, a2bR)) {
6120 *aa = a1;
6121 *bb = a2bR;
6122 *cc = c;
6123 vassert(5-5); // ATC
6124 return 5;
6125 }
6126 if (eqIRAtom(a1, a2bR) && !eqIRAtom(a1, a2bL)) {
6127 *aa = a1;
6128 *bb = a2bL;
6129 *cc = c;
6130 vassert(6-6); // ATC
6131 return 6;
6132 }
6133
6134 v78:
6135 /* -----and------ */
6136 /* --xor--- */
6137 /* find variant 7: (c & (a2 ^ b)) ^ a1 */
6138 /* find variant 8: (c & (b ^ a2)) ^ a1 */
6139 a1 = and = xor = c = a2bL = a2bR = NULL;
6140
6141 a1 = RR(e);
6142 and = STEP(LL(e));
6143 if (!ISBIN(and, opAND)) goto fail;
6144 xor = STEP(RR(and));
6145 c = LL(and);
6146 if (!ISBIN(xor, opXOR)) goto fail;
6147 a2bL = LL(xor);
6148 a2bR = RR(xor);
6149
6150 if (eqIRAtom(a1, a2bL) && !eqIRAtom(a1, a2bR)) {
6151 *aa = a1;
6152 *bb = a2bR;
6153 *cc = c;
6154 return 7;
6155 }
6156 if (eqIRAtom(a1, a2bR) && !eqIRAtom(a1, a2bL)) {
6157 *aa = a1;
6158 *bb = a2bL;
6159 *cc = c;
6160 return 8;
6161 }
6162
6163 fail:
6164 return 0;
6165
6166 # undef ISBIN
6167 # undef ISATOM
6168 # undef STEP
6169 # undef LL
6170 # undef RR
6171 }
6172
6173 /* If |e| is of the form a ^ ((a ^ b) & c) (or any of the 7 other
6174 variants thereof generated by switching arguments around), return
6175 the IRExpr* for (a & ~c) | (b & c). Else return NULL. */
do_XOR_TRANSFORMS_IRExpr(IRExpr ** env,IRExpr * e)6176 static IRExpr* do_XOR_TRANSFORMS_IRExpr ( IRExpr** env, IRExpr* e )
6177 {
6178 if (e->tag != Iex_Binop)
6179 return NULL;
6180
6181 const HChar* tyNm = NULL;
6182 IROp opOR = Iop_INVALID;
6183 IROp opAND = Iop_INVALID;
6184 IROp opNOT = Iop_INVALID;
6185 IROp opXOR = Iop_INVALID;
6186 switch (e->Iex.Binop.op) {
6187 case Iop_Xor32:
6188 tyNm = "I32";
6189 opOR = Iop_Or32; opAND = Iop_And32;
6190 opNOT = Iop_Not32; opXOR = Iop_Xor32;
6191 break;
6192 case Iop_Xor16:
6193 tyNm = "I16";
6194 opOR = Iop_Or16; opAND = Iop_And16;
6195 opNOT = Iop_Not16; opXOR = Iop_Xor16;
6196 break;
6197 case Iop_Xor8:
6198 tyNm = "I8";
6199 opOR = Iop_Or8; opAND = Iop_And8;
6200 opNOT = Iop_Not8; opXOR = Iop_Xor8;
6201 break;
6202 default:
6203 return NULL;
6204 }
6205
6206 IRExpr* aa = NULL;
6207 IRExpr* bb = NULL;
6208 IRExpr* cc = NULL;
6209 UInt variant = spotBitfieldAssignment(&aa, &bb, &cc, env, e, opAND, opXOR);
6210 if (variant > 0) {
6211 static UInt ctr = 0;
6212 if (0)
6213 vex_printf("XXXXXXXXXX Bitfield Assignment number %u, "
6214 "type %s, variant %u\n",
6215 ++ctr, tyNm, variant);
6216 /* It's vitally important that the returned aa, bb and cc are
6217 atoms -- either constants or tmps. If it's anything else
6218 (eg, a GET) then incorporating them in a tree at this point
6219 in the SB may erroneously pull them forwards (eg of a PUT
6220 that originally was after the GET) and so transform the IR
6221 wrongly. spotBitfieldAssignment should guarantee only to
6222 give us atoms, but we check here anyway. */
6223 vassert(aa && isIRAtom(aa));
6224 vassert(bb && isIRAtom(bb));
6225 vassert(cc && isIRAtom(cc));
6226 return IRExpr_Binop(
6227 opOR,
6228 IRExpr_Binop(opAND, aa, IRExpr_Unop(opNOT, cc)),
6229 IRExpr_Binop(opAND, bb, cc)
6230 );
6231 }
6232 return NULL;
6233 }
6234
6235
6236 /* SB is modified in-place. Visit all the IRExprs and, for those
6237 which are allowed to be non-atomic, perform the XOR transform if
6238 possible. This makes |sb| be non-flat, but that's ok, the caller
6239 can re-flatten it. Returns True iff any changes were made. */
do_XOR_TRANSFORM_IRSB(IRSB * sb)6240 static Bool do_XOR_TRANSFORM_IRSB ( IRSB* sb )
6241 {
6242 Int i;
6243 Bool changed = False;
6244
6245 /* Make the tmp->expr environment, so we can use it for
6246 chasing expressions. */
6247 Int n_tmps = sb->tyenv->types_used;
6248 IRExpr** env = LibVEX_Alloc_inline(n_tmps * sizeof(IRExpr*));
6249 for (i = 0; i < n_tmps; i++)
6250 env[i] = NULL;
6251
6252 for (i = 0; i < sb->stmts_used; i++) {
6253 IRStmt* st = sb->stmts[i];
6254 if (st->tag != Ist_WrTmp)
6255 continue;
6256 IRTemp t = st->Ist.WrTmp.tmp;
6257 vassert(t >= 0 && t < n_tmps);
6258 env[t] = st->Ist.WrTmp.data;
6259 }
6260
6261 for (i = 0; i < sb->stmts_used; i++) {
6262 IRStmt* st = sb->stmts[i];
6263
6264 switch (st->tag) {
6265 case Ist_AbiHint:
6266 vassert(isIRAtom(st->Ist.AbiHint.base));
6267 vassert(isIRAtom(st->Ist.AbiHint.nia));
6268 break;
6269 case Ist_Put:
6270 vassert(isIRAtom(st->Ist.Put.data));
6271 break;
6272 case Ist_PutI: {
6273 IRPutI* puti = st->Ist.PutI.details;
6274 vassert(isIRAtom(puti->ix));
6275 vassert(isIRAtom(puti->data));
6276 break;
6277 }
6278 case Ist_WrTmp: {
6279 /* This is the one place where an expr (st->Ist.WrTmp.data) is
6280 allowed to be more than just a constant or a tmp. */
6281 IRExpr* mb_new_data
6282 = do_XOR_TRANSFORMS_IRExpr(env, st->Ist.WrTmp.data);
6283 if (mb_new_data) {
6284 //ppIRSB(sb);
6285 st->Ist.WrTmp.data = mb_new_data;
6286 //ppIRSB(sb);
6287 changed = True;
6288 }
6289 break;
6290 }
6291 case Ist_Store:
6292 vassert(isIRAtom(st->Ist.Store.addr));
6293 vassert(isIRAtom(st->Ist.Store.data));
6294 break;
6295 case Ist_StoreG: {
6296 IRStoreG* sg = st->Ist.StoreG.details;
6297 vassert(isIRAtom(sg->addr));
6298 vassert(isIRAtom(sg->data));
6299 vassert(isIRAtom(sg->guard));
6300 break;
6301 }
6302 case Ist_LoadG: {
6303 IRLoadG* lg = st->Ist.LoadG.details;
6304 vassert(isIRAtom(lg->addr));
6305 vassert(isIRAtom(lg->alt));
6306 vassert(isIRAtom(lg->guard));
6307 break;
6308 }
6309 case Ist_CAS: {
6310 IRCAS* cas = st->Ist.CAS.details;
6311 vassert(isIRAtom(cas->addr));
6312 vassert(cas->expdHi == NULL || isIRAtom(cas->expdHi));
6313 vassert(isIRAtom(cas->expdLo));
6314 vassert(cas->dataHi == NULL || isIRAtom(cas->dataHi));
6315 vassert(isIRAtom(cas->dataLo));
6316 break;
6317 }
6318 case Ist_LLSC:
6319 vassert(isIRAtom(st->Ist.LLSC.addr));
6320 if (st->Ist.LLSC.storedata)
6321 vassert(isIRAtom(st->Ist.LLSC.storedata));
6322 break;
6323 case Ist_Dirty: {
6324 IRDirty* d = st->Ist.Dirty.details;
6325 if (d->mFx != Ifx_None) {
6326 vassert(isIRAtom(d->mAddr));
6327 }
6328 vassert(isIRAtom(d->guard));
6329 for (Int j = 0; d->args[j]; j++) {
6330 IRExpr* arg = d->args[j];
6331 if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg))) {
6332 vassert(isIRAtom(arg));
6333 }
6334 }
6335 break;
6336 }
6337 case Ist_IMark:
6338 case Ist_NoOp:
6339 case Ist_MBE:
6340 break;
6341 case Ist_Exit:
6342 vassert(isIRAtom(st->Ist.Exit.guard));
6343 break;
6344 default:
6345 vex_printf("\n"); ppIRStmt(st);
6346 vpanic("do_XOR_TRANSFORMS_IRSB");
6347 }
6348 }
6349
6350 vassert(isIRAtom(sb->next));
6351 return changed;
6352 }
6353
6354
do_MSVC_HACKS(IRSB * sb)6355 static IRSB* do_MSVC_HACKS ( IRSB* sb )
6356 {
6357 // Normalise as much as we can. This is the one-and-only place
6358 // where we call do_cse_BB with allowLoadsToBeCSEd set to True.
6359 Bool any_cse_changes = do_cse_BB( sb, True/*allowLoadsToBeCSEd*/ );
6360 if (any_cse_changes) {
6361 // CSEing might have created dead code. Remove it.
6362 sb = cprop_BB ( sb );
6363 do_deadcode_BB(sb);
6364 }
6365
6366 // Visit all atoms, do the transformation proper. bb is modified
6367 // in-place.
6368 Bool changed = do_XOR_TRANSFORM_IRSB(sb);
6369
6370 if (changed) {
6371 // The transformation generates non-flat expressions, so we now
6372 // need to re-flatten the block.
6373 sb = flatten_BB(sb);
6374 }
6375
6376 return sb;
6377 }
6378
6379
6380 /*---------------------------------------------------------------*/
6381 /*--- iropt main ---*/
6382 /*---------------------------------------------------------------*/
6383
6384 static Bool iropt_verbose = False; /* True; */
6385
6386
6387 /* Do a simple cleanup pass on bb. This is: redundant Get removal,
6388 redundant Put removal, constant propagation, dead code removal,
6389 clean helper specialisation, and dead code removal (again).
6390 */
6391
6392
6393 static
cheap_transformations(IRSB * bb,IRExpr * (* specHelper)(const HChar *,IRExpr **,IRStmt **,Int),Bool (* preciseMemExnsFn)(Int,Int,VexRegisterUpdates),VexRegisterUpdates pxControl)6394 IRSB* cheap_transformations (
6395 IRSB* bb,
6396 IRExpr* (*specHelper) (const HChar*, IRExpr**, IRStmt**, Int),
6397 Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates),
6398 VexRegisterUpdates pxControl
6399 )
6400 {
6401 redundant_get_removal_BB ( bb );
6402 if (iropt_verbose) {
6403 vex_printf("\n========= REDUNDANT GET\n\n" );
6404 ppIRSB(bb);
6405 }
6406
6407 if (pxControl < VexRegUpdAllregsAtEachInsn) {
6408 redundant_put_removal_BB ( bb, preciseMemExnsFn, pxControl );
6409 }
6410 if (iropt_verbose) {
6411 vex_printf("\n========= REDUNDANT PUT\n\n" );
6412 ppIRSB(bb);
6413 }
6414
6415 bb = cprop_BB ( bb );
6416 if (iropt_verbose) {
6417 vex_printf("\n========= CPROPD\n\n" );
6418 ppIRSB(bb);
6419 }
6420
6421 do_deadcode_BB ( bb );
6422 if (iropt_verbose) {
6423 vex_printf("\n========= DEAD\n\n" );
6424 ppIRSB(bb);
6425 }
6426
6427 bb = spec_helpers_BB ( bb, specHelper );
6428 do_deadcode_BB ( bb );
6429 if (iropt_verbose) {
6430 vex_printf("\n========= SPECd \n\n" );
6431 ppIRSB(bb);
6432 }
6433
6434 return bb;
6435 }
6436
6437
6438 /* Do some more expensive transformations on bb, which are aimed at
6439 optimising as much as possible in the presence of GetI and PutI. */
6440
6441 static
expensive_transformations(IRSB * bb,VexRegisterUpdates pxControl)6442 IRSB* expensive_transformations( IRSB* bb, VexRegisterUpdates pxControl )
6443 {
6444 (void)do_cse_BB( bb, False/*!allowLoadsToBeCSEd*/ );
6445 collapse_AddSub_chains_BB( bb );
6446 do_redundant_GetI_elimination( bb );
6447 if (pxControl < VexRegUpdAllregsAtEachInsn) {
6448 do_redundant_PutI_elimination( bb, pxControl );
6449 }
6450 do_deadcode_BB( bb );
6451 return bb;
6452 }
6453
6454
6455 /* Scan a flattened BB to look for signs that more expensive
6456 optimisations might be useful:
6457 - find out if there are any GetIs and PutIs
6458 - find out if there are any floating or vector-typed temporaries
6459 */
6460
considerExpensives(Bool * hasGetIorPutI,Bool * hasVorFtemps,IRSB * bb)6461 static void considerExpensives ( /*OUT*/Bool* hasGetIorPutI,
6462 /*OUT*/Bool* hasVorFtemps,
6463 IRSB* bb )
6464 {
6465 Int i, j;
6466 IRStmt* st;
6467 IRDirty* d;
6468 IRCAS* cas;
6469
6470 *hasGetIorPutI = False;
6471 *hasVorFtemps = False;
6472
6473 for (i = 0; i < bb->stmts_used; i++) {
6474 st = bb->stmts[i];
6475 switch (st->tag) {
6476 case Ist_AbiHint:
6477 vassert(isIRAtom(st->Ist.AbiHint.base));
6478 vassert(isIRAtom(st->Ist.AbiHint.nia));
6479 break;
6480 case Ist_PutI:
6481 *hasGetIorPutI = True;
6482 break;
6483 case Ist_WrTmp:
6484 if (st->Ist.WrTmp.data->tag == Iex_GetI)
6485 *hasGetIorPutI = True;
6486 switch (typeOfIRTemp(bb->tyenv, st->Ist.WrTmp.tmp)) {
6487 case Ity_I1: case Ity_I8: case Ity_I16:
6488 case Ity_I32: case Ity_I64: case Ity_I128:
6489 break;
6490 case Ity_F16: case Ity_F32: case Ity_F64: case Ity_F128:
6491 case Ity_V128: case Ity_V256:
6492 *hasVorFtemps = True;
6493 break;
6494 case Ity_D32: case Ity_D64: case Ity_D128:
6495 *hasVorFtemps = True;
6496 break;
6497 default:
6498 goto bad;
6499 }
6500 break;
6501 case Ist_Put:
6502 vassert(isIRAtom(st->Ist.Put.data));
6503 break;
6504 case Ist_Store:
6505 vassert(isIRAtom(st->Ist.Store.addr));
6506 vassert(isIRAtom(st->Ist.Store.data));
6507 break;
6508 case Ist_StoreG: {
6509 IRStoreG* sg = st->Ist.StoreG.details;
6510 vassert(isIRAtom(sg->addr));
6511 vassert(isIRAtom(sg->data));
6512 vassert(isIRAtom(sg->guard));
6513 break;
6514 }
6515 case Ist_LoadG: {
6516 IRLoadG* lg = st->Ist.LoadG.details;
6517 vassert(isIRAtom(lg->addr));
6518 vassert(isIRAtom(lg->alt));
6519 vassert(isIRAtom(lg->guard));
6520 break;
6521 }
6522 case Ist_CAS:
6523 cas = st->Ist.CAS.details;
6524 vassert(isIRAtom(cas->addr));
6525 vassert(cas->expdHi == NULL || isIRAtom(cas->expdHi));
6526 vassert(isIRAtom(cas->expdLo));
6527 vassert(cas->dataHi == NULL || isIRAtom(cas->dataHi));
6528 vassert(isIRAtom(cas->dataLo));
6529 break;
6530 case Ist_LLSC:
6531 vassert(isIRAtom(st->Ist.LLSC.addr));
6532 if (st->Ist.LLSC.storedata)
6533 vassert(isIRAtom(st->Ist.LLSC.storedata));
6534 break;
6535 case Ist_Dirty:
6536 d = st->Ist.Dirty.details;
6537 vassert(isIRAtom(d->guard));
6538 for (j = 0; d->args[j]; j++) {
6539 IRExpr* arg = d->args[j];
6540 if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg)))
6541 vassert(isIRAtom(arg));
6542 }
6543 if (d->mFx != Ifx_None)
6544 vassert(isIRAtom(d->mAddr));
6545 break;
6546 case Ist_NoOp:
6547 case Ist_IMark:
6548 case Ist_MBE:
6549 break;
6550 case Ist_Exit:
6551 vassert(isIRAtom(st->Ist.Exit.guard));
6552 break;
6553 default:
6554 bad:
6555 ppIRStmt(st);
6556 vpanic("considerExpensives");
6557 }
6558 }
6559 }
6560
6561
6562 /* ---------------- The main iropt entry point. ---------------- */
6563
6564 /* exported from this file */
6565 /* Rules of the game:
6566
6567 - IRExpr/IRStmt trees should be treated as immutable, as they
6568 may get shared. So never change a field of such a tree node;
6569 instead construct and return a new one if needed.
6570 */
6571
6572
do_iropt_BB(IRSB * bb0,IRExpr * (* specHelper)(const HChar *,IRExpr **,IRStmt **,Int),Bool (* preciseMemExnsFn)(Int,Int,VexRegisterUpdates),VexRegisterUpdates pxControl,Addr guest_addr,VexArch guest_arch)6573 IRSB* do_iropt_BB(
6574 IRSB* bb0,
6575 IRExpr* (*specHelper) (const HChar*, IRExpr**, IRStmt**, Int),
6576 Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates),
6577 VexRegisterUpdates pxControl,
6578 Addr guest_addr,
6579 VexArch guest_arch
6580 )
6581 {
6582 static Int n_total = 0;
6583 static Int n_expensive = 0;
6584
6585 Bool hasGetIorPutI, hasVorFtemps;
6586 IRSB *bb, *bb2;
6587
6588 n_total++;
6589
6590 /* First flatten the block out, since all other
6591 phases assume flat code. */
6592
6593 bb = flatten_BB ( bb0 );
6594
6595 if (iropt_verbose) {
6596 vex_printf("\n========= FLAT\n\n" );
6597 ppIRSB(bb);
6598 }
6599
6600 /* If at level 0, stop now. */
6601 if (vex_control.iropt_level <= 0) return bb;
6602
6603 /* Now do a preliminary cleanup pass, and figure out if we also
6604 need to do 'expensive' optimisations. Expensive optimisations
6605 are deemed necessary if the block contains any GetIs or PutIs.
6606 If needed, do expensive transformations and then another cheap
6607 cleanup pass. */
6608
6609 bb = cheap_transformations( bb, specHelper, preciseMemExnsFn, pxControl );
6610
6611 if (guest_arch == VexArchARM) {
6612 /* Translating Thumb2 code produces a lot of chaff. We have to
6613 work extra hard to get rid of it. */
6614 bb = cprop_BB(bb);
6615 bb = spec_helpers_BB ( bb, specHelper );
6616 if (pxControl < VexRegUpdAllregsAtEachInsn) {
6617 redundant_put_removal_BB ( bb, preciseMemExnsFn, pxControl );
6618 }
6619 do_cse_BB( bb, False/*!allowLoadsToBeCSEd*/ );
6620 do_deadcode_BB( bb );
6621 }
6622
6623 if (vex_control.iropt_level > 1) {
6624
6625 /* Peer at what we have, to decide how much more effort to throw
6626 at it. */
6627 considerExpensives( &hasGetIorPutI, &hasVorFtemps, bb );
6628
6629 if (hasVorFtemps && !hasGetIorPutI) {
6630 /* If any evidence of FP or Vector activity, CSE, as that
6631 tends to mop up all manner of lardy code to do with
6632 rounding modes. Don't bother if hasGetIorPutI since that
6633 case leads into the expensive transformations, which do
6634 CSE anyway. */
6635 (void)do_cse_BB( bb, False/*!allowLoadsToBeCSEd*/ );
6636 do_deadcode_BB( bb );
6637 }
6638
6639 if (hasGetIorPutI) {
6640 Bool cses;
6641 n_expensive++;
6642 if (DEBUG_IROPT)
6643 vex_printf("***** EXPENSIVE %d %d\n", n_total, n_expensive);
6644 bb = expensive_transformations( bb, pxControl );
6645 bb = cheap_transformations( bb, specHelper,
6646 preciseMemExnsFn, pxControl );
6647 /* Potentially common up GetIs */
6648 cses = do_cse_BB( bb, False/*!allowLoadsToBeCSEd*/ );
6649 if (cses)
6650 bb = cheap_transformations( bb, specHelper,
6651 preciseMemExnsFn, pxControl );
6652 }
6653
6654 ///////////////////////////////////////////////////////////
6655 // BEGIN MSVC optimised code transformation hacks
6656 if (0)
6657 bb = do_MSVC_HACKS(bb);
6658 // END MSVC optimised code transformation hacks
6659 ///////////////////////////////////////////////////////////
6660
6661 /* Now have a go at unrolling simple (single-BB) loops. If
6662 successful, clean up the results as much as possible. */
6663
6664 bb2 = maybe_loop_unroll_BB( bb, guest_addr );
6665 if (bb2) {
6666 bb = cheap_transformations( bb2, specHelper,
6667 preciseMemExnsFn, pxControl );
6668 if (hasGetIorPutI) {
6669 bb = expensive_transformations( bb, pxControl );
6670 bb = cheap_transformations( bb, specHelper,
6671 preciseMemExnsFn, pxControl );
6672 } else {
6673 /* at least do CSE and dead code removal */
6674 do_cse_BB( bb, False/*!allowLoadsToBeCSEd*/ );
6675 do_deadcode_BB( bb );
6676 }
6677 if (0) vex_printf("vex iropt: unrolled a loop\n");
6678 }
6679
6680 }
6681
6682 return bb;
6683 }
6684
6685
6686 /*---------------------------------------------------------------*/
6687 /*--- end ir_opt.c ---*/
6688 /*---------------------------------------------------------------*/
6689