1
2 /*---------------------------------------------------------------*/
3 /*--- begin ir_opt.c ---*/
4 /*---------------------------------------------------------------*/
5
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
9
10 Copyright (C) 2004-2010 OpenWorks LLP
11 info@open-works.net
12
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26 02110-1301, USA.
27
28 The GNU General Public License is contained in the file COPYING.
29
30 Neither the names of the U.S. Department of Energy nor the
31 University of California nor the names of its contributors may be
32 used to endorse or promote products derived from this software
33 without prior written permission.
34 */
35
36 #include "libvex_basictypes.h"
37 #include "libvex_ir.h"
38 #include "libvex.h"
39
40 #include "main_util.h"
41 #include "main_globals.h"
42 #include "ir_opt.h"
43
44
45 /* Set to 1 for lots of debugging output. */
46 #define DEBUG_IROPT 0
47
48
49 /* What iropt does, 29 Dec 04.
50
51 It takes an IRSB and produces a new one with the same meaning,
52 defined thus:
53
54 After execution of the new BB, all guest state and guest memory is
55 the same as after execution of the original. This is true
56 regardless of how the block was exited (at the end vs side exit).
57
58 In addition, parts of the guest state will be identical to that
59 created by execution of the original at the following observation
60 points:
61
62 * In a dirty helper call, any parts of the guest state that the
63 helper states that it reads or modifies will be up to date.
64 Also, guest memory will be up to date. Parts of the guest state
65 not marked as being read or modified by the helper cannot be
66 assumed to be up-to-date at the point where the helper is called.
67
68 * Immediately prior to any load or store, those parts of the guest
69 state marked as requiring precise exceptions will be up to date.
70 Also, guest memory will be up to date. Parts of the guest state
71 not marked as requiring precise exceptions cannot be assumed to
72 be up-to-date at the point of the load/store.
73
74 The relative order of loads and stores (including loads/stores of
75 guest memory done by dirty helpers annotated as such) is not
76 changed. However, the relative order of loads with no intervening
77 stores/modifies may be changed.
78
79 Transformation order
80 ~~~~~~~~~~~~~~~~~~~~
81
82 There are three levels of optimisation, controlled by
83 vex_control.iropt_level. Define first:
84
85 "Cheap transformations" are the following sequence:
86 * Redundant-Get removal
87 * Redundant-Put removal
88 * Constant propagation/folding
89 * Dead code removal
90 * Specialisation of clean helper functions
91 * Dead code removal
92
93 "Expensive transformations" are the following sequence:
94 * CSE
95 * Folding of add/sub chains
96 * Redundant-GetI removal
97 * Redundant-PutI removal
98 * Dead code removal
99
100 Then the transformations are as follows, as defined by
101 vex_control.iropt_level:
102
103 Level 0:
104 * Flatten into atomic form.
105
106 Level 1: the following sequence:
107 * Flatten into atomic form.
108 * Cheap transformations.
109
110 Level 2: the following sequence
111 * Flatten into atomic form.
112 * Cheap transformations.
113 * If block contains any floating or vector types, CSE.
114 * If block contains GetI or PutI, Expensive transformations.
115 * Try unrolling loops. Three possible outcomes:
116 - No effect: do nothing more.
117 - Unrolled a loop, and block does not contain GetI or PutI:
118 Do: * CSE
119 * Dead code removal
120 - Unrolled a loop, and block contains GetI or PutI:
121 Do: * Expensive transformations
122 * Cheap transformations
123 */
124
125 /* Implementation notes, 29 Dec 04.
126
127 TODO (important): I think rPutI removal ignores precise exceptions
128 and is therefore in a sense, wrong. In the sense that PutIs are
129 assumed not to write parts of the guest state that we need to have
130 up-to-date at loads/stores. So far on x86 guest that has not
131 mattered since indeed only the x87 FP registers and tags are
132 accessed using GetI/PutI, and there is no need so far for them to
133 be up to date at mem exception points. The rPutI pass should be
134 fixed.
135
136 TODO: improve pessimistic handling of precise exceptions
137 in the tree builder.
138
139 TODO: check interaction of rGetI and dirty helpers.
140
141 F64i constants are treated differently from other constants.
142 They are not regarded as atoms, and instead lifted off and
143 bound to temps. This allows them to participate in CSE, which
144 is important for getting good performance for x86 guest code.
145
146 CSE up F64 literals (already doing F64is)
147
148 CSE: consider carefully the requirement for precise exns
149 prior to making CSE any more aggressive. */
150
151
152 /*---------------------------------------------------------------*/
153 /*--- Finite mappery, of a sort ---*/
154 /*---------------------------------------------------------------*/
155
156 /* General map from HWord-sized thing HWord-sized thing. Could be by
157 hashing, but it's not clear whether or not this would really be any
158 faster. */
159
160 typedef
161 struct {
162 Bool* inuse;
163 HWord* key;
164 HWord* val;
165 Int size;
166 Int used;
167 }
168 HashHW;
169
newHHW(void)170 static HashHW* newHHW ( void )
171 {
172 HashHW* h = LibVEX_Alloc(sizeof(HashHW));
173 h->size = 8;
174 h->used = 0;
175 h->inuse = LibVEX_Alloc(h->size * sizeof(Bool));
176 h->key = LibVEX_Alloc(h->size * sizeof(HWord));
177 h->val = LibVEX_Alloc(h->size * sizeof(HWord));
178 return h;
179 }
180
181
182 /* Look up key in the map. */
183
lookupHHW(HashHW * h,HWord * val,HWord key)184 static Bool lookupHHW ( HashHW* h, /*OUT*/HWord* val, HWord key )
185 {
186 Int i;
187 /* vex_printf("lookupHHW(%llx)\n", key ); */
188 for (i = 0; i < h->used; i++) {
189 if (h->inuse[i] && h->key[i] == key) {
190 if (val)
191 *val = h->val[i];
192 return True;
193 }
194 }
195 return False;
196 }
197
198
199 /* Add key->val to the map. Replaces any existing binding for key. */
200
addToHHW(HashHW * h,HWord key,HWord val)201 static void addToHHW ( HashHW* h, HWord key, HWord val )
202 {
203 Int i, j;
204 /* vex_printf("addToHHW(%llx, %llx)\n", key, val); */
205
206 /* Find and replace existing binding, if any. */
207 for (i = 0; i < h->used; i++) {
208 if (h->inuse[i] && h->key[i] == key) {
209 h->val[i] = val;
210 return;
211 }
212 }
213
214 /* Ensure a space is available. */
215 if (h->used == h->size) {
216 /* Copy into arrays twice the size. */
217 Bool* inuse2 = LibVEX_Alloc(2 * h->size * sizeof(Bool));
218 HWord* key2 = LibVEX_Alloc(2 * h->size * sizeof(HWord));
219 HWord* val2 = LibVEX_Alloc(2 * h->size * sizeof(HWord));
220 for (i = j = 0; i < h->size; i++) {
221 if (!h->inuse[i]) continue;
222 inuse2[j] = True;
223 key2[j] = h->key[i];
224 val2[j] = h->val[i];
225 j++;
226 }
227 h->used = j;
228 h->size *= 2;
229 h->inuse = inuse2;
230 h->key = key2;
231 h->val = val2;
232 }
233
234 /* Finally, add it. */
235 vassert(h->used < h->size);
236 h->inuse[h->used] = True;
237 h->key[h->used] = key;
238 h->val[h->used] = val;
239 h->used++;
240 }
241
242
243 /*---------------------------------------------------------------*/
244 /*--- Flattening out a BB into atomic SSA form ---*/
245 /*---------------------------------------------------------------*/
246
247 /* Non-critical helper, heuristic for reducing the number of tmp-tmp
248 copies made by flattening. If in doubt return False. */
249
isFlat(IRExpr * e)250 static Bool isFlat ( IRExpr* e )
251 {
252 if (e->tag == Iex_Get)
253 return True;
254 if (e->tag == Iex_Binop)
255 return toBool( isIRAtom(e->Iex.Binop.arg1)
256 && isIRAtom(e->Iex.Binop.arg2) );
257 if (e->tag == Iex_Load)
258 return isIRAtom(e->Iex.Load.addr);
259 return False;
260 }
261
262 /* Flatten out 'ex' so it is atomic, returning a new expression with
263 the same value, after having appended extra IRTemp assignments to
264 the end of 'bb'. */
265
flatten_Expr(IRSB * bb,IRExpr * ex)266 static IRExpr* flatten_Expr ( IRSB* bb, IRExpr* ex )
267 {
268 Int i;
269 IRExpr** newargs;
270 IRType ty = typeOfIRExpr(bb->tyenv, ex);
271 IRTemp t1;
272
273 switch (ex->tag) {
274
275 case Iex_GetI:
276 t1 = newIRTemp(bb->tyenv, ty);
277 addStmtToIRSB(bb, IRStmt_WrTmp(t1,
278 IRExpr_GetI(ex->Iex.GetI.descr,
279 flatten_Expr(bb, ex->Iex.GetI.ix),
280 ex->Iex.GetI.bias)));
281 return IRExpr_RdTmp(t1);
282
283 case Iex_Get:
284 t1 = newIRTemp(bb->tyenv, ty);
285 addStmtToIRSB(bb,
286 IRStmt_WrTmp(t1, ex));
287 return IRExpr_RdTmp(t1);
288
289 case Iex_Qop:
290 t1 = newIRTemp(bb->tyenv, ty);
291 addStmtToIRSB(bb, IRStmt_WrTmp(t1,
292 IRExpr_Qop(ex->Iex.Qop.op,
293 flatten_Expr(bb, ex->Iex.Qop.arg1),
294 flatten_Expr(bb, ex->Iex.Qop.arg2),
295 flatten_Expr(bb, ex->Iex.Qop.arg3),
296 flatten_Expr(bb, ex->Iex.Qop.arg4))));
297 return IRExpr_RdTmp(t1);
298
299 case Iex_Triop:
300 t1 = newIRTemp(bb->tyenv, ty);
301 addStmtToIRSB(bb, IRStmt_WrTmp(t1,
302 IRExpr_Triop(ex->Iex.Triop.op,
303 flatten_Expr(bb, ex->Iex.Triop.arg1),
304 flatten_Expr(bb, ex->Iex.Triop.arg2),
305 flatten_Expr(bb, ex->Iex.Triop.arg3))));
306 return IRExpr_RdTmp(t1);
307
308 case Iex_Binop:
309 t1 = newIRTemp(bb->tyenv, ty);
310 addStmtToIRSB(bb, IRStmt_WrTmp(t1,
311 IRExpr_Binop(ex->Iex.Binop.op,
312 flatten_Expr(bb, ex->Iex.Binop.arg1),
313 flatten_Expr(bb, ex->Iex.Binop.arg2))));
314 return IRExpr_RdTmp(t1);
315
316 case Iex_Unop:
317 t1 = newIRTemp(bb->tyenv, ty);
318 addStmtToIRSB(bb, IRStmt_WrTmp(t1,
319 IRExpr_Unop(ex->Iex.Unop.op,
320 flatten_Expr(bb, ex->Iex.Unop.arg))));
321 return IRExpr_RdTmp(t1);
322
323 case Iex_Load:
324 t1 = newIRTemp(bb->tyenv, ty);
325 addStmtToIRSB(bb, IRStmt_WrTmp(t1,
326 IRExpr_Load(ex->Iex.Load.end,
327 ex->Iex.Load.ty,
328 flatten_Expr(bb, ex->Iex.Load.addr))));
329 return IRExpr_RdTmp(t1);
330
331 case Iex_CCall:
332 newargs = shallowCopyIRExprVec(ex->Iex.CCall.args);
333 for (i = 0; newargs[i]; i++)
334 newargs[i] = flatten_Expr(bb, newargs[i]);
335 t1 = newIRTemp(bb->tyenv, ty);
336 addStmtToIRSB(bb, IRStmt_WrTmp(t1,
337 IRExpr_CCall(ex->Iex.CCall.cee,
338 ex->Iex.CCall.retty,
339 newargs)));
340 return IRExpr_RdTmp(t1);
341
342 case Iex_Mux0X:
343 t1 = newIRTemp(bb->tyenv, ty);
344 addStmtToIRSB(bb, IRStmt_WrTmp(t1,
345 IRExpr_Mux0X(flatten_Expr(bb, ex->Iex.Mux0X.cond),
346 flatten_Expr(bb, ex->Iex.Mux0X.expr0),
347 flatten_Expr(bb, ex->Iex.Mux0X.exprX))));
348 return IRExpr_RdTmp(t1);
349
350 case Iex_Const:
351 /* Lift F64i constants out onto temps so they can be CSEd
352 later. */
353 if (ex->Iex.Const.con->tag == Ico_F64i) {
354 t1 = newIRTemp(bb->tyenv, ty);
355 addStmtToIRSB(bb, IRStmt_WrTmp(t1,
356 IRExpr_Const(ex->Iex.Const.con)));
357 return IRExpr_RdTmp(t1);
358 } else {
359 /* Leave all other constants alone. */
360 return ex;
361 }
362
363 case Iex_RdTmp:
364 return ex;
365
366 default:
367 vex_printf("\n");
368 ppIRExpr(ex);
369 vex_printf("\n");
370 vpanic("flatten_Expr");
371 }
372 }
373
374
375 /* Append a completely flattened form of 'st' to the end of 'bb'. */
376
flatten_Stmt(IRSB * bb,IRStmt * st)377 static void flatten_Stmt ( IRSB* bb, IRStmt* st )
378 {
379 Int i;
380 IRExpr *e1, *e2, *e3, *e4, *e5;
381 IRDirty *d, *d2;
382 IRCAS *cas, *cas2;
383 switch (st->tag) {
384 case Ist_Put:
385 if (isIRAtom(st->Ist.Put.data)) {
386 /* optimisation to reduce the amount of heap wasted
387 by the flattener */
388 addStmtToIRSB(bb, st);
389 } else {
390 /* general case, always correct */
391 e1 = flatten_Expr(bb, st->Ist.Put.data);
392 addStmtToIRSB(bb, IRStmt_Put(st->Ist.Put.offset, e1));
393 }
394 break;
395 case Ist_PutI:
396 e1 = flatten_Expr(bb, st->Ist.PutI.ix);
397 e2 = flatten_Expr(bb, st->Ist.PutI.data);
398 addStmtToIRSB(bb, IRStmt_PutI(st->Ist.PutI.descr,
399 e1,
400 st->Ist.PutI.bias,
401 e2));
402 break;
403 case Ist_WrTmp:
404 if (isFlat(st->Ist.WrTmp.data)) {
405 /* optimisation, to reduce the number of tmp-tmp
406 copies generated */
407 addStmtToIRSB(bb, st);
408 } else {
409 /* general case, always correct */
410 e1 = flatten_Expr(bb, st->Ist.WrTmp.data);
411 addStmtToIRSB(bb, IRStmt_WrTmp(st->Ist.WrTmp.tmp, e1));
412 }
413 break;
414 case Ist_Store:
415 e1 = flatten_Expr(bb, st->Ist.Store.addr);
416 e2 = flatten_Expr(bb, st->Ist.Store.data);
417 addStmtToIRSB(bb, IRStmt_Store(st->Ist.Store.end, e1,e2));
418 break;
419 case Ist_CAS:
420 cas = st->Ist.CAS.details;
421 e1 = flatten_Expr(bb, cas->addr);
422 e2 = cas->expdHi ? flatten_Expr(bb, cas->expdHi) : NULL;
423 e3 = flatten_Expr(bb, cas->expdLo);
424 e4 = cas->dataHi ? flatten_Expr(bb, cas->dataHi) : NULL;
425 e5 = flatten_Expr(bb, cas->dataLo);
426 cas2 = mkIRCAS( cas->oldHi, cas->oldLo, cas->end,
427 e1, e2, e3, e4, e5 );
428 addStmtToIRSB(bb, IRStmt_CAS(cas2));
429 break;
430 case Ist_LLSC:
431 e1 = flatten_Expr(bb, st->Ist.LLSC.addr);
432 e2 = st->Ist.LLSC.storedata
433 ? flatten_Expr(bb, st->Ist.LLSC.storedata)
434 : NULL;
435 addStmtToIRSB(bb, IRStmt_LLSC(st->Ist.LLSC.end,
436 st->Ist.LLSC.result, e1, e2));
437 break;
438 case Ist_Dirty:
439 d = st->Ist.Dirty.details;
440 d2 = emptyIRDirty();
441 *d2 = *d;
442 d2->args = shallowCopyIRExprVec(d2->args);
443 if (d2->mFx != Ifx_None) {
444 d2->mAddr = flatten_Expr(bb, d2->mAddr);
445 } else {
446 vassert(d2->mAddr == NULL);
447 }
448 d2->guard = flatten_Expr(bb, d2->guard);
449 for (i = 0; d2->args[i]; i++)
450 d2->args[i] = flatten_Expr(bb, d2->args[i]);
451 addStmtToIRSB(bb, IRStmt_Dirty(d2));
452 break;
453 case Ist_NoOp:
454 case Ist_MBE:
455 case Ist_IMark:
456 addStmtToIRSB(bb, st);
457 break;
458 case Ist_AbiHint:
459 e1 = flatten_Expr(bb, st->Ist.AbiHint.base);
460 e2 = flatten_Expr(bb, st->Ist.AbiHint.nia);
461 addStmtToIRSB(bb, IRStmt_AbiHint(e1, st->Ist.AbiHint.len, e2));
462 break;
463 case Ist_Exit:
464 e1 = flatten_Expr(bb, st->Ist.Exit.guard);
465 addStmtToIRSB(bb, IRStmt_Exit(e1, st->Ist.Exit.jk,
466 st->Ist.Exit.dst));
467 break;
468 default:
469 vex_printf("\n");
470 ppIRStmt(st);
471 vex_printf("\n");
472 vpanic("flatten_Stmt");
473 }
474 }
475
476
flatten_BB(IRSB * in)477 static IRSB* flatten_BB ( IRSB* in )
478 {
479 Int i;
480 IRSB* out;
481 out = emptyIRSB();
482 out->tyenv = deepCopyIRTypeEnv( in->tyenv );
483 for (i = 0; i < in->stmts_used; i++)
484 if (in->stmts[i])
485 flatten_Stmt( out, in->stmts[i] );
486 out->next = flatten_Expr( out, in->next );
487 out->jumpkind = in->jumpkind;
488 return out;
489 }
490
491
492 /*---------------------------------------------------------------*/
493 /*--- In-place removal of redundant GETs ---*/
494 /*---------------------------------------------------------------*/
495
496 /* Scan forwards, building up an environment binding (min offset, max
497 offset) pairs to values, which will either be temps or constants.
498
499 On seeing 't = Get(minoff,maxoff)', look up (minoff,maxoff) in the
500 env and if it matches, replace the Get with the stored value. If
501 there is no match, add a (minoff,maxoff) :-> t binding.
502
503 On seeing 'Put (minoff,maxoff) = t or c', first remove in the env
504 any binding which fully or partially overlaps with (minoff,maxoff).
505 Then add a new (minoff,maxoff) :-> t or c binding. */
506
507 /* Extract the min/max offsets from a guest state array descriptor. */
508
509 inline
getArrayBounds(IRRegArray * descr,UInt * minoff,UInt * maxoff)510 static void getArrayBounds ( IRRegArray* descr,
511 UInt* minoff, UInt* maxoff )
512 {
513 *minoff = descr->base;
514 *maxoff = *minoff + descr->nElems*sizeofIRType(descr->elemTy) - 1;
515 vassert((*minoff & ~0xFFFF) == 0);
516 vassert((*maxoff & ~0xFFFF) == 0);
517 vassert(*minoff <= *maxoff);
518 }
519
520 /* Create keys, of the form ((minoffset << 16) | maxoffset). */
521
mk_key_GetPut(Int offset,IRType ty)522 static UInt mk_key_GetPut ( Int offset, IRType ty )
523 {
524 /* offset should fit in 16 bits. */
525 UInt minoff = offset;
526 UInt maxoff = minoff + sizeofIRType(ty) - 1;
527 vassert((minoff & ~0xFFFF) == 0);
528 vassert((maxoff & ~0xFFFF) == 0);
529 return (minoff << 16) | maxoff;
530 }
531
mk_key_GetIPutI(IRRegArray * descr)532 static UInt mk_key_GetIPutI ( IRRegArray* descr )
533 {
534 UInt minoff, maxoff;
535 getArrayBounds( descr, &minoff, &maxoff );
536 vassert((minoff & ~0xFFFF) == 0);
537 vassert((maxoff & ~0xFFFF) == 0);
538 return (minoff << 16) | maxoff;
539 }
540
541 /* Supposing h has keys of the form generated by mk_key_GetPut and
542 mk_key_GetIPutI, invalidate any key which overlaps (k_lo
543 .. k_hi).
544 */
invalidateOverlaps(HashHW * h,UInt k_lo,UInt k_hi)545 static void invalidateOverlaps ( HashHW* h, UInt k_lo, UInt k_hi )
546 {
547 Int j;
548 UInt e_lo, e_hi;
549 vassert(k_lo <= k_hi);
550 /* invalidate any env entries which in any way overlap (k_lo
551 .. k_hi) */
552 /* vex_printf("invalidate %d .. %d\n", k_lo, k_hi ); */
553
554 for (j = 0; j < h->used; j++) {
555 if (!h->inuse[j])
556 continue;
557 e_lo = (((UInt)h->key[j]) >> 16) & 0xFFFF;
558 e_hi = ((UInt)h->key[j]) & 0xFFFF;
559 vassert(e_lo <= e_hi);
560 if (e_hi < k_lo || k_hi < e_lo)
561 continue; /* no overlap possible */
562 else
563 /* overlap; invalidate */
564 h->inuse[j] = False;
565 }
566 }
567
568
redundant_get_removal_BB(IRSB * bb)569 static void redundant_get_removal_BB ( IRSB* bb )
570 {
571 HashHW* env = newHHW();
572 UInt key = 0; /* keep gcc -O happy */
573 Int i, j;
574 HWord val;
575
576 for (i = 0; i < bb->stmts_used; i++) {
577 IRStmt* st = bb->stmts[i];
578
579 if (st->tag == Ist_NoOp)
580 continue;
581
582 /* Deal with Gets */
583 if (st->tag == Ist_WrTmp
584 && st->Ist.WrTmp.data->tag == Iex_Get) {
585 /* st is 't = Get(...)'. Look up in the environment and see
586 if the Get can be replaced. */
587 IRExpr* get = st->Ist.WrTmp.data;
588 key = (HWord)mk_key_GetPut( get->Iex.Get.offset,
589 get->Iex.Get.ty );
590 if (lookupHHW(env, &val, (HWord)key)) {
591 /* found it */
592 /* Note, we could do better here. If the types are
593 different we don't do the substitution, since doing so
594 could lead to invalidly-typed IR. An improvement would
595 be to stick in a reinterpret-style cast, although that
596 would make maintaining flatness more difficult. */
597 IRExpr* valE = (IRExpr*)val;
598 Bool typesOK = toBool( typeOfIRExpr(bb->tyenv,valE)
599 == st->Ist.WrTmp.data->Iex.Get.ty );
600 if (typesOK && DEBUG_IROPT) {
601 vex_printf("rGET: "); ppIRExpr(get);
602 vex_printf(" -> "); ppIRExpr(valE);
603 vex_printf("\n");
604 }
605 if (typesOK)
606 bb->stmts[i] = IRStmt_WrTmp(st->Ist.WrTmp.tmp, valE);
607 } else {
608 /* Not found, but at least we know that t and the Get(...)
609 are now associated. So add a binding to reflect that
610 fact. */
611 addToHHW( env, (HWord)key,
612 (HWord)(void*)(IRExpr_RdTmp(st->Ist.WrTmp.tmp)) );
613 }
614 }
615
616 /* Deal with Puts: invalidate any env entries overlapped by this
617 Put */
618 if (st->tag == Ist_Put || st->tag == Ist_PutI) {
619 UInt k_lo, k_hi;
620 if (st->tag == Ist_Put) {
621 key = mk_key_GetPut( st->Ist.Put.offset,
622 typeOfIRExpr(bb->tyenv,st->Ist.Put.data) );
623 } else {
624 vassert(st->tag == Ist_PutI);
625 key = mk_key_GetIPutI( st->Ist.PutI.descr );
626 }
627
628 k_lo = (key >> 16) & 0xFFFF;
629 k_hi = key & 0xFFFF;
630 invalidateOverlaps(env, k_lo, k_hi);
631 }
632 else
633 if (st->tag == Ist_Dirty) {
634 /* Deal with dirty helpers which write or modify guest state.
635 Invalidate the entire env. We could do a lot better
636 here. */
637 IRDirty* d = st->Ist.Dirty.details;
638 Bool writes = False;
639 for (j = 0; j < d->nFxState; j++) {
640 if (d->fxState[j].fx == Ifx_Modify
641 || d->fxState[j].fx == Ifx_Write)
642 writes = True;
643 }
644 if (writes) {
645 /* dump the entire env (not clever, but correct ...) */
646 for (j = 0; j < env->used; j++)
647 env->inuse[j] = False;
648 if (0) vex_printf("rGET: trash env due to dirty helper\n");
649 }
650 }
651
652 /* add this one to the env, if appropriate */
653 if (st->tag == Ist_Put) {
654 vassert(isIRAtom(st->Ist.Put.data));
655 addToHHW( env, (HWord)key, (HWord)(st->Ist.Put.data));
656 }
657
658 } /* for (i = 0; i < bb->stmts_used; i++) */
659
660 }
661
662
663 /*---------------------------------------------------------------*/
664 /*--- In-place removal of redundant PUTs ---*/
665 /*---------------------------------------------------------------*/
666
667 /* Find any Get uses in st and invalidate any partially or fully
668 overlapping ranges listed in env. Due to the flattening phase, the
669 only stmt kind we expect to find a Get on is IRStmt_WrTmp. */
670
handle_gets_Stmt(HashHW * env,IRStmt * st,Bool (* preciseMemExnsFn)(Int,Int))671 static void handle_gets_Stmt (
672 HashHW* env,
673 IRStmt* st,
674 Bool (*preciseMemExnsFn)(Int,Int)
675 )
676 {
677 Int j;
678 UInt key = 0; /* keep gcc -O happy */
679 Bool isGet;
680 Bool memRW = False;
681 IRExpr* e;
682
683 switch (st->tag) {
684
685 /* This is the only interesting case. Deal with Gets in the RHS
686 expression. */
687 case Ist_WrTmp:
688 e = st->Ist.WrTmp.data;
689 switch (e->tag) {
690 case Iex_Get:
691 isGet = True;
692 key = mk_key_GetPut ( e->Iex.Get.offset, e->Iex.Get.ty );
693 break;
694 case Iex_GetI:
695 isGet = True;
696 key = mk_key_GetIPutI ( e->Iex.GetI.descr );
697 break;
698 case Iex_Load:
699 isGet = False;
700 memRW = True;
701 break;
702 default:
703 isGet = False;
704 }
705 if (isGet) {
706 UInt k_lo, k_hi;
707 k_lo = (key >> 16) & 0xFFFF;
708 k_hi = key & 0xFFFF;
709 invalidateOverlaps(env, k_lo, k_hi);
710 }
711 break;
712
713 /* Be very conservative for dirty helper calls; dump the entire
714 environment. The helper might read guest state, in which
715 case it needs to be flushed first. Also, the helper might
716 access guest memory, in which case all parts of the guest
717 state requiring precise exceptions needs to be flushed. The
718 crude solution is just to flush everything; we could easily
719 enough do a lot better if needed. */
720 /* Probably also overly-conservative, but also dump everything
721 if we hit a memory bus event (fence, lock, unlock). Ditto
722 AbiHints, CASs, LLs and SCs. */
723 case Ist_AbiHint:
724 vassert(isIRAtom(st->Ist.AbiHint.base));
725 vassert(isIRAtom(st->Ist.AbiHint.nia));
726 /* fall through */
727 case Ist_MBE:
728 case Ist_Dirty:
729 case Ist_CAS:
730 case Ist_LLSC:
731 for (j = 0; j < env->used; j++)
732 env->inuse[j] = False;
733 break;
734
735 /* all other cases are boring. */
736 case Ist_Store:
737 vassert(isIRAtom(st->Ist.Store.addr));
738 vassert(isIRAtom(st->Ist.Store.data));
739 memRW = True;
740 break;
741
742 case Ist_Exit:
743 vassert(isIRAtom(st->Ist.Exit.guard));
744 break;
745
746 case Ist_PutI:
747 vassert(isIRAtom(st->Ist.PutI.ix));
748 vassert(isIRAtom(st->Ist.PutI.data));
749 break;
750
751 case Ist_NoOp:
752 case Ist_IMark:
753 break;
754
755 default:
756 vex_printf("\n");
757 ppIRStmt(st);
758 vex_printf("\n");
759 vpanic("handle_gets_Stmt");
760 }
761
762 if (memRW) {
763 /* This statement accesses memory. So we need to dump all parts
764 of the environment corresponding to guest state that may not
765 be reordered with respect to memory references. That means
766 at least the stack pointer. */
767 for (j = 0; j < env->used; j++) {
768 if (!env->inuse[j])
769 continue;
770 if (vex_control.iropt_precise_memory_exns) {
771 /* Precise exceptions required. Flush all guest state. */
772 env->inuse[j] = False;
773 } else {
774 /* Just flush the minimal amount required, as computed by
775 preciseMemExnsFn. */
776 HWord k_lo = (env->key[j] >> 16) & 0xFFFF;
777 HWord k_hi = env->key[j] & 0xFFFF;
778 if (preciseMemExnsFn( k_lo, k_hi ))
779 env->inuse[j] = False;
780 }
781 }
782 } /* if (memRW) */
783
784 }
785
786
787 /* Scan backwards, building up a set of (min offset, max
788 offset) pairs, indicating those parts of the guest state
789 for which the next event is a write.
790
791 On seeing a conditional exit, empty the set.
792
793 On seeing 'Put (minoff,maxoff) = t or c', if (minoff,maxoff) is
794 completely within the set, remove the Put. Otherwise, add
795 (minoff,maxoff) to the set.
796
797 On seeing 'Get (minoff,maxoff)', remove any part of the set
798 overlapping (minoff,maxoff). The same has to happen for any events
799 which implicitly read parts of the guest state: dirty helper calls
800 and loads/stores.
801 */
802
redundant_put_removal_BB(IRSB * bb,Bool (* preciseMemExnsFn)(Int,Int))803 static void redundant_put_removal_BB (
804 IRSB* bb,
805 Bool (*preciseMemExnsFn)(Int,Int)
806 )
807 {
808 Int i, j;
809 Bool isPut;
810 IRStmt* st;
811 UInt key = 0; /* keep gcc -O happy */
812
813 HashHW* env = newHHW();
814 for (i = bb->stmts_used-1; i >= 0; i--) {
815 st = bb->stmts[i];
816
817 if (st->tag == Ist_NoOp)
818 continue;
819
820 /* Deal with conditional exits. */
821 if (st->tag == Ist_Exit) {
822 /* Since control may not get beyond this point, we must empty
823 out the set, since we can no longer claim that the next
824 event for any part of the guest state is definitely a
825 write. */
826 vassert(isIRAtom(st->Ist.Exit.guard));
827 for (j = 0; j < env->used; j++)
828 env->inuse[j] = False;
829 continue;
830 }
831
832 /* Deal with Puts */
833 switch (st->tag) {
834 case Ist_Put:
835 isPut = True;
836 key = mk_key_GetPut( st->Ist.Put.offset,
837 typeOfIRExpr(bb->tyenv,st->Ist.Put.data) );
838 vassert(isIRAtom(st->Ist.Put.data));
839 break;
840 case Ist_PutI:
841 isPut = True;
842 key = mk_key_GetIPutI( st->Ist.PutI.descr );
843 vassert(isIRAtom(st->Ist.PutI.ix));
844 vassert(isIRAtom(st->Ist.PutI.data));
845 break;
846 default:
847 isPut = False;
848 }
849 if (isPut && st->tag != Ist_PutI) {
850 /* See if any single entry in env overlaps this Put. This is
851 simplistic in that the transformation is valid if, say, two
852 or more entries in the env overlap this Put, but the use of
853 lookupHHW will only find a single entry which exactly
854 overlaps this Put. This is suboptimal but safe. */
855 if (lookupHHW(env, NULL, (HWord)key)) {
856 /* This Put is redundant because a later one will overwrite
857 it. So NULL (nop) it out. */
858 if (DEBUG_IROPT) {
859 vex_printf("rPUT: "); ppIRStmt(st);
860 vex_printf("\n");
861 }
862 bb->stmts[i] = IRStmt_NoOp();
863 } else {
864 /* We can't demonstrate that this Put is redundant, so add it
865 to the running collection. */
866 addToHHW(env, (HWord)key, 0);
867 }
868 continue;
869 }
870
871 /* Deal with Gets. These remove bits of the environment since
872 appearance of a Get means that the next event for that slice
873 of the guest state is no longer a write, but a read. Also
874 deals with implicit reads of guest state needed to maintain
875 precise exceptions. */
876 handle_gets_Stmt( env, st, preciseMemExnsFn );
877 }
878 }
879
880
881 /*---------------------------------------------------------------*/
882 /*--- Constant propagation and folding ---*/
883 /*---------------------------------------------------------------*/
884
885 /* The env in this section is a map from IRTemp to IRExpr*,
886 that is, an array indexed by IRTemp. */
887
888 /* Are both expressions simply the same IRTemp ? */
sameIRTemps(IRExpr * e1,IRExpr * e2)889 static Bool sameIRTemps ( IRExpr* e1, IRExpr* e2 )
890 {
891 return toBool( e1->tag == Iex_RdTmp
892 && e2->tag == Iex_RdTmp
893 && e1->Iex.RdTmp.tmp == e2->Iex.RdTmp.tmp );
894 }
895
sameIcoU32s(IRExpr * e1,IRExpr * e2)896 static Bool sameIcoU32s ( IRExpr* e1, IRExpr* e2 )
897 {
898 return toBool( e1->tag == Iex_Const
899 && e2->tag == Iex_Const
900 && e1->Iex.Const.con->tag == Ico_U32
901 && e2->Iex.Const.con->tag == Ico_U32
902 && e1->Iex.Const.con->Ico.U32
903 == e2->Iex.Const.con->Ico.U32 );
904 }
905
906 /* Are both expressions either the same IRTemp or IRConst-U32s ? If
907 in doubt, say No. */
sameIRTempsOrIcoU32s(IRExpr * e1,IRExpr * e2)908 static Bool sameIRTempsOrIcoU32s ( IRExpr* e1, IRExpr* e2 )
909 {
910 switch (e1->tag) {
911 case Iex_RdTmp:
912 return sameIRTemps(e1, e2);
913 case Iex_Const:
914 return sameIcoU32s(e1, e2);
915 default:
916 return False;
917 }
918 }
919
notBool(Bool b)920 static Bool notBool ( Bool b )
921 {
922 if (b == True) return False;
923 if (b == False) return True;
924 vpanic("notBool");
925 }
926
927 /* Make a zero which has the same type as the result of the given
928 primop. */
mkZeroOfPrimopResultType(IROp op)929 static IRExpr* mkZeroOfPrimopResultType ( IROp op )
930 {
931 switch (op) {
932 case Iop_Xor8: return IRExpr_Const(IRConst_U8(0));
933 case Iop_Xor16: return IRExpr_Const(IRConst_U16(0));
934 case Iop_Sub32:
935 case Iop_Xor32: return IRExpr_Const(IRConst_U32(0));
936 case Iop_Sub64:
937 case Iop_Xor64: return IRExpr_Const(IRConst_U64(0));
938 case Iop_XorV128: return IRExpr_Const(IRConst_V128(0));
939 default: vpanic("mkZeroOfPrimopResultType: bad primop");
940 }
941 }
942
943 /* Make a value containing all 1-bits, which has the same type as the
944 result of the given primop. */
mkOnesOfPrimopResultType(IROp op)945 static IRExpr* mkOnesOfPrimopResultType ( IROp op )
946 {
947 switch (op) {
948 case Iop_CmpEQ64:
949 return IRExpr_Const(IRConst_U1(toBool(1)));
950 case Iop_CmpEQ8x8:
951 return IRExpr_Const(IRConst_U64(0xFFFFFFFFFFFFFFFFULL));
952 case Iop_CmpEQ8x16:
953 return IRExpr_Const(IRConst_V128(0xFFFF));
954 default:
955 vpanic("mkOnesOfPrimopResultType: bad primop");
956 }
957 }
958
959
fold_Expr(IRExpr * e)960 static IRExpr* fold_Expr ( IRExpr* e )
961 {
962 Int shift;
963 IRExpr* e2 = e; /* e2 is the result of folding e, if possible */
964
965 /* UNARY ops */
966 if (e->tag == Iex_Unop
967 && e->Iex.Unop.arg->tag == Iex_Const) {
968 switch (e->Iex.Unop.op) {
969 case Iop_1Uto8:
970 e2 = IRExpr_Const(IRConst_U8(toUChar(
971 e->Iex.Unop.arg->Iex.Const.con->Ico.U1
972 ? 1 : 0)));
973 break;
974 case Iop_1Uto32:
975 e2 = IRExpr_Const(IRConst_U32(
976 e->Iex.Unop.arg->Iex.Const.con->Ico.U1
977 ? 1 : 0));
978 break;
979 case Iop_1Uto64:
980 e2 = IRExpr_Const(IRConst_U64(
981 e->Iex.Unop.arg->Iex.Const.con->Ico.U1
982 ? 1 : 0));
983 break;
984
985 case Iop_1Sto8:
986 e2 = IRExpr_Const(IRConst_U8(toUChar(
987 e->Iex.Unop.arg->Iex.Const.con->Ico.U1
988 ? 0xFF : 0)));
989 break;
990 case Iop_1Sto16:
991 e2 = IRExpr_Const(IRConst_U16(toUShort(
992 e->Iex.Unop.arg->Iex.Const.con->Ico.U1
993 ? 0xFFFF : 0)));
994 break;
995 case Iop_1Sto32:
996 e2 = IRExpr_Const(IRConst_U32(
997 e->Iex.Unop.arg->Iex.Const.con->Ico.U1
998 ? 0xFFFFFFFF : 0));
999 break;
1000 case Iop_1Sto64:
1001 e2 = IRExpr_Const(IRConst_U64(
1002 e->Iex.Unop.arg->Iex.Const.con->Ico.U1
1003 ? 0xFFFFFFFFFFFFFFFFULL : 0));
1004 break;
1005
1006 case Iop_8Sto32: {
1007 /* signed */ Int s32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U8;
1008 s32 <<= 24;
1009 s32 >>= 24;
1010 e2 = IRExpr_Const(IRConst_U32((UInt)s32));
1011 break;
1012 }
1013 case Iop_8Uto64:
1014 e2 = IRExpr_Const(IRConst_U64(
1015 0xFFULL & e->Iex.Unop.arg->Iex.Const.con->Ico.U8));
1016 break;
1017 case Iop_16Uto64:
1018 e2 = IRExpr_Const(IRConst_U64(
1019 0xFFFFULL & e->Iex.Unop.arg->Iex.Const.con->Ico.U16));
1020 break;
1021 case Iop_8Uto32:
1022 e2 = IRExpr_Const(IRConst_U32(
1023 0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U8));
1024 break;
1025 case Iop_16Uto32:
1026 e2 = IRExpr_Const(IRConst_U32(
1027 0xFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U16));
1028 break;
1029 case Iop_32to16:
1030 e2 = IRExpr_Const(IRConst_U16(toUShort(
1031 0xFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)));
1032 break;
1033 case Iop_32to8:
1034 e2 = IRExpr_Const(IRConst_U8(toUChar(
1035 0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)));
1036 break;
1037 case Iop_32to1:
1038 e2 = IRExpr_Const(IRConst_U1(toBool(
1039 1 == (1 & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)
1040 )));
1041 break;
1042 case Iop_64to1:
1043 e2 = IRExpr_Const(IRConst_U1(toBool(
1044 1 == (1 & e->Iex.Unop.arg->Iex.Const.con->Ico.U64)
1045 )));
1046 break;
1047
1048 case Iop_Not64:
1049 e2 = IRExpr_Const(IRConst_U64(
1050 ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U64)));
1051 break;
1052 case Iop_Not32:
1053 e2 = IRExpr_Const(IRConst_U32(
1054 ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U32)));
1055 break;
1056 case Iop_Not16:
1057 e2 = IRExpr_Const(IRConst_U16(toUShort(
1058 ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U16))));
1059 break;
1060 case Iop_Not8:
1061 e2 = IRExpr_Const(IRConst_U8(toUChar(
1062 ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U8))));
1063 break;
1064
1065 case Iop_Not1:
1066 e2 = IRExpr_Const(IRConst_U1(
1067 notBool(e->Iex.Unop.arg->Iex.Const.con->Ico.U1)));
1068 break;
1069
1070 case Iop_64to8: {
1071 ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1072 w64 &= 0xFFULL;
1073 e2 = IRExpr_Const(IRConst_U8( (UChar)w64 ));
1074 break;
1075 }
1076 case Iop_64to16: {
1077 ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1078 w64 &= 0xFFFFULL;
1079 e2 = IRExpr_Const(IRConst_U16( (UShort)w64 ));
1080 break;
1081 }
1082 case Iop_64to32: {
1083 ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1084 w64 &= 0x00000000FFFFFFFFULL;
1085 e2 = IRExpr_Const(IRConst_U32( (UInt)w64 ));
1086 break;
1087 }
1088 case Iop_64HIto32: {
1089 ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1090 w64 >>= 32;
1091 e2 = IRExpr_Const(IRConst_U32( (UInt)w64 ));
1092 break;
1093 }
1094 case Iop_32Uto64:
1095 e2 = IRExpr_Const(IRConst_U64(
1096 0xFFFFFFFFULL
1097 & e->Iex.Unop.arg->Iex.Const.con->Ico.U32));
1098 break;
1099 case Iop_32Sto64: {
1100 /* signed */ Long s64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
1101 s64 <<= 32;
1102 s64 >>= 32;
1103 e2 = IRExpr_Const(IRConst_U64((ULong)s64));
1104 break;
1105 }
1106 case Iop_CmpNEZ8:
1107 e2 = IRExpr_Const(IRConst_U1(toBool(
1108 0 !=
1109 (0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U8)
1110 )));
1111 break;
1112 case Iop_CmpNEZ32:
1113 e2 = IRExpr_Const(IRConst_U1(toBool(
1114 0 !=
1115 (0xFFFFFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)
1116 )));
1117 break;
1118 case Iop_CmpNEZ64:
1119 e2 = IRExpr_Const(IRConst_U1(toBool(
1120 0ULL != e->Iex.Unop.arg->Iex.Const.con->Ico.U64
1121 )));
1122 break;
1123
1124 case Iop_CmpwNEZ32: {
1125 UInt w32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
1126 if (w32 == 0)
1127 e2 = IRExpr_Const(IRConst_U32( 0 ));
1128 else
1129 e2 = IRExpr_Const(IRConst_U32( 0xFFFFFFFF ));
1130 break;
1131 }
1132 case Iop_CmpwNEZ64: {
1133 ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1134 if (w64 == 0)
1135 e2 = IRExpr_Const(IRConst_U64( 0 ));
1136 else
1137 e2 = IRExpr_Const(IRConst_U64( 0xFFFFFFFFFFFFFFFFULL ));
1138 break;
1139 }
1140
1141 case Iop_Left32: {
1142 UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
1143 Int s32 = (Int)(u32 & 0xFFFFFFFF);
1144 s32 = (s32 | (-s32));
1145 e2 = IRExpr_Const( IRConst_U32( (UInt)s32 ));
1146 break;
1147 }
1148
1149 case Iop_Left64: {
1150 ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1151 Long s64 = (Long)u64;
1152 s64 = (s64 | (-s64));
1153 e2 = IRExpr_Const( IRConst_U64( (ULong)s64 ));
1154 break;
1155 }
1156
1157 default:
1158 goto unhandled;
1159 }
1160 }
1161
1162 /* BINARY ops */
1163 if (e->tag == Iex_Binop) {
1164 if (e->Iex.Binop.arg1->tag == Iex_Const
1165 && e->Iex.Binop.arg2->tag == Iex_Const) {
1166 /* cases where both args are consts */
1167 switch (e->Iex.Binop.op) {
1168
1169 /* -- Or -- */
1170 case Iop_Or8:
1171 e2 = IRExpr_Const(IRConst_U8(toUChar(
1172 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
1173 | e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
1174 break;
1175 case Iop_Or16:
1176 e2 = IRExpr_Const(IRConst_U16(toUShort(
1177 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U16
1178 | e->Iex.Binop.arg2->Iex.Const.con->Ico.U16))));
1179 break;
1180 case Iop_Or32:
1181 e2 = IRExpr_Const(IRConst_U32(
1182 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1183 | e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1184 break;
1185 case Iop_Or64:
1186 e2 = IRExpr_Const(IRConst_U64(
1187 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1188 | e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1189 break;
1190
1191 /* -- Xor -- */
1192 case Iop_Xor8:
1193 e2 = IRExpr_Const(IRConst_U8(toUChar(
1194 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
1195 ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
1196 break;
1197 case Iop_Xor16:
1198 e2 = IRExpr_Const(IRConst_U16(toUShort(
1199 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U16
1200 ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U16))));
1201 break;
1202 case Iop_Xor32:
1203 e2 = IRExpr_Const(IRConst_U32(
1204 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1205 ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1206 break;
1207 case Iop_Xor64:
1208 e2 = IRExpr_Const(IRConst_U64(
1209 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1210 ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1211 break;
1212
1213 /* -- And -- */
1214 case Iop_And8:
1215 e2 = IRExpr_Const(IRConst_U8(toUChar(
1216 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
1217 & e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
1218 break;
1219 case Iop_And32:
1220 e2 = IRExpr_Const(IRConst_U32(
1221 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1222 & e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1223 break;
1224 case Iop_And64:
1225 e2 = IRExpr_Const(IRConst_U64(
1226 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1227 & e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1228 break;
1229
1230 /* -- Add -- */
1231 case Iop_Add8:
1232 e2 = IRExpr_Const(IRConst_U8(toUChar(
1233 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
1234 + e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
1235 break;
1236 case Iop_Add32:
1237 e2 = IRExpr_Const(IRConst_U32(
1238 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1239 + e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1240 break;
1241 case Iop_Add64:
1242 e2 = IRExpr_Const(IRConst_U64(
1243 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1244 + e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1245 break;
1246
1247 /* -- Sub -- */
1248 case Iop_Sub8:
1249 e2 = IRExpr_Const(IRConst_U8(toUChar(
1250 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
1251 - e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
1252 break;
1253 case Iop_Sub32:
1254 e2 = IRExpr_Const(IRConst_U32(
1255 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1256 - e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1257 break;
1258 case Iop_Sub64:
1259 e2 = IRExpr_Const(IRConst_U64(
1260 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1261 - e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1262 break;
1263
1264 /* -- Max32U -- */
1265 case Iop_Max32U: {
1266 UInt u32a = e->Iex.Binop.arg1->Iex.Const.con->Ico.U32;
1267 UInt u32b = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
1268 UInt res = u32a > u32b ? u32a : u32b;
1269 e2 = IRExpr_Const(IRConst_U32(res));
1270 break;
1271 }
1272
1273 /* -- Mul -- */
1274 case Iop_Mul32:
1275 e2 = IRExpr_Const(IRConst_U32(
1276 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1277 * e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1278 break;
1279 case Iop_Mul64:
1280 e2 = IRExpr_Const(IRConst_U64(
1281 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1282 * e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1283 break;
1284
1285 case Iop_MullS32: {
1286 /* very paranoid */
1287 UInt u32a = e->Iex.Binop.arg1->Iex.Const.con->Ico.U32;
1288 UInt u32b = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
1289 Int s32a = (Int)u32a;
1290 Int s32b = (Int)u32b;
1291 Long s64a = (Long)s32a;
1292 Long s64b = (Long)s32b;
1293 Long sres = s64a * s64b;
1294 ULong ures = (ULong)sres;
1295 e2 = IRExpr_Const(IRConst_U64(ures));
1296 break;
1297 }
1298
1299 /* -- Shl -- */
1300 case Iop_Shl32:
1301 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1302 shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1303 if (shift >= 0 && shift <= 31)
1304 e2 = IRExpr_Const(IRConst_U32(
1305 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1306 << shift)));
1307 break;
1308 case Iop_Shl64:
1309 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1310 shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1311 if (shift >= 0 && shift <= 63)
1312 e2 = IRExpr_Const(IRConst_U64(
1313 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1314 << shift)));
1315 break;
1316
1317 /* -- Sar -- */
1318 case Iop_Sar32: {
1319 /* paranoid ... */
1320 /*signed*/ Int s32;
1321 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1322 s32 = (Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32);
1323 shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1324 if (shift >= 0 && shift <= 31) {
1325 s32 >>=/*signed*/ shift;
1326 e2 = IRExpr_Const(IRConst_U32((UInt)s32));
1327 }
1328 break;
1329 }
1330 case Iop_Sar64: {
1331 /* paranoid ... */
1332 /*signed*/ Long s64;
1333 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1334 s64 = (Long)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64);
1335 shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1336 if (shift >= 0 && shift <= 63) {
1337 s64 >>=/*signed*/ shift;
1338 e2 = IRExpr_Const(IRConst_U64((ULong)s64));
1339 }
1340 break;
1341 }
1342
1343 /* -- Shr -- */
1344 case Iop_Shr32: {
1345 /* paranoid ... */
1346 /*unsigned*/ UInt u32;
1347 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1348 u32 = (UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32);
1349 shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1350 if (shift >= 0 && shift <= 31) {
1351 u32 >>=/*unsigned*/ shift;
1352 e2 = IRExpr_Const(IRConst_U32(u32));
1353 }
1354 break;
1355 }
1356 case Iop_Shr64: {
1357 /* paranoid ... */
1358 /*unsigned*/ ULong u64;
1359 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1360 u64 = (ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64);
1361 shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1362 if (shift >= 0 && shift <= 63) {
1363 u64 >>=/*unsigned*/ shift;
1364 e2 = IRExpr_Const(IRConst_U64(u64));
1365 }
1366 break;
1367 }
1368
1369 /* -- CmpEQ -- */
1370 case Iop_CmpEQ32:
1371 e2 = IRExpr_Const(IRConst_U1(toBool(
1372 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1373 == e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))));
1374 break;
1375 case Iop_CmpEQ64:
1376 e2 = IRExpr_Const(IRConst_U1(toBool(
1377 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1378 == e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))));
1379 break;
1380
1381 /* -- CmpNE -- */
1382 case Iop_CmpNE8:
1383 e2 = IRExpr_Const(IRConst_U1(toBool(
1384 ((0xFF & e->Iex.Binop.arg1->Iex.Const.con->Ico.U8)
1385 != (0xFF & e->Iex.Binop.arg2->Iex.Const.con->Ico.U8)))));
1386 break;
1387 case Iop_CmpNE32:
1388 e2 = IRExpr_Const(IRConst_U1(toBool(
1389 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1390 != e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))));
1391 break;
1392 case Iop_CmpNE64:
1393 e2 = IRExpr_Const(IRConst_U1(toBool(
1394 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1395 != e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))));
1396 break;
1397
1398 /* -- CmpLEU -- */
1399 case Iop_CmpLE32U:
1400 e2 = IRExpr_Const(IRConst_U1(toBool(
1401 ((UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
1402 <= (UInt)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
1403 break;
1404
1405 /* -- CmpLES -- */
1406 case Iop_CmpLE32S:
1407 e2 = IRExpr_Const(IRConst_U1(toBool(
1408 ((Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
1409 <= (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
1410 break;
1411
1412 /* -- CmpLTS -- */
1413 case Iop_CmpLT32S:
1414 e2 = IRExpr_Const(IRConst_U1(toBool(
1415 ((Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
1416 < (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
1417 break;
1418
1419 /* -- CmpLTU -- */
1420 case Iop_CmpLT32U:
1421 e2 = IRExpr_Const(IRConst_U1(toBool(
1422 ((UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
1423 < (UInt)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
1424 break;
1425
1426 /* -- CmpORD -- */
1427 case Iop_CmpORD32S: {
1428 /* very paranoid */
1429 UInt u32a = e->Iex.Binop.arg1->Iex.Const.con->Ico.U32;
1430 UInt u32b = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
1431 Int s32a = (Int)u32a;
1432 Int s32b = (Int)u32b;
1433 Int r = 0x2; /* EQ */
1434 if (s32a < s32b) {
1435 r = 0x8; /* LT */
1436 }
1437 else if (s32a > s32b) {
1438 r = 0x4; /* GT */
1439 }
1440 e2 = IRExpr_Const(IRConst_U32(r));
1441 break;
1442 }
1443
1444 /* -- nHLto2n -- */
1445 case Iop_32HLto64:
1446 e2 = IRExpr_Const(IRConst_U64(
1447 (((ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)) << 32)
1448 | ((ULong)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))
1449 ));
1450 break;
1451 case Iop_64HLto128:
1452 /* We can't fold this, because there is no way to
1453 express he result in IR, but at least pretend to
1454 handle it, so as to stop getting blasted with
1455 no-rule-for-this-primop messages. */
1456 break;
1457
1458 default:
1459 goto unhandled;
1460 }
1461
1462 } else {
1463
1464 /* other cases (identities, etc) */
1465
1466 /* Shl64/Shr64(x,0) ==> x */
1467 if ((e->Iex.Binop.op == Iop_Shl64 || e->Iex.Binop.op == Iop_Shr64)
1468 && e->Iex.Binop.arg2->tag == Iex_Const
1469 && e->Iex.Binop.arg2->Iex.Const.con->Ico.U8 == 0) {
1470 e2 = e->Iex.Binop.arg1;
1471 } else
1472
1473 /* Shl32/Shr32(x,0) ==> x */
1474 if ((e->Iex.Binop.op == Iop_Shl32 || e->Iex.Binop.op == Iop_Shr32)
1475 && e->Iex.Binop.arg2->tag == Iex_Const
1476 && e->Iex.Binop.arg2->Iex.Const.con->Ico.U8 == 0) {
1477 e2 = e->Iex.Binop.arg1;
1478 } else
1479
1480 /* Or8(x,0) ==> x */
1481 if ((e->Iex.Binop.op == Iop_Or8)
1482 && e->Iex.Binop.arg2->tag == Iex_Const
1483 && e->Iex.Binop.arg2->Iex.Const.con->Ico.U8 == 0) {
1484 e2 = e->Iex.Binop.arg1;
1485 } else
1486
1487 /* Or16(x,0) ==> x */
1488 if ((e->Iex.Binop.op == Iop_Or16)
1489 && e->Iex.Binop.arg2->tag == Iex_Const
1490 && e->Iex.Binop.arg2->Iex.Const.con->Ico.U16 == 0) {
1491 e2 = e->Iex.Binop.arg1;
1492 } else
1493
1494 /* Or32/Add32/Max32U(x,0) ==> x */
1495 if ((e->Iex.Binop.op == Iop_Add32
1496 || e->Iex.Binop.op == Iop_Or32 || e->Iex.Binop.op == Iop_Max32U)
1497 && e->Iex.Binop.arg2->tag == Iex_Const
1498 && e->Iex.Binop.arg2->Iex.Const.con->Ico.U32 == 0) {
1499 e2 = e->Iex.Binop.arg1;
1500 } else
1501
1502 /* Add32(t,t) ==> t << 1. Memcheck doesn't understand that
1503 x+x produces a defined least significant bit, and it seems
1504 simplest just to get rid of the problem by rewriting it
1505 out, since the opportunity to do so exists. */
1506 if (e->Iex.Binop.op == Iop_Add32
1507 && e->Iex.Binop.arg1->tag == Iex_RdTmp
1508 && e->Iex.Binop.arg2->tag == Iex_RdTmp
1509 && e->Iex.Binop.arg1->Iex.RdTmp.tmp
1510 == e->Iex.Binop.arg2->Iex.RdTmp.tmp) {
1511 e2 = IRExpr_Binop(Iop_Shl32,
1512 e->Iex.Binop.arg1,
1513 IRExpr_Const(IRConst_U8(1)));
1514 } else
1515
1516 /* Add64(t,t) ==> t << 1; rationale as for Add32(t,t) above. */
1517 if (e->Iex.Binop.op == Iop_Add64
1518 && e->Iex.Binop.arg1->tag == Iex_RdTmp
1519 && e->Iex.Binop.arg2->tag == Iex_RdTmp
1520 && e->Iex.Binop.arg1->Iex.RdTmp.tmp
1521 == e->Iex.Binop.arg2->Iex.RdTmp.tmp) {
1522 e2 = IRExpr_Binop(Iop_Shl64,
1523 e->Iex.Binop.arg1,
1524 IRExpr_Const(IRConst_U8(1)));
1525 } else
1526
1527 /* Add8(t,t) ==> t << 1; rationale as for Add32(t,t) above. */
1528 if (e->Iex.Binop.op == Iop_Add8
1529 && e->Iex.Binop.arg1->tag == Iex_RdTmp
1530 && e->Iex.Binop.arg2->tag == Iex_RdTmp
1531 && e->Iex.Binop.arg1->Iex.RdTmp.tmp
1532 == e->Iex.Binop.arg2->Iex.RdTmp.tmp) {
1533 e2 = IRExpr_Binop(Iop_Shl8,
1534 e->Iex.Binop.arg1,
1535 IRExpr_Const(IRConst_U8(1)));
1536 } else
1537 /* NB no Add16(t,t) case yet as no known test case exists */
1538
1539 /* Or64/Add64(x,0) ==> x */
1540 if ((e->Iex.Binop.op == Iop_Add64 || e->Iex.Binop.op == Iop_Or64)
1541 && e->Iex.Binop.arg2->tag == Iex_Const
1542 && e->Iex.Binop.arg2->Iex.Const.con->Ico.U64 == 0) {
1543 e2 = e->Iex.Binop.arg1;
1544 } else
1545
1546 /* And32(x,0xFFFFFFFF) ==> x */
1547 if (e->Iex.Binop.op == Iop_And32
1548 && e->Iex.Binop.arg2->tag == Iex_Const
1549 && e->Iex.Binop.arg2->Iex.Const.con->Ico.U32 == 0xFFFFFFFF) {
1550 e2 = e->Iex.Binop.arg1;
1551 } else
1552
1553 /* And32(x,0) ==> 0 */
1554 if (e->Iex.Binop.op == Iop_And32
1555 && e->Iex.Binop.arg2->tag == Iex_Const
1556 && e->Iex.Binop.arg2->Iex.Const.con->Ico.U32 == 0) {
1557 e2 = IRExpr_Const(IRConst_U32(0));
1558 } else
1559
1560 /* And32/Shl32(0,x) ==> 0 */
1561 if ((e->Iex.Binop.op == Iop_And32 || e->Iex.Binop.op == Iop_Shl32)
1562 && e->Iex.Binop.arg1->tag == Iex_Const
1563 && e->Iex.Binop.arg1->Iex.Const.con->Ico.U32 == 0) {
1564 e2 = IRExpr_Const(IRConst_U32(0));
1565 } else
1566
1567 /* Or8(0,x) ==> x */
1568 if (e->Iex.Binop.op == Iop_Or8
1569 && e->Iex.Binop.arg1->tag == Iex_Const
1570 && e->Iex.Binop.arg1->Iex.Const.con->Ico.U8 == 0) {
1571 e2 = e->Iex.Binop.arg2;
1572 } else
1573
1574 /* Or32/Max32U(0,x) ==> x */
1575 if ((e->Iex.Binop.op == Iop_Or32 || e->Iex.Binop.op == Iop_Max32U)
1576 && e->Iex.Binop.arg1->tag == Iex_Const
1577 && e->Iex.Binop.arg1->Iex.Const.con->Ico.U32 == 0) {
1578 e2 = e->Iex.Binop.arg2;
1579 } else
1580
1581 /* Or64(0,x) ==> x */
1582 if (e->Iex.Binop.op == Iop_Or64
1583 && e->Iex.Binop.arg1->tag == Iex_Const
1584 && e->Iex.Binop.arg1->Iex.Const.con->Ico.U64 == 0) {
1585 e2 = e->Iex.Binop.arg2;
1586 } else
1587
1588 /* Or8/16/32/64/V128(t,t) ==> t, for some IRTemp t */
1589 /* And8/16/32/64(t,t) ==> t, for some IRTemp t */
1590 /* Max32U(t,t) ==> t, for some IRTemp t */
1591 switch (e->Iex.Binop.op) {
1592 case Iop_And64: case Iop_And32:
1593 case Iop_And16: case Iop_And8:
1594 case Iop_Or64: case Iop_Or32:
1595 case Iop_Or16: case Iop_Or8: case Iop_OrV128:
1596 case Iop_Max32U:
1597 if (sameIRTemps(e->Iex.Binop.arg1, e->Iex.Binop.arg2))
1598 e2 = e->Iex.Binop.arg1;
1599 break;
1600 default:
1601 break;
1602 }
1603
1604 /* Xor8/16/32/64/V128(t,t) ==> 0, for some IRTemp t */
1605 /* Sub32/64(t,t) ==> 0, for some IRTemp t */
1606 switch (e->Iex.Binop.op) {
1607 case Iop_Xor64: case Iop_Xor32:
1608 case Iop_Xor16: case Iop_Xor8:
1609 case Iop_XorV128:
1610 case Iop_Sub64: case Iop_Sub32:
1611 if (sameIRTemps(e->Iex.Binop.arg1, e->Iex.Binop.arg2))
1612 e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op);
1613 break;
1614 default:
1615 break;
1616 }
1617
1618 switch (e->Iex.Binop.op) {
1619 case Iop_CmpEQ64:
1620 case Iop_CmpEQ8x8:
1621 case Iop_CmpEQ8x16:
1622 if (sameIRTemps(e->Iex.Binop.arg1, e->Iex.Binop.arg2))
1623 e2 = mkOnesOfPrimopResultType(e->Iex.Binop.op);
1624 break;
1625 default:
1626 break;
1627 }
1628
1629 }
1630 }
1631
1632 /* Mux0X */
1633 if (e->tag == Iex_Mux0X) {
1634 /* is the discriminant is a constant? */
1635 if (e->Iex.Mux0X.cond->tag == Iex_Const) {
1636 Bool zero;
1637 /* assured us by the IR type rules */
1638 vassert(e->Iex.Mux0X.cond->Iex.Const.con->tag == Ico_U8);
1639 zero = toBool(0 == (0xFF & e->Iex.Mux0X.cond
1640 ->Iex.Const.con->Ico.U8));
1641 e2 = zero ? e->Iex.Mux0X.expr0 : e->Iex.Mux0X.exprX;
1642 }
1643 else
1644 /* are the arms identical? (pretty weedy test) */
1645 if (sameIRTempsOrIcoU32s(e->Iex.Mux0X.expr0,
1646 e->Iex.Mux0X.exprX)) {
1647 e2 = e->Iex.Mux0X.expr0;
1648 }
1649 }
1650
1651 /* Show cases where we've found but not folded 'op(t,t)'. */
1652 if (0 && e == e2 && e->tag == Iex_Binop
1653 && sameIRTemps(e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
1654 vex_printf("IDENT: ");
1655 ppIRExpr(e); vex_printf("\n");
1656 }
1657
1658 /* Show the overall results of folding. */
1659 if (DEBUG_IROPT && e2 != e) {
1660 vex_printf("FOLD: ");
1661 ppIRExpr(e); vex_printf(" -> ");
1662 ppIRExpr(e2); vex_printf("\n");
1663 }
1664
1665 return e2;
1666
1667 unhandled:
1668 # if 0
1669 vex_printf("\n\n");
1670 ppIRExpr(e);
1671 vpanic("fold_Expr: no rule for the above");
1672 # else
1673 if (vex_control.iropt_verbosity > 0) {
1674 vex_printf("vex iropt: fold_Expr: no rule for: ");
1675 ppIRExpr(e);
1676 vex_printf("\n");
1677 }
1678 return e2;
1679 # endif
1680 }
1681
1682
1683 /* Apply the subst to a simple 1-level expression -- guaranteed to be
1684 1-level due to previous flattening pass. */
1685
subst_Expr(IRExpr ** env,IRExpr * ex)1686 static IRExpr* subst_Expr ( IRExpr** env, IRExpr* ex )
1687 {
1688 switch (ex->tag) {
1689 case Iex_RdTmp:
1690 if (env[(Int)ex->Iex.RdTmp.tmp] != NULL) {
1691 return env[(Int)ex->Iex.RdTmp.tmp];
1692 } else {
1693 /* not bound in env */
1694 return ex;
1695 }
1696
1697 case Iex_Const:
1698 case Iex_Get:
1699 return ex;
1700
1701 case Iex_GetI:
1702 vassert(isIRAtom(ex->Iex.GetI.ix));
1703 return IRExpr_GetI(
1704 ex->Iex.GetI.descr,
1705 subst_Expr(env, ex->Iex.GetI.ix),
1706 ex->Iex.GetI.bias
1707 );
1708
1709 case Iex_Qop:
1710 vassert(isIRAtom(ex->Iex.Qop.arg1));
1711 vassert(isIRAtom(ex->Iex.Qop.arg2));
1712 vassert(isIRAtom(ex->Iex.Qop.arg3));
1713 vassert(isIRAtom(ex->Iex.Qop.arg4));
1714 return IRExpr_Qop(
1715 ex->Iex.Qop.op,
1716 subst_Expr(env, ex->Iex.Qop.arg1),
1717 subst_Expr(env, ex->Iex.Qop.arg2),
1718 subst_Expr(env, ex->Iex.Qop.arg3),
1719 subst_Expr(env, ex->Iex.Qop.arg4)
1720 );
1721
1722 case Iex_Triop:
1723 vassert(isIRAtom(ex->Iex.Triop.arg1));
1724 vassert(isIRAtom(ex->Iex.Triop.arg2));
1725 vassert(isIRAtom(ex->Iex.Triop.arg3));
1726 return IRExpr_Triop(
1727 ex->Iex.Triop.op,
1728 subst_Expr(env, ex->Iex.Triop.arg1),
1729 subst_Expr(env, ex->Iex.Triop.arg2),
1730 subst_Expr(env, ex->Iex.Triop.arg3)
1731 );
1732
1733 case Iex_Binop:
1734 vassert(isIRAtom(ex->Iex.Binop.arg1));
1735 vassert(isIRAtom(ex->Iex.Binop.arg2));
1736 return IRExpr_Binop(
1737 ex->Iex.Binop.op,
1738 subst_Expr(env, ex->Iex.Binop.arg1),
1739 subst_Expr(env, ex->Iex.Binop.arg2)
1740 );
1741
1742 case Iex_Unop:
1743 vassert(isIRAtom(ex->Iex.Unop.arg));
1744 return IRExpr_Unop(
1745 ex->Iex.Unop.op,
1746 subst_Expr(env, ex->Iex.Unop.arg)
1747 );
1748
1749 case Iex_Load:
1750 vassert(isIRAtom(ex->Iex.Load.addr));
1751 return IRExpr_Load(
1752 ex->Iex.Load.end,
1753 ex->Iex.Load.ty,
1754 subst_Expr(env, ex->Iex.Load.addr)
1755 );
1756
1757 case Iex_CCall: {
1758 Int i;
1759 IRExpr** args2 = shallowCopyIRExprVec(ex->Iex.CCall.args);
1760 for (i = 0; args2[i]; i++) {
1761 vassert(isIRAtom(args2[i]));
1762 args2[i] = subst_Expr(env, args2[i]);
1763 }
1764 return IRExpr_CCall(
1765 ex->Iex.CCall.cee,
1766 ex->Iex.CCall.retty,
1767 args2
1768 );
1769 }
1770
1771 case Iex_Mux0X:
1772 vassert(isIRAtom(ex->Iex.Mux0X.cond));
1773 vassert(isIRAtom(ex->Iex.Mux0X.expr0));
1774 vassert(isIRAtom(ex->Iex.Mux0X.exprX));
1775 return IRExpr_Mux0X(
1776 subst_Expr(env, ex->Iex.Mux0X.cond),
1777 subst_Expr(env, ex->Iex.Mux0X.expr0),
1778 subst_Expr(env, ex->Iex.Mux0X.exprX)
1779 );
1780
1781 default:
1782 vex_printf("\n\n"); ppIRExpr(ex);
1783 vpanic("subst_Expr");
1784
1785 }
1786 }
1787
1788
1789 /* Apply the subst to stmt, then fold the result as much as possible.
1790 Much simplified due to stmt being previously flattened. As a
1791 result of this, the stmt may wind up being turned into a no-op.
1792 */
subst_and_fold_Stmt(IRExpr ** env,IRStmt * st)1793 static IRStmt* subst_and_fold_Stmt ( IRExpr** env, IRStmt* st )
1794 {
1795 # if 0
1796 vex_printf("\nsubst and fold stmt\n");
1797 ppIRStmt(st);
1798 vex_printf("\n");
1799 # endif
1800
1801 switch (st->tag) {
1802 case Ist_AbiHint:
1803 vassert(isIRAtom(st->Ist.AbiHint.base));
1804 vassert(isIRAtom(st->Ist.AbiHint.nia));
1805 return IRStmt_AbiHint(
1806 fold_Expr(subst_Expr(env, st->Ist.AbiHint.base)),
1807 st->Ist.AbiHint.len,
1808 fold_Expr(subst_Expr(env, st->Ist.AbiHint.nia))
1809 );
1810 case Ist_Put:
1811 vassert(isIRAtom(st->Ist.Put.data));
1812 return IRStmt_Put(
1813 st->Ist.Put.offset,
1814 fold_Expr(subst_Expr(env, st->Ist.Put.data))
1815 );
1816
1817 case Ist_PutI:
1818 vassert(isIRAtom(st->Ist.PutI.ix));
1819 vassert(isIRAtom(st->Ist.PutI.data));
1820 return IRStmt_PutI(
1821 st->Ist.PutI.descr,
1822 fold_Expr(subst_Expr(env, st->Ist.PutI.ix)),
1823 st->Ist.PutI.bias,
1824 fold_Expr(subst_Expr(env, st->Ist.PutI.data))
1825 );
1826
1827 case Ist_WrTmp:
1828 /* This is the one place where an expr (st->Ist.WrTmp.data) is
1829 allowed to be more than just a constant or a tmp. */
1830 return IRStmt_WrTmp(
1831 st->Ist.WrTmp.tmp,
1832 fold_Expr(subst_Expr(env, st->Ist.WrTmp.data))
1833 );
1834
1835 case Ist_Store:
1836 vassert(isIRAtom(st->Ist.Store.addr));
1837 vassert(isIRAtom(st->Ist.Store.data));
1838 return IRStmt_Store(
1839 st->Ist.Store.end,
1840 fold_Expr(subst_Expr(env, st->Ist.Store.addr)),
1841 fold_Expr(subst_Expr(env, st->Ist.Store.data))
1842 );
1843
1844 case Ist_CAS: {
1845 IRCAS *cas, *cas2;
1846 cas = st->Ist.CAS.details;
1847 vassert(isIRAtom(cas->addr));
1848 vassert(cas->expdHi == NULL || isIRAtom(cas->expdHi));
1849 vassert(isIRAtom(cas->expdLo));
1850 vassert(cas->dataHi == NULL || isIRAtom(cas->dataHi));
1851 vassert(isIRAtom(cas->dataLo));
1852 cas2 = mkIRCAS(
1853 cas->oldHi, cas->oldLo, cas->end,
1854 fold_Expr(subst_Expr(env, cas->addr)),
1855 cas->expdHi ? fold_Expr(subst_Expr(env, cas->expdHi)) : NULL,
1856 fold_Expr(subst_Expr(env, cas->expdLo)),
1857 cas->dataHi ? fold_Expr(subst_Expr(env, cas->dataHi)) : NULL,
1858 fold_Expr(subst_Expr(env, cas->dataLo))
1859 );
1860 return IRStmt_CAS(cas2);
1861 }
1862
1863 case Ist_LLSC:
1864 vassert(isIRAtom(st->Ist.LLSC.addr));
1865 if (st->Ist.LLSC.storedata)
1866 vassert(isIRAtom(st->Ist.LLSC.storedata));
1867 return IRStmt_LLSC(
1868 st->Ist.LLSC.end,
1869 st->Ist.LLSC.result,
1870 fold_Expr(subst_Expr(env, st->Ist.LLSC.addr)),
1871 st->Ist.LLSC.storedata
1872 ? fold_Expr(subst_Expr(env, st->Ist.LLSC.storedata))
1873 : NULL
1874 );
1875
1876 case Ist_Dirty: {
1877 Int i;
1878 IRDirty *d, *d2;
1879 d = st->Ist.Dirty.details;
1880 d2 = emptyIRDirty();
1881 *d2 = *d;
1882 d2->args = shallowCopyIRExprVec(d2->args);
1883 if (d2->mFx != Ifx_None) {
1884 vassert(isIRAtom(d2->mAddr));
1885 d2->mAddr = fold_Expr(subst_Expr(env, d2->mAddr));
1886 }
1887 vassert(isIRAtom(d2->guard));
1888 d2->guard = fold_Expr(subst_Expr(env, d2->guard));
1889 for (i = 0; d2->args[i]; i++) {
1890 vassert(isIRAtom(d2->args[i]));
1891 d2->args[i] = fold_Expr(subst_Expr(env, d2->args[i]));
1892 }
1893 return IRStmt_Dirty(d2);
1894 }
1895
1896 case Ist_IMark:
1897 return IRStmt_IMark(st->Ist.IMark.addr, st->Ist.IMark.len);
1898
1899 case Ist_NoOp:
1900 return IRStmt_NoOp();
1901
1902 case Ist_MBE:
1903 return IRStmt_MBE(st->Ist.MBE.event);
1904
1905 case Ist_Exit: {
1906 IRExpr* fcond;
1907 vassert(isIRAtom(st->Ist.Exit.guard));
1908 fcond = fold_Expr(subst_Expr(env, st->Ist.Exit.guard));
1909 if (fcond->tag == Iex_Const) {
1910 /* Interesting. The condition on this exit has folded down to
1911 a constant. */
1912 vassert(fcond->Iex.Const.con->tag == Ico_U1);
1913 vassert(fcond->Iex.Const.con->Ico.U1 == False
1914 || fcond->Iex.Const.con->Ico.U1 == True);
1915 if (fcond->Iex.Const.con->Ico.U1 == False) {
1916 /* exit is never going to happen, so dump the statement. */
1917 return IRStmt_NoOp();
1918 } else {
1919 vassert(fcond->Iex.Const.con->Ico.U1 == True);
1920 /* Hmmm. The exit has become unconditional. Leave it
1921 as it is for now, since we'd have to truncate the BB
1922 at this point, which is tricky. Such truncation is
1923 done later by the dead-code elimination pass. */
1924 /* fall out into the reconstruct-the-exit code. */
1925 if (vex_control.iropt_verbosity > 0)
1926 /* really a misuse of vex_control.iropt_verbosity */
1927 vex_printf("vex iropt: IRStmt_Exit became unconditional\n");
1928 }
1929 }
1930 return IRStmt_Exit(fcond, st->Ist.Exit.jk, st->Ist.Exit.dst);
1931 }
1932
1933 default:
1934 vex_printf("\n"); ppIRStmt(st);
1935 vpanic("subst_and_fold_Stmt");
1936 }
1937 }
1938
1939
cprop_BB(IRSB * in)1940 IRSB* cprop_BB ( IRSB* in )
1941 {
1942 Int i;
1943 IRSB* out;
1944 IRStmt* st2;
1945 Int n_tmps = in->tyenv->types_used;
1946 IRExpr** env = LibVEX_Alloc(n_tmps * sizeof(IRExpr*));
1947
1948 out = emptyIRSB();
1949 out->tyenv = deepCopyIRTypeEnv( in->tyenv );
1950
1951 /* Set up the env with which travels forward. This holds a
1952 substitution, mapping IRTemps to atoms, that is, IRExprs which
1953 are either IRTemps or IRConsts. Thus, copy and constant
1954 propagation is done. The environment is to be applied as we
1955 move along. Keys are IRTemps. Values are IRExpr*s.
1956 */
1957 for (i = 0; i < n_tmps; i++)
1958 env[i] = NULL;
1959
1960 /* For each original SSA-form stmt ... */
1961 for (i = 0; i < in->stmts_used; i++) {
1962
1963 /* First apply the substitution to the current stmt. This
1964 propagates in any constants and tmp-tmp assignments
1965 accumulated prior to this point. As part of the subst_Stmt
1966 call, also then fold any constant expressions resulting. */
1967
1968 st2 = in->stmts[i];
1969
1970 /* perhaps st2 is already a no-op? */
1971 if (st2->tag == Ist_NoOp) continue;
1972
1973 st2 = subst_and_fold_Stmt( env, st2 );
1974
1975 /* If the statement has been folded into a no-op, forget it. */
1976 if (st2->tag == Ist_NoOp) continue;
1977
1978 /* Now consider what the stmt looks like. If it's of the form
1979 't = const' or 't1 = t2', add it to the running environment
1980 and not to the output BB. Otherwise, add it to the output
1981 BB. Note, we choose not to propagate const when const is an
1982 F64i, so that F64i literals can be CSE'd later. This helps
1983 x86 floating point code generation. */
1984
1985 if (st2->tag == Ist_WrTmp
1986 && st2->Ist.WrTmp.data->tag == Iex_Const
1987 && st2->Ist.WrTmp.data->Iex.Const.con->tag != Ico_F64i) {
1988 /* 't = const' -- add to env.
1989 The pair (IRTemp, IRExpr*) is added. */
1990 env[(Int)(st2->Ist.WrTmp.tmp)] = st2->Ist.WrTmp.data;
1991 }
1992 else
1993 if (st2->tag == Ist_WrTmp && st2->Ist.WrTmp.data->tag == Iex_RdTmp) {
1994 /* 't1 = t2' -- add to env.
1995 The pair (IRTemp, IRExpr*) is added. */
1996 env[(Int)(st2->Ist.WrTmp.tmp)] = st2->Ist.WrTmp.data;
1997 }
1998 else {
1999 /* Not interesting, copy st2 into the output block. */
2000 addStmtToIRSB( out, st2 );
2001 }
2002 }
2003
2004 out->next = subst_Expr( env, in->next );
2005 out->jumpkind = in->jumpkind;
2006 return out;
2007 }
2008
2009
2010 /*---------------------------------------------------------------*/
2011 /*--- Dead code (t = E) removal ---*/
2012 /*---------------------------------------------------------------*/
2013
2014 /* As a side effect, also removes all code following an unconditional
2015 side exit. */
2016
2017 /* The type of the HashHW map is: a map from IRTemp to nothing
2018 -- really just operating a set or IRTemps.
2019 */
2020
2021 inline
addUses_Temp(Bool * set,IRTemp tmp)2022 static void addUses_Temp ( Bool* set, IRTemp tmp )
2023 {
2024 set[(Int)tmp] = True;
2025 }
2026
addUses_Expr(Bool * set,IRExpr * e)2027 static void addUses_Expr ( Bool* set, IRExpr* e )
2028 {
2029 Int i;
2030 switch (e->tag) {
2031 case Iex_GetI:
2032 addUses_Expr(set, e->Iex.GetI.ix);
2033 return;
2034 case Iex_Mux0X:
2035 addUses_Expr(set, e->Iex.Mux0X.cond);
2036 addUses_Expr(set, e->Iex.Mux0X.expr0);
2037 addUses_Expr(set, e->Iex.Mux0X.exprX);
2038 return;
2039 case Iex_CCall:
2040 for (i = 0; e->Iex.CCall.args[i]; i++)
2041 addUses_Expr(set, e->Iex.CCall.args[i]);
2042 return;
2043 case Iex_Load:
2044 addUses_Expr(set, e->Iex.Load.addr);
2045 return;
2046 case Iex_Qop:
2047 addUses_Expr(set, e->Iex.Qop.arg1);
2048 addUses_Expr(set, e->Iex.Qop.arg2);
2049 addUses_Expr(set, e->Iex.Qop.arg3);
2050 addUses_Expr(set, e->Iex.Qop.arg4);
2051 return;
2052 case Iex_Triop:
2053 addUses_Expr(set, e->Iex.Triop.arg1);
2054 addUses_Expr(set, e->Iex.Triop.arg2);
2055 addUses_Expr(set, e->Iex.Triop.arg3);
2056 return;
2057 case Iex_Binop:
2058 addUses_Expr(set, e->Iex.Binop.arg1);
2059 addUses_Expr(set, e->Iex.Binop.arg2);
2060 return;
2061 case Iex_Unop:
2062 addUses_Expr(set, e->Iex.Unop.arg);
2063 return;
2064 case Iex_RdTmp:
2065 addUses_Temp(set, e->Iex.RdTmp.tmp);
2066 return;
2067 case Iex_Const:
2068 case Iex_Get:
2069 return;
2070 default:
2071 vex_printf("\n");
2072 ppIRExpr(e);
2073 vpanic("addUses_Expr");
2074 }
2075 }
2076
addUses_Stmt(Bool * set,IRStmt * st)2077 static void addUses_Stmt ( Bool* set, IRStmt* st )
2078 {
2079 Int i;
2080 IRDirty* d;
2081 IRCAS* cas;
2082 switch (st->tag) {
2083 case Ist_AbiHint:
2084 addUses_Expr(set, st->Ist.AbiHint.base);
2085 addUses_Expr(set, st->Ist.AbiHint.nia);
2086 return;
2087 case Ist_PutI:
2088 addUses_Expr(set, st->Ist.PutI.ix);
2089 addUses_Expr(set, st->Ist.PutI.data);
2090 return;
2091 case Ist_WrTmp:
2092 addUses_Expr(set, st->Ist.WrTmp.data);
2093 return;
2094 case Ist_Put:
2095 addUses_Expr(set, st->Ist.Put.data);
2096 return;
2097 case Ist_Store:
2098 addUses_Expr(set, st->Ist.Store.addr);
2099 addUses_Expr(set, st->Ist.Store.data);
2100 return;
2101 case Ist_CAS:
2102 cas = st->Ist.CAS.details;
2103 addUses_Expr(set, cas->addr);
2104 if (cas->expdHi)
2105 addUses_Expr(set, cas->expdHi);
2106 addUses_Expr(set, cas->expdLo);
2107 if (cas->dataHi)
2108 addUses_Expr(set, cas->dataHi);
2109 addUses_Expr(set, cas->dataLo);
2110 return;
2111 case Ist_LLSC:
2112 addUses_Expr(set, st->Ist.LLSC.addr);
2113 if (st->Ist.LLSC.storedata)
2114 addUses_Expr(set, st->Ist.LLSC.storedata);
2115 return;
2116 case Ist_Dirty:
2117 d = st->Ist.Dirty.details;
2118 if (d->mFx != Ifx_None)
2119 addUses_Expr(set, d->mAddr);
2120 addUses_Expr(set, d->guard);
2121 for (i = 0; d->args[i] != NULL; i++)
2122 addUses_Expr(set, d->args[i]);
2123 return;
2124 case Ist_NoOp:
2125 case Ist_IMark:
2126 case Ist_MBE:
2127 return;
2128 case Ist_Exit:
2129 addUses_Expr(set, st->Ist.Exit.guard);
2130 return;
2131 default:
2132 vex_printf("\n");
2133 ppIRStmt(st);
2134 vpanic("addUses_Stmt");
2135 }
2136 }
2137
2138
2139 /* Is this literally IRExpr_Const(IRConst_U1(False)) ? */
isZeroU1(IRExpr * e)2140 static Bool isZeroU1 ( IRExpr* e )
2141 {
2142 return toBool( e->tag == Iex_Const
2143 && e->Iex.Const.con->tag == Ico_U1
2144 && e->Iex.Const.con->Ico.U1 == False );
2145 }
2146
2147 /* Is this literally IRExpr_Const(IRConst_U1(True)) ? */
isOneU1(IRExpr * e)2148 static Bool isOneU1 ( IRExpr* e )
2149 {
2150 return toBool( e->tag == Iex_Const
2151 && e->Iex.Const.con->tag == Ico_U1
2152 && e->Iex.Const.con->Ico.U1 == True );
2153 }
2154
2155
2156 /* Note, this destructively modifies the given IRSB. */
2157
2158 /* Scan backwards through statements, carrying a set of IRTemps which
2159 are known to be used after the current point. On encountering 't =
2160 E', delete the binding if it is not used. Otherwise, add any temp
2161 uses to the set and keep on moving backwards.
2162
2163 As an enhancement, the first (backwards) pass searches for IR exits
2164 with always-taken conditions and notes the location of the earliest
2165 one in the block. If any such are found, a second pass copies the
2166 exit destination and jump kind to the bb-end. Then, the exit and
2167 all statements following it are turned into no-ops.
2168 */
2169
do_deadcode_BB(IRSB * bb)2170 /* notstatic */ void do_deadcode_BB ( IRSB* bb )
2171 {
2172 Int i, i_unconditional_exit;
2173 Int n_tmps = bb->tyenv->types_used;
2174 Bool* set = LibVEX_Alloc(n_tmps * sizeof(Bool));
2175 IRStmt* st;
2176
2177 for (i = 0; i < n_tmps; i++)
2178 set[i] = False;
2179
2180 /* start off by recording IRTemp uses in the next field. */
2181 addUses_Expr(set, bb->next);
2182
2183 /* First pass */
2184
2185 /* Work backwards through the stmts */
2186 i_unconditional_exit = -1;
2187 for (i = bb->stmts_used-1; i >= 0; i--) {
2188 st = bb->stmts[i];
2189 if (st->tag == Ist_NoOp)
2190 continue;
2191 /* take note of any unconditional exits */
2192 if (st->tag == Ist_Exit
2193 && isOneU1(st->Ist.Exit.guard))
2194 i_unconditional_exit = i;
2195 if (st->tag == Ist_WrTmp
2196 && set[(Int)(st->Ist.WrTmp.tmp)] == False) {
2197 /* it's an IRTemp which never got used. Delete it. */
2198 if (DEBUG_IROPT) {
2199 vex_printf("DEAD: ");
2200 ppIRStmt(st);
2201 vex_printf("\n");
2202 }
2203 bb->stmts[i] = IRStmt_NoOp();
2204 }
2205 else
2206 if (st->tag == Ist_Dirty
2207 && st->Ist.Dirty.details->guard
2208 && isZeroU1(st->Ist.Dirty.details->guard)) {
2209 /* This is a dirty helper which will never get called.
2210 Delete it. */
2211 bb->stmts[i] = IRStmt_NoOp();
2212 }
2213 else {
2214 /* Note any IRTemp uses made by the current statement. */
2215 addUses_Stmt(set, st);
2216 }
2217 }
2218
2219 /* Optional second pass: if any unconditional exits were found,
2220 delete them and all following statements. */
2221
2222 if (i_unconditional_exit != -1) {
2223 if (0) vex_printf("ZAPPING ALL FORWARDS from %d\n",
2224 i_unconditional_exit);
2225 vassert(i_unconditional_exit >= 0
2226 && i_unconditional_exit < bb->stmts_used);
2227 bb->next
2228 = IRExpr_Const( bb->stmts[i_unconditional_exit]->Ist.Exit.dst );
2229 bb->jumpkind
2230 = bb->stmts[i_unconditional_exit]->Ist.Exit.jk;
2231 for (i = i_unconditional_exit; i < bb->stmts_used; i++)
2232 bb->stmts[i] = IRStmt_NoOp();
2233 }
2234 }
2235
2236
2237 /*---------------------------------------------------------------*/
2238 /*--- Specialisation of helper function calls, in ---*/
2239 /*--- collaboration with the front end ---*/
2240 /*---------------------------------------------------------------*/
2241
2242 static
spec_helpers_BB(IRSB * bb,IRExpr * (* specHelper)(HChar *,IRExpr **,IRStmt **,Int))2243 IRSB* spec_helpers_BB(
2244 IRSB* bb,
2245 IRExpr* (*specHelper) (HChar*, IRExpr**, IRStmt**, Int)
2246 )
2247 {
2248 Int i;
2249 IRStmt* st;
2250 IRExpr* ex;
2251 Bool any = False;
2252
2253 for (i = bb->stmts_used-1; i >= 0; i--) {
2254 st = bb->stmts[i];
2255
2256 if (st->tag != Ist_WrTmp
2257 || st->Ist.WrTmp.data->tag != Iex_CCall)
2258 continue;
2259
2260 ex = (*specHelper)( st->Ist.WrTmp.data->Iex.CCall.cee->name,
2261 st->Ist.WrTmp.data->Iex.CCall.args,
2262 &bb->stmts[0], i );
2263 if (!ex)
2264 /* the front end can't think of a suitable replacement */
2265 continue;
2266
2267 /* We got something better. Install it in the bb. */
2268 any = True;
2269 bb->stmts[i]
2270 = IRStmt_WrTmp(st->Ist.WrTmp.tmp, ex);
2271
2272 if (0) {
2273 vex_printf("SPEC: ");
2274 ppIRExpr(st->Ist.WrTmp.data);
2275 vex_printf(" --> ");
2276 ppIRExpr(ex);
2277 vex_printf("\n");
2278 }
2279 }
2280
2281 if (any)
2282 bb = flatten_BB(bb);
2283 return bb;
2284 }
2285
2286
2287 /*---------------------------------------------------------------*/
2288 /*--- Determination of guest state aliasing relationships ---*/
2289 /*---------------------------------------------------------------*/
2290
2291 /* These are helper functions for CSE and GetI/PutI transformations.
2292
2293 Determine, to the extent possible, the relationship between two
2294 guest state accesses. The possible outcomes are:
2295
2296 * Exact alias. These two accesses denote precisely the same
2297 piece of the guest state.
2298
2299 * Definitely no alias. These two accesses are guaranteed not to
2300 overlap any part of the guest state.
2301
2302 * Unknown -- if neither of the above can be established.
2303
2304 If in doubt, return Unknown. */
2305
2306 typedef
2307 enum { ExactAlias, NoAlias, UnknownAlias }
2308 GSAliasing;
2309
2310
2311 /* Produces the alias relation between an indexed guest
2312 state access and a non-indexed access. */
2313
2314 static
getAliasingRelation_IC(IRRegArray * descr1,IRExpr * ix1,Int offset2,IRType ty2)2315 GSAliasing getAliasingRelation_IC ( IRRegArray* descr1, IRExpr* ix1,
2316 Int offset2, IRType ty2 )
2317 {
2318 UInt minoff1, maxoff1, minoff2, maxoff2;
2319
2320 getArrayBounds( descr1, &minoff1, &maxoff1 );
2321 minoff2 = offset2;
2322 maxoff2 = minoff2 + sizeofIRType(ty2) - 1;
2323
2324 if (maxoff1 < minoff2 || maxoff2 < minoff1)
2325 return NoAlias;
2326
2327 /* Could probably do better here if required. For the moment
2328 however just claim not to know anything more. */
2329 return UnknownAlias;
2330 }
2331
2332
2333 /* Produces the alias relation between two indexed guest state
2334 accesses. */
2335
2336 static
getAliasingRelation_II(IRRegArray * descr1,IRExpr * ix1,Int bias1,IRRegArray * descr2,IRExpr * ix2,Int bias2)2337 GSAliasing getAliasingRelation_II (
2338 IRRegArray* descr1, IRExpr* ix1, Int bias1,
2339 IRRegArray* descr2, IRExpr* ix2, Int bias2
2340 )
2341 {
2342 UInt minoff1, maxoff1, minoff2, maxoff2;
2343 Int iters;
2344
2345 /* First try hard to show they don't alias. */
2346 getArrayBounds( descr1, &minoff1, &maxoff1 );
2347 getArrayBounds( descr2, &minoff2, &maxoff2 );
2348 if (maxoff1 < minoff2 || maxoff2 < minoff1)
2349 return NoAlias;
2350
2351 /* So the two arrays at least partially overlap. To get any
2352 further we'll have to be sure that the descriptors are
2353 identical. */
2354 if (!eqIRRegArray(descr1, descr2))
2355 return UnknownAlias;
2356
2357 /* The descriptors are identical. Now the only difference can be
2358 in the index expressions. If they cannot be shown to be
2359 identical, we have to say we don't know what the aliasing
2360 relation will be. Now, since the IR is flattened, the index
2361 expressions should be atoms -- either consts or tmps. So that
2362 makes the comparison simple. */
2363 vassert(isIRAtom(ix1));
2364 vassert(isIRAtom(ix2));
2365 if (!eqIRAtom(ix1,ix2))
2366 return UnknownAlias;
2367
2368 /* Ok, the index expressions are identical. So now the only way
2369 they can be different is in the bias. Normalise this
2370 paranoidly, to reliably establish equality/non-equality. */
2371
2372 /* So now we know that the GetI and PutI index the same array
2373 with the same base. Are the offsets the same, modulo the
2374 array size? Do this paranoidly. */
2375 vassert(descr1->nElems == descr2->nElems);
2376 vassert(descr1->elemTy == descr2->elemTy);
2377 vassert(descr1->base == descr2->base);
2378 iters = 0;
2379 while (bias1 < 0 || bias2 < 0) {
2380 bias1 += descr1->nElems;
2381 bias2 += descr1->nElems;
2382 iters++;
2383 if (iters > 10)
2384 vpanic("getAliasingRelation: iters");
2385 }
2386 vassert(bias1 >= 0 && bias2 >= 0);
2387 bias1 %= descr1->nElems;
2388 bias2 %= descr1->nElems;
2389 vassert(bias1 >= 0 && bias1 < descr1->nElems);
2390 vassert(bias2 >= 0 && bias2 < descr1->nElems);
2391
2392 /* Finally, biasP and biasG are normalised into the range
2393 0 .. descrP/G->nElems - 1. And so we can establish
2394 equality/non-equality. */
2395
2396 return bias1==bias2 ? ExactAlias : NoAlias;
2397 }
2398
2399
2400 /*---------------------------------------------------------------*/
2401 /*--- Common Subexpression Elimination ---*/
2402 /*---------------------------------------------------------------*/
2403
2404 /* Expensive in time and space. */
2405
2406 /* Uses two environments:
2407 a IRTemp -> IRTemp mapping
2408 a mapping from AvailExpr* to IRTemp
2409 */
2410
2411 typedef
2412 struct {
2413 enum { Ut, Btt, Btc, Bct, Cf64i, Mttt, GetIt } tag;
2414 union {
2415 /* unop(tmp) */
2416 struct {
2417 IROp op;
2418 IRTemp arg;
2419 } Ut;
2420 /* binop(tmp,tmp) */
2421 struct {
2422 IROp op;
2423 IRTemp arg1;
2424 IRTemp arg2;
2425 } Btt;
2426 /* binop(tmp,const) */
2427 struct {
2428 IROp op;
2429 IRTemp arg1;
2430 IRConst con2;
2431 } Btc;
2432 /* binop(const,tmp) */
2433 struct {
2434 IROp op;
2435 IRConst con1;
2436 IRTemp arg2;
2437 } Bct;
2438 /* F64i-style const */
2439 struct {
2440 ULong f64i;
2441 } Cf64i;
2442 /* Mux0X(tmp,tmp,tmp) */
2443 struct {
2444 IRTemp co;
2445 IRTemp e0;
2446 IRTemp eX;
2447 } Mttt;
2448 /* GetI(descr,tmp,bias)*/
2449 struct {
2450 IRRegArray* descr;
2451 IRTemp ix;
2452 Int bias;
2453 } GetIt;
2454 } u;
2455 }
2456 AvailExpr;
2457
eq_AvailExpr(AvailExpr * a1,AvailExpr * a2)2458 static Bool eq_AvailExpr ( AvailExpr* a1, AvailExpr* a2 )
2459 {
2460 if (a1->tag != a2->tag)
2461 return False;
2462 switch (a1->tag) {
2463 case Ut:
2464 return toBool(
2465 a1->u.Ut.op == a2->u.Ut.op
2466 && a1->u.Ut.arg == a2->u.Ut.arg);
2467 case Btt:
2468 return toBool(
2469 a1->u.Btt.op == a2->u.Btt.op
2470 && a1->u.Btt.arg1 == a2->u.Btt.arg1
2471 && a1->u.Btt.arg2 == a2->u.Btt.arg2);
2472 case Btc:
2473 return toBool(
2474 a1->u.Btc.op == a2->u.Btc.op
2475 && a1->u.Btc.arg1 == a2->u.Btc.arg1
2476 && eqIRConst(&a1->u.Btc.con2, &a2->u.Btc.con2));
2477 case Bct:
2478 return toBool(
2479 a1->u.Bct.op == a2->u.Bct.op
2480 && a1->u.Bct.arg2 == a2->u.Bct.arg2
2481 && eqIRConst(&a1->u.Bct.con1, &a2->u.Bct.con1));
2482 case Cf64i:
2483 return toBool(a1->u.Cf64i.f64i == a2->u.Cf64i.f64i);
2484 case Mttt:
2485 return toBool(a1->u.Mttt.co == a2->u.Mttt.co
2486 && a1->u.Mttt.e0 == a2->u.Mttt.e0
2487 && a1->u.Mttt.eX == a2->u.Mttt.eX);
2488 case GetIt:
2489 return toBool(eqIRRegArray(a1->u.GetIt.descr, a2->u.GetIt.descr)
2490 && a1->u.GetIt.ix == a2->u.GetIt.ix
2491 && a1->u.GetIt.bias == a2->u.GetIt.bias);
2492 default: vpanic("eq_AvailExpr");
2493 }
2494 }
2495
availExpr_to_IRExpr(AvailExpr * ae)2496 static IRExpr* availExpr_to_IRExpr ( AvailExpr* ae )
2497 {
2498 IRConst* con;
2499 switch (ae->tag) {
2500 case Ut:
2501 return IRExpr_Unop( ae->u.Ut.op, IRExpr_RdTmp(ae->u.Ut.arg) );
2502 case Btt:
2503 return IRExpr_Binop( ae->u.Btt.op,
2504 IRExpr_RdTmp(ae->u.Btt.arg1),
2505 IRExpr_RdTmp(ae->u.Btt.arg2) );
2506 case Btc:
2507 con = LibVEX_Alloc(sizeof(IRConst));
2508 *con = ae->u.Btc.con2;
2509 return IRExpr_Binop( ae->u.Btc.op,
2510 IRExpr_RdTmp(ae->u.Btc.arg1),
2511 IRExpr_Const(con) );
2512 case Bct:
2513 con = LibVEX_Alloc(sizeof(IRConst));
2514 *con = ae->u.Bct.con1;
2515 return IRExpr_Binop( ae->u.Bct.op,
2516 IRExpr_Const(con),
2517 IRExpr_RdTmp(ae->u.Bct.arg2) );
2518 case Cf64i:
2519 return IRExpr_Const(IRConst_F64i(ae->u.Cf64i.f64i));
2520 case Mttt:
2521 return IRExpr_Mux0X(IRExpr_RdTmp(ae->u.Mttt.co),
2522 IRExpr_RdTmp(ae->u.Mttt.e0),
2523 IRExpr_RdTmp(ae->u.Mttt.eX));
2524 case GetIt:
2525 return IRExpr_GetI(ae->u.GetIt.descr,
2526 IRExpr_RdTmp(ae->u.GetIt.ix),
2527 ae->u.GetIt.bias);
2528 default:
2529 vpanic("availExpr_to_IRExpr");
2530 }
2531 }
2532
2533 inline
subst_AvailExpr_Temp(HashHW * env,IRTemp tmp)2534 static IRTemp subst_AvailExpr_Temp ( HashHW* env, IRTemp tmp )
2535 {
2536 HWord res;
2537 /* env :: IRTemp -> IRTemp */
2538 if (lookupHHW( env, &res, (HWord)tmp ))
2539 return (IRTemp)res;
2540 else
2541 return tmp;
2542 }
2543
subst_AvailExpr(HashHW * env,AvailExpr * ae)2544 static void subst_AvailExpr ( HashHW* env, AvailExpr* ae )
2545 {
2546 /* env :: IRTemp -> IRTemp */
2547 switch (ae->tag) {
2548 case Ut:
2549 ae->u.Ut.arg = subst_AvailExpr_Temp( env, ae->u.Ut.arg );
2550 break;
2551 case Btt:
2552 ae->u.Btt.arg1 = subst_AvailExpr_Temp( env, ae->u.Btt.arg1 );
2553 ae->u.Btt.arg2 = subst_AvailExpr_Temp( env, ae->u.Btt.arg2 );
2554 break;
2555 case Btc:
2556 ae->u.Btc.arg1 = subst_AvailExpr_Temp( env, ae->u.Btc.arg1 );
2557 break;
2558 case Bct:
2559 ae->u.Bct.arg2 = subst_AvailExpr_Temp( env, ae->u.Bct.arg2 );
2560 break;
2561 case Cf64i:
2562 break;
2563 case Mttt:
2564 ae->u.Mttt.co = subst_AvailExpr_Temp( env, ae->u.Mttt.co );
2565 ae->u.Mttt.e0 = subst_AvailExpr_Temp( env, ae->u.Mttt.e0 );
2566 ae->u.Mttt.eX = subst_AvailExpr_Temp( env, ae->u.Mttt.eX );
2567 break;
2568 case GetIt:
2569 ae->u.GetIt.ix = subst_AvailExpr_Temp( env, ae->u.GetIt.ix );
2570 break;
2571 default:
2572 vpanic("subst_AvailExpr");
2573 }
2574 }
2575
irExpr_to_AvailExpr(IRExpr * e)2576 static AvailExpr* irExpr_to_AvailExpr ( IRExpr* e )
2577 {
2578 AvailExpr* ae;
2579
2580 if (e->tag == Iex_Unop
2581 && e->Iex.Unop.arg->tag == Iex_RdTmp) {
2582 ae = LibVEX_Alloc(sizeof(AvailExpr));
2583 ae->tag = Ut;
2584 ae->u.Ut.op = e->Iex.Unop.op;
2585 ae->u.Ut.arg = e->Iex.Unop.arg->Iex.RdTmp.tmp;
2586 return ae;
2587 }
2588
2589 if (e->tag == Iex_Binop
2590 && e->Iex.Binop.arg1->tag == Iex_RdTmp
2591 && e->Iex.Binop.arg2->tag == Iex_RdTmp) {
2592 ae = LibVEX_Alloc(sizeof(AvailExpr));
2593 ae->tag = Btt;
2594 ae->u.Btt.op = e->Iex.Binop.op;
2595 ae->u.Btt.arg1 = e->Iex.Binop.arg1->Iex.RdTmp.tmp;
2596 ae->u.Btt.arg2 = e->Iex.Binop.arg2->Iex.RdTmp.tmp;
2597 return ae;
2598 }
2599
2600 if (e->tag == Iex_Binop
2601 && e->Iex.Binop.arg1->tag == Iex_RdTmp
2602 && e->Iex.Binop.arg2->tag == Iex_Const) {
2603 ae = LibVEX_Alloc(sizeof(AvailExpr));
2604 ae->tag = Btc;
2605 ae->u.Btc.op = e->Iex.Binop.op;
2606 ae->u.Btc.arg1 = e->Iex.Binop.arg1->Iex.RdTmp.tmp;
2607 ae->u.Btc.con2 = *(e->Iex.Binop.arg2->Iex.Const.con);
2608 return ae;
2609 }
2610
2611 if (e->tag == Iex_Binop
2612 && e->Iex.Binop.arg1->tag == Iex_Const
2613 && e->Iex.Binop.arg2->tag == Iex_RdTmp) {
2614 ae = LibVEX_Alloc(sizeof(AvailExpr));
2615 ae->tag = Bct;
2616 ae->u.Bct.op = e->Iex.Binop.op;
2617 ae->u.Bct.arg2 = e->Iex.Binop.arg2->Iex.RdTmp.tmp;
2618 ae->u.Bct.con1 = *(e->Iex.Binop.arg1->Iex.Const.con);
2619 return ae;
2620 }
2621
2622 if (e->tag == Iex_Const
2623 && e->Iex.Const.con->tag == Ico_F64i) {
2624 ae = LibVEX_Alloc(sizeof(AvailExpr));
2625 ae->tag = Cf64i;
2626 ae->u.Cf64i.f64i = e->Iex.Const.con->Ico.F64i;
2627 return ae;
2628 }
2629
2630 if (e->tag == Iex_Mux0X
2631 && e->Iex.Mux0X.cond->tag == Iex_RdTmp
2632 && e->Iex.Mux0X.expr0->tag == Iex_RdTmp
2633 && e->Iex.Mux0X.exprX->tag == Iex_RdTmp) {
2634 ae = LibVEX_Alloc(sizeof(AvailExpr));
2635 ae->tag = Mttt;
2636 ae->u.Mttt.co = e->Iex.Mux0X.cond->Iex.RdTmp.tmp;
2637 ae->u.Mttt.e0 = e->Iex.Mux0X.expr0->Iex.RdTmp.tmp;
2638 ae->u.Mttt.eX = e->Iex.Mux0X.exprX->Iex.RdTmp.tmp;
2639 return ae;
2640 }
2641
2642 if (e->tag == Iex_GetI
2643 && e->Iex.GetI.ix->tag == Iex_RdTmp) {
2644 ae = LibVEX_Alloc(sizeof(AvailExpr));
2645 ae->tag = GetIt;
2646 ae->u.GetIt.descr = e->Iex.GetI.descr;
2647 ae->u.GetIt.ix = e->Iex.GetI.ix->Iex.RdTmp.tmp;
2648 ae->u.GetIt.bias = e->Iex.GetI.bias;
2649 return ae;
2650 }
2651
2652 return NULL;
2653 }
2654
2655
2656 /* The BB is modified in-place. Returns True if any changes were
2657 made. */
2658
do_cse_BB(IRSB * bb)2659 static Bool do_cse_BB ( IRSB* bb )
2660 {
2661 Int i, j, paranoia;
2662 IRTemp t, q;
2663 IRStmt* st;
2664 AvailExpr* eprime;
2665 AvailExpr* ae;
2666 Bool invalidate;
2667 Bool anyDone = False;
2668
2669 HashHW* tenv = newHHW(); /* :: IRTemp -> IRTemp */
2670 HashHW* aenv = newHHW(); /* :: AvailExpr* -> IRTemp */
2671
2672 vassert(sizeof(IRTemp) <= sizeof(HWord));
2673
2674 if (0) { ppIRSB(bb); vex_printf("\n\n"); }
2675
2676 /* Iterate forwards over the stmts.
2677 On seeing "t = E", where E is one of the 5 AvailExpr forms:
2678 let E' = apply tenv substitution to E
2679 search aenv for E'
2680 if a mapping E' -> q is found,
2681 replace this stmt by "t = q"
2682 and add binding t -> q to tenv
2683 else
2684 add binding E' -> t to aenv
2685 replace this stmt by "t = E'"
2686
2687 Other statements are only interesting to the extent that they
2688 might invalidate some of the expressions in aenv. So there is
2689 an invalidate-bindings check for each statement seen.
2690 */
2691 for (i = 0; i < bb->stmts_used; i++) {
2692 st = bb->stmts[i];
2693
2694 /* ------ BEGIN invalidate aenv bindings ------ */
2695 /* This is critical: remove from aenv any E' -> .. bindings
2696 which might be invalidated by this statement. The only
2697 vulnerable kind of bindings are the GetI kind.
2698 Dirty call - dump (paranoia level -> 2)
2699 Store - dump (ditto)
2700 Put, PutI - dump unless no-overlap is proven (.. -> 1)
2701 Uses getAliasingRelation_IC and getAliasingRelation_II
2702 to do the no-overlap assessments needed for Put/PutI.
2703 */
2704 switch (st->tag) {
2705 case Ist_Dirty: case Ist_Store: case Ist_MBE:
2706 case Ist_CAS: case Ist_LLSC:
2707 paranoia = 2; break;
2708 case Ist_Put: case Ist_PutI:
2709 paranoia = 1; break;
2710 case Ist_NoOp: case Ist_IMark: case Ist_AbiHint:
2711 case Ist_WrTmp: case Ist_Exit:
2712 paranoia = 0; break;
2713 default:
2714 vpanic("do_cse_BB(1)");
2715 }
2716
2717 if (paranoia > 0) {
2718 for (j = 0; j < aenv->used; j++) {
2719 if (!aenv->inuse[j])
2720 continue;
2721 ae = (AvailExpr*)aenv->key[j];
2722 if (ae->tag != GetIt)
2723 continue;
2724 invalidate = False;
2725 if (paranoia >= 2) {
2726 invalidate = True;
2727 } else {
2728 vassert(paranoia == 1);
2729 if (st->tag == Ist_Put) {
2730 if (getAliasingRelation_IC(
2731 ae->u.GetIt.descr,
2732 IRExpr_RdTmp(ae->u.GetIt.ix),
2733 st->Ist.Put.offset,
2734 typeOfIRExpr(bb->tyenv,st->Ist.Put.data)
2735 ) != NoAlias)
2736 invalidate = True;
2737 }
2738 else
2739 if (st->tag == Ist_PutI) {
2740 if (getAliasingRelation_II(
2741 ae->u.GetIt.descr,
2742 IRExpr_RdTmp(ae->u.GetIt.ix),
2743 ae->u.GetIt.bias,
2744 st->Ist.PutI.descr,
2745 st->Ist.PutI.ix,
2746 st->Ist.PutI.bias
2747 ) != NoAlias)
2748 invalidate = True;
2749 }
2750 else
2751 vpanic("do_cse_BB(2)");
2752 }
2753
2754 if (invalidate) {
2755 aenv->inuse[j] = False;
2756 aenv->key[j] = (HWord)NULL; /* be sure */
2757 }
2758 } /* for j */
2759 } /* paranoia > 0 */
2760
2761 /* ------ ENV invalidate aenv bindings ------ */
2762
2763 /* ignore not-interestings */
2764 if (st->tag != Ist_WrTmp)
2765 continue;
2766
2767 t = st->Ist.WrTmp.tmp;
2768 eprime = irExpr_to_AvailExpr(st->Ist.WrTmp.data);
2769 /* ignore if not of AvailExpr form */
2770 if (!eprime)
2771 continue;
2772
2773 /* vex_printf("considering: " ); ppIRStmt(st); vex_printf("\n"); */
2774
2775 /* apply tenv */
2776 subst_AvailExpr( tenv, eprime );
2777
2778 /* search aenv for eprime, unfortunately the hard way */
2779 for (j = 0; j < aenv->used; j++)
2780 if (aenv->inuse[j] && eq_AvailExpr(eprime, (AvailExpr*)aenv->key[j]))
2781 break;
2782
2783 if (j < aenv->used) {
2784 /* A binding E' -> q was found. Replace stmt by "t = q" and
2785 note the t->q binding in tenv. */
2786 /* (this is the core of the CSE action) */
2787 q = (IRTemp)aenv->val[j];
2788 bb->stmts[i] = IRStmt_WrTmp( t, IRExpr_RdTmp(q) );
2789 addToHHW( tenv, (HWord)t, (HWord)q );
2790 anyDone = True;
2791 } else {
2792 /* No binding was found, so instead we add E' -> t to our
2793 collection of available expressions, replace this stmt
2794 with "t = E'", and move on. */
2795 bb->stmts[i] = IRStmt_WrTmp( t, availExpr_to_IRExpr(eprime) );
2796 addToHHW( aenv, (HWord)eprime, (HWord)t );
2797 }
2798 }
2799
2800 /*
2801 ppIRSB(bb);
2802 sanityCheckIRSB(bb, Ity_I32);
2803 vex_printf("\n\n");
2804 */
2805 return anyDone;
2806 }
2807
2808
2809 /*---------------------------------------------------------------*/
2810 /*--- Add32/Sub32 chain collapsing ---*/
2811 /*---------------------------------------------------------------*/
2812
2813 /* ----- Helper functions for Add32/Sub32 chain collapsing ----- */
2814
2815 /* Is this expression "Add32(tmp,const)" or "Sub32(tmp,const)" ? If
2816 yes, set *tmp and *i32 appropriately. *i32 is set as if the
2817 root node is Add32, not Sub32. */
2818
isAdd32OrSub32(IRExpr * e,IRTemp * tmp,Int * i32)2819 static Bool isAdd32OrSub32 ( IRExpr* e, IRTemp* tmp, Int* i32 )
2820 {
2821 if (e->tag != Iex_Binop)
2822 return False;
2823 if (e->Iex.Binop.op != Iop_Add32 && e->Iex.Binop.op != Iop_Sub32)
2824 return False;
2825 if (e->Iex.Binop.arg1->tag != Iex_RdTmp)
2826 return False;
2827 if (e->Iex.Binop.arg2->tag != Iex_Const)
2828 return False;
2829 *tmp = e->Iex.Binop.arg1->Iex.RdTmp.tmp;
2830 *i32 = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32);
2831 if (e->Iex.Binop.op == Iop_Sub32)
2832 *i32 = -*i32;
2833 return True;
2834 }
2835
2836
2837 /* Figure out if tmp can be expressed as tmp2 +32 const, for some
2838 other tmp2. Scan backwards from the specified start point -- an
2839 optimisation. */
2840
collapseChain(IRSB * bb,Int startHere,IRTemp tmp,IRTemp * tmp2,Int * i32)2841 static Bool collapseChain ( IRSB* bb, Int startHere,
2842 IRTemp tmp,
2843 IRTemp* tmp2, Int* i32 )
2844 {
2845 Int j, ii;
2846 IRTemp vv;
2847 IRStmt* st;
2848 IRExpr* e;
2849
2850 /* the (var, con) pair contain the current 'representation' for
2851 'tmp'. We start with 'tmp + 0'. */
2852 IRTemp var = tmp;
2853 Int con = 0;
2854
2855 /* Scan backwards to see if tmp can be replaced by some other tmp
2856 +/- a constant. */
2857 for (j = startHere; j >= 0; j--) {
2858 st = bb->stmts[j];
2859 if (st->tag != Ist_WrTmp)
2860 continue;
2861 if (st->Ist.WrTmp.tmp != var)
2862 continue;
2863 e = st->Ist.WrTmp.data;
2864 if (!isAdd32OrSub32(e, &vv, &ii))
2865 break;
2866 var = vv;
2867 con += ii;
2868 }
2869 if (j == -1)
2870 /* no earlier binding for var .. ill-formed IR */
2871 vpanic("collapseChain");
2872
2873 /* so, did we find anything interesting? */
2874 if (var == tmp)
2875 return False; /* no .. */
2876
2877 *tmp2 = var;
2878 *i32 = con;
2879 return True;
2880 }
2881
2882
2883 /* ------- Main function for Add32/Sub32 chain collapsing ------ */
2884
collapse_AddSub_chains_BB(IRSB * bb)2885 static void collapse_AddSub_chains_BB ( IRSB* bb )
2886 {
2887 IRStmt *st;
2888 IRTemp var, var2;
2889 Int i, con, con2;
2890
2891 for (i = bb->stmts_used-1; i >= 0; i--) {
2892 st = bb->stmts[i];
2893 if (st->tag == Ist_NoOp)
2894 continue;
2895
2896 /* Try to collapse 't1 = Add32/Sub32(t2, con)'. */
2897
2898 if (st->tag == Ist_WrTmp
2899 && isAdd32OrSub32(st->Ist.WrTmp.data, &var, &con)) {
2900
2901 /* So e1 is of the form Add32(var,con) or Sub32(var,-con).
2902 Find out if var can be expressed as var2 + con2. */
2903 if (collapseChain(bb, i-1, var, &var2, &con2)) {
2904 if (DEBUG_IROPT) {
2905 vex_printf("replacing1 ");
2906 ppIRStmt(st);
2907 vex_printf(" with ");
2908 }
2909 con2 += con;
2910 bb->stmts[i]
2911 = IRStmt_WrTmp(
2912 st->Ist.WrTmp.tmp,
2913 (con2 >= 0)
2914 ? IRExpr_Binop(Iop_Add32,
2915 IRExpr_RdTmp(var2),
2916 IRExpr_Const(IRConst_U32(con2)))
2917 : IRExpr_Binop(Iop_Sub32,
2918 IRExpr_RdTmp(var2),
2919 IRExpr_Const(IRConst_U32(-con2)))
2920 );
2921 if (DEBUG_IROPT) {
2922 ppIRStmt(bb->stmts[i]);
2923 vex_printf("\n");
2924 }
2925 }
2926
2927 continue;
2928 }
2929
2930 /* Try to collapse 't1 = GetI[t2, con]'. */
2931
2932 if (st->tag == Ist_WrTmp
2933 && st->Ist.WrTmp.data->tag == Iex_GetI
2934 && st->Ist.WrTmp.data->Iex.GetI.ix->tag == Iex_RdTmp
2935 && collapseChain(bb, i-1, st->Ist.WrTmp.data->Iex.GetI.ix
2936 ->Iex.RdTmp.tmp, &var2, &con2)) {
2937 if (DEBUG_IROPT) {
2938 vex_printf("replacing3 ");
2939 ppIRStmt(st);
2940 vex_printf(" with ");
2941 }
2942 con2 += st->Ist.WrTmp.data->Iex.GetI.bias;
2943 bb->stmts[i]
2944 = IRStmt_WrTmp(
2945 st->Ist.WrTmp.tmp,
2946 IRExpr_GetI(st->Ist.WrTmp.data->Iex.GetI.descr,
2947 IRExpr_RdTmp(var2),
2948 con2));
2949 if (DEBUG_IROPT) {
2950 ppIRStmt(bb->stmts[i]);
2951 vex_printf("\n");
2952 }
2953 continue;
2954 }
2955
2956 /* Perhaps st is PutI[t, con] ? */
2957
2958 if (st->tag == Ist_PutI
2959 && st->Ist.PutI.ix->tag == Iex_RdTmp
2960 && collapseChain(bb, i-1, st->Ist.PutI.ix->Iex.RdTmp.tmp,
2961 &var2, &con2)) {
2962 if (DEBUG_IROPT) {
2963 vex_printf("replacing2 ");
2964 ppIRStmt(st);
2965 vex_printf(" with ");
2966 }
2967 con2 += st->Ist.PutI.bias;
2968 bb->stmts[i]
2969 = IRStmt_PutI(st->Ist.PutI.descr,
2970 IRExpr_RdTmp(var2),
2971 con2,
2972 st->Ist.PutI.data);
2973 if (DEBUG_IROPT) {
2974 ppIRStmt(bb->stmts[i]);
2975 vex_printf("\n");
2976 }
2977 continue;
2978 }
2979
2980 } /* for */
2981 }
2982
2983
2984 /*---------------------------------------------------------------*/
2985 /*--- PutI/GetI transformations ---*/
2986 /*---------------------------------------------------------------*/
2987
2988 /* Given the parts (descr, tmp, bias) for a GetI, scan backwards from
2989 the given starting point to find, if any, a PutI which writes
2990 exactly the same piece of guest state, and so return the expression
2991 that the PutI writes. This is the core of PutI-GetI forwarding. */
2992
2993 static
findPutI(IRSB * bb,Int startHere,IRRegArray * descrG,IRExpr * ixG,Int biasG)2994 IRExpr* findPutI ( IRSB* bb, Int startHere,
2995 IRRegArray* descrG, IRExpr* ixG, Int biasG )
2996 {
2997 Int j;
2998 IRStmt* st;
2999 GSAliasing relation;
3000
3001 if (0) {
3002 vex_printf("\nfindPutI ");
3003 ppIRRegArray(descrG);
3004 vex_printf(" ");
3005 ppIRExpr(ixG);
3006 vex_printf(" %d\n", biasG);
3007 }
3008
3009 /* Scan backwards in bb from startHere to find a suitable PutI
3010 binding for (descrG, ixG, biasG), if any. */
3011
3012 for (j = startHere; j >= 0; j--) {
3013 st = bb->stmts[j];
3014 if (st->tag == Ist_NoOp)
3015 continue;
3016
3017 if (st->tag == Ist_Put) {
3018 /* Non-indexed Put. This can't give a binding, but we do
3019 need to check it doesn't invalidate the search by
3020 overlapping any part of the indexed guest state. */
3021
3022 relation
3023 = getAliasingRelation_IC(
3024 descrG, ixG,
3025 st->Ist.Put.offset,
3026 typeOfIRExpr(bb->tyenv,st->Ist.Put.data) );
3027
3028 if (relation == NoAlias) {
3029 /* we're OK; keep going */
3030 continue;
3031 } else {
3032 /* relation == UnknownAlias || relation == ExactAlias */
3033 /* If this assertion fails, we've found a Put which writes
3034 an area of guest state which is read by a GetI. Which
3035 is unlikely (although not per se wrong). */
3036 vassert(relation != ExactAlias);
3037 /* This Put potentially writes guest state that the GetI
3038 reads; we must fail. */
3039 return NULL;
3040 }
3041 }
3042
3043 if (st->tag == Ist_PutI) {
3044
3045 relation = getAliasingRelation_II(
3046 descrG, ixG, biasG,
3047 st->Ist.PutI.descr,
3048 st->Ist.PutI.ix,
3049 st->Ist.PutI.bias
3050 );
3051
3052 if (relation == NoAlias) {
3053 /* This PutI definitely doesn't overlap. Ignore it and
3054 keep going. */
3055 continue; /* the for j loop */
3056 }
3057
3058 if (relation == UnknownAlias) {
3059 /* We don't know if this PutI writes to the same guest
3060 state that the GetI, or not. So we have to give up. */
3061 return NULL;
3062 }
3063
3064 /* Otherwise, we've found what we're looking for. */
3065 vassert(relation == ExactAlias);
3066 return st->Ist.PutI.data;
3067
3068 } /* if (st->tag == Ist_PutI) */
3069
3070 if (st->tag == Ist_Dirty) {
3071 /* Be conservative. If the dirty call has any guest effects at
3072 all, give up. We could do better -- only give up if there
3073 are any guest writes/modifies. */
3074 if (st->Ist.Dirty.details->nFxState > 0)
3075 return NULL;
3076 }
3077
3078 } /* for */
3079
3080 /* No valid replacement was found. */
3081 return NULL;
3082 }
3083
3084
3085
3086 /* Assuming pi is a PutI stmt, is s2 identical to it (in the sense
3087 that it writes exactly the same piece of guest state) ? Safe
3088 answer: False. */
3089
identicalPutIs(IRStmt * pi,IRStmt * s2)3090 static Bool identicalPutIs ( IRStmt* pi, IRStmt* s2 )
3091 {
3092 vassert(pi->tag == Ist_PutI);
3093 if (s2->tag != Ist_PutI)
3094 return False;
3095
3096 return toBool(
3097 getAliasingRelation_II(
3098 pi->Ist.PutI.descr, pi->Ist.PutI.ix, pi->Ist.PutI.bias,
3099 s2->Ist.PutI.descr, s2->Ist.PutI.ix, s2->Ist.PutI.bias
3100 )
3101 == ExactAlias
3102 );
3103 }
3104
3105
3106 /* Assuming pi is a PutI stmt, is s2 a Get/GetI/Put/PutI which might
3107 overlap it? Safe answer: True. Note, we could do a lot better
3108 than this if needed. */
3109
3110 static
guestAccessWhichMightOverlapPutI(IRTypeEnv * tyenv,IRStmt * pi,IRStmt * s2)3111 Bool guestAccessWhichMightOverlapPutI (
3112 IRTypeEnv* tyenv, IRStmt* pi, IRStmt* s2
3113 )
3114 {
3115 GSAliasing relation;
3116 UInt minoffP, maxoffP;
3117
3118 vassert(pi->tag == Ist_PutI);
3119 getArrayBounds(pi->Ist.PutI.descr, &minoffP, &maxoffP);
3120 switch (s2->tag) {
3121
3122 case Ist_NoOp:
3123 case Ist_IMark:
3124 return False;
3125
3126 case Ist_MBE:
3127 case Ist_AbiHint:
3128 /* just be paranoid ... these should be rare. */
3129 return True;
3130
3131 case Ist_CAS:
3132 /* This is unbelievably lame, but it's probably not
3133 significant from a performance point of view. Really, a
3134 CAS is a load-store op, so it should be safe to say False.
3135 However .. */
3136 return True;
3137
3138 case Ist_Dirty:
3139 /* If the dirty call has any guest effects at all, give up.
3140 Probably could do better. */
3141 if (s2->Ist.Dirty.details->nFxState > 0)
3142 return True;
3143 return False;
3144
3145 case Ist_Put:
3146 vassert(isIRAtom(s2->Ist.Put.data));
3147 relation
3148 = getAliasingRelation_IC(
3149 pi->Ist.PutI.descr, pi->Ist.PutI.ix,
3150 s2->Ist.Put.offset,
3151 typeOfIRExpr(tyenv,s2->Ist.Put.data)
3152 );
3153 goto have_relation;
3154
3155 case Ist_PutI:
3156 vassert(isIRAtom(s2->Ist.PutI.ix));
3157 vassert(isIRAtom(s2->Ist.PutI.data));
3158 relation
3159 = getAliasingRelation_II(
3160 pi->Ist.PutI.descr, pi->Ist.PutI.ix, pi->Ist.PutI.bias,
3161 s2->Ist.PutI.descr, s2->Ist.PutI.ix, s2->Ist.PutI.bias
3162 );
3163 goto have_relation;
3164
3165 case Ist_WrTmp:
3166 if (s2->Ist.WrTmp.data->tag == Iex_GetI) {
3167 relation
3168 = getAliasingRelation_II(
3169 pi->Ist.PutI.descr, pi->Ist.PutI.ix,
3170 pi->Ist.PutI.bias,
3171 s2->Ist.WrTmp.data->Iex.GetI.descr,
3172 s2->Ist.WrTmp.data->Iex.GetI.ix,
3173 s2->Ist.WrTmp.data->Iex.GetI.bias
3174 );
3175 goto have_relation;
3176 }
3177 if (s2->Ist.WrTmp.data->tag == Iex_Get) {
3178 relation
3179 = getAliasingRelation_IC(
3180 pi->Ist.PutI.descr, pi->Ist.PutI.ix,
3181 s2->Ist.WrTmp.data->Iex.Get.offset,
3182 s2->Ist.WrTmp.data->Iex.Get.ty
3183 );
3184 goto have_relation;
3185 }
3186 return False;
3187
3188 case Ist_Store:
3189 vassert(isIRAtom(s2->Ist.Store.addr));
3190 vassert(isIRAtom(s2->Ist.Store.data));
3191 return False;
3192
3193 default:
3194 vex_printf("\n"); ppIRStmt(s2); vex_printf("\n");
3195 vpanic("guestAccessWhichMightOverlapPutI");
3196 }
3197
3198 have_relation:
3199 if (relation == NoAlias)
3200 return False;
3201 else
3202 return True; /* ExactAlias or UnknownAlias */
3203 }
3204
3205
3206
3207 /* ---------- PutI/GetI transformations main functions --------- */
3208
3209 /* Remove redundant GetIs, to the extent that they can be detected.
3210 bb is modified in-place. */
3211
3212 static
do_redundant_GetI_elimination(IRSB * bb)3213 void do_redundant_GetI_elimination ( IRSB* bb )
3214 {
3215 Int i;
3216 IRStmt* st;
3217
3218 for (i = bb->stmts_used-1; i >= 0; i--) {
3219 st = bb->stmts[i];
3220 if (st->tag == Ist_NoOp)
3221 continue;
3222
3223 if (st->tag == Ist_WrTmp
3224 && st->Ist.WrTmp.data->tag == Iex_GetI
3225 && st->Ist.WrTmp.data->Iex.GetI.ix->tag == Iex_RdTmp) {
3226 IRRegArray* descr = st->Ist.WrTmp.data->Iex.GetI.descr;
3227 IRExpr* ix = st->Ist.WrTmp.data->Iex.GetI.ix;
3228 Int bias = st->Ist.WrTmp.data->Iex.GetI.bias;
3229 IRExpr* replacement = findPutI(bb, i-1, descr, ix, bias);
3230 if (replacement
3231 && isIRAtom(replacement)
3232 /* Make sure we're doing a type-safe transformation! */
3233 && typeOfIRExpr(bb->tyenv, replacement) == descr->elemTy) {
3234 if (DEBUG_IROPT) {
3235 vex_printf("rGI: ");
3236 ppIRExpr(st->Ist.WrTmp.data);
3237 vex_printf(" -> ");
3238 ppIRExpr(replacement);
3239 vex_printf("\n");
3240 }
3241 bb->stmts[i] = IRStmt_WrTmp(st->Ist.WrTmp.tmp, replacement);
3242 }
3243 }
3244 }
3245
3246 }
3247
3248
3249 /* Remove redundant PutIs, to the extent which they can be detected.
3250 bb is modified in-place. */
3251
3252 static
do_redundant_PutI_elimination(IRSB * bb)3253 void do_redundant_PutI_elimination ( IRSB* bb )
3254 {
3255 Int i, j;
3256 Bool delete;
3257 IRStmt *st, *stj;
3258
3259 for (i = 0; i < bb->stmts_used; i++) {
3260 st = bb->stmts[i];
3261 if (st->tag != Ist_PutI)
3262 continue;
3263 /* Ok, search forwards from here to see if we can find another
3264 PutI which makes this one redundant, and dodging various
3265 hazards. Search forwards:
3266 * If conditional exit, give up (because anything after that
3267 does not postdominate this put).
3268 * If a Get which might overlap, give up (because this PutI
3269 not necessarily dead).
3270 * If a Put which is identical, stop with success.
3271 * If a Put which might overlap, but is not identical, give up.
3272 * If a dirty helper call which might write guest state, give up.
3273 * If a Put which definitely doesn't overlap, or any other
3274 kind of stmt, continue.
3275 */
3276 delete = False;
3277 for (j = i+1; j < bb->stmts_used; j++) {
3278 stj = bb->stmts[j];
3279 if (stj->tag == Ist_NoOp)
3280 continue;
3281 if (identicalPutIs(st, stj)) {
3282 /* success! */
3283 delete = True;
3284 break;
3285 }
3286 if (stj->tag == Ist_Exit)
3287 /* give up */
3288 break;
3289 if (st->tag == Ist_Dirty)
3290 /* give up; could do better here */
3291 break;
3292 if (guestAccessWhichMightOverlapPutI(bb->tyenv, st, stj))
3293 /* give up */
3294 break;
3295 }
3296
3297 if (delete) {
3298 if (DEBUG_IROPT) {
3299 vex_printf("rPI: ");
3300 ppIRStmt(st);
3301 vex_printf("\n");
3302 }
3303 bb->stmts[i] = IRStmt_NoOp();
3304 }
3305
3306 }
3307 }
3308
3309
3310 /*---------------------------------------------------------------*/
3311 /*--- Loop unrolling ---*/
3312 /*---------------------------------------------------------------*/
3313
3314 /* Adjust all tmp values (names) in e by delta. e is destructively
3315 modified. */
3316
deltaIRExpr(IRExpr * e,Int delta)3317 static void deltaIRExpr ( IRExpr* e, Int delta )
3318 {
3319 Int i;
3320 switch (e->tag) {
3321 case Iex_RdTmp:
3322 e->Iex.RdTmp.tmp += delta;
3323 break;
3324 case Iex_Get:
3325 case Iex_Const:
3326 break;
3327 case Iex_GetI:
3328 deltaIRExpr(e->Iex.GetI.ix, delta);
3329 break;
3330 case Iex_Qop:
3331 deltaIRExpr(e->Iex.Qop.arg1, delta);
3332 deltaIRExpr(e->Iex.Qop.arg2, delta);
3333 deltaIRExpr(e->Iex.Qop.arg3, delta);
3334 deltaIRExpr(e->Iex.Qop.arg4, delta);
3335 break;
3336 case Iex_Triop:
3337 deltaIRExpr(e->Iex.Triop.arg1, delta);
3338 deltaIRExpr(e->Iex.Triop.arg2, delta);
3339 deltaIRExpr(e->Iex.Triop.arg3, delta);
3340 break;
3341 case Iex_Binop:
3342 deltaIRExpr(e->Iex.Binop.arg1, delta);
3343 deltaIRExpr(e->Iex.Binop.arg2, delta);
3344 break;
3345 case Iex_Unop:
3346 deltaIRExpr(e->Iex.Unop.arg, delta);
3347 break;
3348 case Iex_Load:
3349 deltaIRExpr(e->Iex.Load.addr, delta);
3350 break;
3351 case Iex_CCall:
3352 for (i = 0; e->Iex.CCall.args[i]; i++)
3353 deltaIRExpr(e->Iex.CCall.args[i], delta);
3354 break;
3355 case Iex_Mux0X:
3356 deltaIRExpr(e->Iex.Mux0X.cond, delta);
3357 deltaIRExpr(e->Iex.Mux0X.expr0, delta);
3358 deltaIRExpr(e->Iex.Mux0X.exprX, delta);
3359 break;
3360 default:
3361 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3362 vpanic("deltaIRExpr");
3363 }
3364 }
3365
3366 /* Adjust all tmp values (names) in st by delta. st is destructively
3367 modified. */
3368
deltaIRStmt(IRStmt * st,Int delta)3369 static void deltaIRStmt ( IRStmt* st, Int delta )
3370 {
3371 Int i;
3372 IRDirty* d;
3373 switch (st->tag) {
3374 case Ist_NoOp:
3375 case Ist_IMark:
3376 case Ist_MBE:
3377 break;
3378 case Ist_AbiHint:
3379 deltaIRExpr(st->Ist.AbiHint.base, delta);
3380 deltaIRExpr(st->Ist.AbiHint.nia, delta);
3381 break;
3382 case Ist_Put:
3383 deltaIRExpr(st->Ist.Put.data, delta);
3384 break;
3385 case Ist_PutI:
3386 deltaIRExpr(st->Ist.PutI.ix, delta);
3387 deltaIRExpr(st->Ist.PutI.data, delta);
3388 break;
3389 case Ist_WrTmp:
3390 st->Ist.WrTmp.tmp += delta;
3391 deltaIRExpr(st->Ist.WrTmp.data, delta);
3392 break;
3393 case Ist_Exit:
3394 deltaIRExpr(st->Ist.Exit.guard, delta);
3395 break;
3396 case Ist_Store:
3397 deltaIRExpr(st->Ist.Store.addr, delta);
3398 deltaIRExpr(st->Ist.Store.data, delta);
3399 break;
3400 case Ist_CAS:
3401 if (st->Ist.CAS.details->oldHi != IRTemp_INVALID)
3402 st->Ist.CAS.details->oldHi += delta;
3403 st->Ist.CAS.details->oldLo += delta;
3404 deltaIRExpr(st->Ist.CAS.details->addr, delta);
3405 if (st->Ist.CAS.details->expdHi)
3406 deltaIRExpr(st->Ist.CAS.details->expdHi, delta);
3407 deltaIRExpr(st->Ist.CAS.details->expdLo, delta);
3408 if (st->Ist.CAS.details->dataHi)
3409 deltaIRExpr(st->Ist.CAS.details->dataHi, delta);
3410 deltaIRExpr(st->Ist.CAS.details->dataLo, delta);
3411 break;
3412 case Ist_LLSC:
3413 st->Ist.LLSC.result += delta;
3414 deltaIRExpr(st->Ist.LLSC.addr, delta);
3415 if (st->Ist.LLSC.storedata)
3416 deltaIRExpr(st->Ist.LLSC.storedata, delta);
3417 break;
3418 case Ist_Dirty:
3419 d = st->Ist.Dirty.details;
3420 deltaIRExpr(d->guard, delta);
3421 for (i = 0; d->args[i]; i++)
3422 deltaIRExpr(d->args[i], delta);
3423 if (d->tmp != IRTemp_INVALID)
3424 d->tmp += delta;
3425 if (d->mAddr)
3426 deltaIRExpr(d->mAddr, delta);
3427 break;
3428 default:
3429 vex_printf("\n"); ppIRStmt(st); vex_printf("\n");
3430 vpanic("deltaIRStmt");
3431 }
3432 }
3433
3434
3435 /* If possible, return a loop-unrolled version of bb0. The original
3436 is changed. If not possible, return NULL. */
3437
3438 /* The two schemas considered are:
3439
3440 X: BODY; goto X
3441
3442 which unrolls to (eg) X: BODY;BODY; goto X
3443
3444 and
3445
3446 X: BODY; if (c) goto X; goto Y
3447 which trivially transforms to
3448 X: BODY; if (!c) goto Y; goto X;
3449 so it falls in the scope of the first case.
3450
3451 X and Y must be literal (guest) addresses.
3452 */
3453
calc_unroll_factor(IRSB * bb)3454 static Int calc_unroll_factor( IRSB* bb )
3455 {
3456 Int n_stmts, i;
3457
3458 n_stmts = 0;
3459 for (i = 0; i < bb->stmts_used; i++) {
3460 if (bb->stmts[i]->tag != Ist_NoOp)
3461 n_stmts++;
3462 }
3463
3464 if (n_stmts <= vex_control.iropt_unroll_thresh/8) {
3465 if (vex_control.iropt_verbosity > 0)
3466 vex_printf("vex iropt: 8 x unrolling (%d sts -> %d sts)\n",
3467 n_stmts, 8* n_stmts);
3468 return 8;
3469 }
3470 if (n_stmts <= vex_control.iropt_unroll_thresh/4) {
3471 if (vex_control.iropt_verbosity > 0)
3472 vex_printf("vex iropt: 4 x unrolling (%d sts -> %d sts)\n",
3473 n_stmts, 4* n_stmts);
3474 return 4;
3475 }
3476
3477 if (n_stmts <= vex_control.iropt_unroll_thresh/2) {
3478 if (vex_control.iropt_verbosity > 0)
3479 vex_printf("vex iropt: 2 x unrolling (%d sts -> %d sts)\n",
3480 n_stmts, 2* n_stmts);
3481 return 2;
3482 }
3483
3484 if (vex_control.iropt_verbosity > 0)
3485 vex_printf("vex iropt: not unrolling (%d sts)\n", n_stmts);
3486
3487 return 1;
3488 }
3489
3490
maybe_loop_unroll_BB(IRSB * bb0,Addr64 my_addr)3491 static IRSB* maybe_loop_unroll_BB ( IRSB* bb0, Addr64 my_addr )
3492 {
3493 Int i, j, jmax, n_vars;
3494 Bool xxx_known;
3495 Addr64 xxx_value, yyy_value;
3496 IRExpr* udst;
3497 IRStmt* st;
3498 IRConst* con;
3499 IRSB *bb1, *bb2;
3500 Int unroll_factor;
3501
3502 if (vex_control.iropt_unroll_thresh <= 0)
3503 return NULL;
3504
3505 /* First off, figure out if we can unroll this loop. Do this
3506 without modifying bb0. */
3507
3508 if (bb0->jumpkind != Ijk_Boring)
3509 return NULL;
3510
3511 xxx_known = False;
3512 xxx_value = 0;
3513
3514 /* Extract the next-guest address. If it isn't a literal, we
3515 have to give up. */
3516
3517 udst = bb0->next;
3518 if (udst->tag == Iex_Const
3519 && (udst->Iex.Const.con->tag == Ico_U32
3520 || udst->Iex.Const.con->tag == Ico_U64)) {
3521 /* The BB ends in a jump to a literal location. */
3522 xxx_known = True;
3523 xxx_value = udst->Iex.Const.con->tag == Ico_U64
3524 ? udst->Iex.Const.con->Ico.U64
3525 : (Addr64)(udst->Iex.Const.con->Ico.U32);
3526 }
3527
3528 if (!xxx_known)
3529 return NULL;
3530
3531 /* Now we know the BB ends to a jump to a literal location. If
3532 it's a jump to itself (viz, idiom #1), move directly to the
3533 unrolling stage, first cloning the bb so the original isn't
3534 modified. */
3535 if (xxx_value == my_addr) {
3536 unroll_factor = calc_unroll_factor( bb0 );
3537 if (unroll_factor < 2)
3538 return NULL;
3539 bb1 = deepCopyIRSB( bb0 );
3540 bb0 = NULL;
3541 udst = NULL; /* is now invalid */
3542 goto do_unroll;
3543 }
3544
3545 /* Search for the second idiomatic form:
3546 X: BODY; if (c) goto X; goto Y
3547 We know Y, but need to establish that the last stmt
3548 is 'if (c) goto X'.
3549 */
3550 yyy_value = xxx_value;
3551 for (i = bb0->stmts_used-1; i >= 0; i--)
3552 if (bb0->stmts[i])
3553 break;
3554
3555 if (i < 0)
3556 return NULL; /* block with no stmts. Strange. */
3557
3558 st = bb0->stmts[i];
3559 if (st->tag != Ist_Exit)
3560 return NULL;
3561 if (st->Ist.Exit.jk != Ijk_Boring)
3562 return NULL;
3563
3564 con = st->Ist.Exit.dst;
3565 vassert(con->tag == Ico_U32 || con->tag == Ico_U64);
3566
3567 xxx_value = con->tag == Ico_U64
3568 ? st->Ist.Exit.dst->Ico.U64
3569 : (Addr64)(st->Ist.Exit.dst->Ico.U32);
3570
3571 /* If this assertion fails, we have some kind of type error. */
3572 vassert(con->tag == udst->Iex.Const.con->tag);
3573
3574 if (xxx_value != my_addr)
3575 /* We didn't find either idiom. Give up. */
3576 return NULL;
3577
3578 /* Ok, we found idiom #2. Copy the BB, switch around the xxx and
3579 yyy values (which makes it look like idiom #1), and go into
3580 unrolling proper. This means finding (again) the last stmt, in
3581 the copied BB. */
3582
3583 unroll_factor = calc_unroll_factor( bb0 );
3584 if (unroll_factor < 2)
3585 return NULL;
3586
3587 bb1 = deepCopyIRSB( bb0 );
3588 bb0 = NULL;
3589 udst = NULL; /* is now invalid */
3590 for (i = bb1->stmts_used-1; i >= 0; i--)
3591 if (bb1->stmts[i])
3592 break;
3593
3594 /* The next bunch of assertions should be true since we already
3595 found and checked the last stmt in the original bb. */
3596
3597 vassert(i >= 0);
3598
3599 st = bb1->stmts[i];
3600 vassert(st->tag == Ist_Exit);
3601
3602 con = st->Ist.Exit.dst;
3603 vassert(con->tag == Ico_U32 || con->tag == Ico_U64);
3604
3605 udst = bb1->next;
3606 vassert(udst->tag == Iex_Const);
3607 vassert(udst->Iex.Const.con->tag == Ico_U32
3608 || udst->Iex.Const.con->tag == Ico_U64);
3609 vassert(con->tag == udst->Iex.Const.con->tag);
3610
3611 /* switch the xxx and yyy fields around */
3612 if (con->tag == Ico_U64) {
3613 udst->Iex.Const.con->Ico.U64 = xxx_value;
3614 con->Ico.U64 = yyy_value;
3615 } else {
3616 udst->Iex.Const.con->Ico.U32 = (UInt)xxx_value;
3617 con->Ico.U32 = (UInt)yyy_value;
3618 }
3619
3620 /* negate the test condition */
3621 st->Ist.Exit.guard
3622 = IRExpr_Unop(Iop_Not1,deepCopyIRExpr(st->Ist.Exit.guard));
3623
3624 /* --- The unroller proper. Both idioms are by now --- */
3625 /* --- now converted to idiom 1. --- */
3626
3627 do_unroll:
3628
3629 vassert(unroll_factor == 2
3630 || unroll_factor == 4
3631 || unroll_factor == 8);
3632
3633 jmax = unroll_factor==8 ? 3 : (unroll_factor==4 ? 2 : 1);
3634 for (j = 1; j <= jmax; j++) {
3635
3636 n_vars = bb1->tyenv->types_used;
3637
3638 bb2 = deepCopyIRSB(bb1);
3639 for (i = 0; i < n_vars; i++)
3640 (void)newIRTemp(bb1->tyenv, bb2->tyenv->types[i]);
3641
3642 for (i = 0; i < bb2->stmts_used; i++) {
3643 /* deltaIRStmt destructively modifies the stmt, but
3644 that's OK since bb2 is a complete fresh copy of bb1. */
3645 deltaIRStmt(bb2->stmts[i], n_vars);
3646 addStmtToIRSB(bb1, bb2->stmts[i]);
3647 }
3648 }
3649
3650 if (DEBUG_IROPT) {
3651 vex_printf("\nUNROLLED (%llx)\n", my_addr);
3652 ppIRSB(bb1);
3653 vex_printf("\n");
3654 }
3655
3656 /* Flattening; sigh. The unroller succeeds in breaking flatness
3657 by negating the test condition. This should be fixed properly.
3658 For the moment use this shotgun approach. */
3659 return flatten_BB(bb1);
3660 }
3661
3662
3663 /*---------------------------------------------------------------*/
3664 /*--- The tree builder ---*/
3665 /*---------------------------------------------------------------*/
3666
3667 /* This isn't part of IR optimisation. Really it's a pass done prior
3668 to instruction selection, which improves the code that the
3669 instruction selector can produce. */
3670
3671 /* --- The 'tmp' environment is the central data structure here --- */
3672
3673 /* The number of outstanding bindings we're prepared to track.
3674 The number of times the env becomes full and we have to dump
3675 the oldest binding (hence reducing code quality) falls very
3676 rapidly as the env size increases. 8 gives reasonable performance
3677 under most circumstances. */
3678 #define A_NENV 10
3679
3680 /* bindee == NULL === slot is not in use
3681 bindee != NULL === slot is in use
3682 */
3683 typedef
3684 struct {
3685 IRTemp binder;
3686 IRExpr* bindee;
3687 Bool doesLoad;
3688 Bool doesGet;
3689 }
3690 ATmpInfo;
3691
3692 __attribute__((unused))
ppAEnv(ATmpInfo * env)3693 static void ppAEnv ( ATmpInfo* env )
3694 {
3695 Int i;
3696 for (i = 0; i < A_NENV; i++) {
3697 vex_printf("%d tmp %d val ", i, (Int)env[i].binder);
3698 if (env[i].bindee)
3699 ppIRExpr(env[i].bindee);
3700 else
3701 vex_printf("(null)");
3702 vex_printf("\n");
3703 }
3704 }
3705
3706 /* --- Tree-traversal fns --- */
3707
3708 /* Traverse an expr, and detect if any part of it reads memory or does
3709 a Get. Be careful ... this really controls how much the
3710 tree-builder can reorder the code, so getting it right is critical.
3711 */
setHints_Expr(Bool * doesLoad,Bool * doesGet,IRExpr * e)3712 static void setHints_Expr (Bool* doesLoad, Bool* doesGet, IRExpr* e )
3713 {
3714 Int i;
3715 switch (e->tag) {
3716 case Iex_CCall:
3717 for (i = 0; e->Iex.CCall.args[i]; i++)
3718 setHints_Expr(doesLoad, doesGet, e->Iex.CCall.args[i]);
3719 return;
3720 case Iex_Mux0X:
3721 setHints_Expr(doesLoad, doesGet, e->Iex.Mux0X.cond);
3722 setHints_Expr(doesLoad, doesGet, e->Iex.Mux0X.expr0);
3723 setHints_Expr(doesLoad, doesGet, e->Iex.Mux0X.exprX);
3724 return;
3725 case Iex_Qop:
3726 setHints_Expr(doesLoad, doesGet, e->Iex.Qop.arg1);
3727 setHints_Expr(doesLoad, doesGet, e->Iex.Qop.arg2);
3728 setHints_Expr(doesLoad, doesGet, e->Iex.Qop.arg3);
3729 setHints_Expr(doesLoad, doesGet, e->Iex.Qop.arg4);
3730 return;
3731 case Iex_Triop:
3732 setHints_Expr(doesLoad, doesGet, e->Iex.Triop.arg1);
3733 setHints_Expr(doesLoad, doesGet, e->Iex.Triop.arg2);
3734 setHints_Expr(doesLoad, doesGet, e->Iex.Triop.arg3);
3735 return;
3736 case Iex_Binop:
3737 setHints_Expr(doesLoad, doesGet, e->Iex.Binop.arg1);
3738 setHints_Expr(doesLoad, doesGet, e->Iex.Binop.arg2);
3739 return;
3740 case Iex_Unop:
3741 setHints_Expr(doesLoad, doesGet, e->Iex.Unop.arg);
3742 return;
3743 case Iex_Load:
3744 *doesLoad = True;
3745 setHints_Expr(doesLoad, doesGet, e->Iex.Load.addr);
3746 return;
3747 case Iex_Get:
3748 *doesGet = True;
3749 return;
3750 case Iex_GetI:
3751 *doesGet = True;
3752 setHints_Expr(doesLoad, doesGet, e->Iex.GetI.ix);
3753 return;
3754 case Iex_RdTmp:
3755 case Iex_Const:
3756 return;
3757 default:
3758 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3759 vpanic("setHints_Expr");
3760 }
3761 }
3762
3763
3764 /* Add a binding to the front of the env and slide all the rest
3765 backwards. It should be the case that the last slot is free. */
addToEnvFront(ATmpInfo * env,IRTemp binder,IRExpr * bindee)3766 static void addToEnvFront ( ATmpInfo* env, IRTemp binder, IRExpr* bindee )
3767 {
3768 Int i;
3769 vassert(env[A_NENV-1].bindee == NULL);
3770 for (i = A_NENV-1; i >= 1; i--)
3771 env[i] = env[i-1];
3772 env[0].binder = binder;
3773 env[0].bindee = bindee;
3774 env[0].doesLoad = False; /* filled in later */
3775 env[0].doesGet = False; /* filled in later */
3776 }
3777
3778 /* Given uses :: array of UShort, indexed by IRTemp
3779 Add the use-occurrences of temps in this expression
3780 to the env.
3781 */
aoccCount_Expr(UShort * uses,IRExpr * e)3782 static void aoccCount_Expr ( UShort* uses, IRExpr* e )
3783 {
3784 Int i;
3785
3786 switch (e->tag) {
3787
3788 case Iex_RdTmp: /* the only interesting case */
3789 uses[e->Iex.RdTmp.tmp]++;
3790 return;
3791
3792 case Iex_Mux0X:
3793 aoccCount_Expr(uses, e->Iex.Mux0X.cond);
3794 aoccCount_Expr(uses, e->Iex.Mux0X.expr0);
3795 aoccCount_Expr(uses, e->Iex.Mux0X.exprX);
3796 return;
3797
3798 case Iex_Qop:
3799 aoccCount_Expr(uses, e->Iex.Qop.arg1);
3800 aoccCount_Expr(uses, e->Iex.Qop.arg2);
3801 aoccCount_Expr(uses, e->Iex.Qop.arg3);
3802 aoccCount_Expr(uses, e->Iex.Qop.arg4);
3803 return;
3804
3805 case Iex_Triop:
3806 aoccCount_Expr(uses, e->Iex.Triop.arg1);
3807 aoccCount_Expr(uses, e->Iex.Triop.arg2);
3808 aoccCount_Expr(uses, e->Iex.Triop.arg3);
3809 return;
3810
3811 case Iex_Binop:
3812 aoccCount_Expr(uses, e->Iex.Binop.arg1);
3813 aoccCount_Expr(uses, e->Iex.Binop.arg2);
3814 return;
3815
3816 case Iex_Unop:
3817 aoccCount_Expr(uses, e->Iex.Unop.arg);
3818 return;
3819
3820 case Iex_Load:
3821 aoccCount_Expr(uses, e->Iex.Load.addr);
3822 return;
3823
3824 case Iex_CCall:
3825 for (i = 0; e->Iex.CCall.args[i]; i++)
3826 aoccCount_Expr(uses, e->Iex.CCall.args[i]);
3827 return;
3828
3829 case Iex_GetI:
3830 aoccCount_Expr(uses, e->Iex.GetI.ix);
3831 return;
3832
3833 case Iex_Const:
3834 case Iex_Get:
3835 return;
3836
3837 default:
3838 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3839 vpanic("aoccCount_Expr");
3840 }
3841 }
3842
3843
3844 /* Given uses :: array of UShort, indexed by IRTemp
3845 Add the use-occurrences of temps in this statement
3846 to the env.
3847 */
aoccCount_Stmt(UShort * uses,IRStmt * st)3848 static void aoccCount_Stmt ( UShort* uses, IRStmt* st )
3849 {
3850 Int i;
3851 IRDirty* d;
3852 IRCAS* cas;
3853 switch (st->tag) {
3854 case Ist_AbiHint:
3855 aoccCount_Expr(uses, st->Ist.AbiHint.base);
3856 aoccCount_Expr(uses, st->Ist.AbiHint.nia);
3857 return;
3858 case Ist_WrTmp:
3859 aoccCount_Expr(uses, st->Ist.WrTmp.data);
3860 return;
3861 case Ist_Put:
3862 aoccCount_Expr(uses, st->Ist.Put.data);
3863 return;
3864 case Ist_PutI:
3865 aoccCount_Expr(uses, st->Ist.PutI.ix);
3866 aoccCount_Expr(uses, st->Ist.PutI.data);
3867 return;
3868 case Ist_Store:
3869 aoccCount_Expr(uses, st->Ist.Store.addr);
3870 aoccCount_Expr(uses, st->Ist.Store.data);
3871 return;
3872 case Ist_CAS:
3873 cas = st->Ist.CAS.details;
3874 aoccCount_Expr(uses, cas->addr);
3875 if (cas->expdHi)
3876 aoccCount_Expr(uses, cas->expdHi);
3877 aoccCount_Expr(uses, cas->expdLo);
3878 if (cas->dataHi)
3879 aoccCount_Expr(uses, cas->dataHi);
3880 aoccCount_Expr(uses, cas->dataLo);
3881 return;
3882 case Ist_LLSC:
3883 aoccCount_Expr(uses, st->Ist.LLSC.addr);
3884 if (st->Ist.LLSC.storedata)
3885 aoccCount_Expr(uses, st->Ist.LLSC.storedata);
3886 return;
3887 case Ist_Dirty:
3888 d = st->Ist.Dirty.details;
3889 if (d->mFx != Ifx_None)
3890 aoccCount_Expr(uses, d->mAddr);
3891 aoccCount_Expr(uses, d->guard);
3892 for (i = 0; d->args[i]; i++)
3893 aoccCount_Expr(uses, d->args[i]);
3894 return;
3895 case Ist_NoOp:
3896 case Ist_IMark:
3897 case Ist_MBE:
3898 return;
3899 case Ist_Exit:
3900 aoccCount_Expr(uses, st->Ist.Exit.guard);
3901 return;
3902 default:
3903 vex_printf("\n"); ppIRStmt(st); vex_printf("\n");
3904 vpanic("aoccCount_Stmt");
3905 }
3906 }
3907
3908 /* Look up a binding for tmp in the env. If found, return the bound
3909 expression, and set the env's binding to NULL so it is marked as
3910 used. If not found, return NULL. */
3911
atbSubst_Temp(ATmpInfo * env,IRTemp tmp)3912 static IRExpr* atbSubst_Temp ( ATmpInfo* env, IRTemp tmp )
3913 {
3914 Int i;
3915 for (i = 0; i < A_NENV; i++) {
3916 if (env[i].binder == tmp && env[i].bindee != NULL) {
3917 IRExpr* bindee = env[i].bindee;
3918 env[i].bindee = NULL;
3919 return bindee;
3920 }
3921 }
3922 return NULL;
3923 }
3924
3925 /* Traverse e, looking for temps. For each observed temp, see if env
3926 contains a binding for the temp, and if so return the bound value.
3927 The env has the property that any binding it holds is
3928 'single-shot', so once a binding is used, it is marked as no longer
3929 available, by setting its .bindee field to NULL. */
3930
is_Unop(IRExpr * e,IROp op)3931 static inline Bool is_Unop ( IRExpr* e, IROp op ) {
3932 return e->tag == Iex_Unop && e->Iex.Unop.op == op;
3933 }
is_Binop(IRExpr * e,IROp op)3934 static inline Bool is_Binop ( IRExpr* e, IROp op ) {
3935 return e->tag == Iex_Binop && e->Iex.Binop.op == op;
3936 }
3937
fold_IRExpr_Binop(IROp op,IRExpr * a1,IRExpr * a2)3938 static IRExpr* fold_IRExpr_Binop ( IROp op, IRExpr* a1, IRExpr* a2 )
3939 {
3940 switch (op) {
3941 case Iop_Or32:
3942 /* Or32( CmpwNEZ32(x), CmpwNEZ32(y) ) --> CmpwNEZ32( Or32( x, y ) ) */
3943 if (is_Unop(a1, Iop_CmpwNEZ32) && is_Unop(a2, Iop_CmpwNEZ32))
3944 return IRExpr_Unop( Iop_CmpwNEZ32,
3945 IRExpr_Binop( Iop_Or32, a1->Iex.Unop.arg,
3946 a2->Iex.Unop.arg ) );
3947 break;
3948 default:
3949 break;
3950 }
3951 /* no reduction rule applies */
3952 return IRExpr_Binop( op, a1, a2 );
3953 }
3954
fold_IRExpr_Unop(IROp op,IRExpr * aa)3955 static IRExpr* fold_IRExpr_Unop ( IROp op, IRExpr* aa )
3956 {
3957 switch (op) {
3958 case Iop_CmpwNEZ64:
3959 /* CmpwNEZ64( Or64 ( CmpwNEZ64(x), y ) ) --> CmpwNEZ64( Or64( x, y ) ) */
3960 if (is_Binop(aa, Iop_Or64)
3961 && is_Unop(aa->Iex.Binop.arg1, Iop_CmpwNEZ64))
3962 return fold_IRExpr_Unop(
3963 Iop_CmpwNEZ64,
3964 IRExpr_Binop(Iop_Or64,
3965 aa->Iex.Binop.arg1->Iex.Unop.arg,
3966 aa->Iex.Binop.arg2));
3967 /* CmpwNEZ64( Or64 ( x, CmpwNEZ64(y) ) ) --> CmpwNEZ64( Or64( x, y ) ) */
3968 if (is_Binop(aa, Iop_Or64)
3969 && is_Unop(aa->Iex.Binop.arg2, Iop_CmpwNEZ64))
3970 return fold_IRExpr_Unop(
3971 Iop_CmpwNEZ64,
3972 IRExpr_Binop(Iop_Or64,
3973 aa->Iex.Binop.arg1,
3974 aa->Iex.Binop.arg2->Iex.Unop.arg));
3975 break;
3976 case Iop_CmpNEZ64:
3977 /* CmpNEZ64( Left64(x) ) --> CmpNEZ64(x) */
3978 if (is_Unop(aa, Iop_Left64))
3979 return IRExpr_Unop(Iop_CmpNEZ64, aa->Iex.Unop.arg);
3980 break;
3981 case Iop_CmpwNEZ32:
3982 /* CmpwNEZ32( CmpwNEZ32 ( x ) ) --> CmpwNEZ32 ( x ) */
3983 if (is_Unop(aa, Iop_CmpwNEZ32))
3984 return IRExpr_Unop( Iop_CmpwNEZ32, aa->Iex.Unop.arg );
3985 break;
3986 case Iop_CmpNEZ32:
3987 /* CmpNEZ32( Left32(x) ) --> CmpNEZ32(x) */
3988 if (is_Unop(aa, Iop_Left32))
3989 return IRExpr_Unop(Iop_CmpNEZ32, aa->Iex.Unop.arg);
3990 break;
3991 case Iop_Left32:
3992 /* Left32( Left32(x) ) --> Left32(x) */
3993 if (is_Unop(aa, Iop_Left32))
3994 return IRExpr_Unop( Iop_Left32, aa->Iex.Unop.arg );
3995 break;
3996 case Iop_32to1:
3997 /* 32to1( 1Uto32 ( x ) ) --> x */
3998 if (is_Unop(aa, Iop_1Uto32))
3999 return aa->Iex.Unop.arg;
4000 /* 32to1( CmpwNEZ32 ( x )) --> CmpNEZ32(x) */
4001 if (is_Unop(aa, Iop_CmpwNEZ32))
4002 return IRExpr_Unop( Iop_CmpNEZ32, aa->Iex.Unop.arg );
4003 break;
4004 case Iop_64to1:
4005 /* 64to1( 1Uto64 ( x ) ) --> x */
4006 if (is_Unop(aa, Iop_1Uto64))
4007 return aa->Iex.Unop.arg;
4008 /* 64to1( CmpwNEZ64 ( x )) --> CmpNEZ64(x) */
4009 if (is_Unop(aa, Iop_CmpwNEZ64))
4010 return IRExpr_Unop( Iop_CmpNEZ64, aa->Iex.Unop.arg );
4011 break;
4012 case Iop_64to32:
4013 /* 64to32( 32Uto64 ( x )) --> x */
4014 if (is_Unop(aa, Iop_32Uto64))
4015 return aa->Iex.Unop.arg;
4016 /* 64to32( 8Uto64 ( x )) --> 8Uto32(x) */
4017 if (is_Unop(aa, Iop_8Uto64))
4018 return IRExpr_Unop(Iop_8Uto32, aa->Iex.Unop.arg);
4019 break;
4020
4021 case Iop_32Uto64:
4022 /* 32Uto64( 8Uto32( x )) --> 8Uto64(x) */
4023 if (is_Unop(aa, Iop_8Uto32))
4024 return IRExpr_Unop(Iop_8Uto64, aa->Iex.Unop.arg);
4025 /* 32Uto64( 16Uto32( x )) --> 16Uto64(x) */
4026 if (is_Unop(aa, Iop_16Uto32))
4027 return IRExpr_Unop(Iop_16Uto64, aa->Iex.Unop.arg);
4028 break;
4029
4030 case Iop_1Sto32:
4031 /* 1Sto32( CmpNEZ8( 32to8( 1Uto32( CmpNEZ32( x ))))) -> CmpwNEZ32(x) */
4032 if (is_Unop(aa, Iop_CmpNEZ8)
4033 && is_Unop(aa->Iex.Unop.arg, Iop_32to8)
4034 && is_Unop(aa->Iex.Unop.arg->Iex.Unop.arg, Iop_1Uto32)
4035 && is_Unop(aa->Iex.Unop.arg->Iex.Unop.arg->Iex.Unop.arg,
4036 Iop_CmpNEZ32)) {
4037 return IRExpr_Unop( Iop_CmpwNEZ32,
4038 aa->Iex.Unop.arg->Iex.Unop.arg
4039 ->Iex.Unop.arg->Iex.Unop.arg);
4040 }
4041 break;
4042
4043
4044 default:
4045 break;
4046 }
4047 /* no reduction rule applies */
4048 return IRExpr_Unop( op, aa );
4049 }
4050
atbSubst_Expr(ATmpInfo * env,IRExpr * e)4051 static IRExpr* atbSubst_Expr ( ATmpInfo* env, IRExpr* e )
4052 {
4053 IRExpr* e2;
4054 IRExpr** args2;
4055 Int i;
4056
4057 switch (e->tag) {
4058
4059 case Iex_CCall:
4060 args2 = shallowCopyIRExprVec(e->Iex.CCall.args);
4061 for (i = 0; args2[i]; i++)
4062 args2[i] = atbSubst_Expr(env,args2[i]);
4063 return IRExpr_CCall(
4064 e->Iex.CCall.cee,
4065 e->Iex.CCall.retty,
4066 args2
4067 );
4068 case Iex_RdTmp:
4069 e2 = atbSubst_Temp(env, e->Iex.RdTmp.tmp);
4070 return e2 ? e2 : e;
4071 case Iex_Mux0X:
4072 return IRExpr_Mux0X(
4073 atbSubst_Expr(env, e->Iex.Mux0X.cond),
4074 atbSubst_Expr(env, e->Iex.Mux0X.expr0),
4075 atbSubst_Expr(env, e->Iex.Mux0X.exprX)
4076 );
4077 case Iex_Qop:
4078 return IRExpr_Qop(
4079 e->Iex.Qop.op,
4080 atbSubst_Expr(env, e->Iex.Qop.arg1),
4081 atbSubst_Expr(env, e->Iex.Qop.arg2),
4082 atbSubst_Expr(env, e->Iex.Qop.arg3),
4083 atbSubst_Expr(env, e->Iex.Qop.arg4)
4084 );
4085 case Iex_Triop:
4086 return IRExpr_Triop(
4087 e->Iex.Triop.op,
4088 atbSubst_Expr(env, e->Iex.Triop.arg1),
4089 atbSubst_Expr(env, e->Iex.Triop.arg2),
4090 atbSubst_Expr(env, e->Iex.Triop.arg3)
4091 );
4092 case Iex_Binop:
4093 return fold_IRExpr_Binop(
4094 e->Iex.Binop.op,
4095 atbSubst_Expr(env, e->Iex.Binop.arg1),
4096 atbSubst_Expr(env, e->Iex.Binop.arg2)
4097 );
4098 case Iex_Unop:
4099 return fold_IRExpr_Unop(
4100 e->Iex.Unop.op,
4101 atbSubst_Expr(env, e->Iex.Unop.arg)
4102 );
4103 case Iex_Load:
4104 return IRExpr_Load(
4105 e->Iex.Load.end,
4106 e->Iex.Load.ty,
4107 atbSubst_Expr(env, e->Iex.Load.addr)
4108 );
4109 case Iex_GetI:
4110 return IRExpr_GetI(
4111 e->Iex.GetI.descr,
4112 atbSubst_Expr(env, e->Iex.GetI.ix),
4113 e->Iex.GetI.bias
4114 );
4115 case Iex_Const:
4116 case Iex_Get:
4117 return e;
4118 default:
4119 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
4120 vpanic("atbSubst_Expr");
4121 }
4122 }
4123
4124 /* Same deal as atbSubst_Expr, except for stmts. */
4125
atbSubst_Stmt(ATmpInfo * env,IRStmt * st)4126 static IRStmt* atbSubst_Stmt ( ATmpInfo* env, IRStmt* st )
4127 {
4128 Int i;
4129 IRDirty *d, *d2;
4130 IRCAS *cas, *cas2;
4131 switch (st->tag) {
4132 case Ist_AbiHint:
4133 return IRStmt_AbiHint(
4134 atbSubst_Expr(env, st->Ist.AbiHint.base),
4135 st->Ist.AbiHint.len,
4136 atbSubst_Expr(env, st->Ist.AbiHint.nia)
4137 );
4138 case Ist_Store:
4139 return IRStmt_Store(
4140 st->Ist.Store.end,
4141 atbSubst_Expr(env, st->Ist.Store.addr),
4142 atbSubst_Expr(env, st->Ist.Store.data)
4143 );
4144 case Ist_WrTmp:
4145 return IRStmt_WrTmp(
4146 st->Ist.WrTmp.tmp,
4147 atbSubst_Expr(env, st->Ist.WrTmp.data)
4148 );
4149 case Ist_Put:
4150 return IRStmt_Put(
4151 st->Ist.Put.offset,
4152 atbSubst_Expr(env, st->Ist.Put.data)
4153 );
4154 case Ist_PutI:
4155 return IRStmt_PutI(
4156 st->Ist.PutI.descr,
4157 atbSubst_Expr(env, st->Ist.PutI.ix),
4158 st->Ist.PutI.bias,
4159 atbSubst_Expr(env, st->Ist.PutI.data)
4160 );
4161
4162 case Ist_Exit:
4163 return IRStmt_Exit(
4164 atbSubst_Expr(env, st->Ist.Exit.guard),
4165 st->Ist.Exit.jk,
4166 st->Ist.Exit.dst
4167 );
4168 case Ist_IMark:
4169 return IRStmt_IMark(st->Ist.IMark.addr, st->Ist.IMark.len);
4170 case Ist_NoOp:
4171 return IRStmt_NoOp();
4172 case Ist_MBE:
4173 return IRStmt_MBE(st->Ist.MBE.event);
4174 case Ist_CAS:
4175 cas = st->Ist.CAS.details;
4176 cas2 = mkIRCAS(
4177 cas->oldHi, cas->oldLo, cas->end,
4178 atbSubst_Expr(env, cas->addr),
4179 cas->expdHi ? atbSubst_Expr(env, cas->expdHi) : NULL,
4180 atbSubst_Expr(env, cas->expdLo),
4181 cas->dataHi ? atbSubst_Expr(env, cas->dataHi) : NULL,
4182 atbSubst_Expr(env, cas->dataLo)
4183 );
4184 return IRStmt_CAS(cas2);
4185 case Ist_LLSC:
4186 return IRStmt_LLSC(
4187 st->Ist.LLSC.end,
4188 st->Ist.LLSC.result,
4189 atbSubst_Expr(env, st->Ist.LLSC.addr),
4190 st->Ist.LLSC.storedata
4191 ? atbSubst_Expr(env, st->Ist.LLSC.storedata) : NULL
4192 );
4193 case Ist_Dirty:
4194 d = st->Ist.Dirty.details;
4195 d2 = emptyIRDirty();
4196 *d2 = *d;
4197 if (d2->mFx != Ifx_None)
4198 d2->mAddr = atbSubst_Expr(env, d2->mAddr);
4199 d2->guard = atbSubst_Expr(env, d2->guard);
4200 for (i = 0; d2->args[i]; i++)
4201 d2->args[i] = atbSubst_Expr(env, d2->args[i]);
4202 return IRStmt_Dirty(d2);
4203 default:
4204 vex_printf("\n"); ppIRStmt(st); vex_printf("\n");
4205 vpanic("atbSubst_Stmt");
4206 }
4207 }
4208
ado_treebuild_BB(IRSB * bb)4209 /* notstatic */ void ado_treebuild_BB ( IRSB* bb )
4210 {
4211 Int i, j, k, m;
4212 Bool stmtPuts, stmtStores, invalidateMe;
4213 IRStmt* st;
4214 IRStmt* st2;
4215 ATmpInfo env[A_NENV];
4216
4217 Int n_tmps = bb->tyenv->types_used;
4218 UShort* uses = LibVEX_Alloc(n_tmps * sizeof(UShort));
4219
4220 /* Phase 1. Scan forwards in bb, counting use occurrences of each
4221 temp. Also count occurrences in the bb->next field. */
4222
4223 for (i = 0; i < n_tmps; i++)
4224 uses[i] = 0;
4225
4226 for (i = 0; i < bb->stmts_used; i++) {
4227 st = bb->stmts[i];
4228 if (st->tag == Ist_NoOp)
4229 continue;
4230 aoccCount_Stmt( uses, st );
4231 }
4232 aoccCount_Expr(uses, bb->next );
4233
4234 # if 0
4235 for (i = 0; i < n_tmps; i++) {
4236 if (uses[i] == 0)
4237 continue;
4238 ppIRTemp( (IRTemp)i );
4239 vex_printf(" used %d\n", (Int)uses[i] );
4240 }
4241 # endif
4242
4243 /* Phase 2. Scan forwards in bb. For each statement in turn:
4244
4245 If the env is full, emit the end element. This guarantees
4246 there is at least one free slot in the following.
4247
4248 On seeing 't = E', occ(t)==1,
4249 let E'=env(E)
4250 delete this stmt
4251 add t -> E' to the front of the env
4252 Examine E' and set the hints for E' appropriately
4253 (doesLoad? doesGet?)
4254
4255 On seeing any other stmt,
4256 let stmt' = env(stmt)
4257 remove from env any 't=E' binds invalidated by stmt
4258 emit the invalidated stmts
4259 emit stmt'
4260 compact any holes in env
4261 by sliding entries towards the front
4262
4263 Finally, apply env to bb->next.
4264 */
4265
4266 for (i = 0; i < A_NENV; i++) {
4267 env[i].bindee = NULL;
4268 env[i].binder = IRTemp_INVALID;
4269 }
4270
4271 /* The stmts in bb are being reordered, and we are guaranteed to
4272 end up with no more than the number we started with. Use i to
4273 be the cursor of the current stmt examined and j <= i to be that
4274 for the current stmt being written.
4275 */
4276 j = 0;
4277 for (i = 0; i < bb->stmts_used; i++) {
4278
4279 st = bb->stmts[i];
4280 if (st->tag == Ist_NoOp)
4281 continue;
4282
4283 /* Ensure there's at least one space in the env, by emitting
4284 the oldest binding if necessary. */
4285 if (env[A_NENV-1].bindee != NULL) {
4286 bb->stmts[j] = IRStmt_WrTmp( env[A_NENV-1].binder,
4287 env[A_NENV-1].bindee );
4288 j++;
4289 vassert(j <= i);
4290 env[A_NENV-1].bindee = NULL;
4291 }
4292
4293 /* Consider current stmt. */
4294 if (st->tag == Ist_WrTmp && uses[st->Ist.WrTmp.tmp] <= 1) {
4295 IRExpr *e, *e2;
4296
4297 /* optional extra: dump dead bindings as we find them.
4298 Removes the need for a prior dead-code removal pass. */
4299 if (uses[st->Ist.WrTmp.tmp] == 0) {
4300 if (0) vex_printf("DEAD binding\n");
4301 continue; /* for (i = 0; i < bb->stmts_used; i++) loop */
4302 }
4303 vassert(uses[st->Ist.WrTmp.tmp] == 1);
4304
4305 /* ok, we have 't = E', occ(t)==1. Do the abovementioned
4306 actions. */
4307 e = st->Ist.WrTmp.data;
4308 e2 = atbSubst_Expr(env, e);
4309 addToEnvFront(env, st->Ist.WrTmp.tmp, e2);
4310 setHints_Expr(&env[0].doesLoad, &env[0].doesGet, e2);
4311 /* don't advance j, as we are deleting this stmt and instead
4312 holding it temporarily in the env. */
4313 continue; /* for (i = 0; i < bb->stmts_used; i++) loop */
4314 }
4315
4316 /* we get here for any other kind of statement. */
4317 /* 'use up' any bindings required by the current statement. */
4318 st2 = atbSubst_Stmt(env, st);
4319
4320 /* Now, before this stmt, dump any bindings in env that it
4321 invalidates. These need to be dumped in the order in which
4322 they originally entered env -- that means from oldest to
4323 youngest. */
4324
4325 /* stmtPuts/stmtStores characterise what the stmt under
4326 consideration does, or might do (sidely safe @ True). */
4327 stmtPuts
4328 = toBool( st->tag == Ist_Put
4329 || st->tag == Ist_PutI
4330 || st->tag == Ist_Dirty );
4331
4332 /* be True if this stmt writes memory or might do (==> we don't
4333 want to reorder other loads or stores relative to it). Also,
4334 both LL and SC fall under this classification, since we
4335 really ought to be conservative and not reorder any other
4336 memory transactions relative to them. */
4337 stmtStores
4338 = toBool( st->tag == Ist_Store
4339 || st->tag == Ist_Dirty
4340 || st->tag == Ist_LLSC );
4341
4342 for (k = A_NENV-1; k >= 0; k--) {
4343 if (env[k].bindee == NULL)
4344 continue;
4345 /* Compare the actions of this stmt with the actions of
4346 binding 'k', to see if they invalidate the binding. */
4347 invalidateMe
4348 = toBool(
4349 /* a store invalidates loaded data */
4350 (env[k].doesLoad && stmtStores)
4351 /* a put invalidates get'd data */
4352 || (env[k].doesGet && stmtPuts)
4353 /* a put invalidates loaded data. Note, we could do
4354 much better here in the sense that we only need to
4355 invalidate trees containing loads if the Put in
4356 question is marked as requiring precise
4357 exceptions. */
4358 || (env[k].doesLoad && stmtPuts)
4359 /* probably overly conservative: a memory bus event
4360 invalidates absolutely everything, so that all
4361 computation prior to it is forced to complete before
4362 proceeding with the event (fence,lock,unlock). */
4363 || st->tag == Ist_MBE
4364 /* also be (probably overly) paranoid re AbiHints */
4365 || st->tag == Ist_AbiHint
4366 );
4367 if (invalidateMe) {
4368 bb->stmts[j] = IRStmt_WrTmp( env[k].binder, env[k].bindee );
4369 j++;
4370 vassert(j <= i);
4371 env[k].bindee = NULL;
4372 }
4373 }
4374
4375 /* Slide in-use entries in env up to the front */
4376 m = 0;
4377 for (k = 0; k < A_NENV; k++) {
4378 if (env[k].bindee != NULL) {
4379 env[m] = env[k];
4380 m++;
4381 }
4382 }
4383 for (m = m; m < A_NENV; m++) {
4384 env[m].bindee = NULL;
4385 }
4386
4387 /* finally, emit the substituted statement */
4388 bb->stmts[j] = st2;
4389 /* vex_printf("**2 "); ppIRStmt(bb->stmts[j]); vex_printf("\n"); */
4390 j++;
4391
4392 vassert(j <= i+1);
4393 } /* for each stmt in the original bb ... */
4394
4395 /* Finally ... substitute the ->next field as much as possible, and
4396 dump any left-over bindings. Hmm. Perhaps there should be no
4397 left over bindings? Or any left-over bindings are
4398 by definition dead? */
4399 bb->next = atbSubst_Expr(env, bb->next);
4400 bb->stmts_used = j;
4401 }
4402
4403
4404 /*---------------------------------------------------------------*/
4405 /*--- iropt main ---*/
4406 /*---------------------------------------------------------------*/
4407
4408 static Bool iropt_verbose = False; /* True; */
4409
4410
4411 /* Do a simple cleanup pass on bb. This is: redundant Get removal,
4412 redundant Put removal, constant propagation, dead code removal,
4413 clean helper specialisation, and dead code removal (again).
4414 */
4415
4416
4417 static
cheap_transformations(IRSB * bb,IRExpr * (* specHelper)(HChar *,IRExpr **,IRStmt **,Int),Bool (* preciseMemExnsFn)(Int,Int))4418 IRSB* cheap_transformations (
4419 IRSB* bb,
4420 IRExpr* (*specHelper) (HChar*, IRExpr**, IRStmt**, Int),
4421 Bool (*preciseMemExnsFn)(Int,Int)
4422 )
4423 {
4424 redundant_get_removal_BB ( bb );
4425 if (iropt_verbose) {
4426 vex_printf("\n========= REDUNDANT GET\n\n" );
4427 ppIRSB(bb);
4428 }
4429
4430 redundant_put_removal_BB ( bb, preciseMemExnsFn );
4431 if (iropt_verbose) {
4432 vex_printf("\n========= REDUNDANT PUT\n\n" );
4433 ppIRSB(bb);
4434 }
4435
4436 bb = cprop_BB ( bb );
4437 if (iropt_verbose) {
4438 vex_printf("\n========= CPROPD\n\n" );
4439 ppIRSB(bb);
4440 }
4441
4442 do_deadcode_BB ( bb );
4443 if (iropt_verbose) {
4444 vex_printf("\n========= DEAD\n\n" );
4445 ppIRSB(bb);
4446 }
4447
4448 bb = spec_helpers_BB ( bb, specHelper );
4449 do_deadcode_BB ( bb );
4450 if (iropt_verbose) {
4451 vex_printf("\n========= SPECd \n\n" );
4452 ppIRSB(bb);
4453 }
4454
4455 return bb;
4456 }
4457
4458
4459 /* Do some more expensive transformations on bb, which are aimed at
4460 optimising as much as possible in the presence of GetI and PutI. */
4461
4462 static
expensive_transformations(IRSB * bb)4463 IRSB* expensive_transformations( IRSB* bb )
4464 {
4465 (void)do_cse_BB( bb );
4466 collapse_AddSub_chains_BB( bb );
4467 do_redundant_GetI_elimination( bb );
4468 do_redundant_PutI_elimination( bb );
4469 do_deadcode_BB( bb );
4470 return bb;
4471 }
4472
4473
4474 /* Scan a flattened BB to look for signs that more expensive
4475 optimisations might be useful:
4476 - find out if there are any GetIs and PutIs
4477 - find out if there are any floating or vector-typed temporaries
4478 */
4479
considerExpensives(Bool * hasGetIorPutI,Bool * hasVorFtemps,IRSB * bb)4480 static void considerExpensives ( /*OUT*/Bool* hasGetIorPutI,
4481 /*OUT*/Bool* hasVorFtemps,
4482 IRSB* bb )
4483 {
4484 Int i, j;
4485 IRStmt* st;
4486 IRDirty* d;
4487 IRCAS* cas;
4488
4489 *hasGetIorPutI = False;
4490 *hasVorFtemps = False;
4491
4492 for (i = 0; i < bb->stmts_used; i++) {
4493 st = bb->stmts[i];
4494 switch (st->tag) {
4495 case Ist_AbiHint:
4496 vassert(isIRAtom(st->Ist.AbiHint.base));
4497 vassert(isIRAtom(st->Ist.AbiHint.nia));
4498 break;
4499 case Ist_PutI:
4500 *hasGetIorPutI = True;
4501 break;
4502 case Ist_WrTmp:
4503 if (st->Ist.WrTmp.data->tag == Iex_GetI)
4504 *hasGetIorPutI = True;
4505 switch (typeOfIRTemp(bb->tyenv, st->Ist.WrTmp.tmp)) {
4506 case Ity_I1: case Ity_I8: case Ity_I16:
4507 case Ity_I32: case Ity_I64: case Ity_I128:
4508 break;
4509 case Ity_F32: case Ity_F64: case Ity_V128:
4510 *hasVorFtemps = True;
4511 break;
4512 default:
4513 goto bad;
4514 }
4515 break;
4516 case Ist_Put:
4517 vassert(isIRAtom(st->Ist.Put.data));
4518 break;
4519 case Ist_Store:
4520 vassert(isIRAtom(st->Ist.Store.addr));
4521 vassert(isIRAtom(st->Ist.Store.data));
4522 break;
4523 case Ist_CAS:
4524 cas = st->Ist.CAS.details;
4525 vassert(isIRAtom(cas->addr));
4526 vassert(cas->expdHi == NULL || isIRAtom(cas->expdHi));
4527 vassert(isIRAtom(cas->expdLo));
4528 vassert(cas->dataHi == NULL || isIRAtom(cas->dataHi));
4529 vassert(isIRAtom(cas->dataLo));
4530 break;
4531 case Ist_LLSC:
4532 vassert(isIRAtom(st->Ist.LLSC.addr));
4533 if (st->Ist.LLSC.storedata)
4534 vassert(isIRAtom(st->Ist.LLSC.storedata));
4535 break;
4536 case Ist_Dirty:
4537 d = st->Ist.Dirty.details;
4538 vassert(isIRAtom(d->guard));
4539 for (j = 0; d->args[j]; j++)
4540 vassert(isIRAtom(d->args[j]));
4541 if (d->mFx != Ifx_None)
4542 vassert(isIRAtom(d->mAddr));
4543 break;
4544 case Ist_NoOp:
4545 case Ist_IMark:
4546 case Ist_MBE:
4547 break;
4548 case Ist_Exit:
4549 vassert(isIRAtom(st->Ist.Exit.guard));
4550 break;
4551 default:
4552 bad:
4553 ppIRStmt(st);
4554 vpanic("considerExpensives");
4555 }
4556 }
4557 }
4558
4559
4560 /* ---------------- The main iropt entry point. ---------------- */
4561
4562 /* exported from this file */
4563 /* Rules of the game:
4564
4565 - IRExpr/IRStmt trees should be treated as immutable, as they
4566 may get shared. So never change a field of such a tree node;
4567 instead construct and return a new one if needed.
4568 */
4569
4570
do_iropt_BB(IRSB * bb0,IRExpr * (* specHelper)(HChar *,IRExpr **,IRStmt **,Int),Bool (* preciseMemExnsFn)(Int,Int),Addr64 guest_addr,VexArch guest_arch)4571 IRSB* do_iropt_BB(
4572 IRSB* bb0,
4573 IRExpr* (*specHelper) (HChar*, IRExpr**, IRStmt**, Int),
4574 Bool (*preciseMemExnsFn)(Int,Int),
4575 Addr64 guest_addr,
4576 VexArch guest_arch
4577 )
4578 {
4579 static Int n_total = 0;
4580 static Int n_expensive = 0;
4581
4582 Bool hasGetIorPutI, hasVorFtemps;
4583 IRSB *bb, *bb2;
4584
4585 n_total++;
4586
4587 /* First flatten the block out, since all other
4588 phases assume flat code. */
4589
4590 bb = flatten_BB ( bb0 );
4591
4592 if (iropt_verbose) {
4593 vex_printf("\n========= FLAT\n\n" );
4594 ppIRSB(bb);
4595 }
4596
4597 /* If at level 0, stop now. */
4598 if (vex_control.iropt_level <= 0) return bb;
4599
4600 /* Now do a preliminary cleanup pass, and figure out if we also
4601 need to do 'expensive' optimisations. Expensive optimisations
4602 are deemed necessary if the block contains any GetIs or PutIs.
4603 If needed, do expensive transformations and then another cheap
4604 cleanup pass. */
4605
4606 bb = cheap_transformations( bb, specHelper, preciseMemExnsFn );
4607
4608 if (guest_arch == VexArchARM) {
4609 /* Translating Thumb2 code produces a lot of chaff. We have to
4610 work extra hard to get rid of it. */
4611 bb = cprop_BB(bb);
4612 bb = spec_helpers_BB ( bb, specHelper );
4613 redundant_put_removal_BB ( bb, preciseMemExnsFn );
4614 do_deadcode_BB( bb );
4615 }
4616
4617 if (vex_control.iropt_level > 1) {
4618
4619 /* Peer at what we have, to decide how much more effort to throw
4620 at it. */
4621 considerExpensives( &hasGetIorPutI, &hasVorFtemps, bb );
4622
4623 if (hasVorFtemps && !hasGetIorPutI) {
4624 /* If any evidence of FP or Vector activity, CSE, as that
4625 tends to mop up all manner of lardy code to do with
4626 rounding modes. Don't bother if hasGetIorPutI since that
4627 case leads into the expensive transformations, which do
4628 CSE anyway. */
4629 (void)do_cse_BB( bb );
4630 do_deadcode_BB( bb );
4631 }
4632
4633 if (hasGetIorPutI) {
4634 Bool cses;
4635 n_expensive++;
4636 if (DEBUG_IROPT)
4637 vex_printf("***** EXPENSIVE %d %d\n", n_total, n_expensive);
4638 bb = expensive_transformations( bb );
4639 bb = cheap_transformations( bb, specHelper, preciseMemExnsFn );
4640 /* Potentially common up GetIs */
4641 cses = do_cse_BB( bb );
4642 if (cses)
4643 bb = cheap_transformations( bb, specHelper, preciseMemExnsFn );
4644 }
4645
4646 /* Now have a go at unrolling simple (single-BB) loops. If
4647 successful, clean up the results as much as possible. */
4648
4649 bb2 = maybe_loop_unroll_BB( bb, guest_addr );
4650 if (bb2) {
4651 bb = cheap_transformations( bb2, specHelper, preciseMemExnsFn );
4652 if (hasGetIorPutI) {
4653 bb = expensive_transformations( bb );
4654 bb = cheap_transformations( bb, specHelper, preciseMemExnsFn );
4655 } else {
4656 /* at least do CSE and dead code removal */
4657 do_cse_BB( bb );
4658 do_deadcode_BB( bb );
4659 }
4660 if (0) vex_printf("vex iropt: unrolled a loop\n");
4661 }
4662
4663 }
4664
4665 return bb;
4666 }
4667
4668
4669 /*---------------------------------------------------------------*/
4670 /*--- end ir_opt.c ---*/
4671 /*---------------------------------------------------------------*/
4672