• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 /*---------------------------------------------------------------*/
3 /*--- begin                                 host_reg_alloc2.c ---*/
4 /*---------------------------------------------------------------*/
5 
6 /*
7    This file is part of Valgrind, a dynamic binary instrumentation
8    framework.
9 
10    Copyright (C) 2004-2011 OpenWorks LLP
11       info@open-works.net
12 
13    This program is free software; you can redistribute it and/or
14    modify it under the terms of the GNU General Public License as
15    published by the Free Software Foundation; either version 2 of the
16    License, or (at your option) any later version.
17 
18    This program is distributed in the hope that it will be useful, but
19    WITHOUT ANY WARRANTY; without even the implied warranty of
20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21    General Public License for more details.
22 
23    You should have received a copy of the GNU General Public License
24    along with this program; if not, write to the Free Software
25    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26    02110-1301, USA.
27 
28    The GNU General Public License is contained in the file COPYING.
29 
30    Neither the names of the U.S. Department of Energy nor the
31    University of California nor the names of its contributors may be
32    used to endorse or promote products derived from this software
33    without prior written permission.
34 */
35 
36 #include "libvex_basictypes.h"
37 #include "libvex.h"
38 
39 #include "main_util.h"
40 #include "host_generic_regs.h"
41 
42 /* Set to 1 for lots of debugging output. */
43 #define DEBUG_REGALLOC 0
44 
45 
46 /* TODO 27 Oct 04:
47 
48    Better consistency checking from what isMove tells us.
49 
50    We can possibly do V-V coalescing even when the src is spilled,
51    providing we can arrange for the dst to have the same spill slot.
52 
53    Note that state[].hreg is the same as the available real regs.
54 
55    Generally rationalise data structures.  */
56 
57 
58 /* Records information on virtual register live ranges.  Computed once
59    and remains unchanged after that. */
60 typedef
61    struct {
62       /* Becomes live for the first time after this insn ... */
63       Short live_after;
64       /* Becomes dead for the last time before this insn ... */
65       Short dead_before;
66       /* The "home" spill slot, if needed.  Never changes. */
67       Short spill_offset;
68       Short spill_size;
69       /* What kind of register this is. */
70       HRegClass reg_class;
71    }
72    VRegLR;
73 
74 
75 /* Records information on real-register live ranges.  Computed once
76    and remains unchanged after that. */
77 typedef
78    struct {
79       HReg rreg;
80       /* Becomes live after this insn ... */
81       Short live_after;
82       /* Becomes dead before this insn ... */
83       Short dead_before;
84    }
85    RRegLR;
86 
87 
88 /* An array of the following structs (rreg_state) comprises the
89    running state of the allocator.  It indicates what the current
90    disposition of each allocatable real register is.  The array gets
91    updated as the allocator processes instructions. */
92 typedef
93    struct {
94       /* ------ FIELDS WHICH DO NOT CHANGE ------ */
95       /* Which rreg is this for? */
96       HReg rreg;
97       /* Is this involved in any HLRs?  (only an optimisation hint) */
98       Bool has_hlrs;
99       /* ------ FIELDS WHICH DO CHANGE ------ */
100       /* 6 May 07: rearranged fields below so the whole struct fits
101          into 16 bytes on both x86 and amd64. */
102       /* Used when .disp == Bound and we are looking for vregs to
103          spill. */
104       Bool is_spill_cand;
105       /* Optimisation: used when .disp == Bound.  Indicates when the
106          rreg has the same value as the spill slot for the associated
107          vreg.  Is safely left at False, and becomes True after a
108          spill store or reload for this rreg. */
109       Bool eq_spill_slot;
110       /* What's it's current disposition? */
111       enum { Free,     /* available for use */
112              Unavail,  /* in a real-reg live range */
113              Bound     /* in use (holding value of some vreg) */
114            }
115            disp;
116       /* If .disp == Bound, what vreg is it bound to? */
117       HReg vreg;
118    }
119    RRegState;
120 
121 
122 /* The allocator also maintains a redundant array of indexes
123    (vreg_state) from vreg numbers back to entries in rreg_state.  It
124    is redundant because iff vreg_state[i] == j then
125    hregNumber(rreg_state[j].vreg) == i -- that is, the two entries
126    point at each other.  The purpose of this is to speed up activities
127    which involve looking for a particular vreg: there is no need to
128    scan the rreg_state looking for it, just index directly into
129    vreg_state.  The FAQ "does this vreg already have an associated
130    rreg" is the main beneficiary.
131 
132    To indicate, in vreg_state[i], that a given vreg is not currently
133    associated with any rreg, that entry can be set to INVALID_RREG_NO.
134 
135    Because the vreg_state entries are signed Shorts, the max number
136    of vregs that can be handed by regalloc is 32767.
137 */
138 
139 #define INVALID_RREG_NO ((Short)(-1))
140 
141 #define IS_VALID_VREGNO(_zz) ((_zz) >= 0 && (_zz) < n_vregs)
142 #define IS_VALID_RREGNO(_zz) ((_zz) >= 0 && (_zz) < n_rregs)
143 
144 
145 /* Does this instruction mention a particular reg? */
instrMentionsReg(void (* getRegUsage)(HRegUsage *,HInstr *,Bool),HInstr * instr,HReg r,Bool mode64)146 static Bool instrMentionsReg (
147    void (*getRegUsage) (HRegUsage*, HInstr*, Bool),
148    HInstr* instr,
149    HReg r,
150    Bool mode64
151 )
152 {
153    Int       i;
154    HRegUsage reg_usage;
155    (*getRegUsage)(&reg_usage, instr, mode64);
156    for (i = 0; i < reg_usage.n_used; i++)
157       if (reg_usage.hreg[i] == r)
158          return True;
159    return False;
160 }
161 
162 
163 /* Search forward from some given point in the incoming instruction
164    sequence.  Point is to select a virtual register to spill, by
165    finding the vreg which is mentioned as far ahead as possible, in
166    the hope that this will minimise the number of consequent reloads.
167 
168    Only do the search for vregs which are Bound in the running state,
169    and for which the .is_spill_cand field is set.  This allows the
170    caller to arbitrarily restrict the set of spill candidates to be
171    considered.
172 
173    Returns an index into the state array indicating the (v,r) pair to
174    spill, or -1 if none was found.  */
175 static
findMostDistantlyMentionedVReg(void (* getRegUsage)(HRegUsage *,HInstr *,Bool),HInstrArray * instrs_in,Int search_from_instr,RRegState * state,Int n_state,Bool mode64)176 Int findMostDistantlyMentionedVReg (
177    void (*getRegUsage) (HRegUsage*, HInstr*, Bool),
178    HInstrArray* instrs_in,
179    Int          search_from_instr,
180    RRegState*   state,
181    Int          n_state,
182    Bool         mode64
183 )
184 {
185    Int k, m;
186    Int furthest_k = -1;
187    Int furthest   = -1;
188    vassert(search_from_instr >= 0);
189    for (k = 0; k < n_state; k++) {
190       if (!state[k].is_spill_cand)
191          continue;
192       vassert(state[k].disp == Bound);
193       for (m = search_from_instr; m < instrs_in->arr_used; m++) {
194          if (instrMentionsReg(getRegUsage,
195                               instrs_in->arr[m], state[k].vreg, mode64))
196             break;
197       }
198       if (m > furthest) {
199          furthest   = m;
200          furthest_k = k;
201       }
202    }
203    return furthest_k;
204 }
205 
206 
207 /* Check that this vreg has been assigned a sane spill offset. */
sanity_check_spill_offset(VRegLR * vreg)208 static inline void sanity_check_spill_offset ( VRegLR* vreg )
209 {
210    if (vreg->reg_class == HRcVec128 || vreg->reg_class == HRcFlt64) {
211       vassert(0 == ((UShort)vreg->spill_offset % 16));
212    } else {
213       vassert(0 == ((UShort)vreg->spill_offset % 8));
214    }
215 }
216 
217 
218 /* Double the size of the real-reg live-range array, if needed. */
ensureRRLRspace(RRegLR ** info,Int * size,Int used)219 static void ensureRRLRspace ( RRegLR** info, Int* size, Int used )
220 {
221    Int     k;
222    RRegLR* arr2;
223    if (used < *size) return;
224    if (0)
225       vex_printf("ensureRRISpace: %d -> %d\n", *size, 2 * *size);
226    vassert(used == *size);
227    arr2 = LibVEX_Alloc(2 * *size * sizeof(RRegLR));
228    for (k = 0; k < *size; k++)
229       arr2[k] = (*info)[k];
230    *size *= 2;
231    *info = arr2;
232 }
233 
234 
235 /* Sort an array of RRegLR entries by either the .live_after or
236    .dead_before fields.  This is performance-critical. */
sortRRLRarray(RRegLR * arr,Int size,Bool by_live_after)237 static void sortRRLRarray ( RRegLR* arr,
238                             Int size, Bool by_live_after )
239 {
240    Int    incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
241                        9841, 29524, 88573, 265720,
242                        797161, 2391484 };
243    Int    lo = 0;
244    Int    hi = size-1;
245    Int    i, j, h, bigN, hp;
246    RRegLR v;
247 
248    vassert(size >= 0);
249    if (size == 0)
250       return;
251 
252    bigN = hi - lo + 1; if (bigN < 2) return;
253    hp = 0; while (hp < 14 && incs[hp] < bigN) hp++; hp--;
254 
255    if (by_live_after) {
256 
257       for ( ; hp >= 0; hp--) {
258          h = incs[hp];
259          for (i = lo + h; i <= hi; i++) {
260             v = arr[i];
261             j = i;
262             while (arr[j-h].live_after > v.live_after) {
263                arr[j] = arr[j-h];
264                j = j - h;
265                if (j <= (lo + h - 1)) break;
266             }
267             arr[j] = v;
268          }
269       }
270 
271    } else {
272 
273       for ( ; hp >= 0; hp--) {
274          h = incs[hp];
275          for (i = lo + h; i <= hi; i++) {
276             v = arr[i];
277             j = i;
278             while (arr[j-h].dead_before > v.dead_before) {
279                arr[j] = arr[j-h];
280                j = j - h;
281                if (j <= (lo + h - 1)) break;
282             }
283             arr[j] = v;
284          }
285       }
286 
287    }
288 }
289 
290 
291 /* A target-independent register allocator.  Requires various
292    functions which it uses to deal abstractly with instructions and
293    registers, since it cannot have any target-specific knowledge.
294 
295    Returns a new list of instructions, which, as a result of the
296    behaviour of mapRegs, will be in-place modifications of the
297    original instructions.
298 
299    Requires that the incoming code has been generated using
300    vreg numbers 0, 1 .. n_vregs-1.  Appearance of a vreg outside
301    that range is a checked run-time error.
302 
303    Takes an expandable array of pointers to unallocated insns.
304    Returns an expandable array of pointers to allocated insns.
305 */
doRegisterAllocation(HInstrArray * instrs_in,HReg * available_real_regs,Int n_available_real_regs,Bool (* isMove)(HInstr *,HReg *,HReg *),void (* getRegUsage)(HRegUsage *,HInstr *,Bool),void (* mapRegs)(HRegRemap *,HInstr *,Bool),void (* genSpill)(HInstr **,HInstr **,HReg,Int,Bool),void (* genReload)(HInstr **,HInstr **,HReg,Int,Bool),HInstr * (* directReload)(HInstr *,HReg,Short),Int guest_sizeB,void (* ppInstr)(HInstr *,Bool),void (* ppReg)(HReg),Bool mode64)306 HInstrArray* doRegisterAllocation (
307 
308    /* Incoming virtual-registerised code. */
309    HInstrArray* instrs_in,
310 
311    /* An array listing all the real registers the allocator may use,
312       in no particular order. */
313    HReg* available_real_regs,
314    Int   n_available_real_regs,
315 
316    /* Return True iff the given insn is a reg-reg move, in which
317       case also return the src and dst regs. */
318    Bool (*isMove) ( HInstr*, HReg*, HReg* ),
319 
320    /* Get info about register usage in this insn. */
321    void (*getRegUsage) ( HRegUsage*, HInstr*, Bool ),
322 
323    /* Apply a reg-reg mapping to an insn. */
324    void (*mapRegs) ( HRegRemap*, HInstr*, Bool ),
325 
326    /* Return one, or, if we're unlucky, two insn(s) to spill/restore a
327       real reg to a spill slot byte offset.  The two leading HInstr**
328       args are out parameters, through which the generated insns are
329       returned.  Also (optionally) a 'directReload' function, which
330       attempts to replace a given instruction by one which reads
331       directly from a specified spill slot.  May be NULL, in which
332       case the optimisation is not attempted. */
333    void    (*genSpill)  ( HInstr**, HInstr**, HReg, Int, Bool ),
334    void    (*genReload) ( HInstr**, HInstr**, HReg, Int, Bool ),
335    HInstr* (*directReload) ( HInstr*, HReg, Short ),
336    Int     guest_sizeB,
337 
338    /* For debug printing only. */
339    void (*ppInstr) ( HInstr*, Bool ),
340    void (*ppReg) ( HReg ),
341 
342    /* 32/64bit mode */
343    Bool mode64
344 )
345 {
346 #  define N_SPILL64S  (LibVEX_N_SPILL_BYTES / 8)
347 
348    const Bool eq_spill_opt = True;
349 
350    /* Iterators and temporaries. */
351    Int       ii, j, k, m, spillee, k_suboptimal;
352    HReg      rreg, vreg, vregS, vregD;
353    HRegUsage reg_usage;
354 
355    /* Info on vregs and rregs.  Computed once and remains
356       unchanged. */
357    Int     n_vregs;
358    VRegLR* vreg_lrs; /* [0 .. n_vregs-1] */
359 
360    /* We keep two copies of the real-reg live range info, one sorted
361       by .live_after and the other by .dead_before.  First the
362       unsorted info is created in the _la variant is copied into the
363       _db variant.  Once that's done both of them are sorted.
364       We also need two integer cursors which record the next
365       location in the two arrays to consider. */
366    RRegLR* rreg_lrs_la;
367    RRegLR* rreg_lrs_db;
368    Int     rreg_lrs_size;
369    Int     rreg_lrs_used;
370    Int     rreg_lrs_la_next;
371    Int     rreg_lrs_db_next;
372 
373    /* Used when constructing vreg_lrs (for allocating stack
374       slots). */
375    Int ss_busy_until_before[N_SPILL64S];
376 
377    /* Used when constructing rreg_lrs. */
378    Int* rreg_live_after;
379    Int* rreg_dead_before;
380 
381    /* Running state of the core allocation algorithm. */
382    RRegState* rreg_state;  /* [0 .. n_rregs-1] */
383    Int        n_rregs;
384 
385    /* .. and the redundant backward map */
386    /* Each value is 0 .. n_rregs-1 or is INVALID_RREG_NO.
387       This inplies n_rregs must be <= 32768. */
388    Short*     vreg_state;  /* [0 .. n_vregs-1] */
389 
390    /* The vreg -> rreg map constructed and then applied to each
391       instr. */
392    HRegRemap remap;
393 
394    /* The output array of instructions. */
395    HInstrArray* instrs_out;
396 
397    /* Sanity checks are expensive.  They are only done periodically,
398       not at each insn processed. */
399    Bool do_sanity_check;
400 
401    vassert(0 == (guest_sizeB % 16));
402    vassert(0 == (LibVEX_N_SPILL_BYTES % 16));
403    vassert(0 == (N_SPILL64S % 2));
404 
405    /* The live range numbers are signed shorts, and so limiting the
406       number of insns to 10000 comfortably guards against them
407       overflowing 32k. */
408    vassert(instrs_in->arr_used <= 10000);
409 
410 #  define INVALID_INSTRNO (-2)
411 
412 #  define EMIT_INSTR(_instr)                  \
413       do {                                    \
414         HInstr* _tmp = (_instr);              \
415         if (DEBUG_REGALLOC) {                 \
416            vex_printf("**  ");                \
417            (*ppInstr)(_tmp, mode64);          \
418            vex_printf("\n\n");                \
419         }                                     \
420         addHInstr ( instrs_out, _tmp );       \
421       } while (0)
422 
423 #   define PRINT_STATE						   \
424       do {							   \
425          Int z, q;						   \
426          for (z = 0; z < n_rregs; z++) {			   \
427             vex_printf("  rreg_state[%2d] = ", z);		   \
428             (*ppReg)(rreg_state[z].rreg);	       		   \
429             vex_printf("  \t");					   \
430             switch (rreg_state[z].disp) {			   \
431                case Free:    vex_printf("Free\n"); break;	   \
432                case Unavail: vex_printf("Unavail\n"); break;	   \
433                case Bound:   vex_printf("BoundTo "); 		   \
434                              (*ppReg)(rreg_state[z].vreg);	   \
435                              vex_printf("\n"); break;		   \
436             }							   \
437          }							   \
438          vex_printf("\n  vreg_state[0 .. %d]:\n    ", n_vregs-1);  \
439          q = 0;                                                    \
440          for (z = 0; z < n_vregs; z++) {                           \
441             if (vreg_state[z] == INVALID_RREG_NO)                  \
442                continue;                                           \
443             vex_printf("[%d] -> %d   ", z, vreg_state[z]);         \
444             q++;                                                   \
445             if (q > 0 && (q % 6) == 0)                             \
446                vex_printf("\n    ");                               \
447          }                                                         \
448          vex_printf("\n");                                         \
449       } while (0)
450 
451 
452    /* --------- Stage 0: set up output array --------- */
453    /* --------- and allocate/initialise running state. --------- */
454 
455    instrs_out = newHInstrArray();
456 
457    /* ... and initialise running state. */
458    /* n_rregs is no more than a short name for n_available_real_regs. */
459    n_rregs = n_available_real_regs;
460    n_vregs = instrs_in->n_vregs;
461 
462    /* If this is not so, vreg_state entries will overflow. */
463    vassert(n_vregs < 32767);
464 
465    rreg_state = LibVEX_Alloc(n_rregs * sizeof(RRegState));
466    vreg_state = LibVEX_Alloc(n_vregs * sizeof(Short));
467 
468    for (j = 0; j < n_rregs; j++) {
469       rreg_state[j].rreg          = available_real_regs[j];
470       rreg_state[j].has_hlrs      = False;
471       rreg_state[j].disp          = Free;
472       rreg_state[j].vreg          = INVALID_HREG;
473       rreg_state[j].is_spill_cand = False;
474       rreg_state[j].eq_spill_slot = False;
475    }
476 
477    for (j = 0; j < n_vregs; j++)
478       vreg_state[j] = INVALID_RREG_NO;
479 
480 
481    /* --------- Stage 1: compute vreg live ranges. --------- */
482    /* --------- Stage 2: compute rreg live ranges. --------- */
483 
484    /* ------ start of SET UP TO COMPUTE VREG LIVE RANGES ------ */
485 
486    /* This is relatively simple, because (1) we only seek the complete
487       end-to-end live range of each vreg, and are not interested in
488       any holes in it, and (2) the vregs are conveniently numbered 0
489       .. n_vregs-1, so we can just dump the results in a
490       pre-allocated array. */
491 
492    vreg_lrs = NULL;
493    if (n_vregs > 0)
494       vreg_lrs = LibVEX_Alloc(sizeof(VRegLR) * n_vregs);
495 
496    for (j = 0; j < n_vregs; j++) {
497       vreg_lrs[j].live_after     = INVALID_INSTRNO;
498       vreg_lrs[j].dead_before    = INVALID_INSTRNO;
499       vreg_lrs[j].spill_offset   = 0;
500       vreg_lrs[j].spill_size     = 0;
501       vreg_lrs[j].reg_class      = HRcINVALID;
502    }
503 
504    /* ------ end of SET UP TO COMPUTE VREG LIVE RANGES ------ */
505 
506    /* ------ start of SET UP TO COMPUTE RREG LIVE RANGES ------ */
507 
508    /* This is more complex than Stage 1, because we need to compute
509       exactly all the live ranges of all the allocatable real regs,
510       and we don't know in advance how many there will be. */
511 
512    rreg_lrs_used = 0;
513    rreg_lrs_size = 4;
514    rreg_lrs_la = LibVEX_Alloc(rreg_lrs_size * sizeof(RRegLR));
515    rreg_lrs_db = NULL; /* we'll create this later */
516 
517    /* We'll need to track live range start/end points seperately for
518       each rreg.  Sigh. */
519    vassert(n_available_real_regs > 0);
520    rreg_live_after  = LibVEX_Alloc(n_available_real_regs * sizeof(Int));
521    rreg_dead_before = LibVEX_Alloc(n_available_real_regs * sizeof(Int));
522 
523    for (j = 0; j < n_available_real_regs; j++) {
524       rreg_live_after[j] =
525       rreg_dead_before[j] = INVALID_INSTRNO;
526    }
527 
528    /* ------ end of SET UP TO COMPUTE RREG LIVE RANGES ------ */
529 
530    /* ------ start of ITERATE OVER INSNS ------ */
531 
532    for (ii = 0; ii < instrs_in->arr_used; ii++) {
533 
534       (*getRegUsage)( &reg_usage, instrs_in->arr[ii], mode64 );
535 
536 #     if 0
537       vex_printf("\n%d  stage1: ", ii);
538       (*ppInstr)(instrs_in->arr[ii], mode64);
539       vex_printf("\n");
540       ppHRegUsage(&reg_usage);
541 #     endif
542 
543       /* ------ start of DEAL WITH VREG LIVE RANGES ------ */
544 
545       /* for each reg mentioned in the insn ... */
546       for (j = 0; j < reg_usage.n_used; j++) {
547 
548          vreg = reg_usage.hreg[j];
549          /* only interested in virtual registers right now. */
550          if (!hregIsVirtual(vreg))
551             continue;
552          k = hregNumber(vreg);
553          if (k < 0 || k >= n_vregs) {
554             vex_printf("\n");
555             (*ppInstr)(instrs_in->arr[ii], mode64);
556             vex_printf("\n");
557             vex_printf("vreg %d, n_vregs %d\n", k, n_vregs);
558             vpanic("doRegisterAllocation: out-of-range vreg");
559          }
560 
561          /* Take the opportunity to note its regclass.  We'll need
562             that when allocating spill slots. */
563          if (vreg_lrs[k].reg_class == HRcINVALID) {
564             /* First mention of this vreg. */
565             vreg_lrs[k].reg_class = hregClass(vreg);
566          } else {
567             /* Seen it before, so check for consistency. */
568             vassert(vreg_lrs[k].reg_class == hregClass(vreg));
569          }
570 
571          /* Now consider live ranges. */
572          switch (reg_usage.mode[j]) {
573             case HRmRead:
574                if (vreg_lrs[k].live_after == INVALID_INSTRNO) {
575                   vex_printf("\n\nOFFENDING VREG = %d\n", k);
576                   vpanic("doRegisterAllocation: "
577                          "first event for vreg is Read");
578                }
579                vreg_lrs[k].dead_before = toShort(ii + 1);
580                break;
581             case HRmWrite:
582                if (vreg_lrs[k].live_after == INVALID_INSTRNO)
583                   vreg_lrs[k].live_after = toShort(ii);
584                vreg_lrs[k].dead_before = toShort(ii + 1);
585                break;
586             case HRmModify:
587                if (vreg_lrs[k].live_after == INVALID_INSTRNO) {
588                   vex_printf("\n\nOFFENDING VREG = %d\n", k);
589                   vpanic("doRegisterAllocation: "
590                          "first event for vreg is Modify");
591                }
592                vreg_lrs[k].dead_before = toShort(ii + 1);
593                break;
594             default:
595                vpanic("doRegisterAllocation(1)");
596          } /* switch */
597 
598       } /* iterate over registers */
599 
600       /* ------ end of DEAL WITH VREG LIVE RANGES ------ */
601 
602       /* ------ start of DEAL WITH RREG LIVE RANGES ------ */
603 
604       /* for each reg mentioned in the insn ... */
605       for (j = 0; j < reg_usage.n_used; j++) {
606 
607          /* Dummy initialisations of flush_la and flush_db to avoid
608             possible bogus uninit-var warnings from gcc. */
609          Int  flush_la = INVALID_INSTRNO, flush_db = INVALID_INSTRNO;
610          Bool flush;
611 
612          rreg = reg_usage.hreg[j];
613 
614          /* only interested in real registers right now. */
615          if (hregIsVirtual(rreg))
616             continue;
617 
618          /* Furthermore, we're not interested in this rreg unless it's
619             one of the allocatable ones.  For example, it could be a
620             stack pointer register, or some other register beyond our
621             control, in which case we should just ignore it. */
622          for (k = 0; k < n_available_real_regs; k++)
623             if (available_real_regs[k] == rreg)
624                break;
625          if (k == n_available_real_regs)
626             continue; /* not found -- ignore. */
627          flush = False;
628          switch (reg_usage.mode[j]) {
629             case HRmWrite:
630                flush_la = rreg_live_after[k];
631                flush_db = rreg_dead_before[k];
632                if (flush_la != INVALID_INSTRNO
633                    && flush_db != INVALID_INSTRNO)
634                   flush = True;
635                rreg_live_after[k]  = ii;
636                rreg_dead_before[k] = ii+1;
637                break;
638             case HRmRead:
639                if (rreg_live_after[k] == INVALID_INSTRNO) {
640                   vex_printf("\nOFFENDING RREG = ");
641                   (*ppReg)(available_real_regs[k]);
642                   vex_printf("\n");
643                   vex_printf("\nOFFENDING instr = ");
644                   (*ppInstr)(instrs_in->arr[ii], mode64);
645                   vex_printf("\n");
646                   vpanic("doRegisterAllocation: "
647                          "first event for rreg is Read");
648                }
649                rreg_dead_before[k] = ii+1;
650                break;
651             case HRmModify:
652                if (rreg_live_after[k] == INVALID_INSTRNO) {
653                   vex_printf("\nOFFENDING RREG = ");
654                   (*ppReg)(available_real_regs[k]);
655                   vex_printf("\n");
656                   vex_printf("\nOFFENDING instr = ");
657                   (*ppInstr)(instrs_in->arr[ii], mode64);
658                   vex_printf("\n");
659                   vpanic("doRegisterAllocation: "
660                          "first event for rreg is Modify");
661                }
662                rreg_dead_before[k] = ii+1;
663                break;
664             default:
665                vpanic("doRegisterAllocation(2)");
666          }
667 
668          if (flush) {
669             vassert(flush_la != INVALID_INSTRNO);
670             vassert(flush_db != INVALID_INSTRNO);
671             ensureRRLRspace(&rreg_lrs_la, &rreg_lrs_size, rreg_lrs_used);
672             if (0)
673                vex_printf("FLUSH 1 (%d,%d)\n", flush_la, flush_db);
674             rreg_lrs_la[rreg_lrs_used].rreg        = rreg;
675             rreg_lrs_la[rreg_lrs_used].live_after  = toShort(flush_la);
676             rreg_lrs_la[rreg_lrs_used].dead_before = toShort(flush_db);
677             rreg_lrs_used++;
678          }
679 
680       } /* iterate over regs in the instr */
681 
682       /* ------ end of DEAL WITH RREG LIVE RANGES ------ */
683 
684    } /* iterate over insns */
685 
686    /* ------ end of ITERATE OVER INSNS ------ */
687 
688    /* ------ start of FINALISE RREG LIVE RANGES ------ */
689 
690    /* Now finish up any live ranges left over. */
691    for (j = 0; j < n_available_real_regs; j++) {
692 
693 #     if 0
694       vex_printf("residual %d:  %d %d\n", j, rreg_live_after[j],
695                                              rreg_dead_before[j]);
696 #     endif
697       vassert( (rreg_live_after[j] == INVALID_INSTRNO
698                && rreg_dead_before[j] == INVALID_INSTRNO)
699               ||
700               (rreg_live_after[j] != INVALID_INSTRNO
701                && rreg_dead_before[j] != INVALID_INSTRNO)
702             );
703 
704       if (rreg_live_after[j] == INVALID_INSTRNO)
705          continue;
706 
707       ensureRRLRspace(&rreg_lrs_la, &rreg_lrs_size, rreg_lrs_used);
708       if (0)
709          vex_printf("FLUSH 2 (%d,%d)\n",
710                     rreg_live_after[j], rreg_dead_before[j]);
711       rreg_lrs_la[rreg_lrs_used].rreg        = available_real_regs[j];
712       rreg_lrs_la[rreg_lrs_used].live_after  = toShort(rreg_live_after[j]);
713       rreg_lrs_la[rreg_lrs_used].dead_before = toShort(rreg_dead_before[j]);
714       rreg_lrs_used++;
715    }
716 
717    /* Compute summary hints for choosing real regs.  If a real reg is
718       involved in a hard live range, record that fact in the fixed
719       part of the running rreg_state.  Later, when offered a choice between
720       rregs, it's better to choose one which is not marked as having
721       any HLRs, since ones with HLRs may need to be spilled around
722       their HLRs.  Correctness of final assignment is unaffected by
723       this mechanism -- it is only an optimisation. */
724 
725    for (j = 0; j < rreg_lrs_used; j++) {
726       rreg = rreg_lrs_la[j].rreg;
727       vassert(!hregIsVirtual(rreg));
728       /* rreg is involved in a HLR.  Record this info in the array, if
729          there is space. */
730       for (k = 0; k < n_rregs; k++)
731          if (rreg_state[k].rreg == rreg)
732             break;
733       vassert(k < n_rregs); /* else rreg was not found in rreg_state?! */
734       rreg_state[k].has_hlrs = True;
735    }
736    if (0) {
737       for (j = 0; j < n_rregs; j++) {
738          if (!rreg_state[j].has_hlrs)
739             continue;
740          ppReg(rreg_state[j].rreg);
741          vex_printf(" hinted\n");
742       }
743    }
744 
745    /* Finally, copy the _la variant into the _db variant and
746       sort both by their respective fields. */
747    rreg_lrs_db = LibVEX_Alloc(rreg_lrs_used * sizeof(RRegLR));
748    for (j = 0; j < rreg_lrs_used; j++)
749       rreg_lrs_db[j] = rreg_lrs_la[j];
750 
751    sortRRLRarray( rreg_lrs_la, rreg_lrs_used, True /* by .live_after*/  );
752    sortRRLRarray( rreg_lrs_db, rreg_lrs_used, False/* by .dead_before*/ );
753 
754    /* And set up the cursors. */
755    rreg_lrs_la_next = 0;
756    rreg_lrs_db_next = 0;
757 
758    for (j = 1; j < rreg_lrs_used; j++) {
759       vassert(rreg_lrs_la[j-1].live_after  <= rreg_lrs_la[j].live_after);
760       vassert(rreg_lrs_db[j-1].dead_before <= rreg_lrs_db[j].dead_before);
761    }
762 
763    /* ------ end of FINALISE RREG LIVE RANGES ------ */
764 
765 #  if DEBUG_REGALLOC
766    for (j = 0; j < n_vregs; j++) {
767       vex_printf("vreg %d:  la = %d,  db = %d\n",
768                  j, vreg_lrs[j].live_after, vreg_lrs[j].dead_before );
769    }
770 #  endif
771 
772 #  if DEBUG_REGALLOC
773    vex_printf("RRegLRs by LA:\n");
774    for (j = 0; j < rreg_lrs_used; j++) {
775       vex_printf("  ");
776       (*ppReg)(rreg_lrs_la[j].rreg);
777       vex_printf("      la = %d,  db = %d\n",
778                  rreg_lrs_la[j].live_after, rreg_lrs_la[j].dead_before );
779    }
780    vex_printf("RRegLRs by DB:\n");
781    for (j = 0; j < rreg_lrs_used; j++) {
782       vex_printf("  ");
783       (*ppReg)(rreg_lrs_db[j].rreg);
784       vex_printf("      la = %d,  db = %d\n",
785                  rreg_lrs_db[j].live_after, rreg_lrs_db[j].dead_before );
786    }
787 #  endif
788 
789    /* --------- Stage 3: allocate spill slots. --------- */
790 
791    /* Each spill slot is 8 bytes long.  For vregs which take more than
792       64 bits to spill (classes Flt64 and Vec128), we have to allocate
793       two spill slots.
794 
795       For Vec128-class on PowerPC, the spill slot's actual address
796       must be 16-byte aligned.  Since the spill slot's address is
797       computed as an offset from the guest state pointer, and since
798       the user of the generated code must set that pointer to a
799       16-aligned value, we have the residual obligation here of
800       choosing a 16-aligned spill slot offset for Vec128-class values.
801       Since each spill slot is 8 bytes long, that means for
802       Vec128-class values we must allocated a spill slot number which
803       is zero mod 2.
804 
805       Do a rank-based allocation of vregs to spill slot numbers.  We
806       put as few values as possible in spill slots, but nevertheless
807       need to have a spill slot available for all vregs, just in case.
808    */
809    /* max_ss_no = -1; */
810 
811    for (j = 0; j < N_SPILL64S; j++)
812       ss_busy_until_before[j] = 0;
813 
814    for (j = 0; j < n_vregs; j++) {
815 
816       /* True iff this vreg is unused.  In which case we also expect
817          that the reg_class field for it has not been set.  */
818       if (vreg_lrs[j].live_after == INVALID_INSTRNO) {
819          vassert(vreg_lrs[j].reg_class == HRcINVALID);
820          continue;
821       }
822 
823       /* The spill slots are 64 bits in size.  As per the comment on
824          definition of HRegClass in host_generic_regs.h, that means, to
825          spill a vreg of class Flt64 or Vec128, we'll need to find two
826          adjacent spill slots to use.  Note, this logic needs to kept
827          in sync with the size info on the definition of HRegClass. */
828 
829       if (vreg_lrs[j].reg_class == HRcVec128
830           || vreg_lrs[j].reg_class == HRcFlt64) {
831 
832          /* Find two adjacent free slots in which between them provide
833             up to 128 bits in which to spill the vreg.  Since we are
834             trying to find an even:odd pair, move along in steps of 2
835             (slots). */
836 
837          for (k = 0; k < N_SPILL64S-1; k += 2)
838             if (ss_busy_until_before[k] <= vreg_lrs[j].live_after
839                 && ss_busy_until_before[k+1] <= vreg_lrs[j].live_after)
840                break;
841          if (k >= N_SPILL64S-1) {
842             vpanic("LibVEX_N_SPILL_BYTES is too low.  "
843                    "Increase and recompile.");
844          }
845          if (0) vex_printf("16-byte spill offset in spill slot %d\n", (Int)k);
846          ss_busy_until_before[k+0] = vreg_lrs[j].dead_before;
847          ss_busy_until_before[k+1] = vreg_lrs[j].dead_before;
848 
849       } else {
850 
851          /* The ordinary case -- just find a single spill slot. */
852 
853          /* Find the lowest-numbered spill slot which is available at
854             the start point of this interval, and assign the interval
855             to it. */
856          for (k = 0; k < N_SPILL64S; k++)
857             if (ss_busy_until_before[k] <= vreg_lrs[j].live_after)
858                break;
859          if (k == N_SPILL64S) {
860             vpanic("LibVEX_N_SPILL_BYTES is too low.  "
861                    "Increase and recompile.");
862          }
863          ss_busy_until_before[k] = vreg_lrs[j].dead_before;
864 
865       }
866 
867       /* This reflects LibVEX's hard-wired knowledge of the baseBlock
868          layout: the guest state, then two equal sized areas following
869          it for two sets of shadow state, and then the spill area. */
870       vreg_lrs[j].spill_offset = toShort(guest_sizeB * 3 + k * 8);
871 
872       /* Independent check that we've made a sane choice of slot */
873       sanity_check_spill_offset( &vreg_lrs[j] );
874       /* if (j > max_ss_no) */
875       /*    max_ss_no = j; */
876    }
877 
878 #  if 0
879    vex_printf("\n\n");
880    for (j = 0; j < n_vregs; j++)
881       vex_printf("vreg %d    --> spill offset %d\n",
882                  j, vreg_lrs[j].spill_offset);
883 #  endif
884 
885    /* --------- Stage 4: establish rreg preferences --------- */
886 
887    /* It may be advantageous to allocating certain vregs to specific
888       rregs, as a way of avoiding reg-reg moves later.  Here we
889       establish which, if any, rreg each vreg would prefer to be in.
890       Note that this constrains the allocator -- ideally we end up
891       with as few as possible vregs expressing a preference.
892 
893       This is an optimisation: if the .preferred_rreg field is never
894       set to anything different from INVALID_HREG, the allocator still
895       works. */
896 
897    /* 30 Dec 04: removed this mechanism as it does not seem to
898       help. */
899 
900    /* --------- Stage 5: process instructions --------- */
901 
902    /* This is the main loop of the allocator.  First, we need to
903       correctly set up our running state, which tracks the status of
904       each real register. */
905 
906    /* ------ BEGIN: Process each insn in turn. ------ */
907 
908    for (ii = 0; ii < instrs_in->arr_used; ii++) {
909 
910 #     if DEBUG_REGALLOC
911       vex_printf("\n====----====---- Insn %d ----====----====\n", ii);
912       vex_printf("---- ");
913       (*ppInstr)(instrs_in->arr[ii], mode64);
914       vex_printf("\n\nInitial state:\n");
915       PRINT_STATE;
916       vex_printf("\n");
917 #     endif
918 
919       /* ------------ Sanity checks ------------ */
920 
921       /* Sanity checks are expensive.  So they are done only once
922          every 7 instructions, and just before the last
923          instruction. */
924       do_sanity_check
925          = toBool(
926               False  /* Set to True for sanity checking of all insns. */
927               || ii == instrs_in->arr_used-1
928               || (ii > 0 && (ii % 7) == 0)
929            );
930 
931       if (do_sanity_check) {
932 
933          /* Sanity check 1: all rregs with a hard live range crossing
934             this insn must be marked as unavailable in the running
935             state. */
936          for (j = 0; j < rreg_lrs_used; j++) {
937             if (rreg_lrs_la[j].live_after < ii
938                 && ii < rreg_lrs_la[j].dead_before) {
939                /* ii is the middle of a hard live range for some real
940                   reg.  Check it's marked as such in the running
941                   state. */
942 
943 #              if 0
944                vex_printf("considering la %d .. db %d   reg = ",
945                           rreg_lrs[j].live_after,
946                           rreg_lrs[j].dead_before);
947                (*ppReg)(rreg_lrs[j].rreg);
948                vex_printf("\n");
949 #              endif
950 
951                /* find the state entry for this rreg */
952                for (k = 0; k < n_rregs; k++)
953                   if (rreg_state[k].rreg == rreg_lrs_la[j].rreg)
954                      break;
955 
956                /* and assert that this rreg is marked as unavailable */
957                vassert(rreg_state[k].disp == Unavail);
958             }
959          }
960 
961          /* Sanity check 2: conversely, all rregs marked as
962             unavailable in the running rreg_state must have a
963             corresponding hard live range entry in the rreg_lrs
964             array. */
965          for (j = 0; j < n_available_real_regs; j++) {
966             vassert(rreg_state[j].disp == Bound
967                     || rreg_state[j].disp == Free
968                     || rreg_state[j].disp == Unavail);
969             if (rreg_state[j].disp != Unavail)
970                continue;
971             for (k = 0; k < rreg_lrs_used; k++)
972                if (rreg_lrs_la[k].rreg == rreg_state[j].rreg
973                    && rreg_lrs_la[k].live_after < ii
974                    && ii < rreg_lrs_la[k].dead_before)
975                   break;
976             /* If this vassertion fails, we couldn't find a
977                corresponding HLR. */
978             vassert(k < rreg_lrs_used);
979          }
980 
981          /* Sanity check 3: all vreg-rreg bindings must bind registers
982             of the same class. */
983          for (j = 0; j < n_rregs; j++) {
984             if (rreg_state[j].disp != Bound) {
985                vassert(rreg_state[j].eq_spill_slot == False);
986                continue;
987             }
988             vassert(hregClass(rreg_state[j].rreg)
989                     == hregClass(rreg_state[j].vreg));
990             vassert( hregIsVirtual(rreg_state[j].vreg));
991             vassert(!hregIsVirtual(rreg_state[j].rreg));
992          }
993 
994          /* Sanity check 4: the vreg_state and rreg_state
995             mutually-redundant mappings are consistent.  If
996             rreg_state[j].vreg points at some vreg_state entry then
997             that vreg_state entry should point back at
998             rreg_state[j]. */
999          for (j = 0; j < n_rregs; j++) {
1000             if (rreg_state[j].disp != Bound)
1001                continue;
1002             k = hregNumber(rreg_state[j].vreg);
1003             vassert(IS_VALID_VREGNO(k));
1004             vassert(vreg_state[k] == j);
1005          }
1006          for (j = 0; j < n_vregs; j++) {
1007             k = vreg_state[j];
1008             if (k == INVALID_RREG_NO)
1009                continue;
1010             vassert(IS_VALID_RREGNO(k));
1011             vassert(rreg_state[k].disp == Bound);
1012             vassert(hregNumber(rreg_state[k].vreg) == j);
1013          }
1014 
1015       } /* if (do_sanity_check) */
1016 
1017       /* ------------ end of Sanity checks ------------ */
1018 
1019       /* Do various optimisations pertaining to register coalescing
1020          and preferencing:
1021             MOV  v <-> v   coalescing (done here).
1022             MOV  v <-> r   coalescing (not yet, if ever)
1023       */
1024       /* If doing a reg-reg move between two vregs, and the src's live
1025          range ends here and the dst's live range starts here, bind
1026          the dst to the src's rreg, and that's all. */
1027       if ( (*isMove)( instrs_in->arr[ii], &vregS, &vregD ) ) {
1028          if (!hregIsVirtual(vregS)) goto cannot_coalesce;
1029          if (!hregIsVirtual(vregD)) goto cannot_coalesce;
1030          /* Check that *isMove is not telling us a bunch of lies ... */
1031          vassert(hregClass(vregS) == hregClass(vregD));
1032          k = hregNumber(vregS);
1033          m = hregNumber(vregD);
1034          vassert(IS_VALID_VREGNO(k));
1035          vassert(IS_VALID_VREGNO(m));
1036          if (vreg_lrs[k].dead_before != ii + 1) goto cannot_coalesce;
1037          if (vreg_lrs[m].live_after != ii) goto cannot_coalesce;
1038 #        if DEBUG_REGALLOC
1039          vex_printf("COALESCE ");
1040          (*ppReg)(vregS);
1041          vex_printf(" -> ");
1042          (*ppReg)(vregD);
1043          vex_printf("\n\n");
1044 #        endif
1045          /* Find the state entry for vregS. */
1046          for (m = 0; m < n_rregs; m++)
1047             if (rreg_state[m].disp == Bound && rreg_state[m].vreg == vregS)
1048                break;
1049          if (m == n_rregs)
1050             /* We failed to find a binding for vregS, which means it's
1051                currently not in a register.  So we can't do the
1052                coalescing.  Give up. */
1053             goto cannot_coalesce;
1054 
1055          /* Finally, we can do the coalescing.  It's trivial -- merely
1056             claim vregS's register for vregD. */
1057          rreg_state[m].vreg = vregD;
1058          vassert(IS_VALID_VREGNO(hregNumber(vregD)));
1059          vassert(IS_VALID_VREGNO(hregNumber(vregS)));
1060          vreg_state[hregNumber(vregD)] = toShort(m);
1061          vreg_state[hregNumber(vregS)] = INVALID_RREG_NO;
1062 
1063          /* This rreg has become associated with a different vreg and
1064             hence with a different spill slot.  Play safe. */
1065          rreg_state[m].eq_spill_slot = False;
1066 
1067          /* Move on to the next insn.  We skip the post-insn stuff for
1068             fixed registers, since this move should not interact with
1069             them in any way. */
1070          continue;
1071       }
1072      cannot_coalesce:
1073 
1074       /* ------ Free up rregs bound to dead vregs ------ */
1075 
1076       /* Look for vregs whose live range has just ended, and
1077 	 mark the associated rreg as free. */
1078 
1079       for (j = 0; j < n_rregs; j++) {
1080          if (rreg_state[j].disp != Bound)
1081             continue;
1082          vreg = hregNumber(rreg_state[j].vreg);
1083          vassert(IS_VALID_VREGNO(vreg));
1084          if (vreg_lrs[vreg].dead_before <= ii) {
1085             rreg_state[j].disp = Free;
1086             rreg_state[j].eq_spill_slot = False;
1087             m = hregNumber(rreg_state[j].vreg);
1088             vassert(IS_VALID_VREGNO(m));
1089             vreg_state[m] = INVALID_RREG_NO;
1090             if (DEBUG_REGALLOC) {
1091                vex_printf("free up ");
1092                (*ppReg)(rreg_state[j].rreg);
1093                vex_printf("\n");
1094             }
1095          }
1096       }
1097 
1098       /* ------ Pre-instruction actions for fixed rreg uses ------ */
1099 
1100       /* Now we have to deal with rregs which are about to be made
1101          live by this instruction -- in other words, are entering into
1102          one of their live ranges.  If any such rreg holds a vreg, we
1103          will have to free up the rreg.  The simplest solution which
1104          is correct is to spill the rreg.
1105 
1106          Note we could do better:
1107          * Could move it into some other free rreg, if one is available
1108 
1109          Do this efficiently, by incrementally stepping along an array
1110          of rreg HLRs that are known to be sorted by start point
1111          (their .live_after field).
1112       */
1113       while (True) {
1114          vassert(rreg_lrs_la_next >= 0);
1115          vassert(rreg_lrs_la_next <= rreg_lrs_used);
1116          if (rreg_lrs_la_next == rreg_lrs_used)
1117             break; /* no more real reg live ranges to consider */
1118          if (ii < rreg_lrs_la[rreg_lrs_la_next].live_after)
1119             break; /* next live range does not yet start */
1120          vassert(ii == rreg_lrs_la[rreg_lrs_la_next].live_after);
1121          /* rreg_lrs_la[rreg_lrs_la_next].rreg needs to be freed up.
1122             Find the associated rreg_state entry. */
1123          /* Note, re ii == rreg_lrs_la[rreg_lrs_la_next].live_after.
1124             Real register live ranges are guaranteed to be well-formed
1125             in that they start with a write to the register -- Stage 2
1126             rejects any code not satisfying this.  So the correct
1127             question to ask is whether
1128             rreg_lrs_la[rreg_lrs_la_next].live_after == ii, that is,
1129             whether the reg becomes live after this insn -- rather
1130             than before it. */
1131 #        if DEBUG_REGALLOC
1132          vex_printf("need to free up rreg: ");
1133          (*ppReg)(rreg_lrs_la[rreg_lrs_la_next].rreg);
1134          vex_printf("\n\n");
1135 #        endif
1136          for (k = 0; k < n_rregs; k++)
1137             if (rreg_state[k].rreg == rreg_lrs_la[rreg_lrs_la_next].rreg)
1138                break;
1139          /* If this fails, we don't have an entry for this rreg.
1140             Which we should. */
1141          vassert(IS_VALID_RREGNO(k));
1142          m = hregNumber(rreg_state[k].vreg);
1143          if (rreg_state[k].disp == Bound) {
1144             /* Yes, there is an associated vreg.  Spill it if it's
1145                still live. */
1146             vassert(IS_VALID_VREGNO(m));
1147             vreg_state[m] = INVALID_RREG_NO;
1148             if (vreg_lrs[m].dead_before > ii) {
1149                vassert(vreg_lrs[m].reg_class != HRcINVALID);
1150                if ((!eq_spill_opt) || !rreg_state[k].eq_spill_slot) {
1151                   HInstr* spill1 = NULL;
1152                   HInstr* spill2 = NULL;
1153                   (*genSpill)( &spill1, &spill2, rreg_state[k].rreg,
1154                                vreg_lrs[m].spill_offset, mode64 );
1155                   vassert(spill1 || spill2); /* can't both be NULL */
1156                   if (spill1)
1157                      EMIT_INSTR(spill1);
1158                   if (spill2)
1159                      EMIT_INSTR(spill2);
1160                }
1161                rreg_state[k].eq_spill_slot = True;
1162             }
1163          }
1164          rreg_state[k].disp = Unavail;
1165          rreg_state[k].vreg = INVALID_HREG;
1166          rreg_state[k].eq_spill_slot = False;
1167 
1168          /* check for further rregs entering HLRs at this point */
1169          rreg_lrs_la_next++;
1170       }
1171 
1172 
1173 #     if DEBUG_REGALLOC
1174       vex_printf("After pre-insn actions for fixed regs:\n");
1175       PRINT_STATE;
1176       vex_printf("\n");
1177 #     endif
1178 
1179 
1180       /* ------ Deal with the current instruction. ------ */
1181 
1182       /* Finally we can begin the processing of this instruction
1183          itself.  The aim is to free up enough rregs for this insn.
1184          This may generate spill stores since we may have to evict
1185          some vregs currently in rregs.  Also generates spill loads.
1186          We also build up the final vreg->rreg mapping to be applied
1187          to the insn. */
1188 
1189       (*getRegUsage)( &reg_usage, instrs_in->arr[ii], mode64 );
1190 
1191       initHRegRemap(&remap);
1192 
1193       /* ------------ BEGIN directReload optimisation ----------- */
1194 
1195       /* If the instruction reads exactly one vreg which is currently
1196          in a spill slot, and this is last use of that vreg, see if we
1197          can convert the instruction into one reads directly from the
1198          spill slot.  This is clearly only possible for x86 and amd64
1199          targets, since ppc and arm load-store architectures.  If
1200          successful, replace instrs_in->arr[ii] with this new
1201          instruction, and recompute its reg usage, so that the change
1202          is invisible to the standard-case handling that follows. */
1203 
1204       if (directReload && reg_usage.n_used <= 2) {
1205          Bool  debug_direct_reload = True && False;
1206          HReg  cand     = INVALID_HREG;
1207          Bool  nreads   = 0;
1208          Short spilloff = 0;
1209 
1210          for (j = 0; j < reg_usage.n_used; j++) {
1211 
1212             vreg = reg_usage.hreg[j];
1213 
1214             if (!hregIsVirtual(vreg))
1215                continue;
1216 
1217             if (reg_usage.mode[j] == HRmRead) {
1218                nreads++;
1219                m = hregNumber(vreg);
1220                vassert(IS_VALID_VREGNO(m));
1221                k = vreg_state[m];
1222                if (!IS_VALID_RREGNO(k)) {
1223                   /* ok, it is spilled.  Now, is this its last use? */
1224                   vassert(vreg_lrs[m].dead_before >= ii+1);
1225                   if (vreg_lrs[m].dead_before == ii+1
1226                       && cand == INVALID_HREG) {
1227                      spilloff = vreg_lrs[m].spill_offset;
1228                      cand = vreg;
1229                   }
1230                }
1231             }
1232          }
1233 
1234          if (nreads == 1 && cand != INVALID_HREG) {
1235             HInstr* reloaded;
1236             if (reg_usage.n_used == 2)
1237                vassert(reg_usage.hreg[0] != reg_usage.hreg[1]);
1238 
1239             reloaded = directReload ( instrs_in->arr[ii], cand, spilloff );
1240             if (debug_direct_reload && !reloaded) {
1241                vex_printf("[%3d] ", spilloff); ppHReg(cand); vex_printf(" ");
1242                ppInstr(instrs_in->arr[ii], mode64);
1243             }
1244             if (reloaded) {
1245                /* Update info about the insn, so it looks as if it had
1246                   been in this form all along. */
1247                instrs_in->arr[ii] = reloaded;
1248                (*getRegUsage)( &reg_usage, instrs_in->arr[ii], mode64 );
1249                if (debug_direct_reload && !reloaded) {
1250                   vex_printf("  -->  ");
1251                   ppInstr(reloaded, mode64);
1252                }
1253             }
1254 
1255             if (debug_direct_reload && !reloaded)
1256                vex_printf("\n");
1257          }
1258 
1259       }
1260 
1261       /* ------------ END directReload optimisation ------------ */
1262 
1263       /* for each reg mentioned in the insn ... */
1264       for (j = 0; j < reg_usage.n_used; j++) {
1265 
1266          vreg = reg_usage.hreg[j];
1267 
1268          /* only interested in virtual registers right now. */
1269          if (!hregIsVirtual(vreg))
1270             continue;
1271 
1272 #        if 0
1273          vex_printf("considering "); (*ppReg)(vreg); vex_printf("\n");
1274 #        endif
1275 
1276          /* Now we're trying to find a rreg for "vreg".  First of all,
1277             if it already has an rreg assigned, we don't need to do
1278             anything more.  Search the current state to find out. */
1279          m = hregNumber(vreg);
1280          vassert(IS_VALID_VREGNO(m));
1281          k = vreg_state[m];
1282          if (IS_VALID_RREGNO(k)) {
1283             vassert(rreg_state[k].disp == Bound);
1284             addToHRegRemap(&remap, vreg, rreg_state[k].rreg);
1285             /* If this rreg is written or modified, mark it as different
1286                from any spill slot value. */
1287             if (reg_usage.mode[j] != HRmRead)
1288                rreg_state[k].eq_spill_slot = False;
1289             continue;
1290          } else {
1291             vassert(k == INVALID_RREG_NO);
1292          }
1293 
1294          /* No luck.  The next thing to do is see if there is a
1295             currently free rreg available, of the correct class.  If
1296             so, bag it.  NOTE, we could improve this by selecting an
1297             rreg for which the next live-range event is as far ahead
1298             as possible. */
1299          k_suboptimal = -1;
1300          for (k = 0; k < n_rregs; k++) {
1301             if (rreg_state[k].disp != Free
1302                 || hregClass(rreg_state[k].rreg) != hregClass(vreg))
1303                continue;
1304             if (rreg_state[k].has_hlrs) {
1305                /* Well, at least we can use k_suboptimal if we really
1306                   have to.  Keep on looking for a better candidate. */
1307                k_suboptimal = k;
1308             } else {
1309                /* Found a preferable reg.  Use it. */
1310                k_suboptimal = -1;
1311                break;
1312             }
1313          }
1314          if (k_suboptimal >= 0)
1315             k = k_suboptimal;
1316 
1317          if (k < n_rregs) {
1318             rreg_state[k].disp = Bound;
1319             rreg_state[k].vreg = vreg;
1320             m = hregNumber(vreg);
1321             vassert(IS_VALID_VREGNO(m));
1322             vreg_state[m] = toShort(k);
1323             addToHRegRemap(&remap, vreg, rreg_state[k].rreg);
1324             /* Generate a reload if needed.  This only creates needed
1325                reloads because the live range builder for vregs will
1326                guarantee that the first event for a vreg is a write.
1327                Hence, if this reference is not a write, it cannot be
1328                the first reference for this vreg, and so a reload is
1329                indeed needed. */
1330             if (reg_usage.mode[j] != HRmWrite) {
1331                vassert(vreg_lrs[m].reg_class != HRcINVALID);
1332                HInstr* reload1 = NULL;
1333                HInstr* reload2 = NULL;
1334                (*genReload)( &reload1, &reload2, rreg_state[k].rreg,
1335                              vreg_lrs[m].spill_offset, mode64 );
1336                vassert(reload1 || reload2); /* can't both be NULL */
1337                if (reload1)
1338                   EMIT_INSTR(reload1);
1339                if (reload2)
1340                   EMIT_INSTR(reload2);
1341                /* This rreg is read or modified by the instruction.
1342                   If it's merely read we can claim it now equals the
1343                   spill slot, but not so if it is modified. */
1344                if (reg_usage.mode[j] == HRmRead) {
1345                   rreg_state[k].eq_spill_slot = True;
1346                } else {
1347                   vassert(reg_usage.mode[j] == HRmModify);
1348                   rreg_state[k].eq_spill_slot = False;
1349                }
1350             } else {
1351                rreg_state[k].eq_spill_slot = False;
1352             }
1353 
1354             continue;
1355          }
1356 
1357          /* Well, now we have no option but to spill a vreg.  It's
1358             important to make a good choice of vreg to spill, and of
1359             course we need to be careful not to spill a vreg which is
1360             needed by this insn. */
1361 
1362          /* First, mark in the rreg_state, those rregs which are not spill
1363             candidates, due to holding a vreg mentioned by this
1364             instruction.  Or being of the wrong class. */
1365          for (k = 0; k < n_rregs; k++) {
1366             rreg_state[k].is_spill_cand = False;
1367             if (rreg_state[k].disp != Bound)
1368                continue;
1369             if (hregClass(rreg_state[k].rreg) != hregClass(vreg))
1370                continue;
1371             rreg_state[k].is_spill_cand = True;
1372             for (m = 0; m < reg_usage.n_used; m++) {
1373                if (rreg_state[k].vreg == reg_usage.hreg[m]) {
1374                   rreg_state[k].is_spill_cand = False;
1375                   break;
1376                }
1377             }
1378          }
1379 
1380          /* We can choose to spill any rreg satisfying
1381             rreg_state[r].is_spill_cand (so to speak).  Choose r so that
1382             the next use of its associated vreg is as far ahead as
1383             possible, in the hope that this will minimise the number
1384             of consequent reloads required. */
1385          spillee
1386             = findMostDistantlyMentionedVReg (
1387                  getRegUsage, instrs_in, ii+1, rreg_state, n_rregs, mode64 );
1388 
1389          if (spillee == -1) {
1390             /* Hmmmmm.  There don't appear to be any spill candidates.
1391                We're hosed. */
1392             vex_printf("reg_alloc: can't find a register in class: ");
1393             ppHRegClass(hregClass(vreg));
1394             vex_printf("\n");
1395             vpanic("reg_alloc: can't create a free register.");
1396          }
1397 
1398          /* Right.  So we're going to spill rreg_state[spillee]. */
1399          vassert(IS_VALID_RREGNO(spillee));
1400          vassert(rreg_state[spillee].disp == Bound);
1401          /* check it's the right class */
1402          vassert(hregClass(rreg_state[spillee].rreg) == hregClass(vreg));
1403          /* check we're not ejecting the vreg for which we are trying
1404             to free up a register. */
1405          vassert(rreg_state[spillee].vreg != vreg);
1406 
1407          m = hregNumber(rreg_state[spillee].vreg);
1408          vassert(IS_VALID_VREGNO(m));
1409 
1410          /* So here's the spill store.  Assert that we're spilling a
1411             live vreg. */
1412          vassert(vreg_lrs[m].dead_before > ii);
1413          vassert(vreg_lrs[m].reg_class != HRcINVALID);
1414          if ((!eq_spill_opt) || !rreg_state[spillee].eq_spill_slot) {
1415             HInstr* spill1 = NULL;
1416             HInstr* spill2 = NULL;
1417             (*genSpill)( &spill1, &spill2, rreg_state[spillee].rreg,
1418                          vreg_lrs[m].spill_offset, mode64 );
1419             vassert(spill1 || spill2); /* can't both be NULL */
1420             if (spill1)
1421                EMIT_INSTR(spill1);
1422             if (spill2)
1423                EMIT_INSTR(spill2);
1424          }
1425 
1426          /* Update the rreg_state to reflect the new assignment for this
1427             rreg. */
1428          rreg_state[spillee].vreg = vreg;
1429          vreg_state[m] = INVALID_RREG_NO;
1430 
1431          rreg_state[spillee].eq_spill_slot = False; /* be safe */
1432 
1433          m = hregNumber(vreg);
1434          vassert(IS_VALID_VREGNO(m));
1435          vreg_state[m] = toShort(spillee);
1436 
1437          /* Now, if this vreg is being read or modified (as opposed to
1438             written), we have to generate a reload for it. */
1439          if (reg_usage.mode[j] != HRmWrite) {
1440             vassert(vreg_lrs[m].reg_class != HRcINVALID);
1441             HInstr* reload1 = NULL;
1442             HInstr* reload2 = NULL;
1443             (*genReload)( &reload1, &reload2, rreg_state[spillee].rreg,
1444                           vreg_lrs[m].spill_offset, mode64 );
1445             vassert(reload1 || reload2); /* can't both be NULL */
1446             if (reload1)
1447                EMIT_INSTR(reload1);
1448             if (reload2)
1449                EMIT_INSTR(reload2);
1450             /* This rreg is read or modified by the instruction.
1451                If it's merely read we can claim it now equals the
1452                spill slot, but not so if it is modified. */
1453             if (reg_usage.mode[j] == HRmRead) {
1454                rreg_state[spillee].eq_spill_slot = True;
1455             } else {
1456                vassert(reg_usage.mode[j] == HRmModify);
1457                rreg_state[spillee].eq_spill_slot = False;
1458             }
1459          }
1460 
1461          /* So after much twisting and turning, we have vreg mapped to
1462             rreg_state[spillee].rreg.  Note that in the map. */
1463          addToHRegRemap(&remap, vreg, rreg_state[spillee].rreg);
1464 
1465       } /* iterate over registers in this instruction. */
1466 
1467       /* We've finished clowning around with registers in this instruction.
1468          Three results:
1469          - the running rreg_state[] has been updated
1470          - a suitable vreg->rreg mapping for this instruction has been
1471            constructed
1472          - spill and reload instructions may have been emitted.
1473 
1474         The final step is to apply the mapping to the instruction,
1475         and emit that.
1476       */
1477 
1478       /* NOTE, DESTRUCTIVELY MODIFIES instrs_in->arr[ii]. */
1479       (*mapRegs)( &remap, instrs_in->arr[ii], mode64 );
1480       EMIT_INSTR( instrs_in->arr[ii] );
1481 
1482 #     if DEBUG_REGALLOC
1483       vex_printf("After dealing with current insn:\n");
1484       PRINT_STATE;
1485       vex_printf("\n");
1486 #     endif
1487 
1488       /* ------ Post-instruction actions for fixed rreg uses ------ */
1489 
1490       /* Now we need to check for rregs exiting fixed live ranges
1491          after this instruction, and if so mark them as free. */
1492       while (True) {
1493          vassert(rreg_lrs_db_next >= 0);
1494          vassert(rreg_lrs_db_next <= rreg_lrs_used);
1495          if (rreg_lrs_db_next == rreg_lrs_used)
1496             break; /* no more real reg live ranges to consider */
1497          if (ii+1 < rreg_lrs_db[rreg_lrs_db_next].dead_before)
1498             break; /* next live range does not yet start */
1499          vassert(ii+1 == rreg_lrs_db[rreg_lrs_db_next].dead_before);
1500          /* rreg_lrs_db[[rreg_lrs_db_next].rreg is exiting a hard live
1501             range.  Mark it as such in the main rreg_state array. */
1502          for (k = 0; k < n_rregs; k++)
1503             if (rreg_state[k].rreg == rreg_lrs_db[rreg_lrs_db_next].rreg)
1504                break;
1505          /* If this vassertion fails, we don't have an entry for
1506             this rreg.  Which we should. */
1507          vassert(k < n_rregs);
1508          vassert(rreg_state[k].disp == Unavail);
1509          rreg_state[k].disp = Free;
1510          rreg_state[k].vreg = INVALID_HREG;
1511          rreg_state[k].eq_spill_slot = False;
1512 
1513          /* check for further rregs leaving HLRs at this point */
1514          rreg_lrs_db_next++;
1515       }
1516 
1517 #     if DEBUG_REGALLOC
1518       vex_printf("After post-insn actions for fixed regs:\n");
1519       PRINT_STATE;
1520       vex_printf("\n");
1521 #     endif
1522 
1523    } /* iterate over insns */
1524 
1525    /* ------ END: Process each insn in turn. ------ */
1526 
1527    /* free(rreg_state); */
1528    /* free(rreg_lrs); */
1529    /* if (vreg_lrs) free(vreg_lrs); */
1530 
1531    /* Paranoia */
1532    for (j = 0; j < n_rregs; j++)
1533       vassert(rreg_state[j].rreg == available_real_regs[j]);
1534 
1535    vassert(rreg_lrs_la_next == rreg_lrs_used);
1536    vassert(rreg_lrs_db_next == rreg_lrs_used);
1537 
1538    return instrs_out;
1539 
1540 #  undef INVALID_INSTRNO
1541 #  undef EMIT_INSTR
1542 #  undef PRINT_STATE
1543 }
1544 
1545 
1546 
1547 /*---------------------------------------------------------------*/
1548 /*---                                       host_reg_alloc2.c ---*/
1549 /*---------------------------------------------------------------*/
1550