• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 /*---------------------------------------------------------------*/
3 /*--- begin                                 host_reg_alloc2.c ---*/
4 /*---------------------------------------------------------------*/
5 
6 /*
7    This file is part of Valgrind, a dynamic binary instrumentation
8    framework.
9 
10    Copyright (C) 2004-2013 OpenWorks LLP
11       info@open-works.net
12 
13    This program is free software; you can redistribute it and/or
14    modify it under the terms of the GNU General Public License as
15    published by the Free Software Foundation; either version 2 of the
16    License, or (at your option) any later version.
17 
18    This program is distributed in the hope that it will be useful, but
19    WITHOUT ANY WARRANTY; without even the implied warranty of
20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21    General Public License for more details.
22 
23    You should have received a copy of the GNU General Public License
24    along with this program; if not, write to the Free Software
25    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26    02110-1301, USA.
27 
28    The GNU General Public License is contained in the file COPYING.
29 
30    Neither the names of the U.S. Department of Energy nor the
31    University of California nor the names of its contributors may be
32    used to endorse or promote products derived from this software
33    without prior written permission.
34 */
35 
36 #include "libvex_basictypes.h"
37 #include "libvex.h"
38 
39 #include "main_util.h"
40 #include "host_generic_regs.h"
41 
42 /* Set to 1 for lots of debugging output. */
43 #define DEBUG_REGALLOC 0
44 
45 
46 /* TODO 27 Oct 04:
47 
48    Better consistency checking from what isMove tells us.
49 
50    We can possibly do V-V coalescing even when the src is spilled,
51    providing we can arrange for the dst to have the same spill slot.
52 
53    Note that state[].hreg is the same as the available real regs.
54 
55    Generally rationalise data structures.  */
56 
57 
58 /* Records information on virtual register live ranges.  Computed once
59    and remains unchanged after that. */
60 typedef
61    struct {
62       /* Becomes live for the first time after this insn ... */
63       Short live_after;
64       /* Becomes dead for the last time before this insn ... */
65       Short dead_before;
66       /* The "home" spill slot, if needed.  Never changes. */
67       Short spill_offset;
68       Short spill_size;
69       /* What kind of register this is. */
70       HRegClass reg_class;
71    }
72    VRegLR;
73 
74 
75 /* Records information on real-register live ranges.  Computed once
76    and remains unchanged after that. */
77 typedef
78    struct {
79       HReg rreg;
80       /* Becomes live after this insn ... */
81       Short live_after;
82       /* Becomes dead before this insn ... */
83       Short dead_before;
84    }
85    RRegLR;
86 
87 
88 /* An array of the following structs (rreg_state) comprises the
89    running state of the allocator.  It indicates what the current
90    disposition of each allocatable real register is.  The array gets
91    updated as the allocator processes instructions. */
92 typedef
93    struct {
94       /* ------ FIELDS WHICH DO NOT CHANGE ------ */
95       /* Which rreg is this for? */
96       HReg rreg;
97       /* Is this involved in any HLRs?  (only an optimisation hint) */
98       Bool has_hlrs;
99       /* ------ FIELDS WHICH DO CHANGE ------ */
100       /* 6 May 07: rearranged fields below so the whole struct fits
101          into 16 bytes on both x86 and amd64. */
102       /* Used when .disp == Bound and we are looking for vregs to
103          spill. */
104       Bool is_spill_cand;
105       /* Optimisation: used when .disp == Bound.  Indicates when the
106          rreg has the same value as the spill slot for the associated
107          vreg.  Is safely left at False, and becomes True after a
108          spill store or reload for this rreg. */
109       Bool eq_spill_slot;
110       /* What's it's current disposition? */
111       enum { Free,     /* available for use */
112              Unavail,  /* in a real-reg live range */
113              Bound     /* in use (holding value of some vreg) */
114            }
115            disp;
116       /* If .disp == Bound, what vreg is it bound to? */
117       HReg vreg;
118    }
119    RRegState;
120 
121 
122 /* The allocator also maintains a redundant array of indexes
123    (vreg_state) from vreg numbers back to entries in rreg_state.  It
124    is redundant because iff vreg_state[i] == j then
125    hregNumber(rreg_state[j].vreg) == i -- that is, the two entries
126    point at each other.  The purpose of this is to speed up activities
127    which involve looking for a particular vreg: there is no need to
128    scan the rreg_state looking for it, just index directly into
129    vreg_state.  The FAQ "does this vreg already have an associated
130    rreg" is the main beneficiary.
131 
132    To indicate, in vreg_state[i], that a given vreg is not currently
133    associated with any rreg, that entry can be set to INVALID_RREG_NO.
134 
135    Because the vreg_state entries are signed Shorts, the max number
136    of vregs that can be handed by regalloc is 32767.
137 */
138 
139 #define INVALID_RREG_NO ((Short)(-1))
140 
141 #define IS_VALID_VREGNO(_zz) ((_zz) >= 0 && (_zz) < n_vregs)
142 #define IS_VALID_RREGNO(_zz) ((_zz) >= 0 && (_zz) < n_rregs)
143 
144 
145 /* Does this instruction mention a particular reg? */
instrMentionsReg(void (* getRegUsage)(HRegUsage *,HInstr *,Bool),HInstr * instr,HReg r,Bool mode64)146 static Bool instrMentionsReg (
147    void (*getRegUsage) (HRegUsage*, HInstr*, Bool),
148    HInstr* instr,
149    HReg r,
150    Bool mode64
151 )
152 {
153    Int       i;
154    HRegUsage reg_usage;
155    (*getRegUsage)(&reg_usage, instr, mode64);
156    for (i = 0; i < reg_usage.n_used; i++)
157       if (sameHReg(reg_usage.hreg[i], r))
158          return True;
159    return False;
160 }
161 
162 
163 /* Search forward from some given point in the incoming instruction
164    sequence.  Point is to select a virtual register to spill, by
165    finding the vreg which is mentioned as far ahead as possible, in
166    the hope that this will minimise the number of consequent reloads.
167 
168    Only do the search for vregs which are Bound in the running state,
169    and for which the .is_spill_cand field is set.  This allows the
170    caller to arbitrarily restrict the set of spill candidates to be
171    considered.
172 
173    Returns an index into the state array indicating the (v,r) pair to
174    spill, or -1 if none was found.  */
175 static
findMostDistantlyMentionedVReg(void (* getRegUsage)(HRegUsage *,HInstr *,Bool),HInstrArray * instrs_in,Int search_from_instr,RRegState * state,Int n_state,Bool mode64)176 Int findMostDistantlyMentionedVReg (
177    void (*getRegUsage) (HRegUsage*, HInstr*, Bool),
178    HInstrArray* instrs_in,
179    Int          search_from_instr,
180    RRegState*   state,
181    Int          n_state,
182    Bool         mode64
183 )
184 {
185    Int k, m;
186    Int furthest_k = -1;
187    Int furthest   = -1;
188    vassert(search_from_instr >= 0);
189    for (k = 0; k < n_state; k++) {
190       if (!state[k].is_spill_cand)
191          continue;
192       vassert(state[k].disp == Bound);
193       for (m = search_from_instr; m < instrs_in->arr_used; m++) {
194          if (instrMentionsReg(getRegUsage,
195                               instrs_in->arr[m], state[k].vreg, mode64))
196             break;
197       }
198       if (m > furthest) {
199          furthest   = m;
200          furthest_k = k;
201       }
202    }
203    return furthest_k;
204 }
205 
206 
207 /* Check that this vreg has been assigned a sane spill offset. */
sanity_check_spill_offset(VRegLR * vreg)208 static inline void sanity_check_spill_offset ( VRegLR* vreg )
209 {
210    switch (vreg->reg_class) {
211       case HRcVec128: case HRcFlt64:
212          vassert(0 == ((UShort)vreg->spill_offset % 16)); break;
213       default:
214          vassert(0 == ((UShort)vreg->spill_offset % 8)); break;
215    }
216 }
217 
218 
219 /* Double the size of the real-reg live-range array, if needed. */
ensureRRLRspace(RRegLR ** info,Int * size,Int used)220 static void ensureRRLRspace ( RRegLR** info, Int* size, Int used )
221 {
222    Int     k;
223    RRegLR* arr2;
224    if (used < *size) return;
225    if (0)
226       vex_printf("ensureRRISpace: %d -> %d\n", *size, 2 * *size);
227    vassert(used == *size);
228    arr2 = LibVEX_Alloc(2 * *size * sizeof(RRegLR));
229    for (k = 0; k < *size; k++)
230       arr2[k] = (*info)[k];
231    *size *= 2;
232    *info = arr2;
233 }
234 
235 
236 /* Sort an array of RRegLR entries by either the .live_after or
237    .dead_before fields.  This is performance-critical. */
sortRRLRarray(RRegLR * arr,Int size,Bool by_live_after)238 static void sortRRLRarray ( RRegLR* arr,
239                             Int size, Bool by_live_after )
240 {
241    Int    incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
242                        9841, 29524, 88573, 265720,
243                        797161, 2391484 };
244    Int    lo = 0;
245    Int    hi = size-1;
246    Int    i, j, h, bigN, hp;
247    RRegLR v;
248 
249    vassert(size >= 0);
250    if (size == 0)
251       return;
252 
253    bigN = hi - lo + 1; if (bigN < 2) return;
254    hp = 0; while (hp < 14 && incs[hp] < bigN) hp++; hp--;
255 
256    if (by_live_after) {
257 
258       for ( ; hp >= 0; hp--) {
259          h = incs[hp];
260          for (i = lo + h; i <= hi; i++) {
261             v = arr[i];
262             j = i;
263             while (arr[j-h].live_after > v.live_after) {
264                arr[j] = arr[j-h];
265                j = j - h;
266                if (j <= (lo + h - 1)) break;
267             }
268             arr[j] = v;
269          }
270       }
271 
272    } else {
273 
274       for ( ; hp >= 0; hp--) {
275          h = incs[hp];
276          for (i = lo + h; i <= hi; i++) {
277             v = arr[i];
278             j = i;
279             while (arr[j-h].dead_before > v.dead_before) {
280                arr[j] = arr[j-h];
281                j = j - h;
282                if (j <= (lo + h - 1)) break;
283             }
284             arr[j] = v;
285          }
286       }
287 
288    }
289 }
290 
291 
292 /* A target-independent register allocator.  Requires various
293    functions which it uses to deal abstractly with instructions and
294    registers, since it cannot have any target-specific knowledge.
295 
296    Returns a new list of instructions, which, as a result of the
297    behaviour of mapRegs, will be in-place modifications of the
298    original instructions.
299 
300    Requires that the incoming code has been generated using
301    vreg numbers 0, 1 .. n_vregs-1.  Appearance of a vreg outside
302    that range is a checked run-time error.
303 
304    Takes an expandable array of pointers to unallocated insns.
305    Returns an expandable array of pointers to allocated insns.
306 */
doRegisterAllocation(HInstrArray * instrs_in,HReg * available_real_regs,Int n_available_real_regs,Bool (* isMove)(HInstr *,HReg *,HReg *),void (* getRegUsage)(HRegUsage *,HInstr *,Bool),void (* mapRegs)(HRegRemap *,HInstr *,Bool),void (* genSpill)(HInstr **,HInstr **,HReg,Int,Bool),void (* genReload)(HInstr **,HInstr **,HReg,Int,Bool),HInstr * (* directReload)(HInstr *,HReg,Short),Int guest_sizeB,void (* ppInstr)(HInstr *,Bool),void (* ppReg)(HReg),Bool mode64)307 HInstrArray* doRegisterAllocation (
308 
309    /* Incoming virtual-registerised code. */
310    HInstrArray* instrs_in,
311 
312    /* An array listing all the real registers the allocator may use,
313       in no particular order. */
314    HReg* available_real_regs,
315    Int   n_available_real_regs,
316 
317    /* Return True iff the given insn is a reg-reg move, in which
318       case also return the src and dst regs. */
319    Bool (*isMove) ( HInstr*, HReg*, HReg* ),
320 
321    /* Get info about register usage in this insn. */
322    void (*getRegUsage) ( HRegUsage*, HInstr*, Bool ),
323 
324    /* Apply a reg-reg mapping to an insn. */
325    void (*mapRegs) ( HRegRemap*, HInstr*, Bool ),
326 
327    /* Return one, or, if we're unlucky, two insn(s) to spill/restore a
328       real reg to a spill slot byte offset.  The two leading HInstr**
329       args are out parameters, through which the generated insns are
330       returned.  Also (optionally) a 'directReload' function, which
331       attempts to replace a given instruction by one which reads
332       directly from a specified spill slot.  May be NULL, in which
333       case the optimisation is not attempted. */
334    void    (*genSpill)  ( HInstr**, HInstr**, HReg, Int, Bool ),
335    void    (*genReload) ( HInstr**, HInstr**, HReg, Int, Bool ),
336    HInstr* (*directReload) ( HInstr*, HReg, Short ),
337    Int     guest_sizeB,
338 
339    /* For debug printing only. */
340    void (*ppInstr) ( HInstr*, Bool ),
341    void (*ppReg) ( HReg ),
342 
343    /* 32/64bit mode */
344    Bool mode64
345 )
346 {
347 #  define N_SPILL64S  (LibVEX_N_SPILL_BYTES / 8)
348 
349    const Bool eq_spill_opt = True;
350 
351    /* Iterators and temporaries. */
352    Int       ii, j, k, m, spillee, k_suboptimal;
353    HReg      rreg, vreg, vregS, vregD;
354    HRegUsage reg_usage;
355 
356    /* Info on vregs and rregs.  Computed once and remains
357       unchanged. */
358    Int     n_vregs;
359    VRegLR* vreg_lrs; /* [0 .. n_vregs-1] */
360 
361    /* We keep two copies of the real-reg live range info, one sorted
362       by .live_after and the other by .dead_before.  First the
363       unsorted info is created in the _la variant is copied into the
364       _db variant.  Once that's done both of them are sorted.
365       We also need two integer cursors which record the next
366       location in the two arrays to consider. */
367    RRegLR* rreg_lrs_la;
368    RRegLR* rreg_lrs_db;
369    Int     rreg_lrs_size;
370    Int     rreg_lrs_used;
371    Int     rreg_lrs_la_next;
372    Int     rreg_lrs_db_next;
373 
374    /* Used when constructing vreg_lrs (for allocating stack
375       slots). */
376    Int ss_busy_until_before[N_SPILL64S];
377 
378    /* Used when constructing rreg_lrs. */
379    Int* rreg_live_after;
380    Int* rreg_dead_before;
381 
382    /* Running state of the core allocation algorithm. */
383    RRegState* rreg_state;  /* [0 .. n_rregs-1] */
384    Int        n_rregs;
385 
386    /* .. and the redundant backward map */
387    /* Each value is 0 .. n_rregs-1 or is INVALID_RREG_NO.
388       This inplies n_rregs must be <= 32768. */
389    Short*     vreg_state;  /* [0 .. n_vregs-1] */
390 
391    /* The vreg -> rreg map constructed and then applied to each
392       instr. */
393    HRegRemap remap;
394 
395    /* The output array of instructions. */
396    HInstrArray* instrs_out;
397 
398    /* Sanity checks are expensive.  They are only done periodically,
399       not at each insn processed. */
400    Bool do_sanity_check;
401 
402    vassert(0 == (guest_sizeB % 16));
403    vassert(0 == (LibVEX_N_SPILL_BYTES % 16));
404    vassert(0 == (N_SPILL64S % 2));
405 
406    /* The live range numbers are signed shorts, and so limiting the
407       number of insns to 15000 comfortably guards against them
408       overflowing 32k. */
409    vassert(instrs_in->arr_used <= 15000);
410 
411 #  define INVALID_INSTRNO (-2)
412 
413 #  define EMIT_INSTR(_instr)                  \
414       do {                                    \
415         HInstr* _tmp = (_instr);              \
416         if (DEBUG_REGALLOC) {                 \
417            vex_printf("**  ");                \
418            (*ppInstr)(_tmp, mode64);          \
419            vex_printf("\n\n");                \
420         }                                     \
421         addHInstr ( instrs_out, _tmp );       \
422       } while (0)
423 
424 #   define PRINT_STATE						   \
425       do {							   \
426          Int z, q;						   \
427          for (z = 0; z < n_rregs; z++) {			   \
428             vex_printf("  rreg_state[%2d] = ", z);		   \
429             (*ppReg)(rreg_state[z].rreg);	       		   \
430             vex_printf("  \t");					   \
431             switch (rreg_state[z].disp) {			   \
432                case Free:    vex_printf("Free\n"); break;	   \
433                case Unavail: vex_printf("Unavail\n"); break;	   \
434                case Bound:   vex_printf("BoundTo "); 		   \
435                              (*ppReg)(rreg_state[z].vreg);	   \
436                              vex_printf("\n"); break;		   \
437             }							   \
438          }							   \
439          vex_printf("\n  vreg_state[0 .. %d]:\n    ", n_vregs-1);  \
440          q = 0;                                                    \
441          for (z = 0; z < n_vregs; z++) {                           \
442             if (vreg_state[z] == INVALID_RREG_NO)                  \
443                continue;                                           \
444             vex_printf("[%d] -> %d   ", z, vreg_state[z]);         \
445             q++;                                                   \
446             if (q > 0 && (q % 6) == 0)                             \
447                vex_printf("\n    ");                               \
448          }                                                         \
449          vex_printf("\n");                                         \
450       } while (0)
451 
452 
453    /* --------- Stage 0: set up output array --------- */
454    /* --------- and allocate/initialise running state. --------- */
455 
456    instrs_out = newHInstrArray();
457 
458    /* ... and initialise running state. */
459    /* n_rregs is no more than a short name for n_available_real_regs. */
460    n_rregs = n_available_real_regs;
461    n_vregs = instrs_in->n_vregs;
462 
463    /* If this is not so, vreg_state entries will overflow. */
464    vassert(n_vregs < 32767);
465 
466    rreg_state = LibVEX_Alloc(n_rregs * sizeof(RRegState));
467    vreg_state = LibVEX_Alloc(n_vregs * sizeof(Short));
468 
469    for (j = 0; j < n_rregs; j++) {
470       rreg_state[j].rreg          = available_real_regs[j];
471       rreg_state[j].has_hlrs      = False;
472       rreg_state[j].disp          = Free;
473       rreg_state[j].vreg          = INVALID_HREG;
474       rreg_state[j].is_spill_cand = False;
475       rreg_state[j].eq_spill_slot = False;
476    }
477 
478    for (j = 0; j < n_vregs; j++)
479       vreg_state[j] = INVALID_RREG_NO;
480 
481 
482    /* --------- Stage 1: compute vreg live ranges. --------- */
483    /* --------- Stage 2: compute rreg live ranges. --------- */
484 
485    /* ------ start of SET UP TO COMPUTE VREG LIVE RANGES ------ */
486 
487    /* This is relatively simple, because (1) we only seek the complete
488       end-to-end live range of each vreg, and are not interested in
489       any holes in it, and (2) the vregs are conveniently numbered 0
490       .. n_vregs-1, so we can just dump the results in a
491       pre-allocated array. */
492 
493    vreg_lrs = NULL;
494    if (n_vregs > 0)
495       vreg_lrs = LibVEX_Alloc(sizeof(VRegLR) * n_vregs);
496 
497    for (j = 0; j < n_vregs; j++) {
498       vreg_lrs[j].live_after     = INVALID_INSTRNO;
499       vreg_lrs[j].dead_before    = INVALID_INSTRNO;
500       vreg_lrs[j].spill_offset   = 0;
501       vreg_lrs[j].spill_size     = 0;
502       vreg_lrs[j].reg_class      = HRcINVALID;
503    }
504 
505    /* ------ end of SET UP TO COMPUTE VREG LIVE RANGES ------ */
506 
507    /* ------ start of SET UP TO COMPUTE RREG LIVE RANGES ------ */
508 
509    /* This is more complex than Stage 1, because we need to compute
510       exactly all the live ranges of all the allocatable real regs,
511       and we don't know in advance how many there will be. */
512 
513    rreg_lrs_used = 0;
514    rreg_lrs_size = 4;
515    rreg_lrs_la = LibVEX_Alloc(rreg_lrs_size * sizeof(RRegLR));
516    rreg_lrs_db = NULL; /* we'll create this later */
517 
518    /* We'll need to track live range start/end points seperately for
519       each rreg.  Sigh. */
520    vassert(n_available_real_regs > 0);
521    rreg_live_after  = LibVEX_Alloc(n_available_real_regs * sizeof(Int));
522    rreg_dead_before = LibVEX_Alloc(n_available_real_regs * sizeof(Int));
523 
524    for (j = 0; j < n_available_real_regs; j++) {
525       rreg_live_after[j] =
526       rreg_dead_before[j] = INVALID_INSTRNO;
527    }
528 
529    /* ------ end of SET UP TO COMPUTE RREG LIVE RANGES ------ */
530 
531    /* ------ start of ITERATE OVER INSNS ------ */
532 
533    for (ii = 0; ii < instrs_in->arr_used; ii++) {
534 
535       (*getRegUsage)( &reg_usage, instrs_in->arr[ii], mode64 );
536 
537 #     if 0
538       vex_printf("\n%d  stage1: ", ii);
539       (*ppInstr)(instrs_in->arr[ii], mode64);
540       vex_printf("\n");
541       ppHRegUsage(&reg_usage);
542 #     endif
543 
544       /* ------ start of DEAL WITH VREG LIVE RANGES ------ */
545 
546       /* for each reg mentioned in the insn ... */
547       for (j = 0; j < reg_usage.n_used; j++) {
548 
549          vreg = reg_usage.hreg[j];
550          /* only interested in virtual registers right now. */
551          if (!hregIsVirtual(vreg))
552             continue;
553          k = hregNumber(vreg);
554          if (k < 0 || k >= n_vregs) {
555             vex_printf("\n");
556             (*ppInstr)(instrs_in->arr[ii], mode64);
557             vex_printf("\n");
558             vex_printf("vreg %d, n_vregs %d\n", k, n_vregs);
559             vpanic("doRegisterAllocation: out-of-range vreg");
560          }
561 
562          /* Take the opportunity to note its regclass.  We'll need
563             that when allocating spill slots. */
564          if (vreg_lrs[k].reg_class == HRcINVALID) {
565             /* First mention of this vreg. */
566             vreg_lrs[k].reg_class = hregClass(vreg);
567          } else {
568             /* Seen it before, so check for consistency. */
569             vassert(vreg_lrs[k].reg_class == hregClass(vreg));
570          }
571 
572          /* Now consider live ranges. */
573          switch (reg_usage.mode[j]) {
574             case HRmRead:
575                if (vreg_lrs[k].live_after == INVALID_INSTRNO) {
576                   vex_printf("\n\nOFFENDING VREG = %d\n", k);
577                   vpanic("doRegisterAllocation: "
578                          "first event for vreg is Read");
579                }
580                vreg_lrs[k].dead_before = toShort(ii + 1);
581                break;
582             case HRmWrite:
583                if (vreg_lrs[k].live_after == INVALID_INSTRNO)
584                   vreg_lrs[k].live_after = toShort(ii);
585                vreg_lrs[k].dead_before = toShort(ii + 1);
586                break;
587             case HRmModify:
588                if (vreg_lrs[k].live_after == INVALID_INSTRNO) {
589                   vex_printf("\n\nOFFENDING VREG = %d\n", k);
590                   vpanic("doRegisterAllocation: "
591                          "first event for vreg is Modify");
592                }
593                vreg_lrs[k].dead_before = toShort(ii + 1);
594                break;
595             default:
596                vpanic("doRegisterAllocation(1)");
597          } /* switch */
598 
599       } /* iterate over registers */
600 
601       /* ------ end of DEAL WITH VREG LIVE RANGES ------ */
602 
603       /* ------ start of DEAL WITH RREG LIVE RANGES ------ */
604 
605       /* for each reg mentioned in the insn ... */
606       for (j = 0; j < reg_usage.n_used; j++) {
607 
608          /* Dummy initialisations of flush_la and flush_db to avoid
609             possible bogus uninit-var warnings from gcc. */
610          Int  flush_la = INVALID_INSTRNO, flush_db = INVALID_INSTRNO;
611          Bool flush;
612 
613          rreg = reg_usage.hreg[j];
614 
615          /* only interested in real registers right now. */
616          if (hregIsVirtual(rreg))
617             continue;
618 
619          /* Furthermore, we're not interested in this rreg unless it's
620             one of the allocatable ones.  For example, it could be a
621             stack pointer register, or some other register beyond our
622             control, in which case we should just ignore it. */
623          for (k = 0; k < n_available_real_regs; k++)
624             if (sameHReg(available_real_regs[k], rreg))
625                break;
626          if (k == n_available_real_regs)
627             continue; /* not found -- ignore. */
628          flush = False;
629          switch (reg_usage.mode[j]) {
630             case HRmWrite:
631                flush_la = rreg_live_after[k];
632                flush_db = rreg_dead_before[k];
633                if (flush_la != INVALID_INSTRNO
634                    && flush_db != INVALID_INSTRNO)
635                   flush = True;
636                rreg_live_after[k]  = ii;
637                rreg_dead_before[k] = ii+1;
638                break;
639             case HRmRead:
640                if (rreg_live_after[k] == INVALID_INSTRNO) {
641                   vex_printf("\nOFFENDING RREG = ");
642                   (*ppReg)(available_real_regs[k]);
643                   vex_printf("\n");
644                   vex_printf("\nOFFENDING instr = ");
645                   (*ppInstr)(instrs_in->arr[ii], mode64);
646                   vex_printf("\n");
647                   vpanic("doRegisterAllocation: "
648                          "first event for rreg is Read");
649                }
650                rreg_dead_before[k] = ii+1;
651                break;
652             case HRmModify:
653                if (rreg_live_after[k] == INVALID_INSTRNO) {
654                   vex_printf("\nOFFENDING RREG = ");
655                   (*ppReg)(available_real_regs[k]);
656                   vex_printf("\n");
657                   vex_printf("\nOFFENDING instr = ");
658                   (*ppInstr)(instrs_in->arr[ii], mode64);
659                   vex_printf("\n");
660                   vpanic("doRegisterAllocation: "
661                          "first event for rreg is Modify");
662                }
663                rreg_dead_before[k] = ii+1;
664                break;
665             default:
666                vpanic("doRegisterAllocation(2)");
667          }
668 
669          if (flush) {
670             vassert(flush_la != INVALID_INSTRNO);
671             vassert(flush_db != INVALID_INSTRNO);
672             ensureRRLRspace(&rreg_lrs_la, &rreg_lrs_size, rreg_lrs_used);
673             if (0)
674                vex_printf("FLUSH 1 (%d,%d)\n", flush_la, flush_db);
675             rreg_lrs_la[rreg_lrs_used].rreg        = rreg;
676             rreg_lrs_la[rreg_lrs_used].live_after  = toShort(flush_la);
677             rreg_lrs_la[rreg_lrs_used].dead_before = toShort(flush_db);
678             rreg_lrs_used++;
679          }
680 
681       } /* iterate over regs in the instr */
682 
683       /* ------ end of DEAL WITH RREG LIVE RANGES ------ */
684 
685    } /* iterate over insns */
686 
687    /* ------ end of ITERATE OVER INSNS ------ */
688 
689    /* ------ start of FINALISE RREG LIVE RANGES ------ */
690 
691    /* Now finish up any live ranges left over. */
692    for (j = 0; j < n_available_real_regs; j++) {
693 
694 #     if 0
695       vex_printf("residual %d:  %d %d\n", j, rreg_live_after[j],
696                                              rreg_dead_before[j]);
697 #     endif
698       vassert( (rreg_live_after[j] == INVALID_INSTRNO
699                && rreg_dead_before[j] == INVALID_INSTRNO)
700               ||
701               (rreg_live_after[j] != INVALID_INSTRNO
702                && rreg_dead_before[j] != INVALID_INSTRNO)
703             );
704 
705       if (rreg_live_after[j] == INVALID_INSTRNO)
706          continue;
707 
708       ensureRRLRspace(&rreg_lrs_la, &rreg_lrs_size, rreg_lrs_used);
709       if (0)
710          vex_printf("FLUSH 2 (%d,%d)\n",
711                     rreg_live_after[j], rreg_dead_before[j]);
712       rreg_lrs_la[rreg_lrs_used].rreg        = available_real_regs[j];
713       rreg_lrs_la[rreg_lrs_used].live_after  = toShort(rreg_live_after[j]);
714       rreg_lrs_la[rreg_lrs_used].dead_before = toShort(rreg_dead_before[j]);
715       rreg_lrs_used++;
716    }
717 
718    /* Compute summary hints for choosing real regs.  If a real reg is
719       involved in a hard live range, record that fact in the fixed
720       part of the running rreg_state.  Later, when offered a choice between
721       rregs, it's better to choose one which is not marked as having
722       any HLRs, since ones with HLRs may need to be spilled around
723       their HLRs.  Correctness of final assignment is unaffected by
724       this mechanism -- it is only an optimisation. */
725 
726    for (j = 0; j < rreg_lrs_used; j++) {
727       rreg = rreg_lrs_la[j].rreg;
728       vassert(!hregIsVirtual(rreg));
729       /* rreg is involved in a HLR.  Record this info in the array, if
730          there is space. */
731       for (k = 0; k < n_rregs; k++)
732          if (sameHReg(rreg_state[k].rreg, rreg))
733             break;
734       vassert(k < n_rregs); /* else rreg was not found in rreg_state?! */
735       rreg_state[k].has_hlrs = True;
736    }
737    if (0) {
738       for (j = 0; j < n_rregs; j++) {
739          if (!rreg_state[j].has_hlrs)
740             continue;
741          ppReg(rreg_state[j].rreg);
742          vex_printf(" hinted\n");
743       }
744    }
745 
746    /* Finally, copy the _la variant into the _db variant and
747       sort both by their respective fields. */
748    rreg_lrs_db = LibVEX_Alloc(rreg_lrs_used * sizeof(RRegLR));
749    for (j = 0; j < rreg_lrs_used; j++)
750       rreg_lrs_db[j] = rreg_lrs_la[j];
751 
752    sortRRLRarray( rreg_lrs_la, rreg_lrs_used, True /* by .live_after*/  );
753    sortRRLRarray( rreg_lrs_db, rreg_lrs_used, False/* by .dead_before*/ );
754 
755    /* And set up the cursors. */
756    rreg_lrs_la_next = 0;
757    rreg_lrs_db_next = 0;
758 
759    for (j = 1; j < rreg_lrs_used; j++) {
760       vassert(rreg_lrs_la[j-1].live_after  <= rreg_lrs_la[j].live_after);
761       vassert(rreg_lrs_db[j-1].dead_before <= rreg_lrs_db[j].dead_before);
762    }
763 
764    /* ------ end of FINALISE RREG LIVE RANGES ------ */
765 
766 #  if DEBUG_REGALLOC
767    for (j = 0; j < n_vregs; j++) {
768       vex_printf("vreg %d:  la = %d,  db = %d\n",
769                  j, vreg_lrs[j].live_after, vreg_lrs[j].dead_before );
770    }
771 #  endif
772 
773 #  if DEBUG_REGALLOC
774    vex_printf("RRegLRs by LA:\n");
775    for (j = 0; j < rreg_lrs_used; j++) {
776       vex_printf("  ");
777       (*ppReg)(rreg_lrs_la[j].rreg);
778       vex_printf("      la = %d,  db = %d\n",
779                  rreg_lrs_la[j].live_after, rreg_lrs_la[j].dead_before );
780    }
781    vex_printf("RRegLRs by DB:\n");
782    for (j = 0; j < rreg_lrs_used; j++) {
783       vex_printf("  ");
784       (*ppReg)(rreg_lrs_db[j].rreg);
785       vex_printf("      la = %d,  db = %d\n",
786                  rreg_lrs_db[j].live_after, rreg_lrs_db[j].dead_before );
787    }
788 #  endif
789 
790    /* --------- Stage 3: allocate spill slots. --------- */
791 
792    /* Each spill slot is 8 bytes long.  For vregs which take more than
793       64 bits to spill (classes Flt64 and Vec128), we have to allocate
794       two consecutive spill slots.  For 256 bit registers (class
795       Vec256), we have to allocate four consecutive spill slots.
796 
797       For Vec128-class on PowerPC, the spill slot's actual address
798       must be 16-byte aligned.  Since the spill slot's address is
799       computed as an offset from the guest state pointer, and since
800       the user of the generated code must set that pointer to a
801       32-aligned value, we have the residual obligation here of
802       choosing a 16-aligned spill slot offset for Vec128-class values.
803       Since each spill slot is 8 bytes long, that means for
804       Vec128-class values we must allocated a spill slot number which
805       is zero mod 2.
806 
807       Similarly, for Vec256 calss on amd64, find a spill slot number
808       which is zero mod 4.  This guarantees it will be 32 byte
809       aligned, which isn't actually necessary on amd64 (we use movUpd
810       etc to spill), but seems like good practice.
811 
812       Do a rank-based allocation of vregs to spill slot numbers.  We
813       put as few values as possible in spill slots, but nevertheless
814       need to have a spill slot available for all vregs, just in case.
815    */
816    /* max_ss_no = -1; */
817 
818    for (j = 0; j < N_SPILL64S; j++)
819       ss_busy_until_before[j] = 0;
820 
821    for (j = 0; j < n_vregs; j++) {
822 
823       /* True iff this vreg is unused.  In which case we also expect
824          that the reg_class field for it has not been set.  */
825       if (vreg_lrs[j].live_after == INVALID_INSTRNO) {
826          vassert(vreg_lrs[j].reg_class == HRcINVALID);
827          continue;
828       }
829 
830       /* The spill slots are 64 bits in size.  As per the comment on
831          definition of HRegClass in host_generic_regs.h, that means,
832          to spill a vreg of class Flt64 or Vec128, we'll need to find
833          two adjacent spill slots to use.  For Vec256, we'll need to
834          find four adjacent slots to use.  Note, this logic needs to
835          kept in sync with the size info on the definition of
836          HRegClass. */
837       switch (vreg_lrs[j].reg_class) {
838 
839          case HRcVec128: case HRcFlt64:
840             /* Find two adjacent free slots in which between them
841                provide up to 128 bits in which to spill the vreg.
842                Since we are trying to find an even:odd pair, move
843                along in steps of 2 (slots). */
844             for (k = 0; k < N_SPILL64S-1; k += 2)
845                if (ss_busy_until_before[k+0] <= vreg_lrs[j].live_after
846                    && ss_busy_until_before[k+1] <= vreg_lrs[j].live_after)
847                   break;
848             if (k >= N_SPILL64S-1) {
849                vpanic("LibVEX_N_SPILL_BYTES is too low.  "
850                       "Increase and recompile.");
851             }
852             if (0) vex_printf("16-byte spill offset in spill slot %d\n",
853                               (Int)k);
854             ss_busy_until_before[k+0] = vreg_lrs[j].dead_before;
855             ss_busy_until_before[k+1] = vreg_lrs[j].dead_before;
856             break;
857 
858          default:
859             /* The ordinary case -- just find a single spill slot. */
860             /* Find the lowest-numbered spill slot which is available
861                at the start point of this interval, and assign the
862                interval to it. */
863             for (k = 0; k < N_SPILL64S; k++)
864                if (ss_busy_until_before[k] <= vreg_lrs[j].live_after)
865                   break;
866             if (k == N_SPILL64S) {
867                vpanic("LibVEX_N_SPILL_BYTES is too low.  "
868                       "Increase and recompile.");
869             }
870             ss_busy_until_before[k] = vreg_lrs[j].dead_before;
871             break;
872 
873       } /* switch (vreg_lrs[j].reg_class) { */
874 
875       /* This reflects LibVEX's hard-wired knowledge of the baseBlock
876          layout: the guest state, then two equal sized areas following
877          it for two sets of shadow state, and then the spill area. */
878       vreg_lrs[j].spill_offset = toShort(guest_sizeB * 3 + k * 8);
879 
880       /* Independent check that we've made a sane choice of slot */
881       sanity_check_spill_offset( &vreg_lrs[j] );
882       /* if (j > max_ss_no) */
883       /*    max_ss_no = j; */
884    }
885 
886 #  if 0
887    vex_printf("\n\n");
888    for (j = 0; j < n_vregs; j++)
889       vex_printf("vreg %d    --> spill offset %d\n",
890                  j, vreg_lrs[j].spill_offset);
891 #  endif
892 
893    /* --------- Stage 4: establish rreg preferences --------- */
894 
895    /* It may be advantageous to allocating certain vregs to specific
896       rregs, as a way of avoiding reg-reg moves later.  Here we
897       establish which, if any, rreg each vreg would prefer to be in.
898       Note that this constrains the allocator -- ideally we end up
899       with as few as possible vregs expressing a preference.
900 
901       This is an optimisation: if the .preferred_rreg field is never
902       set to anything different from INVALID_HREG, the allocator still
903       works. */
904 
905    /* 30 Dec 04: removed this mechanism as it does not seem to
906       help. */
907 
908    /* --------- Stage 5: process instructions --------- */
909 
910    /* This is the main loop of the allocator.  First, we need to
911       correctly set up our running state, which tracks the status of
912       each real register. */
913 
914    /* ------ BEGIN: Process each insn in turn. ------ */
915 
916    for (ii = 0; ii < instrs_in->arr_used; ii++) {
917 
918 #     if DEBUG_REGALLOC
919       vex_printf("\n====----====---- Insn %d ----====----====\n", ii);
920       vex_printf("---- ");
921       (*ppInstr)(instrs_in->arr[ii], mode64);
922       vex_printf("\n\nInitial state:\n");
923       PRINT_STATE;
924       vex_printf("\n");
925 #     endif
926 
927       /* ------------ Sanity checks ------------ */
928 
929       /* Sanity checks are expensive.  So they are done only once
930          every 7 instructions, and just before the last
931          instruction. */
932       do_sanity_check
933          = toBool(
934               False  /* Set to True for sanity checking of all insns. */
935               || ii == instrs_in->arr_used-1
936               || (ii > 0 && (ii % 7) == 0)
937            );
938 
939       if (do_sanity_check) {
940 
941          /* Sanity check 1: all rregs with a hard live range crossing
942             this insn must be marked as unavailable in the running
943             state. */
944          for (j = 0; j < rreg_lrs_used; j++) {
945             if (rreg_lrs_la[j].live_after < ii
946                 && ii < rreg_lrs_la[j].dead_before) {
947                /* ii is the middle of a hard live range for some real
948                   reg.  Check it's marked as such in the running
949                   state. */
950 
951 #              if 0
952                vex_printf("considering la %d .. db %d   reg = ",
953                           rreg_lrs[j].live_after,
954                           rreg_lrs[j].dead_before);
955                (*ppReg)(rreg_lrs[j].rreg);
956                vex_printf("\n");
957 #              endif
958 
959                /* find the state entry for this rreg */
960                for (k = 0; k < n_rregs; k++)
961                   if (sameHReg(rreg_state[k].rreg, rreg_lrs_la[j].rreg))
962                      break;
963 
964                /* and assert that this rreg is marked as unavailable */
965                vassert(rreg_state[k].disp == Unavail);
966             }
967          }
968 
969          /* Sanity check 2: conversely, all rregs marked as
970             unavailable in the running rreg_state must have a
971             corresponding hard live range entry in the rreg_lrs
972             array. */
973          for (j = 0; j < n_available_real_regs; j++) {
974             vassert(rreg_state[j].disp == Bound
975                     || rreg_state[j].disp == Free
976                     || rreg_state[j].disp == Unavail);
977             if (rreg_state[j].disp != Unavail)
978                continue;
979             for (k = 0; k < rreg_lrs_used; k++)
980                if (sameHReg(rreg_lrs_la[k].rreg, rreg_state[j].rreg)
981                    && rreg_lrs_la[k].live_after < ii
982                    && ii < rreg_lrs_la[k].dead_before)
983                   break;
984             /* If this vassertion fails, we couldn't find a
985                corresponding HLR. */
986             vassert(k < rreg_lrs_used);
987          }
988 
989          /* Sanity check 3: all vreg-rreg bindings must bind registers
990             of the same class. */
991          for (j = 0; j < n_rregs; j++) {
992             if (rreg_state[j].disp != Bound) {
993                vassert(rreg_state[j].eq_spill_slot == False);
994                continue;
995             }
996             vassert(hregClass(rreg_state[j].rreg)
997                     == hregClass(rreg_state[j].vreg));
998             vassert( hregIsVirtual(rreg_state[j].vreg));
999             vassert(!hregIsVirtual(rreg_state[j].rreg));
1000          }
1001 
1002          /* Sanity check 4: the vreg_state and rreg_state
1003             mutually-redundant mappings are consistent.  If
1004             rreg_state[j].vreg points at some vreg_state entry then
1005             that vreg_state entry should point back at
1006             rreg_state[j]. */
1007          for (j = 0; j < n_rregs; j++) {
1008             if (rreg_state[j].disp != Bound)
1009                continue;
1010             k = hregNumber(rreg_state[j].vreg);
1011             vassert(IS_VALID_VREGNO(k));
1012             vassert(vreg_state[k] == j);
1013          }
1014          for (j = 0; j < n_vregs; j++) {
1015             k = vreg_state[j];
1016             if (k == INVALID_RREG_NO)
1017                continue;
1018             vassert(IS_VALID_RREGNO(k));
1019             vassert(rreg_state[k].disp == Bound);
1020             vassert(hregNumber(rreg_state[k].vreg) == j);
1021          }
1022 
1023       } /* if (do_sanity_check) */
1024 
1025       /* ------------ end of Sanity checks ------------ */
1026 
1027       /* Do various optimisations pertaining to register coalescing
1028          and preferencing:
1029             MOV  v <-> v   coalescing (done here).
1030             MOV  v <-> r   coalescing (not yet, if ever)
1031       */
1032       /* If doing a reg-reg move between two vregs, and the src's live
1033          range ends here and the dst's live range starts here, bind
1034          the dst to the src's rreg, and that's all. */
1035       if ( (*isMove)( instrs_in->arr[ii], &vregS, &vregD ) ) {
1036          if (!hregIsVirtual(vregS)) goto cannot_coalesce;
1037          if (!hregIsVirtual(vregD)) goto cannot_coalesce;
1038          /* Check that *isMove is not telling us a bunch of lies ... */
1039          vassert(hregClass(vregS) == hregClass(vregD));
1040          k = hregNumber(vregS);
1041          m = hregNumber(vregD);
1042          vassert(IS_VALID_VREGNO(k));
1043          vassert(IS_VALID_VREGNO(m));
1044          if (vreg_lrs[k].dead_before != ii + 1) goto cannot_coalesce;
1045          if (vreg_lrs[m].live_after != ii) goto cannot_coalesce;
1046 #        if DEBUG_REGALLOC
1047          vex_printf("COALESCE ");
1048          (*ppReg)(vregS);
1049          vex_printf(" -> ");
1050          (*ppReg)(vregD);
1051          vex_printf("\n\n");
1052 #        endif
1053          /* Find the state entry for vregS. */
1054          for (m = 0; m < n_rregs; m++)
1055             if (rreg_state[m].disp == Bound
1056                 && sameHReg(rreg_state[m].vreg, vregS))
1057                break;
1058          if (m == n_rregs)
1059             /* We failed to find a binding for vregS, which means it's
1060                currently not in a register.  So we can't do the
1061                coalescing.  Give up. */
1062             goto cannot_coalesce;
1063 
1064          /* Finally, we can do the coalescing.  It's trivial -- merely
1065             claim vregS's register for vregD. */
1066          rreg_state[m].vreg = vregD;
1067          vassert(IS_VALID_VREGNO(hregNumber(vregD)));
1068          vassert(IS_VALID_VREGNO(hregNumber(vregS)));
1069          vreg_state[hregNumber(vregD)] = toShort(m);
1070          vreg_state[hregNumber(vregS)] = INVALID_RREG_NO;
1071 
1072          /* This rreg has become associated with a different vreg and
1073             hence with a different spill slot.  Play safe. */
1074          rreg_state[m].eq_spill_slot = False;
1075 
1076          /* Move on to the next insn.  We skip the post-insn stuff for
1077             fixed registers, since this move should not interact with
1078             them in any way. */
1079          continue;
1080       }
1081      cannot_coalesce:
1082 
1083       /* ------ Free up rregs bound to dead vregs ------ */
1084 
1085       /* Look for vregs whose live range has just ended, and
1086 	 mark the associated rreg as free. */
1087 
1088       for (j = 0; j < n_rregs; j++) {
1089          if (rreg_state[j].disp != Bound)
1090             continue;
1091          UInt vregno = hregNumber(rreg_state[j].vreg);
1092          vassert(IS_VALID_VREGNO(vregno));
1093          if (vreg_lrs[vregno].dead_before <= ii) {
1094             rreg_state[j].disp = Free;
1095             rreg_state[j].eq_spill_slot = False;
1096             m = hregNumber(rreg_state[j].vreg);
1097             vassert(IS_VALID_VREGNO(m));
1098             vreg_state[m] = INVALID_RREG_NO;
1099             if (DEBUG_REGALLOC) {
1100                vex_printf("free up ");
1101                (*ppReg)(rreg_state[j].rreg);
1102                vex_printf("\n");
1103             }
1104          }
1105       }
1106 
1107       /* ------ Pre-instruction actions for fixed rreg uses ------ */
1108 
1109       /* Now we have to deal with rregs which are about to be made
1110          live by this instruction -- in other words, are entering into
1111          one of their live ranges.  If any such rreg holds a vreg, we
1112          will have to free up the rreg.  The simplest solution which
1113          is correct is to spill the rreg.
1114 
1115          Note we could do better:
1116          * Could move it into some other free rreg, if one is available
1117 
1118          Do this efficiently, by incrementally stepping along an array
1119          of rreg HLRs that are known to be sorted by start point
1120          (their .live_after field).
1121       */
1122       while (True) {
1123          vassert(rreg_lrs_la_next >= 0);
1124          vassert(rreg_lrs_la_next <= rreg_lrs_used);
1125          if (rreg_lrs_la_next == rreg_lrs_used)
1126             break; /* no more real reg live ranges to consider */
1127          if (ii < rreg_lrs_la[rreg_lrs_la_next].live_after)
1128             break; /* next live range does not yet start */
1129          vassert(ii == rreg_lrs_la[rreg_lrs_la_next].live_after);
1130          /* rreg_lrs_la[rreg_lrs_la_next].rreg needs to be freed up.
1131             Find the associated rreg_state entry. */
1132          /* Note, re ii == rreg_lrs_la[rreg_lrs_la_next].live_after.
1133             Real register live ranges are guaranteed to be well-formed
1134             in that they start with a write to the register -- Stage 2
1135             rejects any code not satisfying this.  So the correct
1136             question to ask is whether
1137             rreg_lrs_la[rreg_lrs_la_next].live_after == ii, that is,
1138             whether the reg becomes live after this insn -- rather
1139             than before it. */
1140 #        if DEBUG_REGALLOC
1141          vex_printf("need to free up rreg: ");
1142          (*ppReg)(rreg_lrs_la[rreg_lrs_la_next].rreg);
1143          vex_printf("\n\n");
1144 #        endif
1145          for (k = 0; k < n_rregs; k++)
1146             if (sameHReg(rreg_state[k].rreg, rreg_lrs_la[rreg_lrs_la_next].rreg))
1147                break;
1148          /* If this fails, we don't have an entry for this rreg.
1149             Which we should. */
1150          vassert(IS_VALID_RREGNO(k));
1151          m = hregNumber(rreg_state[k].vreg);
1152          if (rreg_state[k].disp == Bound) {
1153             /* Yes, there is an associated vreg.  Spill it if it's
1154                still live. */
1155             vassert(IS_VALID_VREGNO(m));
1156             vreg_state[m] = INVALID_RREG_NO;
1157             if (vreg_lrs[m].dead_before > ii) {
1158                vassert(vreg_lrs[m].reg_class != HRcINVALID);
1159                if ((!eq_spill_opt) || !rreg_state[k].eq_spill_slot) {
1160                   HInstr* spill1 = NULL;
1161                   HInstr* spill2 = NULL;
1162                   (*genSpill)( &spill1, &spill2, rreg_state[k].rreg,
1163                                vreg_lrs[m].spill_offset, mode64 );
1164                   vassert(spill1 || spill2); /* can't both be NULL */
1165                   if (spill1)
1166                      EMIT_INSTR(spill1);
1167                   if (spill2)
1168                      EMIT_INSTR(spill2);
1169                }
1170                rreg_state[k].eq_spill_slot = True;
1171             }
1172          }
1173          rreg_state[k].disp = Unavail;
1174          rreg_state[k].vreg = INVALID_HREG;
1175          rreg_state[k].eq_spill_slot = False;
1176 
1177          /* check for further rregs entering HLRs at this point */
1178          rreg_lrs_la_next++;
1179       }
1180 
1181 
1182 #     if DEBUG_REGALLOC
1183       vex_printf("After pre-insn actions for fixed regs:\n");
1184       PRINT_STATE;
1185       vex_printf("\n");
1186 #     endif
1187 
1188 
1189       /* ------ Deal with the current instruction. ------ */
1190 
1191       /* Finally we can begin the processing of this instruction
1192          itself.  The aim is to free up enough rregs for this insn.
1193          This may generate spill stores since we may have to evict
1194          some vregs currently in rregs.  Also generates spill loads.
1195          We also build up the final vreg->rreg mapping to be applied
1196          to the insn. */
1197 
1198       (*getRegUsage)( &reg_usage, instrs_in->arr[ii], mode64 );
1199 
1200       initHRegRemap(&remap);
1201 
1202       /* ------------ BEGIN directReload optimisation ----------- */
1203 
1204       /* If the instruction reads exactly one vreg which is currently
1205          in a spill slot, and this is last use of that vreg, see if we
1206          can convert the instruction into one reads directly from the
1207          spill slot.  This is clearly only possible for x86 and amd64
1208          targets, since ppc and arm load-store architectures.  If
1209          successful, replace instrs_in->arr[ii] with this new
1210          instruction, and recompute its reg usage, so that the change
1211          is invisible to the standard-case handling that follows. */
1212 
1213       if (directReload && reg_usage.n_used <= 2) {
1214          Bool  debug_direct_reload = True && False;
1215          HReg  cand     = INVALID_HREG;
1216          Bool  nreads   = 0;
1217          Short spilloff = 0;
1218 
1219          for (j = 0; j < reg_usage.n_used; j++) {
1220 
1221             vreg = reg_usage.hreg[j];
1222 
1223             if (!hregIsVirtual(vreg))
1224                continue;
1225 
1226             if (reg_usage.mode[j] == HRmRead) {
1227                nreads++;
1228                m = hregNumber(vreg);
1229                vassert(IS_VALID_VREGNO(m));
1230                k = vreg_state[m];
1231                if (!IS_VALID_RREGNO(k)) {
1232                   /* ok, it is spilled.  Now, is this its last use? */
1233                   vassert(vreg_lrs[m].dead_before >= ii+1);
1234                   if (vreg_lrs[m].dead_before == ii+1
1235                       && hregIsInvalid(cand)) {
1236                      spilloff = vreg_lrs[m].spill_offset;
1237                      cand = vreg;
1238                   }
1239                }
1240             }
1241          }
1242 
1243          if (nreads == 1 && ! hregIsInvalid(cand)) {
1244             HInstr* reloaded;
1245             if (reg_usage.n_used == 2)
1246                vassert(! sameHReg(reg_usage.hreg[0], reg_usage.hreg[1]));
1247 
1248             reloaded = directReload ( instrs_in->arr[ii], cand, spilloff );
1249             if (debug_direct_reload && !reloaded) {
1250                vex_printf("[%3d] ", spilloff); ppHReg(cand); vex_printf(" ");
1251                ppInstr(instrs_in->arr[ii], mode64);
1252             }
1253             if (reloaded) {
1254                /* Update info about the insn, so it looks as if it had
1255                   been in this form all along. */
1256                instrs_in->arr[ii] = reloaded;
1257                (*getRegUsage)( &reg_usage, instrs_in->arr[ii], mode64 );
1258                if (debug_direct_reload && !reloaded) {
1259                   vex_printf("  -->  ");
1260                   ppInstr(reloaded, mode64);
1261                }
1262             }
1263 
1264             if (debug_direct_reload && !reloaded)
1265                vex_printf("\n");
1266          }
1267 
1268       }
1269 
1270       /* ------------ END directReload optimisation ------------ */
1271 
1272       /* for each reg mentioned in the insn ... */
1273       for (j = 0; j < reg_usage.n_used; j++) {
1274 
1275          vreg = reg_usage.hreg[j];
1276 
1277          /* only interested in virtual registers right now. */
1278          if (!hregIsVirtual(vreg))
1279             continue;
1280 
1281 #        if 0
1282          vex_printf("considering "); (*ppReg)(vreg); vex_printf("\n");
1283 #        endif
1284 
1285          /* Now we're trying to find a rreg for "vreg".  First of all,
1286             if it already has an rreg assigned, we don't need to do
1287             anything more.  Search the current state to find out. */
1288          m = hregNumber(vreg);
1289          vassert(IS_VALID_VREGNO(m));
1290          k = vreg_state[m];
1291          if (IS_VALID_RREGNO(k)) {
1292             vassert(rreg_state[k].disp == Bound);
1293             addToHRegRemap(&remap, vreg, rreg_state[k].rreg);
1294             /* If this rreg is written or modified, mark it as different
1295                from any spill slot value. */
1296             if (reg_usage.mode[j] != HRmRead)
1297                rreg_state[k].eq_spill_slot = False;
1298             continue;
1299          } else {
1300             vassert(k == INVALID_RREG_NO);
1301          }
1302 
1303          /* No luck.  The next thing to do is see if there is a
1304             currently free rreg available, of the correct class.  If
1305             so, bag it.  NOTE, we could improve this by selecting an
1306             rreg for which the next live-range event is as far ahead
1307             as possible. */
1308          k_suboptimal = -1;
1309          for (k = 0; k < n_rregs; k++) {
1310             if (rreg_state[k].disp != Free
1311                 || hregClass(rreg_state[k].rreg) != hregClass(vreg))
1312                continue;
1313             if (rreg_state[k].has_hlrs) {
1314                /* Well, at least we can use k_suboptimal if we really
1315                   have to.  Keep on looking for a better candidate. */
1316                k_suboptimal = k;
1317             } else {
1318                /* Found a preferable reg.  Use it. */
1319                k_suboptimal = -1;
1320                break;
1321             }
1322          }
1323          if (k_suboptimal >= 0)
1324             k = k_suboptimal;
1325 
1326          if (k < n_rregs) {
1327             rreg_state[k].disp = Bound;
1328             rreg_state[k].vreg = vreg;
1329             m = hregNumber(vreg);
1330             vassert(IS_VALID_VREGNO(m));
1331             vreg_state[m] = toShort(k);
1332             addToHRegRemap(&remap, vreg, rreg_state[k].rreg);
1333             /* Generate a reload if needed.  This only creates needed
1334                reloads because the live range builder for vregs will
1335                guarantee that the first event for a vreg is a write.
1336                Hence, if this reference is not a write, it cannot be
1337                the first reference for this vreg, and so a reload is
1338                indeed needed. */
1339             if (reg_usage.mode[j] != HRmWrite) {
1340                vassert(vreg_lrs[m].reg_class != HRcINVALID);
1341                HInstr* reload1 = NULL;
1342                HInstr* reload2 = NULL;
1343                (*genReload)( &reload1, &reload2, rreg_state[k].rreg,
1344                              vreg_lrs[m].spill_offset, mode64 );
1345                vassert(reload1 || reload2); /* can't both be NULL */
1346                if (reload1)
1347                   EMIT_INSTR(reload1);
1348                if (reload2)
1349                   EMIT_INSTR(reload2);
1350                /* This rreg is read or modified by the instruction.
1351                   If it's merely read we can claim it now equals the
1352                   spill slot, but not so if it is modified. */
1353                if (reg_usage.mode[j] == HRmRead) {
1354                   rreg_state[k].eq_spill_slot = True;
1355                } else {
1356                   vassert(reg_usage.mode[j] == HRmModify);
1357                   rreg_state[k].eq_spill_slot = False;
1358                }
1359             } else {
1360                rreg_state[k].eq_spill_slot = False;
1361             }
1362 
1363             continue;
1364          }
1365 
1366          /* Well, now we have no option but to spill a vreg.  It's
1367             important to make a good choice of vreg to spill, and of
1368             course we need to be careful not to spill a vreg which is
1369             needed by this insn. */
1370 
1371          /* First, mark in the rreg_state, those rregs which are not spill
1372             candidates, due to holding a vreg mentioned by this
1373             instruction.  Or being of the wrong class. */
1374          for (k = 0; k < n_rregs; k++) {
1375             rreg_state[k].is_spill_cand = False;
1376             if (rreg_state[k].disp != Bound)
1377                continue;
1378             if (hregClass(rreg_state[k].rreg) != hregClass(vreg))
1379                continue;
1380             rreg_state[k].is_spill_cand = True;
1381             for (m = 0; m < reg_usage.n_used; m++) {
1382                if (sameHReg(rreg_state[k].vreg, reg_usage.hreg[m])) {
1383                   rreg_state[k].is_spill_cand = False;
1384                   break;
1385                }
1386             }
1387          }
1388 
1389          /* We can choose to spill any rreg satisfying
1390             rreg_state[r].is_spill_cand (so to speak).  Choose r so that
1391             the next use of its associated vreg is as far ahead as
1392             possible, in the hope that this will minimise the number
1393             of consequent reloads required. */
1394          spillee
1395             = findMostDistantlyMentionedVReg (
1396                  getRegUsage, instrs_in, ii+1, rreg_state, n_rregs, mode64 );
1397 
1398          if (spillee == -1) {
1399             /* Hmmmmm.  There don't appear to be any spill candidates.
1400                We're hosed. */
1401             vex_printf("reg_alloc: can't find a register in class: ");
1402             ppHRegClass(hregClass(vreg));
1403             vex_printf("\n");
1404             vpanic("reg_alloc: can't create a free register.");
1405          }
1406 
1407          /* Right.  So we're going to spill rreg_state[spillee]. */
1408          vassert(IS_VALID_RREGNO(spillee));
1409          vassert(rreg_state[spillee].disp == Bound);
1410          /* check it's the right class */
1411          vassert(hregClass(rreg_state[spillee].rreg) == hregClass(vreg));
1412          /* check we're not ejecting the vreg for which we are trying
1413             to free up a register. */
1414          vassert(! sameHReg(rreg_state[spillee].vreg, vreg));
1415 
1416          m = hregNumber(rreg_state[spillee].vreg);
1417          vassert(IS_VALID_VREGNO(m));
1418 
1419          /* So here's the spill store.  Assert that we're spilling a
1420             live vreg. */
1421          vassert(vreg_lrs[m].dead_before > ii);
1422          vassert(vreg_lrs[m].reg_class != HRcINVALID);
1423          if ((!eq_spill_opt) || !rreg_state[spillee].eq_spill_slot) {
1424             HInstr* spill1 = NULL;
1425             HInstr* spill2 = NULL;
1426             (*genSpill)( &spill1, &spill2, rreg_state[spillee].rreg,
1427                          vreg_lrs[m].spill_offset, mode64 );
1428             vassert(spill1 || spill2); /* can't both be NULL */
1429             if (spill1)
1430                EMIT_INSTR(spill1);
1431             if (spill2)
1432                EMIT_INSTR(spill2);
1433          }
1434 
1435          /* Update the rreg_state to reflect the new assignment for this
1436             rreg. */
1437          rreg_state[spillee].vreg = vreg;
1438          vreg_state[m] = INVALID_RREG_NO;
1439 
1440          rreg_state[spillee].eq_spill_slot = False; /* be safe */
1441 
1442          m = hregNumber(vreg);
1443          vassert(IS_VALID_VREGNO(m));
1444          vreg_state[m] = toShort(spillee);
1445 
1446          /* Now, if this vreg is being read or modified (as opposed to
1447             written), we have to generate a reload for it. */
1448          if (reg_usage.mode[j] != HRmWrite) {
1449             vassert(vreg_lrs[m].reg_class != HRcINVALID);
1450             HInstr* reload1 = NULL;
1451             HInstr* reload2 = NULL;
1452             (*genReload)( &reload1, &reload2, rreg_state[spillee].rreg,
1453                           vreg_lrs[m].spill_offset, mode64 );
1454             vassert(reload1 || reload2); /* can't both be NULL */
1455             if (reload1)
1456                EMIT_INSTR(reload1);
1457             if (reload2)
1458                EMIT_INSTR(reload2);
1459             /* This rreg is read or modified by the instruction.
1460                If it's merely read we can claim it now equals the
1461                spill slot, but not so if it is modified. */
1462             if (reg_usage.mode[j] == HRmRead) {
1463                rreg_state[spillee].eq_spill_slot = True;
1464             } else {
1465                vassert(reg_usage.mode[j] == HRmModify);
1466                rreg_state[spillee].eq_spill_slot = False;
1467             }
1468          }
1469 
1470          /* So after much twisting and turning, we have vreg mapped to
1471             rreg_state[spillee].rreg.  Note that in the map. */
1472          addToHRegRemap(&remap, vreg, rreg_state[spillee].rreg);
1473 
1474       } /* iterate over registers in this instruction. */
1475 
1476       /* We've finished clowning around with registers in this instruction.
1477          Three results:
1478          - the running rreg_state[] has been updated
1479          - a suitable vreg->rreg mapping for this instruction has been
1480            constructed
1481          - spill and reload instructions may have been emitted.
1482 
1483         The final step is to apply the mapping to the instruction,
1484         and emit that.
1485       */
1486 
1487       /* NOTE, DESTRUCTIVELY MODIFIES instrs_in->arr[ii]. */
1488       (*mapRegs)( &remap, instrs_in->arr[ii], mode64 );
1489       EMIT_INSTR( instrs_in->arr[ii] );
1490 
1491 #     if DEBUG_REGALLOC
1492       vex_printf("After dealing with current insn:\n");
1493       PRINT_STATE;
1494       vex_printf("\n");
1495 #     endif
1496 
1497       /* ------ Post-instruction actions for fixed rreg uses ------ */
1498 
1499       /* Now we need to check for rregs exiting fixed live ranges
1500          after this instruction, and if so mark them as free. */
1501       while (True) {
1502          vassert(rreg_lrs_db_next >= 0);
1503          vassert(rreg_lrs_db_next <= rreg_lrs_used);
1504          if (rreg_lrs_db_next == rreg_lrs_used)
1505             break; /* no more real reg live ranges to consider */
1506          if (ii+1 < rreg_lrs_db[rreg_lrs_db_next].dead_before)
1507             break; /* next live range does not yet start */
1508          vassert(ii+1 == rreg_lrs_db[rreg_lrs_db_next].dead_before);
1509          /* rreg_lrs_db[[rreg_lrs_db_next].rreg is exiting a hard live
1510             range.  Mark it as such in the main rreg_state array. */
1511          for (k = 0; k < n_rregs; k++)
1512            if (sameHReg(rreg_state[k].rreg, rreg_lrs_db[rreg_lrs_db_next].rreg))
1513                break;
1514          /* If this vassertion fails, we don't have an entry for
1515             this rreg.  Which we should. */
1516          vassert(k < n_rregs);
1517          vassert(rreg_state[k].disp == Unavail);
1518          rreg_state[k].disp = Free;
1519          rreg_state[k].vreg = INVALID_HREG;
1520          rreg_state[k].eq_spill_slot = False;
1521 
1522          /* check for further rregs leaving HLRs at this point */
1523          rreg_lrs_db_next++;
1524       }
1525 
1526 #     if DEBUG_REGALLOC
1527       vex_printf("After post-insn actions for fixed regs:\n");
1528       PRINT_STATE;
1529       vex_printf("\n");
1530 #     endif
1531 
1532    } /* iterate over insns */
1533 
1534    /* ------ END: Process each insn in turn. ------ */
1535 
1536    /* free(rreg_state); */
1537    /* free(rreg_lrs); */
1538    /* if (vreg_lrs) free(vreg_lrs); */
1539 
1540    /* Paranoia */
1541    for (j = 0; j < n_rregs; j++)
1542       vassert(sameHReg(rreg_state[j].rreg, available_real_regs[j]));
1543 
1544    vassert(rreg_lrs_la_next == rreg_lrs_used);
1545    vassert(rreg_lrs_db_next == rreg_lrs_used);
1546 
1547    return instrs_out;
1548 
1549 #  undef INVALID_INSTRNO
1550 #  undef EMIT_INSTR
1551 #  undef PRINT_STATE
1552 }
1553 
1554 
1555 
1556 /*---------------------------------------------------------------*/
1557 /*---                                       host_reg_alloc2.c ---*/
1558 /*---------------------------------------------------------------*/
1559