• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 /*---------------------------------------------------------------*/
3 /*--- begin                                 host_reg_alloc2.c ---*/
4 /*---------------------------------------------------------------*/
5 
6 /*
7    This file is part of Valgrind, a dynamic binary instrumentation
8    framework.
9 
10    Copyright (C) 2004-2012 OpenWorks LLP
11       info@open-works.net
12 
13    This program is free software; you can redistribute it and/or
14    modify it under the terms of the GNU General Public License as
15    published by the Free Software Foundation; either version 2 of the
16    License, or (at your option) any later version.
17 
18    This program is distributed in the hope that it will be useful, but
19    WITHOUT ANY WARRANTY; without even the implied warranty of
20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21    General Public License for more details.
22 
23    You should have received a copy of the GNU General Public License
24    along with this program; if not, write to the Free Software
25    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26    02110-1301, USA.
27 
28    The GNU General Public License is contained in the file COPYING.
29 
30    Neither the names of the U.S. Department of Energy nor the
31    University of California nor the names of its contributors may be
32    used to endorse or promote products derived from this software
33    without prior written permission.
34 */
35 
36 #include "libvex_basictypes.h"
37 #include "libvex.h"
38 
39 #include "main_util.h"
40 #include "host_generic_regs.h"
41 
42 /* Set to 1 for lots of debugging output. */
43 #define DEBUG_REGALLOC 0
44 
45 
46 /* TODO 27 Oct 04:
47 
48    Better consistency checking from what isMove tells us.
49 
50    We can possibly do V-V coalescing even when the src is spilled,
51    providing we can arrange for the dst to have the same spill slot.
52 
53    Note that state[].hreg is the same as the available real regs.
54 
55    Generally rationalise data structures.  */
56 
57 
58 /* Records information on virtual register live ranges.  Computed once
59    and remains unchanged after that. */
60 typedef
61    struct {
62       /* Becomes live for the first time after this insn ... */
63       Short live_after;
64       /* Becomes dead for the last time before this insn ... */
65       Short dead_before;
66       /* The "home" spill slot, if needed.  Never changes. */
67       Short spill_offset;
68       Short spill_size;
69       /* What kind of register this is. */
70       HRegClass reg_class;
71    }
72    VRegLR;
73 
74 
75 /* Records information on real-register live ranges.  Computed once
76    and remains unchanged after that. */
77 typedef
78    struct {
79       HReg rreg;
80       /* Becomes live after this insn ... */
81       Short live_after;
82       /* Becomes dead before this insn ... */
83       Short dead_before;
84    }
85    RRegLR;
86 
87 
88 /* An array of the following structs (rreg_state) comprises the
89    running state of the allocator.  It indicates what the current
90    disposition of each allocatable real register is.  The array gets
91    updated as the allocator processes instructions. */
92 typedef
93    struct {
94       /* ------ FIELDS WHICH DO NOT CHANGE ------ */
95       /* Which rreg is this for? */
96       HReg rreg;
97       /* Is this involved in any HLRs?  (only an optimisation hint) */
98       Bool has_hlrs;
99       /* ------ FIELDS WHICH DO CHANGE ------ */
100       /* 6 May 07: rearranged fields below so the whole struct fits
101          into 16 bytes on both x86 and amd64. */
102       /* Used when .disp == Bound and we are looking for vregs to
103          spill. */
104       Bool is_spill_cand;
105       /* Optimisation: used when .disp == Bound.  Indicates when the
106          rreg has the same value as the spill slot for the associated
107          vreg.  Is safely left at False, and becomes True after a
108          spill store or reload for this rreg. */
109       Bool eq_spill_slot;
110       /* What's it's current disposition? */
111       enum { Free,     /* available for use */
112              Unavail,  /* in a real-reg live range */
113              Bound     /* in use (holding value of some vreg) */
114            }
115            disp;
116       /* If .disp == Bound, what vreg is it bound to? */
117       HReg vreg;
118    }
119    RRegState;
120 
121 
122 /* The allocator also maintains a redundant array of indexes
123    (vreg_state) from vreg numbers back to entries in rreg_state.  It
124    is redundant because iff vreg_state[i] == j then
125    hregNumber(rreg_state[j].vreg) == i -- that is, the two entries
126    point at each other.  The purpose of this is to speed up activities
127    which involve looking for a particular vreg: there is no need to
128    scan the rreg_state looking for it, just index directly into
129    vreg_state.  The FAQ "does this vreg already have an associated
130    rreg" is the main beneficiary.
131 
132    To indicate, in vreg_state[i], that a given vreg is not currently
133    associated with any rreg, that entry can be set to INVALID_RREG_NO.
134 
135    Because the vreg_state entries are signed Shorts, the max number
136    of vregs that can be handed by regalloc is 32767.
137 */
138 
139 #define INVALID_RREG_NO ((Short)(-1))
140 
141 #define IS_VALID_VREGNO(_zz) ((_zz) >= 0 && (_zz) < n_vregs)
142 #define IS_VALID_RREGNO(_zz) ((_zz) >= 0 && (_zz) < n_rregs)
143 
144 
145 /* Does this instruction mention a particular reg? */
instrMentionsReg(void (* getRegUsage)(HRegUsage *,HInstr *,Bool),HInstr * instr,HReg r,Bool mode64)146 static Bool instrMentionsReg (
147    void (*getRegUsage) (HRegUsage*, HInstr*, Bool),
148    HInstr* instr,
149    HReg r,
150    Bool mode64
151 )
152 {
153    Int       i;
154    HRegUsage reg_usage;
155    (*getRegUsage)(&reg_usage, instr, mode64);
156    for (i = 0; i < reg_usage.n_used; i++)
157       if (reg_usage.hreg[i] == r)
158          return True;
159    return False;
160 }
161 
162 
163 /* Search forward from some given point in the incoming instruction
164    sequence.  Point is to select a virtual register to spill, by
165    finding the vreg which is mentioned as far ahead as possible, in
166    the hope that this will minimise the number of consequent reloads.
167 
168    Only do the search for vregs which are Bound in the running state,
169    and for which the .is_spill_cand field is set.  This allows the
170    caller to arbitrarily restrict the set of spill candidates to be
171    considered.
172 
173    Returns an index into the state array indicating the (v,r) pair to
174    spill, or -1 if none was found.  */
175 static
findMostDistantlyMentionedVReg(void (* getRegUsage)(HRegUsage *,HInstr *,Bool),HInstrArray * instrs_in,Int search_from_instr,RRegState * state,Int n_state,Bool mode64)176 Int findMostDistantlyMentionedVReg (
177    void (*getRegUsage) (HRegUsage*, HInstr*, Bool),
178    HInstrArray* instrs_in,
179    Int          search_from_instr,
180    RRegState*   state,
181    Int          n_state,
182    Bool         mode64
183 )
184 {
185    Int k, m;
186    Int furthest_k = -1;
187    Int furthest   = -1;
188    vassert(search_from_instr >= 0);
189    for (k = 0; k < n_state; k++) {
190       if (!state[k].is_spill_cand)
191          continue;
192       vassert(state[k].disp == Bound);
193       for (m = search_from_instr; m < instrs_in->arr_used; m++) {
194          if (instrMentionsReg(getRegUsage,
195                               instrs_in->arr[m], state[k].vreg, mode64))
196             break;
197       }
198       if (m > furthest) {
199          furthest   = m;
200          furthest_k = k;
201       }
202    }
203    return furthest_k;
204 }
205 
206 
207 /* Check that this vreg has been assigned a sane spill offset. */
sanity_check_spill_offset(VRegLR * vreg)208 static inline void sanity_check_spill_offset ( VRegLR* vreg )
209 {
210    switch (vreg->reg_class) {
211       case HRcVec128: case HRcFlt64:
212          vassert(0 == ((UShort)vreg->spill_offset % 16)); break;
213       default:
214          vassert(0 == ((UShort)vreg->spill_offset % 8)); break;
215    }
216 }
217 
218 
219 /* Double the size of the real-reg live-range array, if needed. */
ensureRRLRspace(RRegLR ** info,Int * size,Int used)220 static void ensureRRLRspace ( RRegLR** info, Int* size, Int used )
221 {
222    Int     k;
223    RRegLR* arr2;
224    if (used < *size) return;
225    if (0)
226       vex_printf("ensureRRISpace: %d -> %d\n", *size, 2 * *size);
227    vassert(used == *size);
228    arr2 = LibVEX_Alloc(2 * *size * sizeof(RRegLR));
229    for (k = 0; k < *size; k++)
230       arr2[k] = (*info)[k];
231    *size *= 2;
232    *info = arr2;
233 }
234 
235 
236 /* Sort an array of RRegLR entries by either the .live_after or
237    .dead_before fields.  This is performance-critical. */
sortRRLRarray(RRegLR * arr,Int size,Bool by_live_after)238 static void sortRRLRarray ( RRegLR* arr,
239                             Int size, Bool by_live_after )
240 {
241    Int    incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
242                        9841, 29524, 88573, 265720,
243                        797161, 2391484 };
244    Int    lo = 0;
245    Int    hi = size-1;
246    Int    i, j, h, bigN, hp;
247    RRegLR v;
248 
249    vassert(size >= 0);
250    if (size == 0)
251       return;
252 
253    bigN = hi - lo + 1; if (bigN < 2) return;
254    hp = 0; while (hp < 14 && incs[hp] < bigN) hp++; hp--;
255 
256    if (by_live_after) {
257 
258       for ( ; hp >= 0; hp--) {
259          h = incs[hp];
260          for (i = lo + h; i <= hi; i++) {
261             v = arr[i];
262             j = i;
263             while (arr[j-h].live_after > v.live_after) {
264                arr[j] = arr[j-h];
265                j = j - h;
266                if (j <= (lo + h - 1)) break;
267             }
268             arr[j] = v;
269          }
270       }
271 
272    } else {
273 
274       for ( ; hp >= 0; hp--) {
275          h = incs[hp];
276          for (i = lo + h; i <= hi; i++) {
277             v = arr[i];
278             j = i;
279             while (arr[j-h].dead_before > v.dead_before) {
280                arr[j] = arr[j-h];
281                j = j - h;
282                if (j <= (lo + h - 1)) break;
283             }
284             arr[j] = v;
285          }
286       }
287 
288    }
289 }
290 
291 
292 /* A target-independent register allocator.  Requires various
293    functions which it uses to deal abstractly with instructions and
294    registers, since it cannot have any target-specific knowledge.
295 
296    Returns a new list of instructions, which, as a result of the
297    behaviour of mapRegs, will be in-place modifications of the
298    original instructions.
299 
300    Requires that the incoming code has been generated using
301    vreg numbers 0, 1 .. n_vregs-1.  Appearance of a vreg outside
302    that range is a checked run-time error.
303 
304    Takes an expandable array of pointers to unallocated insns.
305    Returns an expandable array of pointers to allocated insns.
306 */
doRegisterAllocation(HInstrArray * instrs_in,HReg * available_real_regs,Int n_available_real_regs,Bool (* isMove)(HInstr *,HReg *,HReg *),void (* getRegUsage)(HRegUsage *,HInstr *,Bool),void (* mapRegs)(HRegRemap *,HInstr *,Bool),void (* genSpill)(HInstr **,HInstr **,HReg,Int,Bool),void (* genReload)(HInstr **,HInstr **,HReg,Int,Bool),HInstr * (* directReload)(HInstr *,HReg,Short),Int guest_sizeB,void (* ppInstr)(HInstr *,Bool),void (* ppReg)(HReg),Bool mode64)307 HInstrArray* doRegisterAllocation (
308 
309    /* Incoming virtual-registerised code. */
310    HInstrArray* instrs_in,
311 
312    /* An array listing all the real registers the allocator may use,
313       in no particular order. */
314    HReg* available_real_regs,
315    Int   n_available_real_regs,
316 
317    /* Return True iff the given insn is a reg-reg move, in which
318       case also return the src and dst regs. */
319    Bool (*isMove) ( HInstr*, HReg*, HReg* ),
320 
321    /* Get info about register usage in this insn. */
322    void (*getRegUsage) ( HRegUsage*, HInstr*, Bool ),
323 
324    /* Apply a reg-reg mapping to an insn. */
325    void (*mapRegs) ( HRegRemap*, HInstr*, Bool ),
326 
327    /* Return one, or, if we're unlucky, two insn(s) to spill/restore a
328       real reg to a spill slot byte offset.  The two leading HInstr**
329       args are out parameters, through which the generated insns are
330       returned.  Also (optionally) a 'directReload' function, which
331       attempts to replace a given instruction by one which reads
332       directly from a specified spill slot.  May be NULL, in which
333       case the optimisation is not attempted. */
334    void    (*genSpill)  ( HInstr**, HInstr**, HReg, Int, Bool ),
335    void    (*genReload) ( HInstr**, HInstr**, HReg, Int, Bool ),
336    HInstr* (*directReload) ( HInstr*, HReg, Short ),
337    Int     guest_sizeB,
338 
339    /* For debug printing only. */
340    void (*ppInstr) ( HInstr*, Bool ),
341    void (*ppReg) ( HReg ),
342 
343    /* 32/64bit mode */
344    Bool mode64
345 )
346 {
347 #  define N_SPILL64S  (LibVEX_N_SPILL_BYTES / 8)
348 
349    const Bool eq_spill_opt = True;
350 
351    /* Iterators and temporaries. */
352    Int       ii, j, k, m, spillee, k_suboptimal;
353    HReg      rreg, vreg, vregS, vregD;
354    HRegUsage reg_usage;
355 
356    /* Info on vregs and rregs.  Computed once and remains
357       unchanged. */
358    Int     n_vregs;
359    VRegLR* vreg_lrs; /* [0 .. n_vregs-1] */
360 
361    /* We keep two copies of the real-reg live range info, one sorted
362       by .live_after and the other by .dead_before.  First the
363       unsorted info is created in the _la variant is copied into the
364       _db variant.  Once that's done both of them are sorted.
365       We also need two integer cursors which record the next
366       location in the two arrays to consider. */
367    RRegLR* rreg_lrs_la;
368    RRegLR* rreg_lrs_db;
369    Int     rreg_lrs_size;
370    Int     rreg_lrs_used;
371    Int     rreg_lrs_la_next;
372    Int     rreg_lrs_db_next;
373 
374    /* Used when constructing vreg_lrs (for allocating stack
375       slots). */
376    Int ss_busy_until_before[N_SPILL64S];
377 
378    /* Used when constructing rreg_lrs. */
379    Int* rreg_live_after;
380    Int* rreg_dead_before;
381 
382    /* Running state of the core allocation algorithm. */
383    RRegState* rreg_state;  /* [0 .. n_rregs-1] */
384    Int        n_rregs;
385 
386    /* .. and the redundant backward map */
387    /* Each value is 0 .. n_rregs-1 or is INVALID_RREG_NO.
388       This inplies n_rregs must be <= 32768. */
389    Short*     vreg_state;  /* [0 .. n_vregs-1] */
390 
391    /* The vreg -> rreg map constructed and then applied to each
392       instr. */
393    HRegRemap remap;
394 
395    /* The output array of instructions. */
396    HInstrArray* instrs_out;
397 
398    /* Sanity checks are expensive.  They are only done periodically,
399       not at each insn processed. */
400    Bool do_sanity_check;
401 
402    vassert(0 == (guest_sizeB % 32));
403    vassert(0 == (LibVEX_N_SPILL_BYTES % 32));
404    vassert(0 == (N_SPILL64S % 4));
405 
406    /* The live range numbers are signed shorts, and so limiting the
407       number of insns to 15000 comfortably guards against them
408       overflowing 32k. */
409    vassert(instrs_in->arr_used <= 15000);
410 
411 #  define INVALID_INSTRNO (-2)
412 
413 #  define EMIT_INSTR(_instr)                  \
414       do {                                    \
415         HInstr* _tmp = (_instr);              \
416         if (DEBUG_REGALLOC) {                 \
417            vex_printf("**  ");                \
418            (*ppInstr)(_tmp, mode64);          \
419            vex_printf("\n\n");                \
420         }                                     \
421         addHInstr ( instrs_out, _tmp );       \
422       } while (0)
423 
424 #   define PRINT_STATE						   \
425       do {							   \
426          Int z, q;						   \
427          for (z = 0; z < n_rregs; z++) {			   \
428             vex_printf("  rreg_state[%2d] = ", z);		   \
429             (*ppReg)(rreg_state[z].rreg);	       		   \
430             vex_printf("  \t");					   \
431             switch (rreg_state[z].disp) {			   \
432                case Free:    vex_printf("Free\n"); break;	   \
433                case Unavail: vex_printf("Unavail\n"); break;	   \
434                case Bound:   vex_printf("BoundTo "); 		   \
435                              (*ppReg)(rreg_state[z].vreg);	   \
436                              vex_printf("\n"); break;		   \
437             }							   \
438          }							   \
439          vex_printf("\n  vreg_state[0 .. %d]:\n    ", n_vregs-1);  \
440          q = 0;                                                    \
441          for (z = 0; z < n_vregs; z++) {                           \
442             if (vreg_state[z] == INVALID_RREG_NO)                  \
443                continue;                                           \
444             vex_printf("[%d] -> %d   ", z, vreg_state[z]);         \
445             q++;                                                   \
446             if (q > 0 && (q % 6) == 0)                             \
447                vex_printf("\n    ");                               \
448          }                                                         \
449          vex_printf("\n");                                         \
450       } while (0)
451 
452 
453    /* --------- Stage 0: set up output array --------- */
454    /* --------- and allocate/initialise running state. --------- */
455 
456    instrs_out = newHInstrArray();
457 
458    /* ... and initialise running state. */
459    /* n_rregs is no more than a short name for n_available_real_regs. */
460    n_rregs = n_available_real_regs;
461    n_vregs = instrs_in->n_vregs;
462 
463    /* If this is not so, vreg_state entries will overflow. */
464    vassert(n_vregs < 32767);
465 
466    rreg_state = LibVEX_Alloc(n_rregs * sizeof(RRegState));
467    vreg_state = LibVEX_Alloc(n_vregs * sizeof(Short));
468 
469    for (j = 0; j < n_rregs; j++) {
470       rreg_state[j].rreg          = available_real_regs[j];
471       rreg_state[j].has_hlrs      = False;
472       rreg_state[j].disp          = Free;
473       rreg_state[j].vreg          = INVALID_HREG;
474       rreg_state[j].is_spill_cand = False;
475       rreg_state[j].eq_spill_slot = False;
476    }
477 
478    for (j = 0; j < n_vregs; j++)
479       vreg_state[j] = INVALID_RREG_NO;
480 
481 
482    /* --------- Stage 1: compute vreg live ranges. --------- */
483    /* --------- Stage 2: compute rreg live ranges. --------- */
484 
485    /* ------ start of SET UP TO COMPUTE VREG LIVE RANGES ------ */
486 
487    /* This is relatively simple, because (1) we only seek the complete
488       end-to-end live range of each vreg, and are not interested in
489       any holes in it, and (2) the vregs are conveniently numbered 0
490       .. n_vregs-1, so we can just dump the results in a
491       pre-allocated array. */
492 
493    vreg_lrs = NULL;
494    if (n_vregs > 0)
495       vreg_lrs = LibVEX_Alloc(sizeof(VRegLR) * n_vregs);
496 
497    for (j = 0; j < n_vregs; j++) {
498       vreg_lrs[j].live_after     = INVALID_INSTRNO;
499       vreg_lrs[j].dead_before    = INVALID_INSTRNO;
500       vreg_lrs[j].spill_offset   = 0;
501       vreg_lrs[j].spill_size     = 0;
502       vreg_lrs[j].reg_class      = HRcINVALID;
503    }
504 
505    /* ------ end of SET UP TO COMPUTE VREG LIVE RANGES ------ */
506 
507    /* ------ start of SET UP TO COMPUTE RREG LIVE RANGES ------ */
508 
509    /* This is more complex than Stage 1, because we need to compute
510       exactly all the live ranges of all the allocatable real regs,
511       and we don't know in advance how many there will be. */
512 
513    rreg_lrs_used = 0;
514    rreg_lrs_size = 4;
515    rreg_lrs_la = LibVEX_Alloc(rreg_lrs_size * sizeof(RRegLR));
516    rreg_lrs_db = NULL; /* we'll create this later */
517 
518    /* We'll need to track live range start/end points seperately for
519       each rreg.  Sigh. */
520    vassert(n_available_real_regs > 0);
521    rreg_live_after  = LibVEX_Alloc(n_available_real_regs * sizeof(Int));
522    rreg_dead_before = LibVEX_Alloc(n_available_real_regs * sizeof(Int));
523 
524    for (j = 0; j < n_available_real_regs; j++) {
525       rreg_live_after[j] =
526       rreg_dead_before[j] = INVALID_INSTRNO;
527    }
528 
529    /* ------ end of SET UP TO COMPUTE RREG LIVE RANGES ------ */
530 
531    /* ------ start of ITERATE OVER INSNS ------ */
532 
533    for (ii = 0; ii < instrs_in->arr_used; ii++) {
534 
535       (*getRegUsage)( &reg_usage, instrs_in->arr[ii], mode64 );
536 
537 #     if 0
538       vex_printf("\n%d  stage1: ", ii);
539       (*ppInstr)(instrs_in->arr[ii], mode64);
540       vex_printf("\n");
541       ppHRegUsage(&reg_usage);
542 #     endif
543 
544       /* ------ start of DEAL WITH VREG LIVE RANGES ------ */
545 
546       /* for each reg mentioned in the insn ... */
547       for (j = 0; j < reg_usage.n_used; j++) {
548 
549          vreg = reg_usage.hreg[j];
550          /* only interested in virtual registers right now. */
551          if (!hregIsVirtual(vreg))
552             continue;
553          k = hregNumber(vreg);
554          if (k < 0 || k >= n_vregs) {
555             vex_printf("\n");
556             (*ppInstr)(instrs_in->arr[ii], mode64);
557             vex_printf("\n");
558             vex_printf("vreg %d, n_vregs %d\n", k, n_vregs);
559             vpanic("doRegisterAllocation: out-of-range vreg");
560          }
561 
562          /* Take the opportunity to note its regclass.  We'll need
563             that when allocating spill slots. */
564          if (vreg_lrs[k].reg_class == HRcINVALID) {
565             /* First mention of this vreg. */
566             vreg_lrs[k].reg_class = hregClass(vreg);
567          } else {
568             /* Seen it before, so check for consistency. */
569             vassert(vreg_lrs[k].reg_class == hregClass(vreg));
570          }
571 
572          /* Now consider live ranges. */
573          switch (reg_usage.mode[j]) {
574             case HRmRead:
575                if (vreg_lrs[k].live_after == INVALID_INSTRNO) {
576                   vex_printf("\n\nOFFENDING VREG = %d\n", k);
577                   vpanic("doRegisterAllocation: "
578                          "first event for vreg is Read");
579                }
580                vreg_lrs[k].dead_before = toShort(ii + 1);
581                break;
582             case HRmWrite:
583                if (vreg_lrs[k].live_after == INVALID_INSTRNO)
584                   vreg_lrs[k].live_after = toShort(ii);
585                vreg_lrs[k].dead_before = toShort(ii + 1);
586                break;
587             case HRmModify:
588                if (vreg_lrs[k].live_after == INVALID_INSTRNO) {
589                   vex_printf("\n\nOFFENDING VREG = %d\n", k);
590                   vpanic("doRegisterAllocation: "
591                          "first event for vreg is Modify");
592                }
593                vreg_lrs[k].dead_before = toShort(ii + 1);
594                break;
595             default:
596                vpanic("doRegisterAllocation(1)");
597          } /* switch */
598 
599       } /* iterate over registers */
600 
601       /* ------ end of DEAL WITH VREG LIVE RANGES ------ */
602 
603       /* ------ start of DEAL WITH RREG LIVE RANGES ------ */
604 
605       /* for each reg mentioned in the insn ... */
606       for (j = 0; j < reg_usage.n_used; j++) {
607 
608          /* Dummy initialisations of flush_la and flush_db to avoid
609             possible bogus uninit-var warnings from gcc. */
610          Int  flush_la = INVALID_INSTRNO, flush_db = INVALID_INSTRNO;
611          Bool flush;
612 
613          rreg = reg_usage.hreg[j];
614 
615          /* only interested in real registers right now. */
616          if (hregIsVirtual(rreg))
617             continue;
618 
619          /* Furthermore, we're not interested in this rreg unless it's
620             one of the allocatable ones.  For example, it could be a
621             stack pointer register, or some other register beyond our
622             control, in which case we should just ignore it. */
623          for (k = 0; k < n_available_real_regs; k++)
624             if (available_real_regs[k] == rreg)
625                break;
626          if (k == n_available_real_regs)
627             continue; /* not found -- ignore. */
628          flush = False;
629          switch (reg_usage.mode[j]) {
630             case HRmWrite:
631                flush_la = rreg_live_after[k];
632                flush_db = rreg_dead_before[k];
633                if (flush_la != INVALID_INSTRNO
634                    && flush_db != INVALID_INSTRNO)
635                   flush = True;
636                rreg_live_after[k]  = ii;
637                rreg_dead_before[k] = ii+1;
638                break;
639             case HRmRead:
640                if (rreg_live_after[k] == INVALID_INSTRNO) {
641                   vex_printf("\nOFFENDING RREG = ");
642                   (*ppReg)(available_real_regs[k]);
643                   vex_printf("\n");
644                   vex_printf("\nOFFENDING instr = ");
645                   (*ppInstr)(instrs_in->arr[ii], mode64);
646                   vex_printf("\n");
647                   vpanic("doRegisterAllocation: "
648                          "first event for rreg is Read");
649                }
650                rreg_dead_before[k] = ii+1;
651                break;
652             case HRmModify:
653                if (rreg_live_after[k] == INVALID_INSTRNO) {
654                   vex_printf("\nOFFENDING RREG = ");
655                   (*ppReg)(available_real_regs[k]);
656                   vex_printf("\n");
657                   vex_printf("\nOFFENDING instr = ");
658                   (*ppInstr)(instrs_in->arr[ii], mode64);
659                   vex_printf("\n");
660                   vpanic("doRegisterAllocation: "
661                          "first event for rreg is Modify");
662                }
663                rreg_dead_before[k] = ii+1;
664                break;
665             default:
666                vpanic("doRegisterAllocation(2)");
667          }
668 
669          if (flush) {
670             vassert(flush_la != INVALID_INSTRNO);
671             vassert(flush_db != INVALID_INSTRNO);
672             ensureRRLRspace(&rreg_lrs_la, &rreg_lrs_size, rreg_lrs_used);
673             if (0)
674                vex_printf("FLUSH 1 (%d,%d)\n", flush_la, flush_db);
675             rreg_lrs_la[rreg_lrs_used].rreg        = rreg;
676             rreg_lrs_la[rreg_lrs_used].live_after  = toShort(flush_la);
677             rreg_lrs_la[rreg_lrs_used].dead_before = toShort(flush_db);
678             rreg_lrs_used++;
679          }
680 
681       } /* iterate over regs in the instr */
682 
683       /* ------ end of DEAL WITH RREG LIVE RANGES ------ */
684 
685    } /* iterate over insns */
686 
687    /* ------ end of ITERATE OVER INSNS ------ */
688 
689    /* ------ start of FINALISE RREG LIVE RANGES ------ */
690 
691    /* Now finish up any live ranges left over. */
692    for (j = 0; j < n_available_real_regs; j++) {
693 
694 #     if 0
695       vex_printf("residual %d:  %d %d\n", j, rreg_live_after[j],
696                                              rreg_dead_before[j]);
697 #     endif
698       vassert( (rreg_live_after[j] == INVALID_INSTRNO
699                && rreg_dead_before[j] == INVALID_INSTRNO)
700               ||
701               (rreg_live_after[j] != INVALID_INSTRNO
702                && rreg_dead_before[j] != INVALID_INSTRNO)
703             );
704 
705       if (rreg_live_after[j] == INVALID_INSTRNO)
706          continue;
707 
708       ensureRRLRspace(&rreg_lrs_la, &rreg_lrs_size, rreg_lrs_used);
709       if (0)
710          vex_printf("FLUSH 2 (%d,%d)\n",
711                     rreg_live_after[j], rreg_dead_before[j]);
712       rreg_lrs_la[rreg_lrs_used].rreg        = available_real_regs[j];
713       rreg_lrs_la[rreg_lrs_used].live_after  = toShort(rreg_live_after[j]);
714       rreg_lrs_la[rreg_lrs_used].dead_before = toShort(rreg_dead_before[j]);
715       rreg_lrs_used++;
716    }
717 
718    /* Compute summary hints for choosing real regs.  If a real reg is
719       involved in a hard live range, record that fact in the fixed
720       part of the running rreg_state.  Later, when offered a choice between
721       rregs, it's better to choose one which is not marked as having
722       any HLRs, since ones with HLRs may need to be spilled around
723       their HLRs.  Correctness of final assignment is unaffected by
724       this mechanism -- it is only an optimisation. */
725 
726    for (j = 0; j < rreg_lrs_used; j++) {
727       rreg = rreg_lrs_la[j].rreg;
728       vassert(!hregIsVirtual(rreg));
729       /* rreg is involved in a HLR.  Record this info in the array, if
730          there is space. */
731       for (k = 0; k < n_rregs; k++)
732          if (rreg_state[k].rreg == rreg)
733             break;
734       vassert(k < n_rregs); /* else rreg was not found in rreg_state?! */
735       rreg_state[k].has_hlrs = True;
736    }
737    if (0) {
738       for (j = 0; j < n_rregs; j++) {
739          if (!rreg_state[j].has_hlrs)
740             continue;
741          ppReg(rreg_state[j].rreg);
742          vex_printf(" hinted\n");
743       }
744    }
745 
746    /* Finally, copy the _la variant into the _db variant and
747       sort both by their respective fields. */
748    rreg_lrs_db = LibVEX_Alloc(rreg_lrs_used * sizeof(RRegLR));
749    for (j = 0; j < rreg_lrs_used; j++)
750       rreg_lrs_db[j] = rreg_lrs_la[j];
751 
752    sortRRLRarray( rreg_lrs_la, rreg_lrs_used, True /* by .live_after*/  );
753    sortRRLRarray( rreg_lrs_db, rreg_lrs_used, False/* by .dead_before*/ );
754 
755    /* And set up the cursors. */
756    rreg_lrs_la_next = 0;
757    rreg_lrs_db_next = 0;
758 
759    for (j = 1; j < rreg_lrs_used; j++) {
760       vassert(rreg_lrs_la[j-1].live_after  <= rreg_lrs_la[j].live_after);
761       vassert(rreg_lrs_db[j-1].dead_before <= rreg_lrs_db[j].dead_before);
762    }
763 
764    /* ------ end of FINALISE RREG LIVE RANGES ------ */
765 
766 #  if DEBUG_REGALLOC
767    for (j = 0; j < n_vregs; j++) {
768       vex_printf("vreg %d:  la = %d,  db = %d\n",
769                  j, vreg_lrs[j].live_after, vreg_lrs[j].dead_before );
770    }
771 #  endif
772 
773 #  if DEBUG_REGALLOC
774    vex_printf("RRegLRs by LA:\n");
775    for (j = 0; j < rreg_lrs_used; j++) {
776       vex_printf("  ");
777       (*ppReg)(rreg_lrs_la[j].rreg);
778       vex_printf("      la = %d,  db = %d\n",
779                  rreg_lrs_la[j].live_after, rreg_lrs_la[j].dead_before );
780    }
781    vex_printf("RRegLRs by DB:\n");
782    for (j = 0; j < rreg_lrs_used; j++) {
783       vex_printf("  ");
784       (*ppReg)(rreg_lrs_db[j].rreg);
785       vex_printf("      la = %d,  db = %d\n",
786                  rreg_lrs_db[j].live_after, rreg_lrs_db[j].dead_before );
787    }
788 #  endif
789 
790    /* --------- Stage 3: allocate spill slots. --------- */
791 
792    /* Each spill slot is 8 bytes long.  For vregs which take more than
793       64 bits to spill (classes Flt64 and Vec128), we have to allocate
794       two consecutive spill slots.  For 256 bit registers (class
795       Vec256), we have to allocate four consecutive spill slots.
796 
797       For Vec128-class on PowerPC, the spill slot's actual address
798       must be 16-byte aligned.  Since the spill slot's address is
799       computed as an offset from the guest state pointer, and since
800       the user of the generated code must set that pointer to a
801       32-aligned value, we have the residual obligation here of
802       choosing a 16-aligned spill slot offset for Vec128-class values.
803       Since each spill slot is 8 bytes long, that means for
804       Vec128-class values we must allocated a spill slot number which
805       is zero mod 2.
806 
807       Similarly, for Vec256 calss on amd64, find a spill slot number
808       which is zero mod 4.  This guarantees it will be 32 byte
809       aligned, which isn't actually necessary on amd64 (we use movUpd
810       etc to spill), but seems like good practice.
811 
812       Do a rank-based allocation of vregs to spill slot numbers.  We
813       put as few values as possible in spill slots, but nevertheless
814       need to have a spill slot available for all vregs, just in case.
815    */
816    /* max_ss_no = -1; */
817 
818    for (j = 0; j < N_SPILL64S; j++)
819       ss_busy_until_before[j] = 0;
820 
821    for (j = 0; j < n_vregs; j++) {
822 
823       /* True iff this vreg is unused.  In which case we also expect
824          that the reg_class field for it has not been set.  */
825       if (vreg_lrs[j].live_after == INVALID_INSTRNO) {
826          vassert(vreg_lrs[j].reg_class == HRcINVALID);
827          continue;
828       }
829 
830       /* The spill slots are 64 bits in size.  As per the comment on
831          definition of HRegClass in host_generic_regs.h, that means,
832          to spill a vreg of class Flt64 or Vec128, we'll need to find
833          two adjacent spill slots to use.  For Vec256, we'll need to
834          find four adjacent slots to use.  Note, this logic needs to
835          kept in sync with the size info on the definition of
836          HRegClass. */
837       switch (vreg_lrs[j].reg_class) {
838 
839          case HRcVec128: case HRcFlt64:
840             /* Find two adjacent free slots in which between them
841                provide up to 128 bits in which to spill the vreg.
842                Since we are trying to find an even:odd pair, move
843                along in steps of 2 (slots). */
844             for (k = 0; k < N_SPILL64S-1; k += 2)
845                if (ss_busy_until_before[k+0] <= vreg_lrs[j].live_after
846                    && ss_busy_until_before[k+1] <= vreg_lrs[j].live_after)
847                   break;
848             if (k >= N_SPILL64S-1) {
849                vpanic("LibVEX_N_SPILL_BYTES is too low.  "
850                       "Increase and recompile.");
851             }
852             if (0) vex_printf("16-byte spill offset in spill slot %d\n",
853                               (Int)k);
854             ss_busy_until_before[k+0] = vreg_lrs[j].dead_before;
855             ss_busy_until_before[k+1] = vreg_lrs[j].dead_before;
856             break;
857 
858          default:
859             /* The ordinary case -- just find a single spill slot. */
860             /* Find the lowest-numbered spill slot which is available
861                at the start point of this interval, and assign the
862                interval to it. */
863             for (k = 0; k < N_SPILL64S; k++)
864                if (ss_busy_until_before[k] <= vreg_lrs[j].live_after)
865                   break;
866             if (k == N_SPILL64S) {
867                vpanic("LibVEX_N_SPILL_BYTES is too low.  "
868                       "Increase and recompile.");
869             }
870             ss_busy_until_before[k] = vreg_lrs[j].dead_before;
871             break;
872 
873       } /* switch (vreg_lrs[j].reg_class) { */
874 
875       /* This reflects LibVEX's hard-wired knowledge of the baseBlock
876          layout: the guest state, then two equal sized areas following
877          it for two sets of shadow state, and then the spill area. */
878       vreg_lrs[j].spill_offset = toShort(guest_sizeB * 3 + k * 8);
879 
880       /* Independent check that we've made a sane choice of slot */
881       sanity_check_spill_offset( &vreg_lrs[j] );
882       /* if (j > max_ss_no) */
883       /*    max_ss_no = j; */
884    }
885 
886 #  if 0
887    vex_printf("\n\n");
888    for (j = 0; j < n_vregs; j++)
889       vex_printf("vreg %d    --> spill offset %d\n",
890                  j, vreg_lrs[j].spill_offset);
891 #  endif
892 
893    /* --------- Stage 4: establish rreg preferences --------- */
894 
895    /* It may be advantageous to allocating certain vregs to specific
896       rregs, as a way of avoiding reg-reg moves later.  Here we
897       establish which, if any, rreg each vreg would prefer to be in.
898       Note that this constrains the allocator -- ideally we end up
899       with as few as possible vregs expressing a preference.
900 
901       This is an optimisation: if the .preferred_rreg field is never
902       set to anything different from INVALID_HREG, the allocator still
903       works. */
904 
905    /* 30 Dec 04: removed this mechanism as it does not seem to
906       help. */
907 
908    /* --------- Stage 5: process instructions --------- */
909 
910    /* This is the main loop of the allocator.  First, we need to
911       correctly set up our running state, which tracks the status of
912       each real register. */
913 
914    /* ------ BEGIN: Process each insn in turn. ------ */
915 
916    for (ii = 0; ii < instrs_in->arr_used; ii++) {
917 
918 #     if DEBUG_REGALLOC
919       vex_printf("\n====----====---- Insn %d ----====----====\n", ii);
920       vex_printf("---- ");
921       (*ppInstr)(instrs_in->arr[ii], mode64);
922       vex_printf("\n\nInitial state:\n");
923       PRINT_STATE;
924       vex_printf("\n");
925 #     endif
926 
927       /* ------------ Sanity checks ------------ */
928 
929       /* Sanity checks are expensive.  So they are done only once
930          every 7 instructions, and just before the last
931          instruction. */
932       do_sanity_check
933          = toBool(
934               False  /* Set to True for sanity checking of all insns. */
935               || ii == instrs_in->arr_used-1
936               || (ii > 0 && (ii % 7) == 0)
937            );
938 
939       if (do_sanity_check) {
940 
941          /* Sanity check 1: all rregs with a hard live range crossing
942             this insn must be marked as unavailable in the running
943             state. */
944          for (j = 0; j < rreg_lrs_used; j++) {
945             if (rreg_lrs_la[j].live_after < ii
946                 && ii < rreg_lrs_la[j].dead_before) {
947                /* ii is the middle of a hard live range for some real
948                   reg.  Check it's marked as such in the running
949                   state. */
950 
951 #              if 0
952                vex_printf("considering la %d .. db %d   reg = ",
953                           rreg_lrs[j].live_after,
954                           rreg_lrs[j].dead_before);
955                (*ppReg)(rreg_lrs[j].rreg);
956                vex_printf("\n");
957 #              endif
958 
959                /* find the state entry for this rreg */
960                for (k = 0; k < n_rregs; k++)
961                   if (rreg_state[k].rreg == rreg_lrs_la[j].rreg)
962                      break;
963 
964                /* and assert that this rreg is marked as unavailable */
965                vassert(rreg_state[k].disp == Unavail);
966             }
967          }
968 
969          /* Sanity check 2: conversely, all rregs marked as
970             unavailable in the running rreg_state must have a
971             corresponding hard live range entry in the rreg_lrs
972             array. */
973          for (j = 0; j < n_available_real_regs; j++) {
974             vassert(rreg_state[j].disp == Bound
975                     || rreg_state[j].disp == Free
976                     || rreg_state[j].disp == Unavail);
977             if (rreg_state[j].disp != Unavail)
978                continue;
979             for (k = 0; k < rreg_lrs_used; k++)
980                if (rreg_lrs_la[k].rreg == rreg_state[j].rreg
981                    && rreg_lrs_la[k].live_after < ii
982                    && ii < rreg_lrs_la[k].dead_before)
983                   break;
984             /* If this vassertion fails, we couldn't find a
985                corresponding HLR. */
986             vassert(k < rreg_lrs_used);
987          }
988 
989          /* Sanity check 3: all vreg-rreg bindings must bind registers
990             of the same class. */
991          for (j = 0; j < n_rregs; j++) {
992             if (rreg_state[j].disp != Bound) {
993                vassert(rreg_state[j].eq_spill_slot == False);
994                continue;
995             }
996             vassert(hregClass(rreg_state[j].rreg)
997                     == hregClass(rreg_state[j].vreg));
998             vassert( hregIsVirtual(rreg_state[j].vreg));
999             vassert(!hregIsVirtual(rreg_state[j].rreg));
1000          }
1001 
1002          /* Sanity check 4: the vreg_state and rreg_state
1003             mutually-redundant mappings are consistent.  If
1004             rreg_state[j].vreg points at some vreg_state entry then
1005             that vreg_state entry should point back at
1006             rreg_state[j]. */
1007          for (j = 0; j < n_rregs; j++) {
1008             if (rreg_state[j].disp != Bound)
1009                continue;
1010             k = hregNumber(rreg_state[j].vreg);
1011             vassert(IS_VALID_VREGNO(k));
1012             vassert(vreg_state[k] == j);
1013          }
1014          for (j = 0; j < n_vregs; j++) {
1015             k = vreg_state[j];
1016             if (k == INVALID_RREG_NO)
1017                continue;
1018             vassert(IS_VALID_RREGNO(k));
1019             vassert(rreg_state[k].disp == Bound);
1020             vassert(hregNumber(rreg_state[k].vreg) == j);
1021          }
1022 
1023       } /* if (do_sanity_check) */
1024 
1025       /* ------------ end of Sanity checks ------------ */
1026 
1027       /* Do various optimisations pertaining to register coalescing
1028          and preferencing:
1029             MOV  v <-> v   coalescing (done here).
1030             MOV  v <-> r   coalescing (not yet, if ever)
1031       */
1032       /* If doing a reg-reg move between two vregs, and the src's live
1033          range ends here and the dst's live range starts here, bind
1034          the dst to the src's rreg, and that's all. */
1035       if ( (*isMove)( instrs_in->arr[ii], &vregS, &vregD ) ) {
1036          if (!hregIsVirtual(vregS)) goto cannot_coalesce;
1037          if (!hregIsVirtual(vregD)) goto cannot_coalesce;
1038          /* Check that *isMove is not telling us a bunch of lies ... */
1039          vassert(hregClass(vregS) == hregClass(vregD));
1040          k = hregNumber(vregS);
1041          m = hregNumber(vregD);
1042          vassert(IS_VALID_VREGNO(k));
1043          vassert(IS_VALID_VREGNO(m));
1044          if (vreg_lrs[k].dead_before != ii + 1) goto cannot_coalesce;
1045          if (vreg_lrs[m].live_after != ii) goto cannot_coalesce;
1046 #        if DEBUG_REGALLOC
1047          vex_printf("COALESCE ");
1048          (*ppReg)(vregS);
1049          vex_printf(" -> ");
1050          (*ppReg)(vregD);
1051          vex_printf("\n\n");
1052 #        endif
1053          /* Find the state entry for vregS. */
1054          for (m = 0; m < n_rregs; m++)
1055             if (rreg_state[m].disp == Bound && rreg_state[m].vreg == vregS)
1056                break;
1057          if (m == n_rregs)
1058             /* We failed to find a binding for vregS, which means it's
1059                currently not in a register.  So we can't do the
1060                coalescing.  Give up. */
1061             goto cannot_coalesce;
1062 
1063          /* Finally, we can do the coalescing.  It's trivial -- merely
1064             claim vregS's register for vregD. */
1065          rreg_state[m].vreg = vregD;
1066          vassert(IS_VALID_VREGNO(hregNumber(vregD)));
1067          vassert(IS_VALID_VREGNO(hregNumber(vregS)));
1068          vreg_state[hregNumber(vregD)] = toShort(m);
1069          vreg_state[hregNumber(vregS)] = INVALID_RREG_NO;
1070 
1071          /* This rreg has become associated with a different vreg and
1072             hence with a different spill slot.  Play safe. */
1073          rreg_state[m].eq_spill_slot = False;
1074 
1075          /* Move on to the next insn.  We skip the post-insn stuff for
1076             fixed registers, since this move should not interact with
1077             them in any way. */
1078          continue;
1079       }
1080      cannot_coalesce:
1081 
1082       /* ------ Free up rregs bound to dead vregs ------ */
1083 
1084       /* Look for vregs whose live range has just ended, and
1085 	 mark the associated rreg as free. */
1086 
1087       for (j = 0; j < n_rregs; j++) {
1088          if (rreg_state[j].disp != Bound)
1089             continue;
1090          vreg = hregNumber(rreg_state[j].vreg);
1091          vassert(IS_VALID_VREGNO(vreg));
1092          if (vreg_lrs[vreg].dead_before <= ii) {
1093             rreg_state[j].disp = Free;
1094             rreg_state[j].eq_spill_slot = False;
1095             m = hregNumber(rreg_state[j].vreg);
1096             vassert(IS_VALID_VREGNO(m));
1097             vreg_state[m] = INVALID_RREG_NO;
1098             if (DEBUG_REGALLOC) {
1099                vex_printf("free up ");
1100                (*ppReg)(rreg_state[j].rreg);
1101                vex_printf("\n");
1102             }
1103          }
1104       }
1105 
1106       /* ------ Pre-instruction actions for fixed rreg uses ------ */
1107 
1108       /* Now we have to deal with rregs which are about to be made
1109          live by this instruction -- in other words, are entering into
1110          one of their live ranges.  If any such rreg holds a vreg, we
1111          will have to free up the rreg.  The simplest solution which
1112          is correct is to spill the rreg.
1113 
1114          Note we could do better:
1115          * Could move it into some other free rreg, if one is available
1116 
1117          Do this efficiently, by incrementally stepping along an array
1118          of rreg HLRs that are known to be sorted by start point
1119          (their .live_after field).
1120       */
1121       while (True) {
1122          vassert(rreg_lrs_la_next >= 0);
1123          vassert(rreg_lrs_la_next <= rreg_lrs_used);
1124          if (rreg_lrs_la_next == rreg_lrs_used)
1125             break; /* no more real reg live ranges to consider */
1126          if (ii < rreg_lrs_la[rreg_lrs_la_next].live_after)
1127             break; /* next live range does not yet start */
1128          vassert(ii == rreg_lrs_la[rreg_lrs_la_next].live_after);
1129          /* rreg_lrs_la[rreg_lrs_la_next].rreg needs to be freed up.
1130             Find the associated rreg_state entry. */
1131          /* Note, re ii == rreg_lrs_la[rreg_lrs_la_next].live_after.
1132             Real register live ranges are guaranteed to be well-formed
1133             in that they start with a write to the register -- Stage 2
1134             rejects any code not satisfying this.  So the correct
1135             question to ask is whether
1136             rreg_lrs_la[rreg_lrs_la_next].live_after == ii, that is,
1137             whether the reg becomes live after this insn -- rather
1138             than before it. */
1139 #        if DEBUG_REGALLOC
1140          vex_printf("need to free up rreg: ");
1141          (*ppReg)(rreg_lrs_la[rreg_lrs_la_next].rreg);
1142          vex_printf("\n\n");
1143 #        endif
1144          for (k = 0; k < n_rregs; k++)
1145             if (rreg_state[k].rreg == rreg_lrs_la[rreg_lrs_la_next].rreg)
1146                break;
1147          /* If this fails, we don't have an entry for this rreg.
1148             Which we should. */
1149          vassert(IS_VALID_RREGNO(k));
1150          m = hregNumber(rreg_state[k].vreg);
1151          if (rreg_state[k].disp == Bound) {
1152             /* Yes, there is an associated vreg.  Spill it if it's
1153                still live. */
1154             vassert(IS_VALID_VREGNO(m));
1155             vreg_state[m] = INVALID_RREG_NO;
1156             if (vreg_lrs[m].dead_before > ii) {
1157                vassert(vreg_lrs[m].reg_class != HRcINVALID);
1158                if ((!eq_spill_opt) || !rreg_state[k].eq_spill_slot) {
1159                   HInstr* spill1 = NULL;
1160                   HInstr* spill2 = NULL;
1161                   (*genSpill)( &spill1, &spill2, rreg_state[k].rreg,
1162                                vreg_lrs[m].spill_offset, mode64 );
1163                   vassert(spill1 || spill2); /* can't both be NULL */
1164                   if (spill1)
1165                      EMIT_INSTR(spill1);
1166                   if (spill2)
1167                      EMIT_INSTR(spill2);
1168                }
1169                rreg_state[k].eq_spill_slot = True;
1170             }
1171          }
1172          rreg_state[k].disp = Unavail;
1173          rreg_state[k].vreg = INVALID_HREG;
1174          rreg_state[k].eq_spill_slot = False;
1175 
1176          /* check for further rregs entering HLRs at this point */
1177          rreg_lrs_la_next++;
1178       }
1179 
1180 
1181 #     if DEBUG_REGALLOC
1182       vex_printf("After pre-insn actions for fixed regs:\n");
1183       PRINT_STATE;
1184       vex_printf("\n");
1185 #     endif
1186 
1187 
1188       /* ------ Deal with the current instruction. ------ */
1189 
1190       /* Finally we can begin the processing of this instruction
1191          itself.  The aim is to free up enough rregs for this insn.
1192          This may generate spill stores since we may have to evict
1193          some vregs currently in rregs.  Also generates spill loads.
1194          We also build up the final vreg->rreg mapping to be applied
1195          to the insn. */
1196 
1197       (*getRegUsage)( &reg_usage, instrs_in->arr[ii], mode64 );
1198 
1199       initHRegRemap(&remap);
1200 
1201       /* ------------ BEGIN directReload optimisation ----------- */
1202 
1203       /* If the instruction reads exactly one vreg which is currently
1204          in a spill slot, and this is last use of that vreg, see if we
1205          can convert the instruction into one reads directly from the
1206          spill slot.  This is clearly only possible for x86 and amd64
1207          targets, since ppc and arm load-store architectures.  If
1208          successful, replace instrs_in->arr[ii] with this new
1209          instruction, and recompute its reg usage, so that the change
1210          is invisible to the standard-case handling that follows. */
1211 
1212       if (directReload && reg_usage.n_used <= 2) {
1213          Bool  debug_direct_reload = True && False;
1214          HReg  cand     = INVALID_HREG;
1215          Bool  nreads   = 0;
1216          Short spilloff = 0;
1217 
1218          for (j = 0; j < reg_usage.n_used; j++) {
1219 
1220             vreg = reg_usage.hreg[j];
1221 
1222             if (!hregIsVirtual(vreg))
1223                continue;
1224 
1225             if (reg_usage.mode[j] == HRmRead) {
1226                nreads++;
1227                m = hregNumber(vreg);
1228                vassert(IS_VALID_VREGNO(m));
1229                k = vreg_state[m];
1230                if (!IS_VALID_RREGNO(k)) {
1231                   /* ok, it is spilled.  Now, is this its last use? */
1232                   vassert(vreg_lrs[m].dead_before >= ii+1);
1233                   if (vreg_lrs[m].dead_before == ii+1
1234                       && cand == INVALID_HREG) {
1235                      spilloff = vreg_lrs[m].spill_offset;
1236                      cand = vreg;
1237                   }
1238                }
1239             }
1240          }
1241 
1242          if (nreads == 1 && cand != INVALID_HREG) {
1243             HInstr* reloaded;
1244             if (reg_usage.n_used == 2)
1245                vassert(reg_usage.hreg[0] != reg_usage.hreg[1]);
1246 
1247             reloaded = directReload ( instrs_in->arr[ii], cand, spilloff );
1248             if (debug_direct_reload && !reloaded) {
1249                vex_printf("[%3d] ", spilloff); ppHReg(cand); vex_printf(" ");
1250                ppInstr(instrs_in->arr[ii], mode64);
1251             }
1252             if (reloaded) {
1253                /* Update info about the insn, so it looks as if it had
1254                   been in this form all along. */
1255                instrs_in->arr[ii] = reloaded;
1256                (*getRegUsage)( &reg_usage, instrs_in->arr[ii], mode64 );
1257                if (debug_direct_reload && !reloaded) {
1258                   vex_printf("  -->  ");
1259                   ppInstr(reloaded, mode64);
1260                }
1261             }
1262 
1263             if (debug_direct_reload && !reloaded)
1264                vex_printf("\n");
1265          }
1266 
1267       }
1268 
1269       /* ------------ END directReload optimisation ------------ */
1270 
1271       /* for each reg mentioned in the insn ... */
1272       for (j = 0; j < reg_usage.n_used; j++) {
1273 
1274          vreg = reg_usage.hreg[j];
1275 
1276          /* only interested in virtual registers right now. */
1277          if (!hregIsVirtual(vreg))
1278             continue;
1279 
1280 #        if 0
1281          vex_printf("considering "); (*ppReg)(vreg); vex_printf("\n");
1282 #        endif
1283 
1284          /* Now we're trying to find a rreg for "vreg".  First of all,
1285             if it already has an rreg assigned, we don't need to do
1286             anything more.  Search the current state to find out. */
1287          m = hregNumber(vreg);
1288          vassert(IS_VALID_VREGNO(m));
1289          k = vreg_state[m];
1290          if (IS_VALID_RREGNO(k)) {
1291             vassert(rreg_state[k].disp == Bound);
1292             addToHRegRemap(&remap, vreg, rreg_state[k].rreg);
1293             /* If this rreg is written or modified, mark it as different
1294                from any spill slot value. */
1295             if (reg_usage.mode[j] != HRmRead)
1296                rreg_state[k].eq_spill_slot = False;
1297             continue;
1298          } else {
1299             vassert(k == INVALID_RREG_NO);
1300          }
1301 
1302          /* No luck.  The next thing to do is see if there is a
1303             currently free rreg available, of the correct class.  If
1304             so, bag it.  NOTE, we could improve this by selecting an
1305             rreg for which the next live-range event is as far ahead
1306             as possible. */
1307          k_suboptimal = -1;
1308          for (k = 0; k < n_rregs; k++) {
1309             if (rreg_state[k].disp != Free
1310                 || hregClass(rreg_state[k].rreg) != hregClass(vreg))
1311                continue;
1312             if (rreg_state[k].has_hlrs) {
1313                /* Well, at least we can use k_suboptimal if we really
1314                   have to.  Keep on looking for a better candidate. */
1315                k_suboptimal = k;
1316             } else {
1317                /* Found a preferable reg.  Use it. */
1318                k_suboptimal = -1;
1319                break;
1320             }
1321          }
1322          if (k_suboptimal >= 0)
1323             k = k_suboptimal;
1324 
1325          if (k < n_rregs) {
1326             rreg_state[k].disp = Bound;
1327             rreg_state[k].vreg = vreg;
1328             m = hregNumber(vreg);
1329             vassert(IS_VALID_VREGNO(m));
1330             vreg_state[m] = toShort(k);
1331             addToHRegRemap(&remap, vreg, rreg_state[k].rreg);
1332             /* Generate a reload if needed.  This only creates needed
1333                reloads because the live range builder for vregs will
1334                guarantee that the first event for a vreg is a write.
1335                Hence, if this reference is not a write, it cannot be
1336                the first reference for this vreg, and so a reload is
1337                indeed needed. */
1338             if (reg_usage.mode[j] != HRmWrite) {
1339                vassert(vreg_lrs[m].reg_class != HRcINVALID);
1340                HInstr* reload1 = NULL;
1341                HInstr* reload2 = NULL;
1342                (*genReload)( &reload1, &reload2, rreg_state[k].rreg,
1343                              vreg_lrs[m].spill_offset, mode64 );
1344                vassert(reload1 || reload2); /* can't both be NULL */
1345                if (reload1)
1346                   EMIT_INSTR(reload1);
1347                if (reload2)
1348                   EMIT_INSTR(reload2);
1349                /* This rreg is read or modified by the instruction.
1350                   If it's merely read we can claim it now equals the
1351                   spill slot, but not so if it is modified. */
1352                if (reg_usage.mode[j] == HRmRead) {
1353                   rreg_state[k].eq_spill_slot = True;
1354                } else {
1355                   vassert(reg_usage.mode[j] == HRmModify);
1356                   rreg_state[k].eq_spill_slot = False;
1357                }
1358             } else {
1359                rreg_state[k].eq_spill_slot = False;
1360             }
1361 
1362             continue;
1363          }
1364 
1365          /* Well, now we have no option but to spill a vreg.  It's
1366             important to make a good choice of vreg to spill, and of
1367             course we need to be careful not to spill a vreg which is
1368             needed by this insn. */
1369 
1370          /* First, mark in the rreg_state, those rregs which are not spill
1371             candidates, due to holding a vreg mentioned by this
1372             instruction.  Or being of the wrong class. */
1373          for (k = 0; k < n_rregs; k++) {
1374             rreg_state[k].is_spill_cand = False;
1375             if (rreg_state[k].disp != Bound)
1376                continue;
1377             if (hregClass(rreg_state[k].rreg) != hregClass(vreg))
1378                continue;
1379             rreg_state[k].is_spill_cand = True;
1380             for (m = 0; m < reg_usage.n_used; m++) {
1381                if (rreg_state[k].vreg == reg_usage.hreg[m]) {
1382                   rreg_state[k].is_spill_cand = False;
1383                   break;
1384                }
1385             }
1386          }
1387 
1388          /* We can choose to spill any rreg satisfying
1389             rreg_state[r].is_spill_cand (so to speak).  Choose r so that
1390             the next use of its associated vreg is as far ahead as
1391             possible, in the hope that this will minimise the number
1392             of consequent reloads required. */
1393          spillee
1394             = findMostDistantlyMentionedVReg (
1395                  getRegUsage, instrs_in, ii+1, rreg_state, n_rregs, mode64 );
1396 
1397          if (spillee == -1) {
1398             /* Hmmmmm.  There don't appear to be any spill candidates.
1399                We're hosed. */
1400             vex_printf("reg_alloc: can't find a register in class: ");
1401             ppHRegClass(hregClass(vreg));
1402             vex_printf("\n");
1403             vpanic("reg_alloc: can't create a free register.");
1404          }
1405 
1406          /* Right.  So we're going to spill rreg_state[spillee]. */
1407          vassert(IS_VALID_RREGNO(spillee));
1408          vassert(rreg_state[spillee].disp == Bound);
1409          /* check it's the right class */
1410          vassert(hregClass(rreg_state[spillee].rreg) == hregClass(vreg));
1411          /* check we're not ejecting the vreg for which we are trying
1412             to free up a register. */
1413          vassert(rreg_state[spillee].vreg != vreg);
1414 
1415          m = hregNumber(rreg_state[spillee].vreg);
1416          vassert(IS_VALID_VREGNO(m));
1417 
1418          /* So here's the spill store.  Assert that we're spilling a
1419             live vreg. */
1420          vassert(vreg_lrs[m].dead_before > ii);
1421          vassert(vreg_lrs[m].reg_class != HRcINVALID);
1422          if ((!eq_spill_opt) || !rreg_state[spillee].eq_spill_slot) {
1423             HInstr* spill1 = NULL;
1424             HInstr* spill2 = NULL;
1425             (*genSpill)( &spill1, &spill2, rreg_state[spillee].rreg,
1426                          vreg_lrs[m].spill_offset, mode64 );
1427             vassert(spill1 || spill2); /* can't both be NULL */
1428             if (spill1)
1429                EMIT_INSTR(spill1);
1430             if (spill2)
1431                EMIT_INSTR(spill2);
1432          }
1433 
1434          /* Update the rreg_state to reflect the new assignment for this
1435             rreg. */
1436          rreg_state[spillee].vreg = vreg;
1437          vreg_state[m] = INVALID_RREG_NO;
1438 
1439          rreg_state[spillee].eq_spill_slot = False; /* be safe */
1440 
1441          m = hregNumber(vreg);
1442          vassert(IS_VALID_VREGNO(m));
1443          vreg_state[m] = toShort(spillee);
1444 
1445          /* Now, if this vreg is being read or modified (as opposed to
1446             written), we have to generate a reload for it. */
1447          if (reg_usage.mode[j] != HRmWrite) {
1448             vassert(vreg_lrs[m].reg_class != HRcINVALID);
1449             HInstr* reload1 = NULL;
1450             HInstr* reload2 = NULL;
1451             (*genReload)( &reload1, &reload2, rreg_state[spillee].rreg,
1452                           vreg_lrs[m].spill_offset, mode64 );
1453             vassert(reload1 || reload2); /* can't both be NULL */
1454             if (reload1)
1455                EMIT_INSTR(reload1);
1456             if (reload2)
1457                EMIT_INSTR(reload2);
1458             /* This rreg is read or modified by the instruction.
1459                If it's merely read we can claim it now equals the
1460                spill slot, but not so if it is modified. */
1461             if (reg_usage.mode[j] == HRmRead) {
1462                rreg_state[spillee].eq_spill_slot = True;
1463             } else {
1464                vassert(reg_usage.mode[j] == HRmModify);
1465                rreg_state[spillee].eq_spill_slot = False;
1466             }
1467          }
1468 
1469          /* So after much twisting and turning, we have vreg mapped to
1470             rreg_state[spillee].rreg.  Note that in the map. */
1471          addToHRegRemap(&remap, vreg, rreg_state[spillee].rreg);
1472 
1473       } /* iterate over registers in this instruction. */
1474 
1475       /* We've finished clowning around with registers in this instruction.
1476          Three results:
1477          - the running rreg_state[] has been updated
1478          - a suitable vreg->rreg mapping for this instruction has been
1479            constructed
1480          - spill and reload instructions may have been emitted.
1481 
1482         The final step is to apply the mapping to the instruction,
1483         and emit that.
1484       */
1485 
1486       /* NOTE, DESTRUCTIVELY MODIFIES instrs_in->arr[ii]. */
1487       (*mapRegs)( &remap, instrs_in->arr[ii], mode64 );
1488       EMIT_INSTR( instrs_in->arr[ii] );
1489 
1490 #     if DEBUG_REGALLOC
1491       vex_printf("After dealing with current insn:\n");
1492       PRINT_STATE;
1493       vex_printf("\n");
1494 #     endif
1495 
1496       /* ------ Post-instruction actions for fixed rreg uses ------ */
1497 
1498       /* Now we need to check for rregs exiting fixed live ranges
1499          after this instruction, and if so mark them as free. */
1500       while (True) {
1501          vassert(rreg_lrs_db_next >= 0);
1502          vassert(rreg_lrs_db_next <= rreg_lrs_used);
1503          if (rreg_lrs_db_next == rreg_lrs_used)
1504             break; /* no more real reg live ranges to consider */
1505          if (ii+1 < rreg_lrs_db[rreg_lrs_db_next].dead_before)
1506             break; /* next live range does not yet start */
1507          vassert(ii+1 == rreg_lrs_db[rreg_lrs_db_next].dead_before);
1508          /* rreg_lrs_db[[rreg_lrs_db_next].rreg is exiting a hard live
1509             range.  Mark it as such in the main rreg_state array. */
1510          for (k = 0; k < n_rregs; k++)
1511             if (rreg_state[k].rreg == rreg_lrs_db[rreg_lrs_db_next].rreg)
1512                break;
1513          /* If this vassertion fails, we don't have an entry for
1514             this rreg.  Which we should. */
1515          vassert(k < n_rregs);
1516          vassert(rreg_state[k].disp == Unavail);
1517          rreg_state[k].disp = Free;
1518          rreg_state[k].vreg = INVALID_HREG;
1519          rreg_state[k].eq_spill_slot = False;
1520 
1521          /* check for further rregs leaving HLRs at this point */
1522          rreg_lrs_db_next++;
1523       }
1524 
1525 #     if DEBUG_REGALLOC
1526       vex_printf("After post-insn actions for fixed regs:\n");
1527       PRINT_STATE;
1528       vex_printf("\n");
1529 #     endif
1530 
1531    } /* iterate over insns */
1532 
1533    /* ------ END: Process each insn in turn. ------ */
1534 
1535    /* free(rreg_state); */
1536    /* free(rreg_lrs); */
1537    /* if (vreg_lrs) free(vreg_lrs); */
1538 
1539    /* Paranoia */
1540    for (j = 0; j < n_rregs; j++)
1541       vassert(rreg_state[j].rreg == available_real_regs[j]);
1542 
1543    vassert(rreg_lrs_la_next == rreg_lrs_used);
1544    vassert(rreg_lrs_db_next == rreg_lrs_used);
1545 
1546    return instrs_out;
1547 
1548 #  undef INVALID_INSTRNO
1549 #  undef EMIT_INSTR
1550 #  undef PRINT_STATE
1551 }
1552 
1553 
1554 
1555 /*---------------------------------------------------------------*/
1556 /*---                                       host_reg_alloc2.c ---*/
1557 /*---------------------------------------------------------------*/
1558