1
2 /*---------------------------------------------------------------*/
3 /*--- begin host_reg_alloc2.c ---*/
4 /*---------------------------------------------------------------*/
5
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
9
10 Copyright (C) 2004-2011 OpenWorks LLP
11 info@open-works.net
12
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26 02110-1301, USA.
27
28 The GNU General Public License is contained in the file COPYING.
29
30 Neither the names of the U.S. Department of Energy nor the
31 University of California nor the names of its contributors may be
32 used to endorse or promote products derived from this software
33 without prior written permission.
34 */
35
36 #include "libvex_basictypes.h"
37 #include "libvex.h"
38
39 #include "main_util.h"
40 #include "host_generic_regs.h"
41
42 /* Set to 1 for lots of debugging output. */
43 #define DEBUG_REGALLOC 0
44
45
46 /* TODO 27 Oct 04:
47
48 Better consistency checking from what isMove tells us.
49
50 We can possibly do V-V coalescing even when the src is spilled,
51 providing we can arrange for the dst to have the same spill slot.
52
53 Note that state[].hreg is the same as the available real regs.
54
55 Generally rationalise data structures. */
56
57
58 /* Records information on virtual register live ranges. Computed once
59 and remains unchanged after that. */
60 typedef
61 struct {
62 /* Becomes live for the first time after this insn ... */
63 Short live_after;
64 /* Becomes dead for the last time before this insn ... */
65 Short dead_before;
66 /* The "home" spill slot, if needed. Never changes. */
67 Short spill_offset;
68 Short spill_size;
69 /* What kind of register this is. */
70 HRegClass reg_class;
71 }
72 VRegLR;
73
74
75 /* Records information on real-register live ranges. Computed once
76 and remains unchanged after that. */
77 typedef
78 struct {
79 HReg rreg;
80 /* Becomes live after this insn ... */
81 Short live_after;
82 /* Becomes dead before this insn ... */
83 Short dead_before;
84 }
85 RRegLR;
86
87
88 /* An array of the following structs (rreg_state) comprises the
89 running state of the allocator. It indicates what the current
90 disposition of each allocatable real register is. The array gets
91 updated as the allocator processes instructions. */
92 typedef
93 struct {
94 /* ------ FIELDS WHICH DO NOT CHANGE ------ */
95 /* Which rreg is this for? */
96 HReg rreg;
97 /* Is this involved in any HLRs? (only an optimisation hint) */
98 Bool has_hlrs;
99 /* ------ FIELDS WHICH DO CHANGE ------ */
100 /* 6 May 07: rearranged fields below so the whole struct fits
101 into 16 bytes on both x86 and amd64. */
102 /* Used when .disp == Bound and we are looking for vregs to
103 spill. */
104 Bool is_spill_cand;
105 /* Optimisation: used when .disp == Bound. Indicates when the
106 rreg has the same value as the spill slot for the associated
107 vreg. Is safely left at False, and becomes True after a
108 spill store or reload for this rreg. */
109 Bool eq_spill_slot;
110 /* What's it's current disposition? */
111 enum { Free, /* available for use */
112 Unavail, /* in a real-reg live range */
113 Bound /* in use (holding value of some vreg) */
114 }
115 disp;
116 /* If .disp == Bound, what vreg is it bound to? */
117 HReg vreg;
118 }
119 RRegState;
120
121
122 /* The allocator also maintains a redundant array of indexes
123 (vreg_state) from vreg numbers back to entries in rreg_state. It
124 is redundant because iff vreg_state[i] == j then
125 hregNumber(rreg_state[j].vreg) == i -- that is, the two entries
126 point at each other. The purpose of this is to speed up activities
127 which involve looking for a particular vreg: there is no need to
128 scan the rreg_state looking for it, just index directly into
129 vreg_state. The FAQ "does this vreg already have an associated
130 rreg" is the main beneficiary.
131
132 To indicate, in vreg_state[i], that a given vreg is not currently
133 associated with any rreg, that entry can be set to INVALID_RREG_NO.
134
135 Because the vreg_state entries are signed Shorts, the max number
136 of vregs that can be handed by regalloc is 32767.
137 */
138
139 #define INVALID_RREG_NO ((Short)(-1))
140
141 #define IS_VALID_VREGNO(_zz) ((_zz) >= 0 && (_zz) < n_vregs)
142 #define IS_VALID_RREGNO(_zz) ((_zz) >= 0 && (_zz) < n_rregs)
143
144
145 /* Does this instruction mention a particular reg? */
instrMentionsReg(void (* getRegUsage)(HRegUsage *,HInstr *,Bool),HInstr * instr,HReg r,Bool mode64)146 static Bool instrMentionsReg (
147 void (*getRegUsage) (HRegUsage*, HInstr*, Bool),
148 HInstr* instr,
149 HReg r,
150 Bool mode64
151 )
152 {
153 Int i;
154 HRegUsage reg_usage;
155 (*getRegUsage)(®_usage, instr, mode64);
156 for (i = 0; i < reg_usage.n_used; i++)
157 if (reg_usage.hreg[i] == r)
158 return True;
159 return False;
160 }
161
162
163 /* Search forward from some given point in the incoming instruction
164 sequence. Point is to select a virtual register to spill, by
165 finding the vreg which is mentioned as far ahead as possible, in
166 the hope that this will minimise the number of consequent reloads.
167
168 Only do the search for vregs which are Bound in the running state,
169 and for which the .is_spill_cand field is set. This allows the
170 caller to arbitrarily restrict the set of spill candidates to be
171 considered.
172
173 Returns an index into the state array indicating the (v,r) pair to
174 spill, or -1 if none was found. */
175 static
findMostDistantlyMentionedVReg(void (* getRegUsage)(HRegUsage *,HInstr *,Bool),HInstrArray * instrs_in,Int search_from_instr,RRegState * state,Int n_state,Bool mode64)176 Int findMostDistantlyMentionedVReg (
177 void (*getRegUsage) (HRegUsage*, HInstr*, Bool),
178 HInstrArray* instrs_in,
179 Int search_from_instr,
180 RRegState* state,
181 Int n_state,
182 Bool mode64
183 )
184 {
185 Int k, m;
186 Int furthest_k = -1;
187 Int furthest = -1;
188 vassert(search_from_instr >= 0);
189 for (k = 0; k < n_state; k++) {
190 if (!state[k].is_spill_cand)
191 continue;
192 vassert(state[k].disp == Bound);
193 for (m = search_from_instr; m < instrs_in->arr_used; m++) {
194 if (instrMentionsReg(getRegUsage,
195 instrs_in->arr[m], state[k].vreg, mode64))
196 break;
197 }
198 if (m > furthest) {
199 furthest = m;
200 furthest_k = k;
201 }
202 }
203 return furthest_k;
204 }
205
206
207 /* Check that this vreg has been assigned a sane spill offset. */
sanity_check_spill_offset(VRegLR * vreg)208 static inline void sanity_check_spill_offset ( VRegLR* vreg )
209 {
210 if (vreg->reg_class == HRcVec128 || vreg->reg_class == HRcFlt64) {
211 vassert(0 == ((UShort)vreg->spill_offset % 16));
212 } else {
213 vassert(0 == ((UShort)vreg->spill_offset % 8));
214 }
215 }
216
217
218 /* Double the size of the real-reg live-range array, if needed. */
ensureRRLRspace(RRegLR ** info,Int * size,Int used)219 static void ensureRRLRspace ( RRegLR** info, Int* size, Int used )
220 {
221 Int k;
222 RRegLR* arr2;
223 if (used < *size) return;
224 if (0)
225 vex_printf("ensureRRISpace: %d -> %d\n", *size, 2 * *size);
226 vassert(used == *size);
227 arr2 = LibVEX_Alloc(2 * *size * sizeof(RRegLR));
228 for (k = 0; k < *size; k++)
229 arr2[k] = (*info)[k];
230 *size *= 2;
231 *info = arr2;
232 }
233
234
235 /* Sort an array of RRegLR entries by either the .live_after or
236 .dead_before fields. This is performance-critical. */
sortRRLRarray(RRegLR * arr,Int size,Bool by_live_after)237 static void sortRRLRarray ( RRegLR* arr,
238 Int size, Bool by_live_after )
239 {
240 Int incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
241 9841, 29524, 88573, 265720,
242 797161, 2391484 };
243 Int lo = 0;
244 Int hi = size-1;
245 Int i, j, h, bigN, hp;
246 RRegLR v;
247
248 vassert(size >= 0);
249 if (size == 0)
250 return;
251
252 bigN = hi - lo + 1; if (bigN < 2) return;
253 hp = 0; while (hp < 14 && incs[hp] < bigN) hp++; hp--;
254
255 if (by_live_after) {
256
257 for ( ; hp >= 0; hp--) {
258 h = incs[hp];
259 for (i = lo + h; i <= hi; i++) {
260 v = arr[i];
261 j = i;
262 while (arr[j-h].live_after > v.live_after) {
263 arr[j] = arr[j-h];
264 j = j - h;
265 if (j <= (lo + h - 1)) break;
266 }
267 arr[j] = v;
268 }
269 }
270
271 } else {
272
273 for ( ; hp >= 0; hp--) {
274 h = incs[hp];
275 for (i = lo + h; i <= hi; i++) {
276 v = arr[i];
277 j = i;
278 while (arr[j-h].dead_before > v.dead_before) {
279 arr[j] = arr[j-h];
280 j = j - h;
281 if (j <= (lo + h - 1)) break;
282 }
283 arr[j] = v;
284 }
285 }
286
287 }
288 }
289
290
291 /* A target-independent register allocator. Requires various
292 functions which it uses to deal abstractly with instructions and
293 registers, since it cannot have any target-specific knowledge.
294
295 Returns a new list of instructions, which, as a result of the
296 behaviour of mapRegs, will be in-place modifications of the
297 original instructions.
298
299 Requires that the incoming code has been generated using
300 vreg numbers 0, 1 .. n_vregs-1. Appearance of a vreg outside
301 that range is a checked run-time error.
302
303 Takes an expandable array of pointers to unallocated insns.
304 Returns an expandable array of pointers to allocated insns.
305 */
doRegisterAllocation(HInstrArray * instrs_in,HReg * available_real_regs,Int n_available_real_regs,Bool (* isMove)(HInstr *,HReg *,HReg *),void (* getRegUsage)(HRegUsage *,HInstr *,Bool),void (* mapRegs)(HRegRemap *,HInstr *,Bool),void (* genSpill)(HInstr **,HInstr **,HReg,Int,Bool),void (* genReload)(HInstr **,HInstr **,HReg,Int,Bool),HInstr * (* directReload)(HInstr *,HReg,Short),Int guest_sizeB,void (* ppInstr)(HInstr *,Bool),void (* ppReg)(HReg),Bool mode64)306 HInstrArray* doRegisterAllocation (
307
308 /* Incoming virtual-registerised code. */
309 HInstrArray* instrs_in,
310
311 /* An array listing all the real registers the allocator may use,
312 in no particular order. */
313 HReg* available_real_regs,
314 Int n_available_real_regs,
315
316 /* Return True iff the given insn is a reg-reg move, in which
317 case also return the src and dst regs. */
318 Bool (*isMove) ( HInstr*, HReg*, HReg* ),
319
320 /* Get info about register usage in this insn. */
321 void (*getRegUsage) ( HRegUsage*, HInstr*, Bool ),
322
323 /* Apply a reg-reg mapping to an insn. */
324 void (*mapRegs) ( HRegRemap*, HInstr*, Bool ),
325
326 /* Return one, or, if we're unlucky, two insn(s) to spill/restore a
327 real reg to a spill slot byte offset. The two leading HInstr**
328 args are out parameters, through which the generated insns are
329 returned. Also (optionally) a 'directReload' function, which
330 attempts to replace a given instruction by one which reads
331 directly from a specified spill slot. May be NULL, in which
332 case the optimisation is not attempted. */
333 void (*genSpill) ( HInstr**, HInstr**, HReg, Int, Bool ),
334 void (*genReload) ( HInstr**, HInstr**, HReg, Int, Bool ),
335 HInstr* (*directReload) ( HInstr*, HReg, Short ),
336 Int guest_sizeB,
337
338 /* For debug printing only. */
339 void (*ppInstr) ( HInstr*, Bool ),
340 void (*ppReg) ( HReg ),
341
342 /* 32/64bit mode */
343 Bool mode64
344 )
345 {
346 # define N_SPILL64S (LibVEX_N_SPILL_BYTES / 8)
347
348 const Bool eq_spill_opt = True;
349
350 /* Iterators and temporaries. */
351 Int ii, j, k, m, spillee, k_suboptimal;
352 HReg rreg, vreg, vregS, vregD;
353 HRegUsage reg_usage;
354
355 /* Info on vregs and rregs. Computed once and remains
356 unchanged. */
357 Int n_vregs;
358 VRegLR* vreg_lrs; /* [0 .. n_vregs-1] */
359
360 /* We keep two copies of the real-reg live range info, one sorted
361 by .live_after and the other by .dead_before. First the
362 unsorted info is created in the _la variant is copied into the
363 _db variant. Once that's done both of them are sorted.
364 We also need two integer cursors which record the next
365 location in the two arrays to consider. */
366 RRegLR* rreg_lrs_la;
367 RRegLR* rreg_lrs_db;
368 Int rreg_lrs_size;
369 Int rreg_lrs_used;
370 Int rreg_lrs_la_next;
371 Int rreg_lrs_db_next;
372
373 /* Used when constructing vreg_lrs (for allocating stack
374 slots). */
375 Int ss_busy_until_before[N_SPILL64S];
376
377 /* Used when constructing rreg_lrs. */
378 Int* rreg_live_after;
379 Int* rreg_dead_before;
380
381 /* Running state of the core allocation algorithm. */
382 RRegState* rreg_state; /* [0 .. n_rregs-1] */
383 Int n_rregs;
384
385 /* .. and the redundant backward map */
386 /* Each value is 0 .. n_rregs-1 or is INVALID_RREG_NO.
387 This inplies n_rregs must be <= 32768. */
388 Short* vreg_state; /* [0 .. n_vregs-1] */
389
390 /* The vreg -> rreg map constructed and then applied to each
391 instr. */
392 HRegRemap remap;
393
394 /* The output array of instructions. */
395 HInstrArray* instrs_out;
396
397 /* Sanity checks are expensive. They are only done periodically,
398 not at each insn processed. */
399 Bool do_sanity_check;
400
401 vassert(0 == (guest_sizeB % 16));
402 vassert(0 == (LibVEX_N_SPILL_BYTES % 16));
403 vassert(0 == (N_SPILL64S % 2));
404
405 /* The live range numbers are signed shorts, and so limiting the
406 number of insns to 10000 comfortably guards against them
407 overflowing 32k. */
408 vassert(instrs_in->arr_used <= 10000);
409
410 # define INVALID_INSTRNO (-2)
411
412 # define EMIT_INSTR(_instr) \
413 do { \
414 HInstr* _tmp = (_instr); \
415 if (DEBUG_REGALLOC) { \
416 vex_printf("** "); \
417 (*ppInstr)(_tmp, mode64); \
418 vex_printf("\n\n"); \
419 } \
420 addHInstr ( instrs_out, _tmp ); \
421 } while (0)
422
423 # define PRINT_STATE \
424 do { \
425 Int z, q; \
426 for (z = 0; z < n_rregs; z++) { \
427 vex_printf(" rreg_state[%2d] = ", z); \
428 (*ppReg)(rreg_state[z].rreg); \
429 vex_printf(" \t"); \
430 switch (rreg_state[z].disp) { \
431 case Free: vex_printf("Free\n"); break; \
432 case Unavail: vex_printf("Unavail\n"); break; \
433 case Bound: vex_printf("BoundTo "); \
434 (*ppReg)(rreg_state[z].vreg); \
435 vex_printf("\n"); break; \
436 } \
437 } \
438 vex_printf("\n vreg_state[0 .. %d]:\n ", n_vregs-1); \
439 q = 0; \
440 for (z = 0; z < n_vregs; z++) { \
441 if (vreg_state[z] == INVALID_RREG_NO) \
442 continue; \
443 vex_printf("[%d] -> %d ", z, vreg_state[z]); \
444 q++; \
445 if (q > 0 && (q % 6) == 0) \
446 vex_printf("\n "); \
447 } \
448 vex_printf("\n"); \
449 } while (0)
450
451
452 /* --------- Stage 0: set up output array --------- */
453 /* --------- and allocate/initialise running state. --------- */
454
455 instrs_out = newHInstrArray();
456
457 /* ... and initialise running state. */
458 /* n_rregs is no more than a short name for n_available_real_regs. */
459 n_rregs = n_available_real_regs;
460 n_vregs = instrs_in->n_vregs;
461
462 /* If this is not so, vreg_state entries will overflow. */
463 vassert(n_vregs < 32767);
464
465 rreg_state = LibVEX_Alloc(n_rregs * sizeof(RRegState));
466 vreg_state = LibVEX_Alloc(n_vregs * sizeof(Short));
467
468 for (j = 0; j < n_rregs; j++) {
469 rreg_state[j].rreg = available_real_regs[j];
470 rreg_state[j].has_hlrs = False;
471 rreg_state[j].disp = Free;
472 rreg_state[j].vreg = INVALID_HREG;
473 rreg_state[j].is_spill_cand = False;
474 rreg_state[j].eq_spill_slot = False;
475 }
476
477 for (j = 0; j < n_vregs; j++)
478 vreg_state[j] = INVALID_RREG_NO;
479
480
481 /* --------- Stage 1: compute vreg live ranges. --------- */
482 /* --------- Stage 2: compute rreg live ranges. --------- */
483
484 /* ------ start of SET UP TO COMPUTE VREG LIVE RANGES ------ */
485
486 /* This is relatively simple, because (1) we only seek the complete
487 end-to-end live range of each vreg, and are not interested in
488 any holes in it, and (2) the vregs are conveniently numbered 0
489 .. n_vregs-1, so we can just dump the results in a
490 pre-allocated array. */
491
492 vreg_lrs = NULL;
493 if (n_vregs > 0)
494 vreg_lrs = LibVEX_Alloc(sizeof(VRegLR) * n_vregs);
495
496 for (j = 0; j < n_vregs; j++) {
497 vreg_lrs[j].live_after = INVALID_INSTRNO;
498 vreg_lrs[j].dead_before = INVALID_INSTRNO;
499 vreg_lrs[j].spill_offset = 0;
500 vreg_lrs[j].spill_size = 0;
501 vreg_lrs[j].reg_class = HRcINVALID;
502 }
503
504 /* ------ end of SET UP TO COMPUTE VREG LIVE RANGES ------ */
505
506 /* ------ start of SET UP TO COMPUTE RREG LIVE RANGES ------ */
507
508 /* This is more complex than Stage 1, because we need to compute
509 exactly all the live ranges of all the allocatable real regs,
510 and we don't know in advance how many there will be. */
511
512 rreg_lrs_used = 0;
513 rreg_lrs_size = 4;
514 rreg_lrs_la = LibVEX_Alloc(rreg_lrs_size * sizeof(RRegLR));
515 rreg_lrs_db = NULL; /* we'll create this later */
516
517 /* We'll need to track live range start/end points seperately for
518 each rreg. Sigh. */
519 vassert(n_available_real_regs > 0);
520 rreg_live_after = LibVEX_Alloc(n_available_real_regs * sizeof(Int));
521 rreg_dead_before = LibVEX_Alloc(n_available_real_regs * sizeof(Int));
522
523 for (j = 0; j < n_available_real_regs; j++) {
524 rreg_live_after[j] =
525 rreg_dead_before[j] = INVALID_INSTRNO;
526 }
527
528 /* ------ end of SET UP TO COMPUTE RREG LIVE RANGES ------ */
529
530 /* ------ start of ITERATE OVER INSNS ------ */
531
532 for (ii = 0; ii < instrs_in->arr_used; ii++) {
533
534 (*getRegUsage)( ®_usage, instrs_in->arr[ii], mode64 );
535
536 # if 0
537 vex_printf("\n%d stage1: ", ii);
538 (*ppInstr)(instrs_in->arr[ii], mode64);
539 vex_printf("\n");
540 ppHRegUsage(®_usage);
541 # endif
542
543 /* ------ start of DEAL WITH VREG LIVE RANGES ------ */
544
545 /* for each reg mentioned in the insn ... */
546 for (j = 0; j < reg_usage.n_used; j++) {
547
548 vreg = reg_usage.hreg[j];
549 /* only interested in virtual registers right now. */
550 if (!hregIsVirtual(vreg))
551 continue;
552 k = hregNumber(vreg);
553 if (k < 0 || k >= n_vregs) {
554 vex_printf("\n");
555 (*ppInstr)(instrs_in->arr[ii], mode64);
556 vex_printf("\n");
557 vex_printf("vreg %d, n_vregs %d\n", k, n_vregs);
558 vpanic("doRegisterAllocation: out-of-range vreg");
559 }
560
561 /* Take the opportunity to note its regclass. We'll need
562 that when allocating spill slots. */
563 if (vreg_lrs[k].reg_class == HRcINVALID) {
564 /* First mention of this vreg. */
565 vreg_lrs[k].reg_class = hregClass(vreg);
566 } else {
567 /* Seen it before, so check for consistency. */
568 vassert(vreg_lrs[k].reg_class == hregClass(vreg));
569 }
570
571 /* Now consider live ranges. */
572 switch (reg_usage.mode[j]) {
573 case HRmRead:
574 if (vreg_lrs[k].live_after == INVALID_INSTRNO) {
575 vex_printf("\n\nOFFENDING VREG = %d\n", k);
576 vpanic("doRegisterAllocation: "
577 "first event for vreg is Read");
578 }
579 vreg_lrs[k].dead_before = toShort(ii + 1);
580 break;
581 case HRmWrite:
582 if (vreg_lrs[k].live_after == INVALID_INSTRNO)
583 vreg_lrs[k].live_after = toShort(ii);
584 vreg_lrs[k].dead_before = toShort(ii + 1);
585 break;
586 case HRmModify:
587 if (vreg_lrs[k].live_after == INVALID_INSTRNO) {
588 vex_printf("\n\nOFFENDING VREG = %d\n", k);
589 vpanic("doRegisterAllocation: "
590 "first event for vreg is Modify");
591 }
592 vreg_lrs[k].dead_before = toShort(ii + 1);
593 break;
594 default:
595 vpanic("doRegisterAllocation(1)");
596 } /* switch */
597
598 } /* iterate over registers */
599
600 /* ------ end of DEAL WITH VREG LIVE RANGES ------ */
601
602 /* ------ start of DEAL WITH RREG LIVE RANGES ------ */
603
604 /* for each reg mentioned in the insn ... */
605 for (j = 0; j < reg_usage.n_used; j++) {
606
607 /* Dummy initialisations of flush_la and flush_db to avoid
608 possible bogus uninit-var warnings from gcc. */
609 Int flush_la = INVALID_INSTRNO, flush_db = INVALID_INSTRNO;
610 Bool flush;
611
612 rreg = reg_usage.hreg[j];
613
614 /* only interested in real registers right now. */
615 if (hregIsVirtual(rreg))
616 continue;
617
618 /* Furthermore, we're not interested in this rreg unless it's
619 one of the allocatable ones. For example, it could be a
620 stack pointer register, or some other register beyond our
621 control, in which case we should just ignore it. */
622 for (k = 0; k < n_available_real_regs; k++)
623 if (available_real_regs[k] == rreg)
624 break;
625 if (k == n_available_real_regs)
626 continue; /* not found -- ignore. */
627 flush = False;
628 switch (reg_usage.mode[j]) {
629 case HRmWrite:
630 flush_la = rreg_live_after[k];
631 flush_db = rreg_dead_before[k];
632 if (flush_la != INVALID_INSTRNO
633 && flush_db != INVALID_INSTRNO)
634 flush = True;
635 rreg_live_after[k] = ii;
636 rreg_dead_before[k] = ii+1;
637 break;
638 case HRmRead:
639 if (rreg_live_after[k] == INVALID_INSTRNO) {
640 vex_printf("\nOFFENDING RREG = ");
641 (*ppReg)(available_real_regs[k]);
642 vex_printf("\n");
643 vex_printf("\nOFFENDING instr = ");
644 (*ppInstr)(instrs_in->arr[ii], mode64);
645 vex_printf("\n");
646 vpanic("doRegisterAllocation: "
647 "first event for rreg is Read");
648 }
649 rreg_dead_before[k] = ii+1;
650 break;
651 case HRmModify:
652 if (rreg_live_after[k] == INVALID_INSTRNO) {
653 vex_printf("\nOFFENDING RREG = ");
654 (*ppReg)(available_real_regs[k]);
655 vex_printf("\n");
656 vex_printf("\nOFFENDING instr = ");
657 (*ppInstr)(instrs_in->arr[ii], mode64);
658 vex_printf("\n");
659 vpanic("doRegisterAllocation: "
660 "first event for rreg is Modify");
661 }
662 rreg_dead_before[k] = ii+1;
663 break;
664 default:
665 vpanic("doRegisterAllocation(2)");
666 }
667
668 if (flush) {
669 vassert(flush_la != INVALID_INSTRNO);
670 vassert(flush_db != INVALID_INSTRNO);
671 ensureRRLRspace(&rreg_lrs_la, &rreg_lrs_size, rreg_lrs_used);
672 if (0)
673 vex_printf("FLUSH 1 (%d,%d)\n", flush_la, flush_db);
674 rreg_lrs_la[rreg_lrs_used].rreg = rreg;
675 rreg_lrs_la[rreg_lrs_used].live_after = toShort(flush_la);
676 rreg_lrs_la[rreg_lrs_used].dead_before = toShort(flush_db);
677 rreg_lrs_used++;
678 }
679
680 } /* iterate over regs in the instr */
681
682 /* ------ end of DEAL WITH RREG LIVE RANGES ------ */
683
684 } /* iterate over insns */
685
686 /* ------ end of ITERATE OVER INSNS ------ */
687
688 /* ------ start of FINALISE RREG LIVE RANGES ------ */
689
690 /* Now finish up any live ranges left over. */
691 for (j = 0; j < n_available_real_regs; j++) {
692
693 # if 0
694 vex_printf("residual %d: %d %d\n", j, rreg_live_after[j],
695 rreg_dead_before[j]);
696 # endif
697 vassert( (rreg_live_after[j] == INVALID_INSTRNO
698 && rreg_dead_before[j] == INVALID_INSTRNO)
699 ||
700 (rreg_live_after[j] != INVALID_INSTRNO
701 && rreg_dead_before[j] != INVALID_INSTRNO)
702 );
703
704 if (rreg_live_after[j] == INVALID_INSTRNO)
705 continue;
706
707 ensureRRLRspace(&rreg_lrs_la, &rreg_lrs_size, rreg_lrs_used);
708 if (0)
709 vex_printf("FLUSH 2 (%d,%d)\n",
710 rreg_live_after[j], rreg_dead_before[j]);
711 rreg_lrs_la[rreg_lrs_used].rreg = available_real_regs[j];
712 rreg_lrs_la[rreg_lrs_used].live_after = toShort(rreg_live_after[j]);
713 rreg_lrs_la[rreg_lrs_used].dead_before = toShort(rreg_dead_before[j]);
714 rreg_lrs_used++;
715 }
716
717 /* Compute summary hints for choosing real regs. If a real reg is
718 involved in a hard live range, record that fact in the fixed
719 part of the running rreg_state. Later, when offered a choice between
720 rregs, it's better to choose one which is not marked as having
721 any HLRs, since ones with HLRs may need to be spilled around
722 their HLRs. Correctness of final assignment is unaffected by
723 this mechanism -- it is only an optimisation. */
724
725 for (j = 0; j < rreg_lrs_used; j++) {
726 rreg = rreg_lrs_la[j].rreg;
727 vassert(!hregIsVirtual(rreg));
728 /* rreg is involved in a HLR. Record this info in the array, if
729 there is space. */
730 for (k = 0; k < n_rregs; k++)
731 if (rreg_state[k].rreg == rreg)
732 break;
733 vassert(k < n_rregs); /* else rreg was not found in rreg_state?! */
734 rreg_state[k].has_hlrs = True;
735 }
736 if (0) {
737 for (j = 0; j < n_rregs; j++) {
738 if (!rreg_state[j].has_hlrs)
739 continue;
740 ppReg(rreg_state[j].rreg);
741 vex_printf(" hinted\n");
742 }
743 }
744
745 /* Finally, copy the _la variant into the _db variant and
746 sort both by their respective fields. */
747 rreg_lrs_db = LibVEX_Alloc(rreg_lrs_used * sizeof(RRegLR));
748 for (j = 0; j < rreg_lrs_used; j++)
749 rreg_lrs_db[j] = rreg_lrs_la[j];
750
751 sortRRLRarray( rreg_lrs_la, rreg_lrs_used, True /* by .live_after*/ );
752 sortRRLRarray( rreg_lrs_db, rreg_lrs_used, False/* by .dead_before*/ );
753
754 /* And set up the cursors. */
755 rreg_lrs_la_next = 0;
756 rreg_lrs_db_next = 0;
757
758 for (j = 1; j < rreg_lrs_used; j++) {
759 vassert(rreg_lrs_la[j-1].live_after <= rreg_lrs_la[j].live_after);
760 vassert(rreg_lrs_db[j-1].dead_before <= rreg_lrs_db[j].dead_before);
761 }
762
763 /* ------ end of FINALISE RREG LIVE RANGES ------ */
764
765 # if DEBUG_REGALLOC
766 for (j = 0; j < n_vregs; j++) {
767 vex_printf("vreg %d: la = %d, db = %d\n",
768 j, vreg_lrs[j].live_after, vreg_lrs[j].dead_before );
769 }
770 # endif
771
772 # if DEBUG_REGALLOC
773 vex_printf("RRegLRs by LA:\n");
774 for (j = 0; j < rreg_lrs_used; j++) {
775 vex_printf(" ");
776 (*ppReg)(rreg_lrs_la[j].rreg);
777 vex_printf(" la = %d, db = %d\n",
778 rreg_lrs_la[j].live_after, rreg_lrs_la[j].dead_before );
779 }
780 vex_printf("RRegLRs by DB:\n");
781 for (j = 0; j < rreg_lrs_used; j++) {
782 vex_printf(" ");
783 (*ppReg)(rreg_lrs_db[j].rreg);
784 vex_printf(" la = %d, db = %d\n",
785 rreg_lrs_db[j].live_after, rreg_lrs_db[j].dead_before );
786 }
787 # endif
788
789 /* --------- Stage 3: allocate spill slots. --------- */
790
791 /* Each spill slot is 8 bytes long. For vregs which take more than
792 64 bits to spill (classes Flt64 and Vec128), we have to allocate
793 two spill slots.
794
795 For Vec128-class on PowerPC, the spill slot's actual address
796 must be 16-byte aligned. Since the spill slot's address is
797 computed as an offset from the guest state pointer, and since
798 the user of the generated code must set that pointer to a
799 16-aligned value, we have the residual obligation here of
800 choosing a 16-aligned spill slot offset for Vec128-class values.
801 Since each spill slot is 8 bytes long, that means for
802 Vec128-class values we must allocated a spill slot number which
803 is zero mod 2.
804
805 Do a rank-based allocation of vregs to spill slot numbers. We
806 put as few values as possible in spill slots, but nevertheless
807 need to have a spill slot available for all vregs, just in case.
808 */
809 /* max_ss_no = -1; */
810
811 for (j = 0; j < N_SPILL64S; j++)
812 ss_busy_until_before[j] = 0;
813
814 for (j = 0; j < n_vregs; j++) {
815
816 /* True iff this vreg is unused. In which case we also expect
817 that the reg_class field for it has not been set. */
818 if (vreg_lrs[j].live_after == INVALID_INSTRNO) {
819 vassert(vreg_lrs[j].reg_class == HRcINVALID);
820 continue;
821 }
822
823 /* The spill slots are 64 bits in size. As per the comment on
824 definition of HRegClass in host_generic_regs.h, that means, to
825 spill a vreg of class Flt64 or Vec128, we'll need to find two
826 adjacent spill slots to use. Note, this logic needs to kept
827 in sync with the size info on the definition of HRegClass. */
828
829 if (vreg_lrs[j].reg_class == HRcVec128
830 || vreg_lrs[j].reg_class == HRcFlt64) {
831
832 /* Find two adjacent free slots in which between them provide
833 up to 128 bits in which to spill the vreg. Since we are
834 trying to find an even:odd pair, move along in steps of 2
835 (slots). */
836
837 for (k = 0; k < N_SPILL64S-1; k += 2)
838 if (ss_busy_until_before[k] <= vreg_lrs[j].live_after
839 && ss_busy_until_before[k+1] <= vreg_lrs[j].live_after)
840 break;
841 if (k >= N_SPILL64S-1) {
842 vpanic("LibVEX_N_SPILL_BYTES is too low. "
843 "Increase and recompile.");
844 }
845 if (0) vex_printf("16-byte spill offset in spill slot %d\n", (Int)k);
846 ss_busy_until_before[k+0] = vreg_lrs[j].dead_before;
847 ss_busy_until_before[k+1] = vreg_lrs[j].dead_before;
848
849 } else {
850
851 /* The ordinary case -- just find a single spill slot. */
852
853 /* Find the lowest-numbered spill slot which is available at
854 the start point of this interval, and assign the interval
855 to it. */
856 for (k = 0; k < N_SPILL64S; k++)
857 if (ss_busy_until_before[k] <= vreg_lrs[j].live_after)
858 break;
859 if (k == N_SPILL64S) {
860 vpanic("LibVEX_N_SPILL_BYTES is too low. "
861 "Increase and recompile.");
862 }
863 ss_busy_until_before[k] = vreg_lrs[j].dead_before;
864
865 }
866
867 /* This reflects LibVEX's hard-wired knowledge of the baseBlock
868 layout: the guest state, then two equal sized areas following
869 it for two sets of shadow state, and then the spill area. */
870 vreg_lrs[j].spill_offset = toShort(guest_sizeB * 3 + k * 8);
871
872 /* Independent check that we've made a sane choice of slot */
873 sanity_check_spill_offset( &vreg_lrs[j] );
874 /* if (j > max_ss_no) */
875 /* max_ss_no = j; */
876 }
877
878 # if 0
879 vex_printf("\n\n");
880 for (j = 0; j < n_vregs; j++)
881 vex_printf("vreg %d --> spill offset %d\n",
882 j, vreg_lrs[j].spill_offset);
883 # endif
884
885 /* --------- Stage 4: establish rreg preferences --------- */
886
887 /* It may be advantageous to allocating certain vregs to specific
888 rregs, as a way of avoiding reg-reg moves later. Here we
889 establish which, if any, rreg each vreg would prefer to be in.
890 Note that this constrains the allocator -- ideally we end up
891 with as few as possible vregs expressing a preference.
892
893 This is an optimisation: if the .preferred_rreg field is never
894 set to anything different from INVALID_HREG, the allocator still
895 works. */
896
897 /* 30 Dec 04: removed this mechanism as it does not seem to
898 help. */
899
900 /* --------- Stage 5: process instructions --------- */
901
902 /* This is the main loop of the allocator. First, we need to
903 correctly set up our running state, which tracks the status of
904 each real register. */
905
906 /* ------ BEGIN: Process each insn in turn. ------ */
907
908 for (ii = 0; ii < instrs_in->arr_used; ii++) {
909
910 # if DEBUG_REGALLOC
911 vex_printf("\n====----====---- Insn %d ----====----====\n", ii);
912 vex_printf("---- ");
913 (*ppInstr)(instrs_in->arr[ii], mode64);
914 vex_printf("\n\nInitial state:\n");
915 PRINT_STATE;
916 vex_printf("\n");
917 # endif
918
919 /* ------------ Sanity checks ------------ */
920
921 /* Sanity checks are expensive. So they are done only once
922 every 7 instructions, and just before the last
923 instruction. */
924 do_sanity_check
925 = toBool(
926 False /* Set to True for sanity checking of all insns. */
927 || ii == instrs_in->arr_used-1
928 || (ii > 0 && (ii % 7) == 0)
929 );
930
931 if (do_sanity_check) {
932
933 /* Sanity check 1: all rregs with a hard live range crossing
934 this insn must be marked as unavailable in the running
935 state. */
936 for (j = 0; j < rreg_lrs_used; j++) {
937 if (rreg_lrs_la[j].live_after < ii
938 && ii < rreg_lrs_la[j].dead_before) {
939 /* ii is the middle of a hard live range for some real
940 reg. Check it's marked as such in the running
941 state. */
942
943 # if 0
944 vex_printf("considering la %d .. db %d reg = ",
945 rreg_lrs[j].live_after,
946 rreg_lrs[j].dead_before);
947 (*ppReg)(rreg_lrs[j].rreg);
948 vex_printf("\n");
949 # endif
950
951 /* find the state entry for this rreg */
952 for (k = 0; k < n_rregs; k++)
953 if (rreg_state[k].rreg == rreg_lrs_la[j].rreg)
954 break;
955
956 /* and assert that this rreg is marked as unavailable */
957 vassert(rreg_state[k].disp == Unavail);
958 }
959 }
960
961 /* Sanity check 2: conversely, all rregs marked as
962 unavailable in the running rreg_state must have a
963 corresponding hard live range entry in the rreg_lrs
964 array. */
965 for (j = 0; j < n_available_real_regs; j++) {
966 vassert(rreg_state[j].disp == Bound
967 || rreg_state[j].disp == Free
968 || rreg_state[j].disp == Unavail);
969 if (rreg_state[j].disp != Unavail)
970 continue;
971 for (k = 0; k < rreg_lrs_used; k++)
972 if (rreg_lrs_la[k].rreg == rreg_state[j].rreg
973 && rreg_lrs_la[k].live_after < ii
974 && ii < rreg_lrs_la[k].dead_before)
975 break;
976 /* If this vassertion fails, we couldn't find a
977 corresponding HLR. */
978 vassert(k < rreg_lrs_used);
979 }
980
981 /* Sanity check 3: all vreg-rreg bindings must bind registers
982 of the same class. */
983 for (j = 0; j < n_rregs; j++) {
984 if (rreg_state[j].disp != Bound) {
985 vassert(rreg_state[j].eq_spill_slot == False);
986 continue;
987 }
988 vassert(hregClass(rreg_state[j].rreg)
989 == hregClass(rreg_state[j].vreg));
990 vassert( hregIsVirtual(rreg_state[j].vreg));
991 vassert(!hregIsVirtual(rreg_state[j].rreg));
992 }
993
994 /* Sanity check 4: the vreg_state and rreg_state
995 mutually-redundant mappings are consistent. If
996 rreg_state[j].vreg points at some vreg_state entry then
997 that vreg_state entry should point back at
998 rreg_state[j]. */
999 for (j = 0; j < n_rregs; j++) {
1000 if (rreg_state[j].disp != Bound)
1001 continue;
1002 k = hregNumber(rreg_state[j].vreg);
1003 vassert(IS_VALID_VREGNO(k));
1004 vassert(vreg_state[k] == j);
1005 }
1006 for (j = 0; j < n_vregs; j++) {
1007 k = vreg_state[j];
1008 if (k == INVALID_RREG_NO)
1009 continue;
1010 vassert(IS_VALID_RREGNO(k));
1011 vassert(rreg_state[k].disp == Bound);
1012 vassert(hregNumber(rreg_state[k].vreg) == j);
1013 }
1014
1015 } /* if (do_sanity_check) */
1016
1017 /* ------------ end of Sanity checks ------------ */
1018
1019 /* Do various optimisations pertaining to register coalescing
1020 and preferencing:
1021 MOV v <-> v coalescing (done here).
1022 MOV v <-> r coalescing (not yet, if ever)
1023 */
1024 /* If doing a reg-reg move between two vregs, and the src's live
1025 range ends here and the dst's live range starts here, bind
1026 the dst to the src's rreg, and that's all. */
1027 if ( (*isMove)( instrs_in->arr[ii], &vregS, &vregD ) ) {
1028 if (!hregIsVirtual(vregS)) goto cannot_coalesce;
1029 if (!hregIsVirtual(vregD)) goto cannot_coalesce;
1030 /* Check that *isMove is not telling us a bunch of lies ... */
1031 vassert(hregClass(vregS) == hregClass(vregD));
1032 k = hregNumber(vregS);
1033 m = hregNumber(vregD);
1034 vassert(IS_VALID_VREGNO(k));
1035 vassert(IS_VALID_VREGNO(m));
1036 if (vreg_lrs[k].dead_before != ii + 1) goto cannot_coalesce;
1037 if (vreg_lrs[m].live_after != ii) goto cannot_coalesce;
1038 # if DEBUG_REGALLOC
1039 vex_printf("COALESCE ");
1040 (*ppReg)(vregS);
1041 vex_printf(" -> ");
1042 (*ppReg)(vregD);
1043 vex_printf("\n\n");
1044 # endif
1045 /* Find the state entry for vregS. */
1046 for (m = 0; m < n_rregs; m++)
1047 if (rreg_state[m].disp == Bound && rreg_state[m].vreg == vregS)
1048 break;
1049 if (m == n_rregs)
1050 /* We failed to find a binding for vregS, which means it's
1051 currently not in a register. So we can't do the
1052 coalescing. Give up. */
1053 goto cannot_coalesce;
1054
1055 /* Finally, we can do the coalescing. It's trivial -- merely
1056 claim vregS's register for vregD. */
1057 rreg_state[m].vreg = vregD;
1058 vassert(IS_VALID_VREGNO(hregNumber(vregD)));
1059 vassert(IS_VALID_VREGNO(hregNumber(vregS)));
1060 vreg_state[hregNumber(vregD)] = toShort(m);
1061 vreg_state[hregNumber(vregS)] = INVALID_RREG_NO;
1062
1063 /* This rreg has become associated with a different vreg and
1064 hence with a different spill slot. Play safe. */
1065 rreg_state[m].eq_spill_slot = False;
1066
1067 /* Move on to the next insn. We skip the post-insn stuff for
1068 fixed registers, since this move should not interact with
1069 them in any way. */
1070 continue;
1071 }
1072 cannot_coalesce:
1073
1074 /* ------ Free up rregs bound to dead vregs ------ */
1075
1076 /* Look for vregs whose live range has just ended, and
1077 mark the associated rreg as free. */
1078
1079 for (j = 0; j < n_rregs; j++) {
1080 if (rreg_state[j].disp != Bound)
1081 continue;
1082 vreg = hregNumber(rreg_state[j].vreg);
1083 vassert(IS_VALID_VREGNO(vreg));
1084 if (vreg_lrs[vreg].dead_before <= ii) {
1085 rreg_state[j].disp = Free;
1086 rreg_state[j].eq_spill_slot = False;
1087 m = hregNumber(rreg_state[j].vreg);
1088 vassert(IS_VALID_VREGNO(m));
1089 vreg_state[m] = INVALID_RREG_NO;
1090 if (DEBUG_REGALLOC) {
1091 vex_printf("free up ");
1092 (*ppReg)(rreg_state[j].rreg);
1093 vex_printf("\n");
1094 }
1095 }
1096 }
1097
1098 /* ------ Pre-instruction actions for fixed rreg uses ------ */
1099
1100 /* Now we have to deal with rregs which are about to be made
1101 live by this instruction -- in other words, are entering into
1102 one of their live ranges. If any such rreg holds a vreg, we
1103 will have to free up the rreg. The simplest solution which
1104 is correct is to spill the rreg.
1105
1106 Note we could do better:
1107 * Could move it into some other free rreg, if one is available
1108
1109 Do this efficiently, by incrementally stepping along an array
1110 of rreg HLRs that are known to be sorted by start point
1111 (their .live_after field).
1112 */
1113 while (True) {
1114 vassert(rreg_lrs_la_next >= 0);
1115 vassert(rreg_lrs_la_next <= rreg_lrs_used);
1116 if (rreg_lrs_la_next == rreg_lrs_used)
1117 break; /* no more real reg live ranges to consider */
1118 if (ii < rreg_lrs_la[rreg_lrs_la_next].live_after)
1119 break; /* next live range does not yet start */
1120 vassert(ii == rreg_lrs_la[rreg_lrs_la_next].live_after);
1121 /* rreg_lrs_la[rreg_lrs_la_next].rreg needs to be freed up.
1122 Find the associated rreg_state entry. */
1123 /* Note, re ii == rreg_lrs_la[rreg_lrs_la_next].live_after.
1124 Real register live ranges are guaranteed to be well-formed
1125 in that they start with a write to the register -- Stage 2
1126 rejects any code not satisfying this. So the correct
1127 question to ask is whether
1128 rreg_lrs_la[rreg_lrs_la_next].live_after == ii, that is,
1129 whether the reg becomes live after this insn -- rather
1130 than before it. */
1131 # if DEBUG_REGALLOC
1132 vex_printf("need to free up rreg: ");
1133 (*ppReg)(rreg_lrs_la[rreg_lrs_la_next].rreg);
1134 vex_printf("\n\n");
1135 # endif
1136 for (k = 0; k < n_rregs; k++)
1137 if (rreg_state[k].rreg == rreg_lrs_la[rreg_lrs_la_next].rreg)
1138 break;
1139 /* If this fails, we don't have an entry for this rreg.
1140 Which we should. */
1141 vassert(IS_VALID_RREGNO(k));
1142 m = hregNumber(rreg_state[k].vreg);
1143 if (rreg_state[k].disp == Bound) {
1144 /* Yes, there is an associated vreg. Spill it if it's
1145 still live. */
1146 vassert(IS_VALID_VREGNO(m));
1147 vreg_state[m] = INVALID_RREG_NO;
1148 if (vreg_lrs[m].dead_before > ii) {
1149 vassert(vreg_lrs[m].reg_class != HRcINVALID);
1150 if ((!eq_spill_opt) || !rreg_state[k].eq_spill_slot) {
1151 HInstr* spill1 = NULL;
1152 HInstr* spill2 = NULL;
1153 (*genSpill)( &spill1, &spill2, rreg_state[k].rreg,
1154 vreg_lrs[m].spill_offset, mode64 );
1155 vassert(spill1 || spill2); /* can't both be NULL */
1156 if (spill1)
1157 EMIT_INSTR(spill1);
1158 if (spill2)
1159 EMIT_INSTR(spill2);
1160 }
1161 rreg_state[k].eq_spill_slot = True;
1162 }
1163 }
1164 rreg_state[k].disp = Unavail;
1165 rreg_state[k].vreg = INVALID_HREG;
1166 rreg_state[k].eq_spill_slot = False;
1167
1168 /* check for further rregs entering HLRs at this point */
1169 rreg_lrs_la_next++;
1170 }
1171
1172
1173 # if DEBUG_REGALLOC
1174 vex_printf("After pre-insn actions for fixed regs:\n");
1175 PRINT_STATE;
1176 vex_printf("\n");
1177 # endif
1178
1179
1180 /* ------ Deal with the current instruction. ------ */
1181
1182 /* Finally we can begin the processing of this instruction
1183 itself. The aim is to free up enough rregs for this insn.
1184 This may generate spill stores since we may have to evict
1185 some vregs currently in rregs. Also generates spill loads.
1186 We also build up the final vreg->rreg mapping to be applied
1187 to the insn. */
1188
1189 (*getRegUsage)( ®_usage, instrs_in->arr[ii], mode64 );
1190
1191 initHRegRemap(&remap);
1192
1193 /* ------------ BEGIN directReload optimisation ----------- */
1194
1195 /* If the instruction reads exactly one vreg which is currently
1196 in a spill slot, and this is last use of that vreg, see if we
1197 can convert the instruction into one reads directly from the
1198 spill slot. This is clearly only possible for x86 and amd64
1199 targets, since ppc and arm load-store architectures. If
1200 successful, replace instrs_in->arr[ii] with this new
1201 instruction, and recompute its reg usage, so that the change
1202 is invisible to the standard-case handling that follows. */
1203
1204 if (directReload && reg_usage.n_used <= 2) {
1205 Bool debug_direct_reload = True && False;
1206 HReg cand = INVALID_HREG;
1207 Bool nreads = 0;
1208 Short spilloff = 0;
1209
1210 for (j = 0; j < reg_usage.n_used; j++) {
1211
1212 vreg = reg_usage.hreg[j];
1213
1214 if (!hregIsVirtual(vreg))
1215 continue;
1216
1217 if (reg_usage.mode[j] == HRmRead) {
1218 nreads++;
1219 m = hregNumber(vreg);
1220 vassert(IS_VALID_VREGNO(m));
1221 k = vreg_state[m];
1222 if (!IS_VALID_RREGNO(k)) {
1223 /* ok, it is spilled. Now, is this its last use? */
1224 vassert(vreg_lrs[m].dead_before >= ii+1);
1225 if (vreg_lrs[m].dead_before == ii+1
1226 && cand == INVALID_HREG) {
1227 spilloff = vreg_lrs[m].spill_offset;
1228 cand = vreg;
1229 }
1230 }
1231 }
1232 }
1233
1234 if (nreads == 1 && cand != INVALID_HREG) {
1235 HInstr* reloaded;
1236 if (reg_usage.n_used == 2)
1237 vassert(reg_usage.hreg[0] != reg_usage.hreg[1]);
1238
1239 reloaded = directReload ( instrs_in->arr[ii], cand, spilloff );
1240 if (debug_direct_reload && !reloaded) {
1241 vex_printf("[%3d] ", spilloff); ppHReg(cand); vex_printf(" ");
1242 ppInstr(instrs_in->arr[ii], mode64);
1243 }
1244 if (reloaded) {
1245 /* Update info about the insn, so it looks as if it had
1246 been in this form all along. */
1247 instrs_in->arr[ii] = reloaded;
1248 (*getRegUsage)( ®_usage, instrs_in->arr[ii], mode64 );
1249 if (debug_direct_reload && !reloaded) {
1250 vex_printf(" --> ");
1251 ppInstr(reloaded, mode64);
1252 }
1253 }
1254
1255 if (debug_direct_reload && !reloaded)
1256 vex_printf("\n");
1257 }
1258
1259 }
1260
1261 /* ------------ END directReload optimisation ------------ */
1262
1263 /* for each reg mentioned in the insn ... */
1264 for (j = 0; j < reg_usage.n_used; j++) {
1265
1266 vreg = reg_usage.hreg[j];
1267
1268 /* only interested in virtual registers right now. */
1269 if (!hregIsVirtual(vreg))
1270 continue;
1271
1272 # if 0
1273 vex_printf("considering "); (*ppReg)(vreg); vex_printf("\n");
1274 # endif
1275
1276 /* Now we're trying to find a rreg for "vreg". First of all,
1277 if it already has an rreg assigned, we don't need to do
1278 anything more. Search the current state to find out. */
1279 m = hregNumber(vreg);
1280 vassert(IS_VALID_VREGNO(m));
1281 k = vreg_state[m];
1282 if (IS_VALID_RREGNO(k)) {
1283 vassert(rreg_state[k].disp == Bound);
1284 addToHRegRemap(&remap, vreg, rreg_state[k].rreg);
1285 /* If this rreg is written or modified, mark it as different
1286 from any spill slot value. */
1287 if (reg_usage.mode[j] != HRmRead)
1288 rreg_state[k].eq_spill_slot = False;
1289 continue;
1290 } else {
1291 vassert(k == INVALID_RREG_NO);
1292 }
1293
1294 /* No luck. The next thing to do is see if there is a
1295 currently free rreg available, of the correct class. If
1296 so, bag it. NOTE, we could improve this by selecting an
1297 rreg for which the next live-range event is as far ahead
1298 as possible. */
1299 k_suboptimal = -1;
1300 for (k = 0; k < n_rregs; k++) {
1301 if (rreg_state[k].disp != Free
1302 || hregClass(rreg_state[k].rreg) != hregClass(vreg))
1303 continue;
1304 if (rreg_state[k].has_hlrs) {
1305 /* Well, at least we can use k_suboptimal if we really
1306 have to. Keep on looking for a better candidate. */
1307 k_suboptimal = k;
1308 } else {
1309 /* Found a preferable reg. Use it. */
1310 k_suboptimal = -1;
1311 break;
1312 }
1313 }
1314 if (k_suboptimal >= 0)
1315 k = k_suboptimal;
1316
1317 if (k < n_rregs) {
1318 rreg_state[k].disp = Bound;
1319 rreg_state[k].vreg = vreg;
1320 m = hregNumber(vreg);
1321 vassert(IS_VALID_VREGNO(m));
1322 vreg_state[m] = toShort(k);
1323 addToHRegRemap(&remap, vreg, rreg_state[k].rreg);
1324 /* Generate a reload if needed. This only creates needed
1325 reloads because the live range builder for vregs will
1326 guarantee that the first event for a vreg is a write.
1327 Hence, if this reference is not a write, it cannot be
1328 the first reference for this vreg, and so a reload is
1329 indeed needed. */
1330 if (reg_usage.mode[j] != HRmWrite) {
1331 vassert(vreg_lrs[m].reg_class != HRcINVALID);
1332 HInstr* reload1 = NULL;
1333 HInstr* reload2 = NULL;
1334 (*genReload)( &reload1, &reload2, rreg_state[k].rreg,
1335 vreg_lrs[m].spill_offset, mode64 );
1336 vassert(reload1 || reload2); /* can't both be NULL */
1337 if (reload1)
1338 EMIT_INSTR(reload1);
1339 if (reload2)
1340 EMIT_INSTR(reload2);
1341 /* This rreg is read or modified by the instruction.
1342 If it's merely read we can claim it now equals the
1343 spill slot, but not so if it is modified. */
1344 if (reg_usage.mode[j] == HRmRead) {
1345 rreg_state[k].eq_spill_slot = True;
1346 } else {
1347 vassert(reg_usage.mode[j] == HRmModify);
1348 rreg_state[k].eq_spill_slot = False;
1349 }
1350 } else {
1351 rreg_state[k].eq_spill_slot = False;
1352 }
1353
1354 continue;
1355 }
1356
1357 /* Well, now we have no option but to spill a vreg. It's
1358 important to make a good choice of vreg to spill, and of
1359 course we need to be careful not to spill a vreg which is
1360 needed by this insn. */
1361
1362 /* First, mark in the rreg_state, those rregs which are not spill
1363 candidates, due to holding a vreg mentioned by this
1364 instruction. Or being of the wrong class. */
1365 for (k = 0; k < n_rregs; k++) {
1366 rreg_state[k].is_spill_cand = False;
1367 if (rreg_state[k].disp != Bound)
1368 continue;
1369 if (hregClass(rreg_state[k].rreg) != hregClass(vreg))
1370 continue;
1371 rreg_state[k].is_spill_cand = True;
1372 for (m = 0; m < reg_usage.n_used; m++) {
1373 if (rreg_state[k].vreg == reg_usage.hreg[m]) {
1374 rreg_state[k].is_spill_cand = False;
1375 break;
1376 }
1377 }
1378 }
1379
1380 /* We can choose to spill any rreg satisfying
1381 rreg_state[r].is_spill_cand (so to speak). Choose r so that
1382 the next use of its associated vreg is as far ahead as
1383 possible, in the hope that this will minimise the number
1384 of consequent reloads required. */
1385 spillee
1386 = findMostDistantlyMentionedVReg (
1387 getRegUsage, instrs_in, ii+1, rreg_state, n_rregs, mode64 );
1388
1389 if (spillee == -1) {
1390 /* Hmmmmm. There don't appear to be any spill candidates.
1391 We're hosed. */
1392 vex_printf("reg_alloc: can't find a register in class: ");
1393 ppHRegClass(hregClass(vreg));
1394 vex_printf("\n");
1395 vpanic("reg_alloc: can't create a free register.");
1396 }
1397
1398 /* Right. So we're going to spill rreg_state[spillee]. */
1399 vassert(IS_VALID_RREGNO(spillee));
1400 vassert(rreg_state[spillee].disp == Bound);
1401 /* check it's the right class */
1402 vassert(hregClass(rreg_state[spillee].rreg) == hregClass(vreg));
1403 /* check we're not ejecting the vreg for which we are trying
1404 to free up a register. */
1405 vassert(rreg_state[spillee].vreg != vreg);
1406
1407 m = hregNumber(rreg_state[spillee].vreg);
1408 vassert(IS_VALID_VREGNO(m));
1409
1410 /* So here's the spill store. Assert that we're spilling a
1411 live vreg. */
1412 vassert(vreg_lrs[m].dead_before > ii);
1413 vassert(vreg_lrs[m].reg_class != HRcINVALID);
1414 if ((!eq_spill_opt) || !rreg_state[spillee].eq_spill_slot) {
1415 HInstr* spill1 = NULL;
1416 HInstr* spill2 = NULL;
1417 (*genSpill)( &spill1, &spill2, rreg_state[spillee].rreg,
1418 vreg_lrs[m].spill_offset, mode64 );
1419 vassert(spill1 || spill2); /* can't both be NULL */
1420 if (spill1)
1421 EMIT_INSTR(spill1);
1422 if (spill2)
1423 EMIT_INSTR(spill2);
1424 }
1425
1426 /* Update the rreg_state to reflect the new assignment for this
1427 rreg. */
1428 rreg_state[spillee].vreg = vreg;
1429 vreg_state[m] = INVALID_RREG_NO;
1430
1431 rreg_state[spillee].eq_spill_slot = False; /* be safe */
1432
1433 m = hregNumber(vreg);
1434 vassert(IS_VALID_VREGNO(m));
1435 vreg_state[m] = toShort(spillee);
1436
1437 /* Now, if this vreg is being read or modified (as opposed to
1438 written), we have to generate a reload for it. */
1439 if (reg_usage.mode[j] != HRmWrite) {
1440 vassert(vreg_lrs[m].reg_class != HRcINVALID);
1441 HInstr* reload1 = NULL;
1442 HInstr* reload2 = NULL;
1443 (*genReload)( &reload1, &reload2, rreg_state[spillee].rreg,
1444 vreg_lrs[m].spill_offset, mode64 );
1445 vassert(reload1 || reload2); /* can't both be NULL */
1446 if (reload1)
1447 EMIT_INSTR(reload1);
1448 if (reload2)
1449 EMIT_INSTR(reload2);
1450 /* This rreg is read or modified by the instruction.
1451 If it's merely read we can claim it now equals the
1452 spill slot, but not so if it is modified. */
1453 if (reg_usage.mode[j] == HRmRead) {
1454 rreg_state[spillee].eq_spill_slot = True;
1455 } else {
1456 vassert(reg_usage.mode[j] == HRmModify);
1457 rreg_state[spillee].eq_spill_slot = False;
1458 }
1459 }
1460
1461 /* So after much twisting and turning, we have vreg mapped to
1462 rreg_state[spillee].rreg. Note that in the map. */
1463 addToHRegRemap(&remap, vreg, rreg_state[spillee].rreg);
1464
1465 } /* iterate over registers in this instruction. */
1466
1467 /* We've finished clowning around with registers in this instruction.
1468 Three results:
1469 - the running rreg_state[] has been updated
1470 - a suitable vreg->rreg mapping for this instruction has been
1471 constructed
1472 - spill and reload instructions may have been emitted.
1473
1474 The final step is to apply the mapping to the instruction,
1475 and emit that.
1476 */
1477
1478 /* NOTE, DESTRUCTIVELY MODIFIES instrs_in->arr[ii]. */
1479 (*mapRegs)( &remap, instrs_in->arr[ii], mode64 );
1480 EMIT_INSTR( instrs_in->arr[ii] );
1481
1482 # if DEBUG_REGALLOC
1483 vex_printf("After dealing with current insn:\n");
1484 PRINT_STATE;
1485 vex_printf("\n");
1486 # endif
1487
1488 /* ------ Post-instruction actions for fixed rreg uses ------ */
1489
1490 /* Now we need to check for rregs exiting fixed live ranges
1491 after this instruction, and if so mark them as free. */
1492 while (True) {
1493 vassert(rreg_lrs_db_next >= 0);
1494 vassert(rreg_lrs_db_next <= rreg_lrs_used);
1495 if (rreg_lrs_db_next == rreg_lrs_used)
1496 break; /* no more real reg live ranges to consider */
1497 if (ii+1 < rreg_lrs_db[rreg_lrs_db_next].dead_before)
1498 break; /* next live range does not yet start */
1499 vassert(ii+1 == rreg_lrs_db[rreg_lrs_db_next].dead_before);
1500 /* rreg_lrs_db[[rreg_lrs_db_next].rreg is exiting a hard live
1501 range. Mark it as such in the main rreg_state array. */
1502 for (k = 0; k < n_rregs; k++)
1503 if (rreg_state[k].rreg == rreg_lrs_db[rreg_lrs_db_next].rreg)
1504 break;
1505 /* If this vassertion fails, we don't have an entry for
1506 this rreg. Which we should. */
1507 vassert(k < n_rregs);
1508 vassert(rreg_state[k].disp == Unavail);
1509 rreg_state[k].disp = Free;
1510 rreg_state[k].vreg = INVALID_HREG;
1511 rreg_state[k].eq_spill_slot = False;
1512
1513 /* check for further rregs leaving HLRs at this point */
1514 rreg_lrs_db_next++;
1515 }
1516
1517 # if DEBUG_REGALLOC
1518 vex_printf("After post-insn actions for fixed regs:\n");
1519 PRINT_STATE;
1520 vex_printf("\n");
1521 # endif
1522
1523 } /* iterate over insns */
1524
1525 /* ------ END: Process each insn in turn. ------ */
1526
1527 /* free(rreg_state); */
1528 /* free(rreg_lrs); */
1529 /* if (vreg_lrs) free(vreg_lrs); */
1530
1531 /* Paranoia */
1532 for (j = 0; j < n_rregs; j++)
1533 vassert(rreg_state[j].rreg == available_real_regs[j]);
1534
1535 vassert(rreg_lrs_la_next == rreg_lrs_used);
1536 vassert(rreg_lrs_db_next == rreg_lrs_used);
1537
1538 return instrs_out;
1539
1540 # undef INVALID_INSTRNO
1541 # undef EMIT_INSTR
1542 # undef PRINT_STATE
1543 }
1544
1545
1546
1547 /*---------------------------------------------------------------*/
1548 /*--- host_reg_alloc2.c ---*/
1549 /*---------------------------------------------------------------*/
1550