1
2 /*---------------------------------------------------------------*/
3 /*--- begin host_reg_alloc2.c ---*/
4 /*---------------------------------------------------------------*/
5
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
9
10 Copyright (C) 2004-2012 OpenWorks LLP
11 info@open-works.net
12
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26 02110-1301, USA.
27
28 The GNU General Public License is contained in the file COPYING.
29
30 Neither the names of the U.S. Department of Energy nor the
31 University of California nor the names of its contributors may be
32 used to endorse or promote products derived from this software
33 without prior written permission.
34 */
35
36 #include "libvex_basictypes.h"
37 #include "libvex.h"
38
39 #include "main_util.h"
40 #include "host_generic_regs.h"
41
42 /* Set to 1 for lots of debugging output. */
43 #define DEBUG_REGALLOC 0
44
45
46 /* TODO 27 Oct 04:
47
48 Better consistency checking from what isMove tells us.
49
50 We can possibly do V-V coalescing even when the src is spilled,
51 providing we can arrange for the dst to have the same spill slot.
52
53 Note that state[].hreg is the same as the available real regs.
54
55 Generally rationalise data structures. */
56
57
58 /* Records information on virtual register live ranges. Computed once
59 and remains unchanged after that. */
60 typedef
61 struct {
62 /* Becomes live for the first time after this insn ... */
63 Short live_after;
64 /* Becomes dead for the last time before this insn ... */
65 Short dead_before;
66 /* The "home" spill slot, if needed. Never changes. */
67 Short spill_offset;
68 Short spill_size;
69 /* What kind of register this is. */
70 HRegClass reg_class;
71 }
72 VRegLR;
73
74
75 /* Records information on real-register live ranges. Computed once
76 and remains unchanged after that. */
77 typedef
78 struct {
79 HReg rreg;
80 /* Becomes live after this insn ... */
81 Short live_after;
82 /* Becomes dead before this insn ... */
83 Short dead_before;
84 }
85 RRegLR;
86
87
88 /* An array of the following structs (rreg_state) comprises the
89 running state of the allocator. It indicates what the current
90 disposition of each allocatable real register is. The array gets
91 updated as the allocator processes instructions. */
92 typedef
93 struct {
94 /* ------ FIELDS WHICH DO NOT CHANGE ------ */
95 /* Which rreg is this for? */
96 HReg rreg;
97 /* Is this involved in any HLRs? (only an optimisation hint) */
98 Bool has_hlrs;
99 /* ------ FIELDS WHICH DO CHANGE ------ */
100 /* 6 May 07: rearranged fields below so the whole struct fits
101 into 16 bytes on both x86 and amd64. */
102 /* Used when .disp == Bound and we are looking for vregs to
103 spill. */
104 Bool is_spill_cand;
105 /* Optimisation: used when .disp == Bound. Indicates when the
106 rreg has the same value as the spill slot for the associated
107 vreg. Is safely left at False, and becomes True after a
108 spill store or reload for this rreg. */
109 Bool eq_spill_slot;
110 /* What's it's current disposition? */
111 enum { Free, /* available for use */
112 Unavail, /* in a real-reg live range */
113 Bound /* in use (holding value of some vreg) */
114 }
115 disp;
116 /* If .disp == Bound, what vreg is it bound to? */
117 HReg vreg;
118 }
119 RRegState;
120
121
122 /* The allocator also maintains a redundant array of indexes
123 (vreg_state) from vreg numbers back to entries in rreg_state. It
124 is redundant because iff vreg_state[i] == j then
125 hregNumber(rreg_state[j].vreg) == i -- that is, the two entries
126 point at each other. The purpose of this is to speed up activities
127 which involve looking for a particular vreg: there is no need to
128 scan the rreg_state looking for it, just index directly into
129 vreg_state. The FAQ "does this vreg already have an associated
130 rreg" is the main beneficiary.
131
132 To indicate, in vreg_state[i], that a given vreg is not currently
133 associated with any rreg, that entry can be set to INVALID_RREG_NO.
134
135 Because the vreg_state entries are signed Shorts, the max number
136 of vregs that can be handed by regalloc is 32767.
137 */
138
139 #define INVALID_RREG_NO ((Short)(-1))
140
141 #define IS_VALID_VREGNO(_zz) ((_zz) >= 0 && (_zz) < n_vregs)
142 #define IS_VALID_RREGNO(_zz) ((_zz) >= 0 && (_zz) < n_rregs)
143
144
145 /* Does this instruction mention a particular reg? */
instrMentionsReg(void (* getRegUsage)(HRegUsage *,HInstr *,Bool),HInstr * instr,HReg r,Bool mode64)146 static Bool instrMentionsReg (
147 void (*getRegUsage) (HRegUsage*, HInstr*, Bool),
148 HInstr* instr,
149 HReg r,
150 Bool mode64
151 )
152 {
153 Int i;
154 HRegUsage reg_usage;
155 (*getRegUsage)(®_usage, instr, mode64);
156 for (i = 0; i < reg_usage.n_used; i++)
157 if (reg_usage.hreg[i] == r)
158 return True;
159 return False;
160 }
161
162
163 /* Search forward from some given point in the incoming instruction
164 sequence. Point is to select a virtual register to spill, by
165 finding the vreg which is mentioned as far ahead as possible, in
166 the hope that this will minimise the number of consequent reloads.
167
168 Only do the search for vregs which are Bound in the running state,
169 and for which the .is_spill_cand field is set. This allows the
170 caller to arbitrarily restrict the set of spill candidates to be
171 considered.
172
173 Returns an index into the state array indicating the (v,r) pair to
174 spill, or -1 if none was found. */
175 static
findMostDistantlyMentionedVReg(void (* getRegUsage)(HRegUsage *,HInstr *,Bool),HInstrArray * instrs_in,Int search_from_instr,RRegState * state,Int n_state,Bool mode64)176 Int findMostDistantlyMentionedVReg (
177 void (*getRegUsage) (HRegUsage*, HInstr*, Bool),
178 HInstrArray* instrs_in,
179 Int search_from_instr,
180 RRegState* state,
181 Int n_state,
182 Bool mode64
183 )
184 {
185 Int k, m;
186 Int furthest_k = -1;
187 Int furthest = -1;
188 vassert(search_from_instr >= 0);
189 for (k = 0; k < n_state; k++) {
190 if (!state[k].is_spill_cand)
191 continue;
192 vassert(state[k].disp == Bound);
193 for (m = search_from_instr; m < instrs_in->arr_used; m++) {
194 if (instrMentionsReg(getRegUsage,
195 instrs_in->arr[m], state[k].vreg, mode64))
196 break;
197 }
198 if (m > furthest) {
199 furthest = m;
200 furthest_k = k;
201 }
202 }
203 return furthest_k;
204 }
205
206
207 /* Check that this vreg has been assigned a sane spill offset. */
sanity_check_spill_offset(VRegLR * vreg)208 static inline void sanity_check_spill_offset ( VRegLR* vreg )
209 {
210 switch (vreg->reg_class) {
211 case HRcVec128: case HRcFlt64:
212 vassert(0 == ((UShort)vreg->spill_offset % 16)); break;
213 default:
214 vassert(0 == ((UShort)vreg->spill_offset % 8)); break;
215 }
216 }
217
218
219 /* Double the size of the real-reg live-range array, if needed. */
ensureRRLRspace(RRegLR ** info,Int * size,Int used)220 static void ensureRRLRspace ( RRegLR** info, Int* size, Int used )
221 {
222 Int k;
223 RRegLR* arr2;
224 if (used < *size) return;
225 if (0)
226 vex_printf("ensureRRISpace: %d -> %d\n", *size, 2 * *size);
227 vassert(used == *size);
228 arr2 = LibVEX_Alloc(2 * *size * sizeof(RRegLR));
229 for (k = 0; k < *size; k++)
230 arr2[k] = (*info)[k];
231 *size *= 2;
232 *info = arr2;
233 }
234
235
236 /* Sort an array of RRegLR entries by either the .live_after or
237 .dead_before fields. This is performance-critical. */
sortRRLRarray(RRegLR * arr,Int size,Bool by_live_after)238 static void sortRRLRarray ( RRegLR* arr,
239 Int size, Bool by_live_after )
240 {
241 Int incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
242 9841, 29524, 88573, 265720,
243 797161, 2391484 };
244 Int lo = 0;
245 Int hi = size-1;
246 Int i, j, h, bigN, hp;
247 RRegLR v;
248
249 vassert(size >= 0);
250 if (size == 0)
251 return;
252
253 bigN = hi - lo + 1; if (bigN < 2) return;
254 hp = 0; while (hp < 14 && incs[hp] < bigN) hp++; hp--;
255
256 if (by_live_after) {
257
258 for ( ; hp >= 0; hp--) {
259 h = incs[hp];
260 for (i = lo + h; i <= hi; i++) {
261 v = arr[i];
262 j = i;
263 while (arr[j-h].live_after > v.live_after) {
264 arr[j] = arr[j-h];
265 j = j - h;
266 if (j <= (lo + h - 1)) break;
267 }
268 arr[j] = v;
269 }
270 }
271
272 } else {
273
274 for ( ; hp >= 0; hp--) {
275 h = incs[hp];
276 for (i = lo + h; i <= hi; i++) {
277 v = arr[i];
278 j = i;
279 while (arr[j-h].dead_before > v.dead_before) {
280 arr[j] = arr[j-h];
281 j = j - h;
282 if (j <= (lo + h - 1)) break;
283 }
284 arr[j] = v;
285 }
286 }
287
288 }
289 }
290
291
292 /* A target-independent register allocator. Requires various
293 functions which it uses to deal abstractly with instructions and
294 registers, since it cannot have any target-specific knowledge.
295
296 Returns a new list of instructions, which, as a result of the
297 behaviour of mapRegs, will be in-place modifications of the
298 original instructions.
299
300 Requires that the incoming code has been generated using
301 vreg numbers 0, 1 .. n_vregs-1. Appearance of a vreg outside
302 that range is a checked run-time error.
303
304 Takes an expandable array of pointers to unallocated insns.
305 Returns an expandable array of pointers to allocated insns.
306 */
doRegisterAllocation(HInstrArray * instrs_in,HReg * available_real_regs,Int n_available_real_regs,Bool (* isMove)(HInstr *,HReg *,HReg *),void (* getRegUsage)(HRegUsage *,HInstr *,Bool),void (* mapRegs)(HRegRemap *,HInstr *,Bool),void (* genSpill)(HInstr **,HInstr **,HReg,Int,Bool),void (* genReload)(HInstr **,HInstr **,HReg,Int,Bool),HInstr * (* directReload)(HInstr *,HReg,Short),Int guest_sizeB,void (* ppInstr)(HInstr *,Bool),void (* ppReg)(HReg),Bool mode64)307 HInstrArray* doRegisterAllocation (
308
309 /* Incoming virtual-registerised code. */
310 HInstrArray* instrs_in,
311
312 /* An array listing all the real registers the allocator may use,
313 in no particular order. */
314 HReg* available_real_regs,
315 Int n_available_real_regs,
316
317 /* Return True iff the given insn is a reg-reg move, in which
318 case also return the src and dst regs. */
319 Bool (*isMove) ( HInstr*, HReg*, HReg* ),
320
321 /* Get info about register usage in this insn. */
322 void (*getRegUsage) ( HRegUsage*, HInstr*, Bool ),
323
324 /* Apply a reg-reg mapping to an insn. */
325 void (*mapRegs) ( HRegRemap*, HInstr*, Bool ),
326
327 /* Return one, or, if we're unlucky, two insn(s) to spill/restore a
328 real reg to a spill slot byte offset. The two leading HInstr**
329 args are out parameters, through which the generated insns are
330 returned. Also (optionally) a 'directReload' function, which
331 attempts to replace a given instruction by one which reads
332 directly from a specified spill slot. May be NULL, in which
333 case the optimisation is not attempted. */
334 void (*genSpill) ( HInstr**, HInstr**, HReg, Int, Bool ),
335 void (*genReload) ( HInstr**, HInstr**, HReg, Int, Bool ),
336 HInstr* (*directReload) ( HInstr*, HReg, Short ),
337 Int guest_sizeB,
338
339 /* For debug printing only. */
340 void (*ppInstr) ( HInstr*, Bool ),
341 void (*ppReg) ( HReg ),
342
343 /* 32/64bit mode */
344 Bool mode64
345 )
346 {
347 # define N_SPILL64S (LibVEX_N_SPILL_BYTES / 8)
348
349 const Bool eq_spill_opt = True;
350
351 /* Iterators and temporaries. */
352 Int ii, j, k, m, spillee, k_suboptimal;
353 HReg rreg, vreg, vregS, vregD;
354 HRegUsage reg_usage;
355
356 /* Info on vregs and rregs. Computed once and remains
357 unchanged. */
358 Int n_vregs;
359 VRegLR* vreg_lrs; /* [0 .. n_vregs-1] */
360
361 /* We keep two copies of the real-reg live range info, one sorted
362 by .live_after and the other by .dead_before. First the
363 unsorted info is created in the _la variant is copied into the
364 _db variant. Once that's done both of them are sorted.
365 We also need two integer cursors which record the next
366 location in the two arrays to consider. */
367 RRegLR* rreg_lrs_la;
368 RRegLR* rreg_lrs_db;
369 Int rreg_lrs_size;
370 Int rreg_lrs_used;
371 Int rreg_lrs_la_next;
372 Int rreg_lrs_db_next;
373
374 /* Used when constructing vreg_lrs (for allocating stack
375 slots). */
376 Int ss_busy_until_before[N_SPILL64S];
377
378 /* Used when constructing rreg_lrs. */
379 Int* rreg_live_after;
380 Int* rreg_dead_before;
381
382 /* Running state of the core allocation algorithm. */
383 RRegState* rreg_state; /* [0 .. n_rregs-1] */
384 Int n_rregs;
385
386 /* .. and the redundant backward map */
387 /* Each value is 0 .. n_rregs-1 or is INVALID_RREG_NO.
388 This inplies n_rregs must be <= 32768. */
389 Short* vreg_state; /* [0 .. n_vregs-1] */
390
391 /* The vreg -> rreg map constructed and then applied to each
392 instr. */
393 HRegRemap remap;
394
395 /* The output array of instructions. */
396 HInstrArray* instrs_out;
397
398 /* Sanity checks are expensive. They are only done periodically,
399 not at each insn processed. */
400 Bool do_sanity_check;
401
402 vassert(0 == (guest_sizeB % 32));
403 vassert(0 == (LibVEX_N_SPILL_BYTES % 32));
404 vassert(0 == (N_SPILL64S % 4));
405
406 /* The live range numbers are signed shorts, and so limiting the
407 number of insns to 15000 comfortably guards against them
408 overflowing 32k. */
409 vassert(instrs_in->arr_used <= 15000);
410
411 # define INVALID_INSTRNO (-2)
412
413 # define EMIT_INSTR(_instr) \
414 do { \
415 HInstr* _tmp = (_instr); \
416 if (DEBUG_REGALLOC) { \
417 vex_printf("** "); \
418 (*ppInstr)(_tmp, mode64); \
419 vex_printf("\n\n"); \
420 } \
421 addHInstr ( instrs_out, _tmp ); \
422 } while (0)
423
424 # define PRINT_STATE \
425 do { \
426 Int z, q; \
427 for (z = 0; z < n_rregs; z++) { \
428 vex_printf(" rreg_state[%2d] = ", z); \
429 (*ppReg)(rreg_state[z].rreg); \
430 vex_printf(" \t"); \
431 switch (rreg_state[z].disp) { \
432 case Free: vex_printf("Free\n"); break; \
433 case Unavail: vex_printf("Unavail\n"); break; \
434 case Bound: vex_printf("BoundTo "); \
435 (*ppReg)(rreg_state[z].vreg); \
436 vex_printf("\n"); break; \
437 } \
438 } \
439 vex_printf("\n vreg_state[0 .. %d]:\n ", n_vregs-1); \
440 q = 0; \
441 for (z = 0; z < n_vregs; z++) { \
442 if (vreg_state[z] == INVALID_RREG_NO) \
443 continue; \
444 vex_printf("[%d] -> %d ", z, vreg_state[z]); \
445 q++; \
446 if (q > 0 && (q % 6) == 0) \
447 vex_printf("\n "); \
448 } \
449 vex_printf("\n"); \
450 } while (0)
451
452
453 /* --------- Stage 0: set up output array --------- */
454 /* --------- and allocate/initialise running state. --------- */
455
456 instrs_out = newHInstrArray();
457
458 /* ... and initialise running state. */
459 /* n_rregs is no more than a short name for n_available_real_regs. */
460 n_rregs = n_available_real_regs;
461 n_vregs = instrs_in->n_vregs;
462
463 /* If this is not so, vreg_state entries will overflow. */
464 vassert(n_vregs < 32767);
465
466 rreg_state = LibVEX_Alloc(n_rregs * sizeof(RRegState));
467 vreg_state = LibVEX_Alloc(n_vregs * sizeof(Short));
468
469 for (j = 0; j < n_rregs; j++) {
470 rreg_state[j].rreg = available_real_regs[j];
471 rreg_state[j].has_hlrs = False;
472 rreg_state[j].disp = Free;
473 rreg_state[j].vreg = INVALID_HREG;
474 rreg_state[j].is_spill_cand = False;
475 rreg_state[j].eq_spill_slot = False;
476 }
477
478 for (j = 0; j < n_vregs; j++)
479 vreg_state[j] = INVALID_RREG_NO;
480
481
482 /* --------- Stage 1: compute vreg live ranges. --------- */
483 /* --------- Stage 2: compute rreg live ranges. --------- */
484
485 /* ------ start of SET UP TO COMPUTE VREG LIVE RANGES ------ */
486
487 /* This is relatively simple, because (1) we only seek the complete
488 end-to-end live range of each vreg, and are not interested in
489 any holes in it, and (2) the vregs are conveniently numbered 0
490 .. n_vregs-1, so we can just dump the results in a
491 pre-allocated array. */
492
493 vreg_lrs = NULL;
494 if (n_vregs > 0)
495 vreg_lrs = LibVEX_Alloc(sizeof(VRegLR) * n_vregs);
496
497 for (j = 0; j < n_vregs; j++) {
498 vreg_lrs[j].live_after = INVALID_INSTRNO;
499 vreg_lrs[j].dead_before = INVALID_INSTRNO;
500 vreg_lrs[j].spill_offset = 0;
501 vreg_lrs[j].spill_size = 0;
502 vreg_lrs[j].reg_class = HRcINVALID;
503 }
504
505 /* ------ end of SET UP TO COMPUTE VREG LIVE RANGES ------ */
506
507 /* ------ start of SET UP TO COMPUTE RREG LIVE RANGES ------ */
508
509 /* This is more complex than Stage 1, because we need to compute
510 exactly all the live ranges of all the allocatable real regs,
511 and we don't know in advance how many there will be. */
512
513 rreg_lrs_used = 0;
514 rreg_lrs_size = 4;
515 rreg_lrs_la = LibVEX_Alloc(rreg_lrs_size * sizeof(RRegLR));
516 rreg_lrs_db = NULL; /* we'll create this later */
517
518 /* We'll need to track live range start/end points seperately for
519 each rreg. Sigh. */
520 vassert(n_available_real_regs > 0);
521 rreg_live_after = LibVEX_Alloc(n_available_real_regs * sizeof(Int));
522 rreg_dead_before = LibVEX_Alloc(n_available_real_regs * sizeof(Int));
523
524 for (j = 0; j < n_available_real_regs; j++) {
525 rreg_live_after[j] =
526 rreg_dead_before[j] = INVALID_INSTRNO;
527 }
528
529 /* ------ end of SET UP TO COMPUTE RREG LIVE RANGES ------ */
530
531 /* ------ start of ITERATE OVER INSNS ------ */
532
533 for (ii = 0; ii < instrs_in->arr_used; ii++) {
534
535 (*getRegUsage)( ®_usage, instrs_in->arr[ii], mode64 );
536
537 # if 0
538 vex_printf("\n%d stage1: ", ii);
539 (*ppInstr)(instrs_in->arr[ii], mode64);
540 vex_printf("\n");
541 ppHRegUsage(®_usage);
542 # endif
543
544 /* ------ start of DEAL WITH VREG LIVE RANGES ------ */
545
546 /* for each reg mentioned in the insn ... */
547 for (j = 0; j < reg_usage.n_used; j++) {
548
549 vreg = reg_usage.hreg[j];
550 /* only interested in virtual registers right now. */
551 if (!hregIsVirtual(vreg))
552 continue;
553 k = hregNumber(vreg);
554 if (k < 0 || k >= n_vregs) {
555 vex_printf("\n");
556 (*ppInstr)(instrs_in->arr[ii], mode64);
557 vex_printf("\n");
558 vex_printf("vreg %d, n_vregs %d\n", k, n_vregs);
559 vpanic("doRegisterAllocation: out-of-range vreg");
560 }
561
562 /* Take the opportunity to note its regclass. We'll need
563 that when allocating spill slots. */
564 if (vreg_lrs[k].reg_class == HRcINVALID) {
565 /* First mention of this vreg. */
566 vreg_lrs[k].reg_class = hregClass(vreg);
567 } else {
568 /* Seen it before, so check for consistency. */
569 vassert(vreg_lrs[k].reg_class == hregClass(vreg));
570 }
571
572 /* Now consider live ranges. */
573 switch (reg_usage.mode[j]) {
574 case HRmRead:
575 if (vreg_lrs[k].live_after == INVALID_INSTRNO) {
576 vex_printf("\n\nOFFENDING VREG = %d\n", k);
577 vpanic("doRegisterAllocation: "
578 "first event for vreg is Read");
579 }
580 vreg_lrs[k].dead_before = toShort(ii + 1);
581 break;
582 case HRmWrite:
583 if (vreg_lrs[k].live_after == INVALID_INSTRNO)
584 vreg_lrs[k].live_after = toShort(ii);
585 vreg_lrs[k].dead_before = toShort(ii + 1);
586 break;
587 case HRmModify:
588 if (vreg_lrs[k].live_after == INVALID_INSTRNO) {
589 vex_printf("\n\nOFFENDING VREG = %d\n", k);
590 vpanic("doRegisterAllocation: "
591 "first event for vreg is Modify");
592 }
593 vreg_lrs[k].dead_before = toShort(ii + 1);
594 break;
595 default:
596 vpanic("doRegisterAllocation(1)");
597 } /* switch */
598
599 } /* iterate over registers */
600
601 /* ------ end of DEAL WITH VREG LIVE RANGES ------ */
602
603 /* ------ start of DEAL WITH RREG LIVE RANGES ------ */
604
605 /* for each reg mentioned in the insn ... */
606 for (j = 0; j < reg_usage.n_used; j++) {
607
608 /* Dummy initialisations of flush_la and flush_db to avoid
609 possible bogus uninit-var warnings from gcc. */
610 Int flush_la = INVALID_INSTRNO, flush_db = INVALID_INSTRNO;
611 Bool flush;
612
613 rreg = reg_usage.hreg[j];
614
615 /* only interested in real registers right now. */
616 if (hregIsVirtual(rreg))
617 continue;
618
619 /* Furthermore, we're not interested in this rreg unless it's
620 one of the allocatable ones. For example, it could be a
621 stack pointer register, or some other register beyond our
622 control, in which case we should just ignore it. */
623 for (k = 0; k < n_available_real_regs; k++)
624 if (available_real_regs[k] == rreg)
625 break;
626 if (k == n_available_real_regs)
627 continue; /* not found -- ignore. */
628 flush = False;
629 switch (reg_usage.mode[j]) {
630 case HRmWrite:
631 flush_la = rreg_live_after[k];
632 flush_db = rreg_dead_before[k];
633 if (flush_la != INVALID_INSTRNO
634 && flush_db != INVALID_INSTRNO)
635 flush = True;
636 rreg_live_after[k] = ii;
637 rreg_dead_before[k] = ii+1;
638 break;
639 case HRmRead:
640 if (rreg_live_after[k] == INVALID_INSTRNO) {
641 vex_printf("\nOFFENDING RREG = ");
642 (*ppReg)(available_real_regs[k]);
643 vex_printf("\n");
644 vex_printf("\nOFFENDING instr = ");
645 (*ppInstr)(instrs_in->arr[ii], mode64);
646 vex_printf("\n");
647 vpanic("doRegisterAllocation: "
648 "first event for rreg is Read");
649 }
650 rreg_dead_before[k] = ii+1;
651 break;
652 case HRmModify:
653 if (rreg_live_after[k] == INVALID_INSTRNO) {
654 vex_printf("\nOFFENDING RREG = ");
655 (*ppReg)(available_real_regs[k]);
656 vex_printf("\n");
657 vex_printf("\nOFFENDING instr = ");
658 (*ppInstr)(instrs_in->arr[ii], mode64);
659 vex_printf("\n");
660 vpanic("doRegisterAllocation: "
661 "first event for rreg is Modify");
662 }
663 rreg_dead_before[k] = ii+1;
664 break;
665 default:
666 vpanic("doRegisterAllocation(2)");
667 }
668
669 if (flush) {
670 vassert(flush_la != INVALID_INSTRNO);
671 vassert(flush_db != INVALID_INSTRNO);
672 ensureRRLRspace(&rreg_lrs_la, &rreg_lrs_size, rreg_lrs_used);
673 if (0)
674 vex_printf("FLUSH 1 (%d,%d)\n", flush_la, flush_db);
675 rreg_lrs_la[rreg_lrs_used].rreg = rreg;
676 rreg_lrs_la[rreg_lrs_used].live_after = toShort(flush_la);
677 rreg_lrs_la[rreg_lrs_used].dead_before = toShort(flush_db);
678 rreg_lrs_used++;
679 }
680
681 } /* iterate over regs in the instr */
682
683 /* ------ end of DEAL WITH RREG LIVE RANGES ------ */
684
685 } /* iterate over insns */
686
687 /* ------ end of ITERATE OVER INSNS ------ */
688
689 /* ------ start of FINALISE RREG LIVE RANGES ------ */
690
691 /* Now finish up any live ranges left over. */
692 for (j = 0; j < n_available_real_regs; j++) {
693
694 # if 0
695 vex_printf("residual %d: %d %d\n", j, rreg_live_after[j],
696 rreg_dead_before[j]);
697 # endif
698 vassert( (rreg_live_after[j] == INVALID_INSTRNO
699 && rreg_dead_before[j] == INVALID_INSTRNO)
700 ||
701 (rreg_live_after[j] != INVALID_INSTRNO
702 && rreg_dead_before[j] != INVALID_INSTRNO)
703 );
704
705 if (rreg_live_after[j] == INVALID_INSTRNO)
706 continue;
707
708 ensureRRLRspace(&rreg_lrs_la, &rreg_lrs_size, rreg_lrs_used);
709 if (0)
710 vex_printf("FLUSH 2 (%d,%d)\n",
711 rreg_live_after[j], rreg_dead_before[j]);
712 rreg_lrs_la[rreg_lrs_used].rreg = available_real_regs[j];
713 rreg_lrs_la[rreg_lrs_used].live_after = toShort(rreg_live_after[j]);
714 rreg_lrs_la[rreg_lrs_used].dead_before = toShort(rreg_dead_before[j]);
715 rreg_lrs_used++;
716 }
717
718 /* Compute summary hints for choosing real regs. If a real reg is
719 involved in a hard live range, record that fact in the fixed
720 part of the running rreg_state. Later, when offered a choice between
721 rregs, it's better to choose one which is not marked as having
722 any HLRs, since ones with HLRs may need to be spilled around
723 their HLRs. Correctness of final assignment is unaffected by
724 this mechanism -- it is only an optimisation. */
725
726 for (j = 0; j < rreg_lrs_used; j++) {
727 rreg = rreg_lrs_la[j].rreg;
728 vassert(!hregIsVirtual(rreg));
729 /* rreg is involved in a HLR. Record this info in the array, if
730 there is space. */
731 for (k = 0; k < n_rregs; k++)
732 if (rreg_state[k].rreg == rreg)
733 break;
734 vassert(k < n_rregs); /* else rreg was not found in rreg_state?! */
735 rreg_state[k].has_hlrs = True;
736 }
737 if (0) {
738 for (j = 0; j < n_rregs; j++) {
739 if (!rreg_state[j].has_hlrs)
740 continue;
741 ppReg(rreg_state[j].rreg);
742 vex_printf(" hinted\n");
743 }
744 }
745
746 /* Finally, copy the _la variant into the _db variant and
747 sort both by their respective fields. */
748 rreg_lrs_db = LibVEX_Alloc(rreg_lrs_used * sizeof(RRegLR));
749 for (j = 0; j < rreg_lrs_used; j++)
750 rreg_lrs_db[j] = rreg_lrs_la[j];
751
752 sortRRLRarray( rreg_lrs_la, rreg_lrs_used, True /* by .live_after*/ );
753 sortRRLRarray( rreg_lrs_db, rreg_lrs_used, False/* by .dead_before*/ );
754
755 /* And set up the cursors. */
756 rreg_lrs_la_next = 0;
757 rreg_lrs_db_next = 0;
758
759 for (j = 1; j < rreg_lrs_used; j++) {
760 vassert(rreg_lrs_la[j-1].live_after <= rreg_lrs_la[j].live_after);
761 vassert(rreg_lrs_db[j-1].dead_before <= rreg_lrs_db[j].dead_before);
762 }
763
764 /* ------ end of FINALISE RREG LIVE RANGES ------ */
765
766 # if DEBUG_REGALLOC
767 for (j = 0; j < n_vregs; j++) {
768 vex_printf("vreg %d: la = %d, db = %d\n",
769 j, vreg_lrs[j].live_after, vreg_lrs[j].dead_before );
770 }
771 # endif
772
773 # if DEBUG_REGALLOC
774 vex_printf("RRegLRs by LA:\n");
775 for (j = 0; j < rreg_lrs_used; j++) {
776 vex_printf(" ");
777 (*ppReg)(rreg_lrs_la[j].rreg);
778 vex_printf(" la = %d, db = %d\n",
779 rreg_lrs_la[j].live_after, rreg_lrs_la[j].dead_before );
780 }
781 vex_printf("RRegLRs by DB:\n");
782 for (j = 0; j < rreg_lrs_used; j++) {
783 vex_printf(" ");
784 (*ppReg)(rreg_lrs_db[j].rreg);
785 vex_printf(" la = %d, db = %d\n",
786 rreg_lrs_db[j].live_after, rreg_lrs_db[j].dead_before );
787 }
788 # endif
789
790 /* --------- Stage 3: allocate spill slots. --------- */
791
792 /* Each spill slot is 8 bytes long. For vregs which take more than
793 64 bits to spill (classes Flt64 and Vec128), we have to allocate
794 two consecutive spill slots. For 256 bit registers (class
795 Vec256), we have to allocate four consecutive spill slots.
796
797 For Vec128-class on PowerPC, the spill slot's actual address
798 must be 16-byte aligned. Since the spill slot's address is
799 computed as an offset from the guest state pointer, and since
800 the user of the generated code must set that pointer to a
801 32-aligned value, we have the residual obligation here of
802 choosing a 16-aligned spill slot offset for Vec128-class values.
803 Since each spill slot is 8 bytes long, that means for
804 Vec128-class values we must allocated a spill slot number which
805 is zero mod 2.
806
807 Similarly, for Vec256 calss on amd64, find a spill slot number
808 which is zero mod 4. This guarantees it will be 32 byte
809 aligned, which isn't actually necessary on amd64 (we use movUpd
810 etc to spill), but seems like good practice.
811
812 Do a rank-based allocation of vregs to spill slot numbers. We
813 put as few values as possible in spill slots, but nevertheless
814 need to have a spill slot available for all vregs, just in case.
815 */
816 /* max_ss_no = -1; */
817
818 for (j = 0; j < N_SPILL64S; j++)
819 ss_busy_until_before[j] = 0;
820
821 for (j = 0; j < n_vregs; j++) {
822
823 /* True iff this vreg is unused. In which case we also expect
824 that the reg_class field for it has not been set. */
825 if (vreg_lrs[j].live_after == INVALID_INSTRNO) {
826 vassert(vreg_lrs[j].reg_class == HRcINVALID);
827 continue;
828 }
829
830 /* The spill slots are 64 bits in size. As per the comment on
831 definition of HRegClass in host_generic_regs.h, that means,
832 to spill a vreg of class Flt64 or Vec128, we'll need to find
833 two adjacent spill slots to use. For Vec256, we'll need to
834 find four adjacent slots to use. Note, this logic needs to
835 kept in sync with the size info on the definition of
836 HRegClass. */
837 switch (vreg_lrs[j].reg_class) {
838
839 case HRcVec128: case HRcFlt64:
840 /* Find two adjacent free slots in which between them
841 provide up to 128 bits in which to spill the vreg.
842 Since we are trying to find an even:odd pair, move
843 along in steps of 2 (slots). */
844 for (k = 0; k < N_SPILL64S-1; k += 2)
845 if (ss_busy_until_before[k+0] <= vreg_lrs[j].live_after
846 && ss_busy_until_before[k+1] <= vreg_lrs[j].live_after)
847 break;
848 if (k >= N_SPILL64S-1) {
849 vpanic("LibVEX_N_SPILL_BYTES is too low. "
850 "Increase and recompile.");
851 }
852 if (0) vex_printf("16-byte spill offset in spill slot %d\n",
853 (Int)k);
854 ss_busy_until_before[k+0] = vreg_lrs[j].dead_before;
855 ss_busy_until_before[k+1] = vreg_lrs[j].dead_before;
856 break;
857
858 default:
859 /* The ordinary case -- just find a single spill slot. */
860 /* Find the lowest-numbered spill slot which is available
861 at the start point of this interval, and assign the
862 interval to it. */
863 for (k = 0; k < N_SPILL64S; k++)
864 if (ss_busy_until_before[k] <= vreg_lrs[j].live_after)
865 break;
866 if (k == N_SPILL64S) {
867 vpanic("LibVEX_N_SPILL_BYTES is too low. "
868 "Increase and recompile.");
869 }
870 ss_busy_until_before[k] = vreg_lrs[j].dead_before;
871 break;
872
873 } /* switch (vreg_lrs[j].reg_class) { */
874
875 /* This reflects LibVEX's hard-wired knowledge of the baseBlock
876 layout: the guest state, then two equal sized areas following
877 it for two sets of shadow state, and then the spill area. */
878 vreg_lrs[j].spill_offset = toShort(guest_sizeB * 3 + k * 8);
879
880 /* Independent check that we've made a sane choice of slot */
881 sanity_check_spill_offset( &vreg_lrs[j] );
882 /* if (j > max_ss_no) */
883 /* max_ss_no = j; */
884 }
885
886 # if 0
887 vex_printf("\n\n");
888 for (j = 0; j < n_vregs; j++)
889 vex_printf("vreg %d --> spill offset %d\n",
890 j, vreg_lrs[j].spill_offset);
891 # endif
892
893 /* --------- Stage 4: establish rreg preferences --------- */
894
895 /* It may be advantageous to allocating certain vregs to specific
896 rregs, as a way of avoiding reg-reg moves later. Here we
897 establish which, if any, rreg each vreg would prefer to be in.
898 Note that this constrains the allocator -- ideally we end up
899 with as few as possible vregs expressing a preference.
900
901 This is an optimisation: if the .preferred_rreg field is never
902 set to anything different from INVALID_HREG, the allocator still
903 works. */
904
905 /* 30 Dec 04: removed this mechanism as it does not seem to
906 help. */
907
908 /* --------- Stage 5: process instructions --------- */
909
910 /* This is the main loop of the allocator. First, we need to
911 correctly set up our running state, which tracks the status of
912 each real register. */
913
914 /* ------ BEGIN: Process each insn in turn. ------ */
915
916 for (ii = 0; ii < instrs_in->arr_used; ii++) {
917
918 # if DEBUG_REGALLOC
919 vex_printf("\n====----====---- Insn %d ----====----====\n", ii);
920 vex_printf("---- ");
921 (*ppInstr)(instrs_in->arr[ii], mode64);
922 vex_printf("\n\nInitial state:\n");
923 PRINT_STATE;
924 vex_printf("\n");
925 # endif
926
927 /* ------------ Sanity checks ------------ */
928
929 /* Sanity checks are expensive. So they are done only once
930 every 7 instructions, and just before the last
931 instruction. */
932 do_sanity_check
933 = toBool(
934 False /* Set to True for sanity checking of all insns. */
935 || ii == instrs_in->arr_used-1
936 || (ii > 0 && (ii % 7) == 0)
937 );
938
939 if (do_sanity_check) {
940
941 /* Sanity check 1: all rregs with a hard live range crossing
942 this insn must be marked as unavailable in the running
943 state. */
944 for (j = 0; j < rreg_lrs_used; j++) {
945 if (rreg_lrs_la[j].live_after < ii
946 && ii < rreg_lrs_la[j].dead_before) {
947 /* ii is the middle of a hard live range for some real
948 reg. Check it's marked as such in the running
949 state. */
950
951 # if 0
952 vex_printf("considering la %d .. db %d reg = ",
953 rreg_lrs[j].live_after,
954 rreg_lrs[j].dead_before);
955 (*ppReg)(rreg_lrs[j].rreg);
956 vex_printf("\n");
957 # endif
958
959 /* find the state entry for this rreg */
960 for (k = 0; k < n_rregs; k++)
961 if (rreg_state[k].rreg == rreg_lrs_la[j].rreg)
962 break;
963
964 /* and assert that this rreg is marked as unavailable */
965 vassert(rreg_state[k].disp == Unavail);
966 }
967 }
968
969 /* Sanity check 2: conversely, all rregs marked as
970 unavailable in the running rreg_state must have a
971 corresponding hard live range entry in the rreg_lrs
972 array. */
973 for (j = 0; j < n_available_real_regs; j++) {
974 vassert(rreg_state[j].disp == Bound
975 || rreg_state[j].disp == Free
976 || rreg_state[j].disp == Unavail);
977 if (rreg_state[j].disp != Unavail)
978 continue;
979 for (k = 0; k < rreg_lrs_used; k++)
980 if (rreg_lrs_la[k].rreg == rreg_state[j].rreg
981 && rreg_lrs_la[k].live_after < ii
982 && ii < rreg_lrs_la[k].dead_before)
983 break;
984 /* If this vassertion fails, we couldn't find a
985 corresponding HLR. */
986 vassert(k < rreg_lrs_used);
987 }
988
989 /* Sanity check 3: all vreg-rreg bindings must bind registers
990 of the same class. */
991 for (j = 0; j < n_rregs; j++) {
992 if (rreg_state[j].disp != Bound) {
993 vassert(rreg_state[j].eq_spill_slot == False);
994 continue;
995 }
996 vassert(hregClass(rreg_state[j].rreg)
997 == hregClass(rreg_state[j].vreg));
998 vassert( hregIsVirtual(rreg_state[j].vreg));
999 vassert(!hregIsVirtual(rreg_state[j].rreg));
1000 }
1001
1002 /* Sanity check 4: the vreg_state and rreg_state
1003 mutually-redundant mappings are consistent. If
1004 rreg_state[j].vreg points at some vreg_state entry then
1005 that vreg_state entry should point back at
1006 rreg_state[j]. */
1007 for (j = 0; j < n_rregs; j++) {
1008 if (rreg_state[j].disp != Bound)
1009 continue;
1010 k = hregNumber(rreg_state[j].vreg);
1011 vassert(IS_VALID_VREGNO(k));
1012 vassert(vreg_state[k] == j);
1013 }
1014 for (j = 0; j < n_vregs; j++) {
1015 k = vreg_state[j];
1016 if (k == INVALID_RREG_NO)
1017 continue;
1018 vassert(IS_VALID_RREGNO(k));
1019 vassert(rreg_state[k].disp == Bound);
1020 vassert(hregNumber(rreg_state[k].vreg) == j);
1021 }
1022
1023 } /* if (do_sanity_check) */
1024
1025 /* ------------ end of Sanity checks ------------ */
1026
1027 /* Do various optimisations pertaining to register coalescing
1028 and preferencing:
1029 MOV v <-> v coalescing (done here).
1030 MOV v <-> r coalescing (not yet, if ever)
1031 */
1032 /* If doing a reg-reg move between two vregs, and the src's live
1033 range ends here and the dst's live range starts here, bind
1034 the dst to the src's rreg, and that's all. */
1035 if ( (*isMove)( instrs_in->arr[ii], &vregS, &vregD ) ) {
1036 if (!hregIsVirtual(vregS)) goto cannot_coalesce;
1037 if (!hregIsVirtual(vregD)) goto cannot_coalesce;
1038 /* Check that *isMove is not telling us a bunch of lies ... */
1039 vassert(hregClass(vregS) == hregClass(vregD));
1040 k = hregNumber(vregS);
1041 m = hregNumber(vregD);
1042 vassert(IS_VALID_VREGNO(k));
1043 vassert(IS_VALID_VREGNO(m));
1044 if (vreg_lrs[k].dead_before != ii + 1) goto cannot_coalesce;
1045 if (vreg_lrs[m].live_after != ii) goto cannot_coalesce;
1046 # if DEBUG_REGALLOC
1047 vex_printf("COALESCE ");
1048 (*ppReg)(vregS);
1049 vex_printf(" -> ");
1050 (*ppReg)(vregD);
1051 vex_printf("\n\n");
1052 # endif
1053 /* Find the state entry for vregS. */
1054 for (m = 0; m < n_rregs; m++)
1055 if (rreg_state[m].disp == Bound && rreg_state[m].vreg == vregS)
1056 break;
1057 if (m == n_rregs)
1058 /* We failed to find a binding for vregS, which means it's
1059 currently not in a register. So we can't do the
1060 coalescing. Give up. */
1061 goto cannot_coalesce;
1062
1063 /* Finally, we can do the coalescing. It's trivial -- merely
1064 claim vregS's register for vregD. */
1065 rreg_state[m].vreg = vregD;
1066 vassert(IS_VALID_VREGNO(hregNumber(vregD)));
1067 vassert(IS_VALID_VREGNO(hregNumber(vregS)));
1068 vreg_state[hregNumber(vregD)] = toShort(m);
1069 vreg_state[hregNumber(vregS)] = INVALID_RREG_NO;
1070
1071 /* This rreg has become associated with a different vreg and
1072 hence with a different spill slot. Play safe. */
1073 rreg_state[m].eq_spill_slot = False;
1074
1075 /* Move on to the next insn. We skip the post-insn stuff for
1076 fixed registers, since this move should not interact with
1077 them in any way. */
1078 continue;
1079 }
1080 cannot_coalesce:
1081
1082 /* ------ Free up rregs bound to dead vregs ------ */
1083
1084 /* Look for vregs whose live range has just ended, and
1085 mark the associated rreg as free. */
1086
1087 for (j = 0; j < n_rregs; j++) {
1088 if (rreg_state[j].disp != Bound)
1089 continue;
1090 vreg = hregNumber(rreg_state[j].vreg);
1091 vassert(IS_VALID_VREGNO(vreg));
1092 if (vreg_lrs[vreg].dead_before <= ii) {
1093 rreg_state[j].disp = Free;
1094 rreg_state[j].eq_spill_slot = False;
1095 m = hregNumber(rreg_state[j].vreg);
1096 vassert(IS_VALID_VREGNO(m));
1097 vreg_state[m] = INVALID_RREG_NO;
1098 if (DEBUG_REGALLOC) {
1099 vex_printf("free up ");
1100 (*ppReg)(rreg_state[j].rreg);
1101 vex_printf("\n");
1102 }
1103 }
1104 }
1105
1106 /* ------ Pre-instruction actions for fixed rreg uses ------ */
1107
1108 /* Now we have to deal with rregs which are about to be made
1109 live by this instruction -- in other words, are entering into
1110 one of their live ranges. If any such rreg holds a vreg, we
1111 will have to free up the rreg. The simplest solution which
1112 is correct is to spill the rreg.
1113
1114 Note we could do better:
1115 * Could move it into some other free rreg, if one is available
1116
1117 Do this efficiently, by incrementally stepping along an array
1118 of rreg HLRs that are known to be sorted by start point
1119 (their .live_after field).
1120 */
1121 while (True) {
1122 vassert(rreg_lrs_la_next >= 0);
1123 vassert(rreg_lrs_la_next <= rreg_lrs_used);
1124 if (rreg_lrs_la_next == rreg_lrs_used)
1125 break; /* no more real reg live ranges to consider */
1126 if (ii < rreg_lrs_la[rreg_lrs_la_next].live_after)
1127 break; /* next live range does not yet start */
1128 vassert(ii == rreg_lrs_la[rreg_lrs_la_next].live_after);
1129 /* rreg_lrs_la[rreg_lrs_la_next].rreg needs to be freed up.
1130 Find the associated rreg_state entry. */
1131 /* Note, re ii == rreg_lrs_la[rreg_lrs_la_next].live_after.
1132 Real register live ranges are guaranteed to be well-formed
1133 in that they start with a write to the register -- Stage 2
1134 rejects any code not satisfying this. So the correct
1135 question to ask is whether
1136 rreg_lrs_la[rreg_lrs_la_next].live_after == ii, that is,
1137 whether the reg becomes live after this insn -- rather
1138 than before it. */
1139 # if DEBUG_REGALLOC
1140 vex_printf("need to free up rreg: ");
1141 (*ppReg)(rreg_lrs_la[rreg_lrs_la_next].rreg);
1142 vex_printf("\n\n");
1143 # endif
1144 for (k = 0; k < n_rregs; k++)
1145 if (rreg_state[k].rreg == rreg_lrs_la[rreg_lrs_la_next].rreg)
1146 break;
1147 /* If this fails, we don't have an entry for this rreg.
1148 Which we should. */
1149 vassert(IS_VALID_RREGNO(k));
1150 m = hregNumber(rreg_state[k].vreg);
1151 if (rreg_state[k].disp == Bound) {
1152 /* Yes, there is an associated vreg. Spill it if it's
1153 still live. */
1154 vassert(IS_VALID_VREGNO(m));
1155 vreg_state[m] = INVALID_RREG_NO;
1156 if (vreg_lrs[m].dead_before > ii) {
1157 vassert(vreg_lrs[m].reg_class != HRcINVALID);
1158 if ((!eq_spill_opt) || !rreg_state[k].eq_spill_slot) {
1159 HInstr* spill1 = NULL;
1160 HInstr* spill2 = NULL;
1161 (*genSpill)( &spill1, &spill2, rreg_state[k].rreg,
1162 vreg_lrs[m].spill_offset, mode64 );
1163 vassert(spill1 || spill2); /* can't both be NULL */
1164 if (spill1)
1165 EMIT_INSTR(spill1);
1166 if (spill2)
1167 EMIT_INSTR(spill2);
1168 }
1169 rreg_state[k].eq_spill_slot = True;
1170 }
1171 }
1172 rreg_state[k].disp = Unavail;
1173 rreg_state[k].vreg = INVALID_HREG;
1174 rreg_state[k].eq_spill_slot = False;
1175
1176 /* check for further rregs entering HLRs at this point */
1177 rreg_lrs_la_next++;
1178 }
1179
1180
1181 # if DEBUG_REGALLOC
1182 vex_printf("After pre-insn actions for fixed regs:\n");
1183 PRINT_STATE;
1184 vex_printf("\n");
1185 # endif
1186
1187
1188 /* ------ Deal with the current instruction. ------ */
1189
1190 /* Finally we can begin the processing of this instruction
1191 itself. The aim is to free up enough rregs for this insn.
1192 This may generate spill stores since we may have to evict
1193 some vregs currently in rregs. Also generates spill loads.
1194 We also build up the final vreg->rreg mapping to be applied
1195 to the insn. */
1196
1197 (*getRegUsage)( ®_usage, instrs_in->arr[ii], mode64 );
1198
1199 initHRegRemap(&remap);
1200
1201 /* ------------ BEGIN directReload optimisation ----------- */
1202
1203 /* If the instruction reads exactly one vreg which is currently
1204 in a spill slot, and this is last use of that vreg, see if we
1205 can convert the instruction into one reads directly from the
1206 spill slot. This is clearly only possible for x86 and amd64
1207 targets, since ppc and arm load-store architectures. If
1208 successful, replace instrs_in->arr[ii] with this new
1209 instruction, and recompute its reg usage, so that the change
1210 is invisible to the standard-case handling that follows. */
1211
1212 if (directReload && reg_usage.n_used <= 2) {
1213 Bool debug_direct_reload = True && False;
1214 HReg cand = INVALID_HREG;
1215 Bool nreads = 0;
1216 Short spilloff = 0;
1217
1218 for (j = 0; j < reg_usage.n_used; j++) {
1219
1220 vreg = reg_usage.hreg[j];
1221
1222 if (!hregIsVirtual(vreg))
1223 continue;
1224
1225 if (reg_usage.mode[j] == HRmRead) {
1226 nreads++;
1227 m = hregNumber(vreg);
1228 vassert(IS_VALID_VREGNO(m));
1229 k = vreg_state[m];
1230 if (!IS_VALID_RREGNO(k)) {
1231 /* ok, it is spilled. Now, is this its last use? */
1232 vassert(vreg_lrs[m].dead_before >= ii+1);
1233 if (vreg_lrs[m].dead_before == ii+1
1234 && cand == INVALID_HREG) {
1235 spilloff = vreg_lrs[m].spill_offset;
1236 cand = vreg;
1237 }
1238 }
1239 }
1240 }
1241
1242 if (nreads == 1 && cand != INVALID_HREG) {
1243 HInstr* reloaded;
1244 if (reg_usage.n_used == 2)
1245 vassert(reg_usage.hreg[0] != reg_usage.hreg[1]);
1246
1247 reloaded = directReload ( instrs_in->arr[ii], cand, spilloff );
1248 if (debug_direct_reload && !reloaded) {
1249 vex_printf("[%3d] ", spilloff); ppHReg(cand); vex_printf(" ");
1250 ppInstr(instrs_in->arr[ii], mode64);
1251 }
1252 if (reloaded) {
1253 /* Update info about the insn, so it looks as if it had
1254 been in this form all along. */
1255 instrs_in->arr[ii] = reloaded;
1256 (*getRegUsage)( ®_usage, instrs_in->arr[ii], mode64 );
1257 if (debug_direct_reload && !reloaded) {
1258 vex_printf(" --> ");
1259 ppInstr(reloaded, mode64);
1260 }
1261 }
1262
1263 if (debug_direct_reload && !reloaded)
1264 vex_printf("\n");
1265 }
1266
1267 }
1268
1269 /* ------------ END directReload optimisation ------------ */
1270
1271 /* for each reg mentioned in the insn ... */
1272 for (j = 0; j < reg_usage.n_used; j++) {
1273
1274 vreg = reg_usage.hreg[j];
1275
1276 /* only interested in virtual registers right now. */
1277 if (!hregIsVirtual(vreg))
1278 continue;
1279
1280 # if 0
1281 vex_printf("considering "); (*ppReg)(vreg); vex_printf("\n");
1282 # endif
1283
1284 /* Now we're trying to find a rreg for "vreg". First of all,
1285 if it already has an rreg assigned, we don't need to do
1286 anything more. Search the current state to find out. */
1287 m = hregNumber(vreg);
1288 vassert(IS_VALID_VREGNO(m));
1289 k = vreg_state[m];
1290 if (IS_VALID_RREGNO(k)) {
1291 vassert(rreg_state[k].disp == Bound);
1292 addToHRegRemap(&remap, vreg, rreg_state[k].rreg);
1293 /* If this rreg is written or modified, mark it as different
1294 from any spill slot value. */
1295 if (reg_usage.mode[j] != HRmRead)
1296 rreg_state[k].eq_spill_slot = False;
1297 continue;
1298 } else {
1299 vassert(k == INVALID_RREG_NO);
1300 }
1301
1302 /* No luck. The next thing to do is see if there is a
1303 currently free rreg available, of the correct class. If
1304 so, bag it. NOTE, we could improve this by selecting an
1305 rreg for which the next live-range event is as far ahead
1306 as possible. */
1307 k_suboptimal = -1;
1308 for (k = 0; k < n_rregs; k++) {
1309 if (rreg_state[k].disp != Free
1310 || hregClass(rreg_state[k].rreg) != hregClass(vreg))
1311 continue;
1312 if (rreg_state[k].has_hlrs) {
1313 /* Well, at least we can use k_suboptimal if we really
1314 have to. Keep on looking for a better candidate. */
1315 k_suboptimal = k;
1316 } else {
1317 /* Found a preferable reg. Use it. */
1318 k_suboptimal = -1;
1319 break;
1320 }
1321 }
1322 if (k_suboptimal >= 0)
1323 k = k_suboptimal;
1324
1325 if (k < n_rregs) {
1326 rreg_state[k].disp = Bound;
1327 rreg_state[k].vreg = vreg;
1328 m = hregNumber(vreg);
1329 vassert(IS_VALID_VREGNO(m));
1330 vreg_state[m] = toShort(k);
1331 addToHRegRemap(&remap, vreg, rreg_state[k].rreg);
1332 /* Generate a reload if needed. This only creates needed
1333 reloads because the live range builder for vregs will
1334 guarantee that the first event for a vreg is a write.
1335 Hence, if this reference is not a write, it cannot be
1336 the first reference for this vreg, and so a reload is
1337 indeed needed. */
1338 if (reg_usage.mode[j] != HRmWrite) {
1339 vassert(vreg_lrs[m].reg_class != HRcINVALID);
1340 HInstr* reload1 = NULL;
1341 HInstr* reload2 = NULL;
1342 (*genReload)( &reload1, &reload2, rreg_state[k].rreg,
1343 vreg_lrs[m].spill_offset, mode64 );
1344 vassert(reload1 || reload2); /* can't both be NULL */
1345 if (reload1)
1346 EMIT_INSTR(reload1);
1347 if (reload2)
1348 EMIT_INSTR(reload2);
1349 /* This rreg is read or modified by the instruction.
1350 If it's merely read we can claim it now equals the
1351 spill slot, but not so if it is modified. */
1352 if (reg_usage.mode[j] == HRmRead) {
1353 rreg_state[k].eq_spill_slot = True;
1354 } else {
1355 vassert(reg_usage.mode[j] == HRmModify);
1356 rreg_state[k].eq_spill_slot = False;
1357 }
1358 } else {
1359 rreg_state[k].eq_spill_slot = False;
1360 }
1361
1362 continue;
1363 }
1364
1365 /* Well, now we have no option but to spill a vreg. It's
1366 important to make a good choice of vreg to spill, and of
1367 course we need to be careful not to spill a vreg which is
1368 needed by this insn. */
1369
1370 /* First, mark in the rreg_state, those rregs which are not spill
1371 candidates, due to holding a vreg mentioned by this
1372 instruction. Or being of the wrong class. */
1373 for (k = 0; k < n_rregs; k++) {
1374 rreg_state[k].is_spill_cand = False;
1375 if (rreg_state[k].disp != Bound)
1376 continue;
1377 if (hregClass(rreg_state[k].rreg) != hregClass(vreg))
1378 continue;
1379 rreg_state[k].is_spill_cand = True;
1380 for (m = 0; m < reg_usage.n_used; m++) {
1381 if (rreg_state[k].vreg == reg_usage.hreg[m]) {
1382 rreg_state[k].is_spill_cand = False;
1383 break;
1384 }
1385 }
1386 }
1387
1388 /* We can choose to spill any rreg satisfying
1389 rreg_state[r].is_spill_cand (so to speak). Choose r so that
1390 the next use of its associated vreg is as far ahead as
1391 possible, in the hope that this will minimise the number
1392 of consequent reloads required. */
1393 spillee
1394 = findMostDistantlyMentionedVReg (
1395 getRegUsage, instrs_in, ii+1, rreg_state, n_rregs, mode64 );
1396
1397 if (spillee == -1) {
1398 /* Hmmmmm. There don't appear to be any spill candidates.
1399 We're hosed. */
1400 vex_printf("reg_alloc: can't find a register in class: ");
1401 ppHRegClass(hregClass(vreg));
1402 vex_printf("\n");
1403 vpanic("reg_alloc: can't create a free register.");
1404 }
1405
1406 /* Right. So we're going to spill rreg_state[spillee]. */
1407 vassert(IS_VALID_RREGNO(spillee));
1408 vassert(rreg_state[spillee].disp == Bound);
1409 /* check it's the right class */
1410 vassert(hregClass(rreg_state[spillee].rreg) == hregClass(vreg));
1411 /* check we're not ejecting the vreg for which we are trying
1412 to free up a register. */
1413 vassert(rreg_state[spillee].vreg != vreg);
1414
1415 m = hregNumber(rreg_state[spillee].vreg);
1416 vassert(IS_VALID_VREGNO(m));
1417
1418 /* So here's the spill store. Assert that we're spilling a
1419 live vreg. */
1420 vassert(vreg_lrs[m].dead_before > ii);
1421 vassert(vreg_lrs[m].reg_class != HRcINVALID);
1422 if ((!eq_spill_opt) || !rreg_state[spillee].eq_spill_slot) {
1423 HInstr* spill1 = NULL;
1424 HInstr* spill2 = NULL;
1425 (*genSpill)( &spill1, &spill2, rreg_state[spillee].rreg,
1426 vreg_lrs[m].spill_offset, mode64 );
1427 vassert(spill1 || spill2); /* can't both be NULL */
1428 if (spill1)
1429 EMIT_INSTR(spill1);
1430 if (spill2)
1431 EMIT_INSTR(spill2);
1432 }
1433
1434 /* Update the rreg_state to reflect the new assignment for this
1435 rreg. */
1436 rreg_state[spillee].vreg = vreg;
1437 vreg_state[m] = INVALID_RREG_NO;
1438
1439 rreg_state[spillee].eq_spill_slot = False; /* be safe */
1440
1441 m = hregNumber(vreg);
1442 vassert(IS_VALID_VREGNO(m));
1443 vreg_state[m] = toShort(spillee);
1444
1445 /* Now, if this vreg is being read or modified (as opposed to
1446 written), we have to generate a reload for it. */
1447 if (reg_usage.mode[j] != HRmWrite) {
1448 vassert(vreg_lrs[m].reg_class != HRcINVALID);
1449 HInstr* reload1 = NULL;
1450 HInstr* reload2 = NULL;
1451 (*genReload)( &reload1, &reload2, rreg_state[spillee].rreg,
1452 vreg_lrs[m].spill_offset, mode64 );
1453 vassert(reload1 || reload2); /* can't both be NULL */
1454 if (reload1)
1455 EMIT_INSTR(reload1);
1456 if (reload2)
1457 EMIT_INSTR(reload2);
1458 /* This rreg is read or modified by the instruction.
1459 If it's merely read we can claim it now equals the
1460 spill slot, but not so if it is modified. */
1461 if (reg_usage.mode[j] == HRmRead) {
1462 rreg_state[spillee].eq_spill_slot = True;
1463 } else {
1464 vassert(reg_usage.mode[j] == HRmModify);
1465 rreg_state[spillee].eq_spill_slot = False;
1466 }
1467 }
1468
1469 /* So after much twisting and turning, we have vreg mapped to
1470 rreg_state[spillee].rreg. Note that in the map. */
1471 addToHRegRemap(&remap, vreg, rreg_state[spillee].rreg);
1472
1473 } /* iterate over registers in this instruction. */
1474
1475 /* We've finished clowning around with registers in this instruction.
1476 Three results:
1477 - the running rreg_state[] has been updated
1478 - a suitable vreg->rreg mapping for this instruction has been
1479 constructed
1480 - spill and reload instructions may have been emitted.
1481
1482 The final step is to apply the mapping to the instruction,
1483 and emit that.
1484 */
1485
1486 /* NOTE, DESTRUCTIVELY MODIFIES instrs_in->arr[ii]. */
1487 (*mapRegs)( &remap, instrs_in->arr[ii], mode64 );
1488 EMIT_INSTR( instrs_in->arr[ii] );
1489
1490 # if DEBUG_REGALLOC
1491 vex_printf("After dealing with current insn:\n");
1492 PRINT_STATE;
1493 vex_printf("\n");
1494 # endif
1495
1496 /* ------ Post-instruction actions for fixed rreg uses ------ */
1497
1498 /* Now we need to check for rregs exiting fixed live ranges
1499 after this instruction, and if so mark them as free. */
1500 while (True) {
1501 vassert(rreg_lrs_db_next >= 0);
1502 vassert(rreg_lrs_db_next <= rreg_lrs_used);
1503 if (rreg_lrs_db_next == rreg_lrs_used)
1504 break; /* no more real reg live ranges to consider */
1505 if (ii+1 < rreg_lrs_db[rreg_lrs_db_next].dead_before)
1506 break; /* next live range does not yet start */
1507 vassert(ii+1 == rreg_lrs_db[rreg_lrs_db_next].dead_before);
1508 /* rreg_lrs_db[[rreg_lrs_db_next].rreg is exiting a hard live
1509 range. Mark it as such in the main rreg_state array. */
1510 for (k = 0; k < n_rregs; k++)
1511 if (rreg_state[k].rreg == rreg_lrs_db[rreg_lrs_db_next].rreg)
1512 break;
1513 /* If this vassertion fails, we don't have an entry for
1514 this rreg. Which we should. */
1515 vassert(k < n_rregs);
1516 vassert(rreg_state[k].disp == Unavail);
1517 rreg_state[k].disp = Free;
1518 rreg_state[k].vreg = INVALID_HREG;
1519 rreg_state[k].eq_spill_slot = False;
1520
1521 /* check for further rregs leaving HLRs at this point */
1522 rreg_lrs_db_next++;
1523 }
1524
1525 # if DEBUG_REGALLOC
1526 vex_printf("After post-insn actions for fixed regs:\n");
1527 PRINT_STATE;
1528 vex_printf("\n");
1529 # endif
1530
1531 } /* iterate over insns */
1532
1533 /* ------ END: Process each insn in turn. ------ */
1534
1535 /* free(rreg_state); */
1536 /* free(rreg_lrs); */
1537 /* if (vreg_lrs) free(vreg_lrs); */
1538
1539 /* Paranoia */
1540 for (j = 0; j < n_rregs; j++)
1541 vassert(rreg_state[j].rreg == available_real_regs[j]);
1542
1543 vassert(rreg_lrs_la_next == rreg_lrs_used);
1544 vassert(rreg_lrs_db_next == rreg_lrs_used);
1545
1546 return instrs_out;
1547
1548 # undef INVALID_INSTRNO
1549 # undef EMIT_INSTR
1550 # undef PRINT_STATE
1551 }
1552
1553
1554
1555 /*---------------------------------------------------------------*/
1556 /*--- host_reg_alloc2.c ---*/
1557 /*---------------------------------------------------------------*/
1558