1
2 /*---------------------------------------------------------------*/
3 /*--- begin host_reg_alloc2.c ---*/
4 /*---------------------------------------------------------------*/
5
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
9
10 Copyright (C) 2004-2017 OpenWorks LLP
11 info@open-works.net
12
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26 02110-1301, USA.
27
28 The GNU General Public License is contained in the file COPYING.
29
30 Neither the names of the U.S. Department of Energy nor the
31 University of California nor the names of its contributors may be
32 used to endorse or promote products derived from this software
33 without prior written permission.
34 */
35
36 #include "libvex_basictypes.h"
37 #include "libvex.h"
38
39 #include "main_util.h"
40 #include "host_generic_regs.h"
41
42 /* Set to 1 for lots of debugging output. */
43 #define DEBUG_REGALLOC 0
44
45
46 /* TODO 27 Oct 04:
47
48 Better consistency checking from what isMove tells us.
49
50 We can possibly do V-V coalescing even when the src is spilled,
51 providing we can arrange for the dst to have the same spill slot.
52
53 Note that state[].hreg is the same as the available real regs.
54
55 Generally rationalise data structures. */
56
57
58 /* Records information on virtual register live ranges. Computed once
59 and remains unchanged after that. */
60 typedef
61 struct {
62 /* Becomes live for the first time after this insn ... */
63 Short live_after;
64 /* Becomes dead for the last time before this insn ... */
65 Short dead_before;
66 /* The "home" spill slot, if needed. Never changes. */
67 Short spill_offset;
68 Short spill_size;
69 /* What kind of register this is. */
70 HRegClass reg_class;
71 }
72 VRegLR;
73
74
75 /* Records information on real-register live ranges. Computed once
76 and remains unchanged after that. */
77 typedef
78 struct {
79 HReg rreg;
80 /* Becomes live after this insn ... */
81 Short live_after;
82 /* Becomes dead before this insn ... */
83 Short dead_before;
84 }
85 RRegLR;
86
87
88 /* An array of the following structs (rreg_state) comprises the
89 running state of the allocator. It indicates what the current
90 disposition of each allocatable real register is. The array gets
91 updated as the allocator processes instructions. The identity of
92 the register is not recorded here, because the index of this
93 structure in doRegisterAllocation()'s |rreg_state| is the index
94 number of the register, and the register itself can be extracted
95 from the RRegUniverse supplied to doRegisterAllocation(). */
96 typedef
97 struct {
98 /* ------ FIELDS WHICH DO NOT CHANGE ------ */
99 /* Is this involved in any HLRs? (only an optimisation hint) */
100 Bool has_hlrs;
101 /* ------ FIELDS WHICH DO CHANGE ------ */
102 /* 6 May 07: rearranged fields below so the whole struct fits
103 into 16 bytes on both x86 and amd64. */
104 /* Used when .disp == Bound and we are looking for vregs to
105 spill. */
106 Bool is_spill_cand;
107 /* Optimisation: used when .disp == Bound. Indicates when the
108 rreg has the same value as the spill slot for the associated
109 vreg. Is safely left at False, and becomes True after a
110 spill store or reload for this rreg. */
111 Bool eq_spill_slot;
112 /* What's it's current disposition? */
113 enum { Free, /* available for use */
114 Unavail, /* in a real-reg live range */
115 Bound /* in use (holding value of some vreg) */
116 }
117 disp;
118 /* If .disp == Bound, what vreg is it bound to? */
119 HReg vreg;
120 }
121 RRegState;
122
123
124 /* The allocator also maintains a redundant array of indexes
125 (vreg_state) from vreg numbers back to entries in rreg_state. It
126 is redundant because iff vreg_state[i] == j then
127 hregNumber(rreg_state[j].vreg) == i -- that is, the two entries
128 point at each other. The purpose of this is to speed up activities
129 which involve looking for a particular vreg: there is no need to
130 scan the rreg_state looking for it, just index directly into
131 vreg_state. The FAQ "does this vreg already have an associated
132 rreg" is the main beneficiary.
133
134 To indicate, in vreg_state[i], that a given vreg is not currently
135 associated with any rreg, that entry can be set to INVALID_RREG_NO.
136
137 Because the vreg_state entries are signed Shorts, the max number
138 of vregs that can be handed by regalloc is 32767.
139 */
140
141 #define INVALID_RREG_NO ((Short)(-1))
142
143 #define IS_VALID_VREGNO(_zz) ((_zz) >= 0 && (_zz) < n_vregs)
144 #define IS_VALID_RREGNO(_zz) ((_zz) >= 0 && (_zz) < n_rregs)
145
146
147 /* Search forward from some given point in the incoming instruction
148 sequence. Point is to select a virtual register to spill, by
149 finding the vreg which is mentioned as far ahead as possible, in
150 the hope that this will minimise the number of consequent reloads.
151
152 Only do the search for vregs which are Bound in the running state,
153 and for which the .is_spill_cand field is set. This allows the
154 caller to arbitrarily restrict the set of spill candidates to be
155 considered.
156
157 To do this we don't actually need to see the incoming instruction
158 stream. Rather, what we need us the HRegUsage records for the
159 incoming instruction stream. Hence that is passed in.
160
161 Returns an index into the state array indicating the (v,r) pair to
162 spill, or -1 if none was found. */
163 static
findMostDistantlyMentionedVReg(HRegUsage * reg_usages_in,Int search_from_instr,Int num_instrs,RRegState * state,Int n_state)164 Int findMostDistantlyMentionedVReg (
165 HRegUsage* reg_usages_in,
166 Int search_from_instr,
167 Int num_instrs,
168 RRegState* state,
169 Int n_state
170 )
171 {
172 Int k, m;
173 Int furthest_k = -1;
174 Int furthest = -1;
175 vassert(search_from_instr >= 0);
176 for (k = 0; k < n_state; k++) {
177 if (!state[k].is_spill_cand)
178 continue;
179 vassert(state[k].disp == Bound);
180 for (m = search_from_instr; m < num_instrs; m++) {
181 if (HRegUsage__contains(®_usages_in[m], state[k].vreg))
182 break;
183 }
184 if (m > furthest) {
185 furthest = m;
186 furthest_k = k;
187 }
188 }
189 return furthest_k;
190 }
191
192
193 /* Check that this vreg has been assigned a sane spill offset. */
194 inline
sanity_check_spill_offset(VRegLR * vreg)195 static void sanity_check_spill_offset ( VRegLR* vreg )
196 {
197 switch (vreg->reg_class) {
198 case HRcVec128: case HRcFlt64:
199 vassert(0 == ((UShort)vreg->spill_offset % 16)); break;
200 default:
201 vassert(0 == ((UShort)vreg->spill_offset % 8)); break;
202 }
203 }
204
205
206 /* Double the size of the real-reg live-range array, if needed. */
207 __attribute__((noinline))
ensureRRLRspace_SLOW(RRegLR ** info,Int * size,Int used)208 static void ensureRRLRspace_SLOW ( RRegLR** info, Int* size, Int used )
209 {
210 Int k;
211 RRegLR* arr2;
212 if (0)
213 vex_printf("ensureRRISpace: %d -> %d\n", *size, 2 * *size);
214 vassert(used == *size);
215 arr2 = LibVEX_Alloc_inline(2 * *size * sizeof(RRegLR));
216 for (k = 0; k < *size; k++)
217 arr2[k] = (*info)[k];
218 *size *= 2;
219 *info = arr2;
220 }
221 inline
ensureRRLRspace(RRegLR ** info,Int * size,Int used)222 static void ensureRRLRspace ( RRegLR** info, Int* size, Int used )
223 {
224 if (LIKELY(used < *size)) return;
225 ensureRRLRspace_SLOW(info, size, used);
226 }
227
228
229 /* Sort an array of RRegLR entries by either the .live_after or
230 .dead_before fields. This is performance-critical. */
sortRRLRarray(RRegLR * arr,Int size,Bool by_live_after)231 static void sortRRLRarray ( RRegLR* arr,
232 Int size, Bool by_live_after )
233 {
234 Int incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
235 9841, 29524, 88573, 265720,
236 797161, 2391484 };
237 Int lo = 0;
238 Int hi = size-1;
239 Int i, j, h, bigN, hp;
240 RRegLR v;
241
242 vassert(size >= 0);
243 if (size == 0)
244 return;
245
246 bigN = hi - lo + 1; if (bigN < 2) return;
247 hp = 0; while (hp < 14 && incs[hp] < bigN) hp++; hp--;
248
249 if (by_live_after) {
250
251 for ( ; hp >= 0; hp--) {
252 h = incs[hp];
253 for (i = lo + h; i <= hi; i++) {
254 v = arr[i];
255 j = i;
256 while (arr[j-h].live_after > v.live_after) {
257 arr[j] = arr[j-h];
258 j = j - h;
259 if (j <= (lo + h - 1)) break;
260 }
261 arr[j] = v;
262 }
263 }
264
265 } else {
266
267 for ( ; hp >= 0; hp--) {
268 h = incs[hp];
269 for (i = lo + h; i <= hi; i++) {
270 v = arr[i];
271 j = i;
272 while (arr[j-h].dead_before > v.dead_before) {
273 arr[j] = arr[j-h];
274 j = j - h;
275 if (j <= (lo + h - 1)) break;
276 }
277 arr[j] = v;
278 }
279 }
280
281 }
282 }
283
284
285 /* Compute the index of the highest and lowest 1 in a ULong,
286 respectively. Results are undefined if the argument is zero.
287 Don't pass it zero :) */
ULong__maxIndex(ULong w64)288 static inline UInt ULong__maxIndex ( ULong w64 ) {
289 return 63 - __builtin_clzll(w64);
290 }
291
ULong__minIndex(ULong w64)292 static inline UInt ULong__minIndex ( ULong w64 ) {
293 return __builtin_ctzll(w64);
294 }
295
296
297 /* Vectorised memset, copied from Valgrind's m_libcbase.c. */
local_memset(void * destV,Int c,SizeT sz)298 static void* local_memset ( void *destV, Int c, SizeT sz )
299 {
300 # define IS_4_ALIGNED(aaa_p) (0 == (((HWord)(aaa_p)) & ((HWord)0x3)))
301
302 UInt c4;
303 UChar* d = destV;
304 UChar uc = c;
305
306 while ((!IS_4_ALIGNED(d)) && sz >= 1) {
307 d[0] = uc;
308 d++;
309 sz--;
310 }
311 if (sz == 0)
312 return destV;
313 c4 = uc;
314 c4 |= (c4 << 8);
315 c4 |= (c4 << 16);
316 while (sz >= 16) {
317 ((UInt*)d)[0] = c4;
318 ((UInt*)d)[1] = c4;
319 ((UInt*)d)[2] = c4;
320 ((UInt*)d)[3] = c4;
321 d += 16;
322 sz -= 16;
323 }
324 while (sz >= 4) {
325 ((UInt*)d)[0] = c4;
326 d += 4;
327 sz -= 4;
328 }
329 while (sz >= 1) {
330 d[0] = c;
331 d++;
332 sz--;
333 }
334 return destV;
335
336 # undef IS_4_ALIGNED
337 }
338
339
340 /* A target-independent register allocator. Requires various
341 functions which it uses to deal abstractly with instructions and
342 registers, since it cannot have any target-specific knowledge.
343
344 Returns a new list of instructions, which, as a result of the
345 behaviour of mapRegs, will be in-place modifications of the
346 original instructions.
347
348 Requires that the incoming code has been generated using
349 vreg numbers 0, 1 .. n_vregs-1. Appearance of a vreg outside
350 that range is a checked run-time error.
351
352 Takes an expandable array of pointers to unallocated insns.
353 Returns an expandable array of pointers to allocated insns.
354 */
doRegisterAllocation(HInstrArray * instrs_in,const RRegUniverse * univ,Bool (* isMove)(const HInstr *,HReg *,HReg *),void (* getRegUsage)(HRegUsage *,const HInstr *,Bool),void (* mapRegs)(HRegRemap *,HInstr *,Bool),void (* genSpill)(HInstr **,HInstr **,HReg,Int,Bool),void (* genReload)(HInstr **,HInstr **,HReg,Int,Bool),HInstr * (* directReload)(HInstr *,HReg,Short),Int guest_sizeB,void (* ppInstr)(const HInstr *,Bool),void (* ppReg)(HReg),Bool mode64)355 HInstrArray* doRegisterAllocation (
356
357 /* Incoming virtual-registerised code. */
358 HInstrArray* instrs_in,
359
360 /* The real-register universe to use. This contains facts about
361 real registers, one of which is the set of registers available
362 for allocation. */
363 const RRegUniverse* univ,
364
365 /* Return True iff the given insn is a reg-reg move, in which
366 case also return the src and dst regs. */
367 Bool (*isMove) ( const HInstr*, HReg*, HReg* ),
368
369 /* Get info about register usage in this insn. */
370 void (*getRegUsage) ( HRegUsage*, const HInstr*, Bool ),
371
372 /* Apply a reg-reg mapping to an insn. */
373 void (*mapRegs) ( HRegRemap*, HInstr*, Bool ),
374
375 /* Return one, or, if we're unlucky, two insn(s) to spill/restore a
376 real reg to a spill slot byte offset. The two leading HInstr**
377 args are out parameters, through which the generated insns are
378 returned. Also (optionally) a 'directReload' function, which
379 attempts to replace a given instruction by one which reads
380 directly from a specified spill slot. May be NULL, in which
381 case the optimisation is not attempted. */
382 void (*genSpill) ( HInstr**, HInstr**, HReg, Int, Bool ),
383 void (*genReload) ( HInstr**, HInstr**, HReg, Int, Bool ),
384 HInstr* (*directReload) ( HInstr*, HReg, Short ),
385 Int guest_sizeB,
386
387 /* For debug printing only. */
388 void (*ppInstr) ( const HInstr*, Bool ),
389 void (*ppReg) ( HReg ),
390
391 /* 32/64bit mode */
392 Bool mode64
393 )
394 {
395 # define N_SPILL64S (LibVEX_N_SPILL_BYTES / 8)
396
397 const Bool eq_spill_opt = True;
398
399 /* Info on vregs and rregs. Computed once and remains
400 unchanged. */
401 Int n_vregs;
402 VRegLR* vreg_lrs; /* [0 .. n_vregs-1] */
403
404 /* We keep two copies of the real-reg live range info, one sorted
405 by .live_after and the other by .dead_before. First the
406 unsorted info is created in the _la variant is copied into the
407 _db variant. Once that's done both of them are sorted.
408 We also need two integer cursors which record the next
409 location in the two arrays to consider. */
410 RRegLR* rreg_lrs_la;
411 RRegLR* rreg_lrs_db;
412 Int rreg_lrs_size;
413 Int rreg_lrs_used;
414 Int rreg_lrs_la_next;
415 Int rreg_lrs_db_next;
416
417 /* Info on register usage in the incoming instruction array.
418 Computed once and remains unchanged, more or less; updated
419 sometimes by the direct-reload optimisation. */
420 HRegUsage* reg_usage_arr; /* [0 .. instrs_in->arr_used-1] */
421
422 /* Used when constructing vreg_lrs (for allocating stack
423 slots). */
424 Short ss_busy_until_before[N_SPILL64S];
425
426 /* Used when constructing rreg_lrs. */
427 Int* rreg_live_after;
428 Int* rreg_dead_before;
429
430 /* Running state of the core allocation algorithm. */
431 RRegState* rreg_state; /* [0 .. n_rregs-1] */
432 Int n_rregs;
433
434 /* .. and the redundant backward map */
435 /* Each value is 0 .. n_rregs-1 or is INVALID_RREG_NO.
436 This inplies n_rregs must be <= 32768. */
437 Short* vreg_state; /* [0 .. n_vregs-1] */
438
439 /* The vreg -> rreg map constructed and then applied to each
440 instr. */
441 HRegRemap remap;
442
443 /* The output array of instructions. */
444 HInstrArray* instrs_out;
445
446 /* Sanity checks are expensive. They are only done periodically,
447 not at each insn processed. */
448 Bool do_sanity_check;
449
450 vassert(0 == (guest_sizeB % LibVEX_GUEST_STATE_ALIGN));
451 vassert(0 == (LibVEX_N_SPILL_BYTES % LibVEX_GUEST_STATE_ALIGN));
452 vassert(0 == (N_SPILL64S % 2));
453
454 /* The live range numbers are signed shorts, and so limiting the
455 number of insns to 15000 comfortably guards against them
456 overflowing 32k. */
457 vassert(instrs_in->arr_used <= 15000);
458
459 # define INVALID_INSTRNO (-2)
460
461 # define EMIT_INSTR(_instr) \
462 do { \
463 HInstr* _tmp = (_instr); \
464 if (DEBUG_REGALLOC) { \
465 vex_printf("** "); \
466 (*ppInstr)(_tmp, mode64); \
467 vex_printf("\n\n"); \
468 } \
469 addHInstr ( instrs_out, _tmp ); \
470 } while (0)
471
472 # define PRINT_STATE \
473 do { \
474 Int z, q; \
475 for (z = 0; z < n_rregs; z++) { \
476 vex_printf(" rreg_state[%2d] = ", z); \
477 (*ppReg)(univ->regs[z]); \
478 vex_printf(" \t"); \
479 switch (rreg_state[z].disp) { \
480 case Free: vex_printf("Free\n"); break; \
481 case Unavail: vex_printf("Unavail\n"); break; \
482 case Bound: vex_printf("BoundTo "); \
483 (*ppReg)(rreg_state[z].vreg); \
484 vex_printf("\n"); break; \
485 } \
486 } \
487 vex_printf("\n vreg_state[0 .. %d]:\n ", n_vregs-1); \
488 q = 0; \
489 for (z = 0; z < n_vregs; z++) { \
490 if (vreg_state[z] == INVALID_RREG_NO) \
491 continue; \
492 vex_printf("[%d] -> %d ", z, vreg_state[z]); \
493 q++; \
494 if (q > 0 && (q % 6) == 0) \
495 vex_printf("\n "); \
496 } \
497 vex_printf("\n"); \
498 } while (0)
499
500
501 /* --------- Stage 0: set up output array --------- */
502 /* --------- and allocate/initialise running state. --------- */
503
504 instrs_out = newHInstrArray();
505
506 /* ... and initialise running state. */
507 /* n_rregs is no more than a short name for n_available_real_regs. */
508 n_rregs = univ->allocable;
509 n_vregs = instrs_in->n_vregs;
510
511 /* If this is not so, vreg_state entries will overflow. */
512 vassert(n_vregs < 32767);
513
514 /* If this is not so, the universe we have is nonsensical. */
515 vassert(n_rregs > 0);
516
517 rreg_state = LibVEX_Alloc_inline(n_rregs * sizeof(RRegState));
518 vreg_state = LibVEX_Alloc_inline(n_vregs * sizeof(Short));
519
520 for (Int j = 0; j < n_rregs; j++) {
521 rreg_state[j].has_hlrs = False;
522 rreg_state[j].disp = Free;
523 rreg_state[j].vreg = INVALID_HREG;
524 rreg_state[j].is_spill_cand = False;
525 rreg_state[j].eq_spill_slot = False;
526 }
527
528 for (Int j = 0; j < n_vregs; j++)
529 vreg_state[j] = INVALID_RREG_NO;
530
531
532 /* --------- Stage 1: compute vreg live ranges. --------- */
533 /* --------- Stage 2: compute rreg live ranges. --------- */
534
535 /* ------ start of SET UP TO COMPUTE VREG LIVE RANGES ------ */
536
537 /* This is relatively simple, because (1) we only seek the complete
538 end-to-end live range of each vreg, and are not interested in
539 any holes in it, and (2) the vregs are conveniently numbered 0
540 .. n_vregs-1, so we can just dump the results in a
541 pre-allocated array. */
542
543 vreg_lrs = NULL;
544 if (n_vregs > 0)
545 vreg_lrs = LibVEX_Alloc_inline(sizeof(VRegLR) * n_vregs);
546
547 for (Int j = 0; j < n_vregs; j++) {
548 vreg_lrs[j].live_after = INVALID_INSTRNO;
549 vreg_lrs[j].dead_before = INVALID_INSTRNO;
550 vreg_lrs[j].spill_offset = 0;
551 vreg_lrs[j].spill_size = 0;
552 vreg_lrs[j].reg_class = HRcINVALID;
553 }
554
555 /* An array to hold the reg-usage info for the incoming
556 instructions. */
557 reg_usage_arr
558 = LibVEX_Alloc_inline(sizeof(HRegUsage) * instrs_in->arr_used-1);
559
560 /* ------ end of SET UP TO COMPUTE VREG LIVE RANGES ------ */
561
562 /* ------ start of SET UP TO COMPUTE RREG LIVE RANGES ------ */
563
564 /* This is more complex than Stage 1, because we need to compute
565 exactly all the live ranges of all the allocatable real regs,
566 and we don't know in advance how many there will be. */
567
568 rreg_lrs_used = 0;
569 rreg_lrs_size = 4;
570 rreg_lrs_la = LibVEX_Alloc_inline(rreg_lrs_size * sizeof(RRegLR));
571 rreg_lrs_db = NULL; /* we'll create this later */
572
573 /* We'll need to track live range start/end points seperately for
574 each rreg. Sigh. */
575 vassert(n_rregs > 0);
576 rreg_live_after = LibVEX_Alloc_inline(n_rregs * sizeof(Int));
577 rreg_dead_before = LibVEX_Alloc_inline(n_rregs * sizeof(Int));
578
579 for (Int j = 0; j < n_rregs; j++) {
580 rreg_live_after[j] =
581 rreg_dead_before[j] = INVALID_INSTRNO;
582 }
583
584 /* ------ end of SET UP TO COMPUTE RREG LIVE RANGES ------ */
585
586 /* ------ start of ITERATE OVER INSNS ------ */
587
588 for (Int ii = 0; ii < instrs_in->arr_used; ii++) {
589
590 (*getRegUsage)( ®_usage_arr[ii], instrs_in->arr[ii], mode64 );
591
592 if (0) {
593 vex_printf("\n%d stage1: ", ii);
594 (*ppInstr)(instrs_in->arr[ii], mode64);
595 vex_printf("\n");
596 ppHRegUsage(univ, ®_usage_arr[ii]);
597 }
598
599 /* ------ start of DEAL WITH VREG LIVE RANGES ------ */
600
601 /* for each virtual reg mentioned in the insn ... */
602 for (Int j = 0; j < reg_usage_arr[ii].n_vRegs; j++) {
603
604 HReg vreg = reg_usage_arr[ii].vRegs[j];
605 vassert(hregIsVirtual(vreg));
606
607 Int k = hregIndex(vreg);
608 if (k < 0 || k >= n_vregs) {
609 vex_printf("\n");
610 (*ppInstr)(instrs_in->arr[ii], mode64);
611 vex_printf("\n");
612 vex_printf("vreg %d, n_vregs %d\n", k, n_vregs);
613 vpanic("doRegisterAllocation: out-of-range vreg");
614 }
615
616 /* Take the opportunity to note its regclass. We'll need
617 that when allocating spill slots. */
618 if (vreg_lrs[k].reg_class == HRcINVALID) {
619 /* First mention of this vreg. */
620 vreg_lrs[k].reg_class = hregClass(vreg);
621 } else {
622 /* Seen it before, so check for consistency. */
623 vassert(vreg_lrs[k].reg_class == hregClass(vreg));
624 }
625
626 /* Now consider live ranges. */
627 switch (reg_usage_arr[ii].vMode[j]) {
628 case HRmRead:
629 if (vreg_lrs[k].live_after == INVALID_INSTRNO) {
630 vex_printf("\n\nOFFENDING VREG = %d\n", k);
631 vpanic("doRegisterAllocation: "
632 "first event for vreg is Read");
633 }
634 vreg_lrs[k].dead_before = toShort(ii + 1);
635 break;
636 case HRmWrite:
637 if (vreg_lrs[k].live_after == INVALID_INSTRNO)
638 vreg_lrs[k].live_after = toShort(ii);
639 vreg_lrs[k].dead_before = toShort(ii + 1);
640 break;
641 case HRmModify:
642 if (vreg_lrs[k].live_after == INVALID_INSTRNO) {
643 vex_printf("\n\nOFFENDING VREG = %d\n", k);
644 vpanic("doRegisterAllocation: "
645 "first event for vreg is Modify");
646 }
647 vreg_lrs[k].dead_before = toShort(ii + 1);
648 break;
649 default:
650 vpanic("doRegisterAllocation(1)");
651 } /* switch */
652
653 } /* iterate over virtual registers */
654
655 /* ------ end of DEAL WITH VREG LIVE RANGES ------ */
656
657 /* ------ start of DEAL WITH RREG LIVE RANGES ------ */
658
659 /* If this doesn't hold, the following iteration over real registers
660 will fail miserably. */
661 vassert(N_RREGUNIVERSE_REGS == 64);
662
663 const ULong rRead = reg_usage_arr[ii].rRead;
664 const ULong rWritten = reg_usage_arr[ii].rWritten;
665 const ULong rMentioned = rRead | rWritten;
666
667 UInt rReg_minIndex;
668 UInt rReg_maxIndex;
669 if (rMentioned == 0) {
670 /* There are no real register uses in this insn. Set
671 rReg_{min,max}Index so that the following loop doesn't iterate
672 at all, so as to avoid wasting time. */
673 rReg_minIndex = 1;
674 rReg_maxIndex = 0;
675 } else {
676 rReg_minIndex = ULong__minIndex(rMentioned);
677 rReg_maxIndex = ULong__maxIndex(rMentioned);
678 /* Don't bother to look at registers which are not available
679 to the allocator. We asserted above that n_rregs > 0, so
680 n_rregs-1 is safe. */
681 if (rReg_maxIndex >= n_rregs)
682 rReg_maxIndex = n_rregs-1;
683 }
684
685 /* for each allocator-available real reg mentioned in the insn ... */
686 /* Note. We are allocating only over the real regs available to
687 the allocator. Others, eg the stack or baseblock pointers,
688 are unavailable to allocation and so we never visit them.
689 Hence the iteration is cut off at n_rregs-1, since n_rregs ==
690 univ->allocable. */
691 for (Int j = rReg_minIndex; j <= rReg_maxIndex; j++) {
692
693 const ULong jMask = 1ULL << j;
694 if (LIKELY((rMentioned & jMask) == 0))
695 continue;
696
697 const Bool isR = (rRead & jMask) != 0;
698 const Bool isW = (rWritten & jMask) != 0;
699
700 /* Dummy initialisations of flush_la and flush_db to avoid
701 possible bogus uninit-var warnings from gcc. */
702 Int flush_la = INVALID_INSTRNO, flush_db = INVALID_INSTRNO;
703 Bool flush = False;
704
705 if (isW && !isR) {
706 flush_la = rreg_live_after[j];
707 flush_db = rreg_dead_before[j];
708 if (flush_la != INVALID_INSTRNO && flush_db != INVALID_INSTRNO)
709 flush = True;
710 rreg_live_after[j] = ii;
711 rreg_dead_before[j] = ii+1;
712 } else if (!isW && isR) {
713 if (rreg_live_after[j] == INVALID_INSTRNO) {
714 vex_printf("\nOFFENDING RREG = ");
715 (*ppReg)(univ->regs[j]);
716 vex_printf("\n");
717 vex_printf("\nOFFENDING instr = ");
718 (*ppInstr)(instrs_in->arr[ii], mode64);
719 vex_printf("\n");
720 vpanic("doRegisterAllocation: "
721 "first event for rreg is Read");
722 }
723 rreg_dead_before[j] = ii+1;
724 } else {
725 vassert(isR && isW);
726 if (rreg_live_after[j] == INVALID_INSTRNO) {
727 vex_printf("\nOFFENDING RREG = ");
728 (*ppReg)(univ->regs[j]);
729 vex_printf("\n");
730 vex_printf("\nOFFENDING instr = ");
731 (*ppInstr)(instrs_in->arr[ii], mode64);
732 vex_printf("\n");
733 vpanic("doRegisterAllocation: "
734 "first event for rreg is Modify");
735 }
736 rreg_dead_before[j] = ii+1;
737 }
738
739 if (flush) {
740 vassert(flush_la != INVALID_INSTRNO);
741 vassert(flush_db != INVALID_INSTRNO);
742 ensureRRLRspace(&rreg_lrs_la, &rreg_lrs_size, rreg_lrs_used);
743 if (0)
744 vex_printf("FLUSH 1 (%d,%d)\n", flush_la, flush_db);
745 rreg_lrs_la[rreg_lrs_used].rreg = univ->regs[j];
746 rreg_lrs_la[rreg_lrs_used].live_after = toShort(flush_la);
747 rreg_lrs_la[rreg_lrs_used].dead_before = toShort(flush_db);
748 rreg_lrs_used++;
749 }
750
751 } /* iterate over rregs in the instr */
752
753 /* ------ end of DEAL WITH RREG LIVE RANGES ------ */
754
755 } /* iterate over insns */
756
757 /* ------ end of ITERATE OVER INSNS ------ */
758
759 /* ------ start of FINALISE RREG LIVE RANGES ------ */
760
761 /* Now finish up any live ranges left over. */
762 for (Int j = 0; j < n_rregs; j++) {
763
764 if (0) {
765 vex_printf("residual %d: %d %d\n", j, rreg_live_after[j],
766 rreg_dead_before[j]);
767 }
768 vassert( (rreg_live_after[j] == INVALID_INSTRNO
769 && rreg_dead_before[j] == INVALID_INSTRNO)
770 ||
771 (rreg_live_after[j] != INVALID_INSTRNO
772 && rreg_dead_before[j] != INVALID_INSTRNO)
773 );
774
775 if (rreg_live_after[j] == INVALID_INSTRNO)
776 continue;
777
778 ensureRRLRspace(&rreg_lrs_la, &rreg_lrs_size, rreg_lrs_used);
779 if (0)
780 vex_printf("FLUSH 2 (%d,%d)\n",
781 rreg_live_after[j], rreg_dead_before[j]);
782 rreg_lrs_la[rreg_lrs_used].rreg = univ->regs[j];
783 rreg_lrs_la[rreg_lrs_used].live_after = toShort(rreg_live_after[j]);
784 rreg_lrs_la[rreg_lrs_used].dead_before = toShort(rreg_dead_before[j]);
785 rreg_lrs_used++;
786 }
787
788 /* Compute summary hints for choosing real regs. If a real reg is
789 involved in a hard live range, record that fact in the fixed
790 part of the running rreg_state. Later, when offered a choice between
791 rregs, it's better to choose one which is not marked as having
792 any HLRs, since ones with HLRs may need to be spilled around
793 their HLRs. Correctness of final assignment is unaffected by
794 this mechanism -- it is only an optimisation. */
795
796 for (Int j = 0; j < rreg_lrs_used; j++) {
797 HReg rreg = rreg_lrs_la[j].rreg;
798 vassert(!hregIsVirtual(rreg));
799 /* rreg is involved in a HLR. Record this info in the array, if
800 there is space. */
801 UInt ix = hregIndex(rreg);
802 vassert(ix < n_rregs);
803 rreg_state[ix].has_hlrs = True;
804 }
805 if (0) {
806 for (Int j = 0; j < n_rregs; j++) {
807 if (!rreg_state[j].has_hlrs)
808 continue;
809 ppReg(univ->regs[j]);
810 vex_printf(" hinted\n");
811 }
812 }
813
814 /* Finally, copy the _la variant into the _db variant and
815 sort both by their respective fields. */
816 rreg_lrs_db = LibVEX_Alloc_inline(rreg_lrs_used * sizeof(RRegLR));
817 for (Int j = 0; j < rreg_lrs_used; j++)
818 rreg_lrs_db[j] = rreg_lrs_la[j];
819
820 sortRRLRarray( rreg_lrs_la, rreg_lrs_used, True /* by .live_after*/ );
821 sortRRLRarray( rreg_lrs_db, rreg_lrs_used, False/* by .dead_before*/ );
822
823 /* And set up the cursors. */
824 rreg_lrs_la_next = 0;
825 rreg_lrs_db_next = 0;
826
827 for (Int j = 1; j < rreg_lrs_used; j++) {
828 vassert(rreg_lrs_la[j-1].live_after <= rreg_lrs_la[j].live_after);
829 vassert(rreg_lrs_db[j-1].dead_before <= rreg_lrs_db[j].dead_before);
830 }
831
832 /* ------ end of FINALISE RREG LIVE RANGES ------ */
833
834 if (DEBUG_REGALLOC) {
835 for (Int j = 0; j < n_vregs; j++) {
836 vex_printf("vreg %d: la = %d, db = %d\n",
837 j, vreg_lrs[j].live_after, vreg_lrs[j].dead_before );
838 }
839 }
840
841 if (DEBUG_REGALLOC) {
842 vex_printf("RRegLRs by LA:\n");
843 for (Int j = 0; j < rreg_lrs_used; j++) {
844 vex_printf(" ");
845 (*ppReg)(rreg_lrs_la[j].rreg);
846 vex_printf(" la = %d, db = %d\n",
847 rreg_lrs_la[j].live_after, rreg_lrs_la[j].dead_before );
848 }
849 vex_printf("RRegLRs by DB:\n");
850 for (Int j = 0; j < rreg_lrs_used; j++) {
851 vex_printf(" ");
852 (*ppReg)(rreg_lrs_db[j].rreg);
853 vex_printf(" la = %d, db = %d\n",
854 rreg_lrs_db[j].live_after, rreg_lrs_db[j].dead_before );
855 }
856 }
857
858 /* --------- Stage 3: allocate spill slots. --------- */
859
860 /* Each spill slot is 8 bytes long. For vregs which take more than
861 64 bits to spill (classes Flt64 and Vec128), we have to allocate
862 two consecutive spill slots. For 256 bit registers (class
863 Vec256), we have to allocate four consecutive spill slots.
864
865 For Vec128-class on PowerPC, the spill slot's actual address
866 must be 16-byte aligned. Since the spill slot's address is
867 computed as an offset from the guest state pointer, and since
868 the user of the generated code must set that pointer to a
869 32-aligned value, we have the residual obligation here of
870 choosing a 16-aligned spill slot offset for Vec128-class values.
871 Since each spill slot is 8 bytes long, that means for
872 Vec128-class values we must allocated a spill slot number which
873 is zero mod 2.
874
875 Similarly, for Vec256 class on amd64, find a spill slot number
876 which is zero mod 4. This guarantees it will be 32 byte
877 aligned, which isn't actually necessary on amd64 (we use movUpd
878 etc to spill), but seems like good practice.
879
880 Do a rank-based allocation of vregs to spill slot numbers. We
881 put as few values as possible in spill slots, but nevertheless
882 need to have a spill slot available for all vregs, just in case.
883 */
884 /* Int max_ss_no = -1; */
885
886 local_memset(ss_busy_until_before, 0, sizeof(ss_busy_until_before));
887
888 for (Int j = 0; j < n_vregs; j++) {
889
890 /* True iff this vreg is unused. In which case we also expect
891 that the reg_class field for it has not been set. */
892 if (vreg_lrs[j].live_after == INVALID_INSTRNO) {
893 vassert(vreg_lrs[j].reg_class == HRcINVALID);
894 continue;
895 }
896
897 /* The spill slots are 64 bits in size. As per the comment on
898 definition of HRegClass in host_generic_regs.h, that means,
899 to spill a vreg of class Flt64 or Vec128, we'll need to find
900 two adjacent spill slots to use. For Vec256, we'll need to
901 find four adjacent slots to use. Note, this logic needs to
902 kept in sync with the size info on the definition of
903 HRegClass. */
904 Int ss_no = -1;
905 switch (vreg_lrs[j].reg_class) {
906
907 case HRcVec128: case HRcFlt64:
908 /* Find two adjacent free slots in which between them
909 provide up to 128 bits in which to spill the vreg.
910 Since we are trying to find an even:odd pair, move
911 along in steps of 2 (slots). */
912 for (ss_no = 0; ss_no < N_SPILL64S-1; ss_no += 2)
913 if (ss_busy_until_before[ss_no+0] <= vreg_lrs[j].live_after
914 && ss_busy_until_before[ss_no+1] <= vreg_lrs[j].live_after)
915 break;
916 if (ss_no >= N_SPILL64S-1) {
917 vpanic("LibVEX_N_SPILL_BYTES is too low. "
918 "Increase and recompile.");
919 }
920 ss_busy_until_before[ss_no+0] = vreg_lrs[j].dead_before;
921 ss_busy_until_before[ss_no+1] = vreg_lrs[j].dead_before;
922 break;
923
924 default:
925 /* The ordinary case -- just find a single spill slot. */
926 /* Find the lowest-numbered spill slot which is available
927 at the start point of this interval, and assign the
928 interval to it. */
929 for (ss_no = 0; ss_no < N_SPILL64S; ss_no++)
930 if (ss_busy_until_before[ss_no] <= vreg_lrs[j].live_after)
931 break;
932 if (ss_no == N_SPILL64S) {
933 vpanic("LibVEX_N_SPILL_BYTES is too low. "
934 "Increase and recompile.");
935 }
936 ss_busy_until_before[ss_no] = vreg_lrs[j].dead_before;
937 break;
938
939 } /* switch (vreg_lrs[j].reg_class) */
940
941 /* This reflects LibVEX's hard-wired knowledge of the baseBlock
942 layout: the guest state, then two equal sized areas following
943 it for two sets of shadow state, and then the spill area. */
944 vreg_lrs[j].spill_offset = toShort(guest_sizeB * 3 + ss_no * 8);
945
946 /* Independent check that we've made a sane choice of slot */
947 sanity_check_spill_offset( &vreg_lrs[j] );
948 /* if (j > max_ss_no) */
949 /* max_ss_no = j; */
950 }
951
952 if (0) {
953 vex_printf("\n\n");
954 for (Int j = 0; j < n_vregs; j++)
955 vex_printf("vreg %d --> spill offset %d\n",
956 j, vreg_lrs[j].spill_offset);
957 }
958
959 /* --------- Stage 4: establish rreg preferences --------- */
960
961 /* It may be advantageous to allocating certain vregs to specific
962 rregs, as a way of avoiding reg-reg moves later. Here we
963 establish which, if any, rreg each vreg would prefer to be in.
964 Note that this constrains the allocator -- ideally we end up
965 with as few as possible vregs expressing a preference.
966
967 This is an optimisation: if the .preferred_rreg field is never
968 set to anything different from INVALID_HREG, the allocator still
969 works. */
970
971 /* 30 Dec 04: removed this mechanism as it does not seem to
972 help. */
973
974 /* --------- Stage 5: process instructions --------- */
975
976 /* This is the main loop of the allocator. First, we need to
977 correctly set up our running state, which tracks the status of
978 each real register. */
979
980 /* ------ BEGIN: Process each insn in turn. ------ */
981
982 for (Int ii = 0; ii < instrs_in->arr_used; ii++) {
983
984 if (DEBUG_REGALLOC) {
985 vex_printf("\n====----====---- Insn %d ----====----====\n", ii);
986 vex_printf("---- ");
987 (*ppInstr)(instrs_in->arr[ii], mode64);
988 vex_printf("\n\nInitial state:\n");
989 PRINT_STATE;
990 vex_printf("\n");
991 }
992
993 /* ------------ Sanity checks ------------ */
994
995 /* Sanity checks are expensive. So they are done only once
996 every 17 instructions, and just before the last
997 instruction. */
998 do_sanity_check
999 = toBool(
1000 False /* Set to True for sanity checking of all insns. */
1001 || ii == instrs_in->arr_used-1
1002 || (ii > 0 && (ii % 17) == 0)
1003 );
1004
1005 if (do_sanity_check) {
1006
1007 /* Sanity check 1: all rregs with a hard live range crossing
1008 this insn must be marked as unavailable in the running
1009 state. */
1010 for (Int j = 0; j < rreg_lrs_used; j++) {
1011 if (rreg_lrs_la[j].live_after < ii
1012 && ii < rreg_lrs_la[j].dead_before) {
1013 /* ii is the middle of a hard live range for some real
1014 reg. Check it's marked as such in the running
1015 state. */
1016 HReg reg = rreg_lrs_la[j].rreg;
1017
1018 if (0) {
1019 vex_printf("considering la %d .. db %d reg = ",
1020 rreg_lrs_la[j].live_after,
1021 rreg_lrs_la[j].dead_before);
1022 (*ppReg)(reg);
1023 vex_printf("\n");
1024 }
1025
1026 /* assert that this rreg is marked as unavailable */
1027 vassert(!hregIsVirtual(reg));
1028 vassert(rreg_state[hregIndex(reg)].disp == Unavail);
1029 }
1030 }
1031
1032 /* Sanity check 2: conversely, all rregs marked as
1033 unavailable in the running rreg_state must have a
1034 corresponding hard live range entry in the rreg_lrs
1035 array. */
1036 for (Int j = 0; j < n_rregs; j++) {
1037 vassert(rreg_state[j].disp == Bound
1038 || rreg_state[j].disp == Free
1039 || rreg_state[j].disp == Unavail);
1040 if (rreg_state[j].disp != Unavail)
1041 continue;
1042 Int k;
1043 for (k = 0; k < rreg_lrs_used; k++) {
1044 HReg reg = rreg_lrs_la[k].rreg;
1045 vassert(!hregIsVirtual(reg));
1046 if (hregIndex(reg) == j
1047 && rreg_lrs_la[k].live_after < ii
1048 && ii < rreg_lrs_la[k].dead_before)
1049 break;
1050 }
1051 /* If this vassertion fails, we couldn't find a
1052 corresponding HLR. */
1053 vassert(k < rreg_lrs_used);
1054 }
1055
1056 /* Sanity check 3: all vreg-rreg bindings must bind registers
1057 of the same class. */
1058 for (Int j = 0; j < n_rregs; j++) {
1059 if (rreg_state[j].disp != Bound) {
1060 vassert(rreg_state[j].eq_spill_slot == False);
1061 continue;
1062 }
1063 vassert(hregClass(univ->regs[j])
1064 == hregClass(rreg_state[j].vreg));
1065 vassert( hregIsVirtual(rreg_state[j].vreg));
1066 }
1067
1068 /* Sanity check 4: the vreg_state and rreg_state
1069 mutually-redundant mappings are consistent. If
1070 rreg_state[j].vreg points at some vreg_state entry then
1071 that vreg_state entry should point back at
1072 rreg_state[j]. */
1073 for (Int j = 0; j < n_rregs; j++) {
1074 if (rreg_state[j].disp != Bound)
1075 continue;
1076 Int k = hregIndex(rreg_state[j].vreg);
1077 vassert(IS_VALID_VREGNO(k));
1078 vassert(vreg_state[k] == j);
1079 }
1080 for (Int j = 0; j < n_vregs; j++) {
1081 Int k = vreg_state[j];
1082 if (k == INVALID_RREG_NO)
1083 continue;
1084 vassert(IS_VALID_RREGNO(k));
1085 vassert(rreg_state[k].disp == Bound);
1086 vassert(hregIndex(rreg_state[k].vreg) == j);
1087 }
1088
1089 } /* if (do_sanity_check) */
1090
1091 /* ------------ end of Sanity checks ------------ */
1092
1093 /* Do various optimisations pertaining to register coalescing
1094 and preferencing:
1095 MOV v <-> v coalescing (done here).
1096 MOV v <-> r coalescing (not yet, if ever)
1097 */
1098 /* If doing a reg-reg move between two vregs, and the src's live
1099 range ends here and the dst's live range starts here, bind
1100 the dst to the src's rreg, and that's all. */
1101 HReg vregS = INVALID_HREG;
1102 HReg vregD = INVALID_HREG;
1103 if ( (*isMove)( instrs_in->arr[ii], &vregS, &vregD ) ) {
1104 if (!hregIsVirtual(vregS)) goto cannot_coalesce;
1105 if (!hregIsVirtual(vregD)) goto cannot_coalesce;
1106 /* Check that *isMove is not telling us a bunch of lies ... */
1107 vassert(hregClass(vregS) == hregClass(vregD));
1108 Int k = hregIndex(vregS);
1109 Int m = hregIndex(vregD);
1110 vassert(IS_VALID_VREGNO(k));
1111 vassert(IS_VALID_VREGNO(m));
1112 if (vreg_lrs[k].dead_before != ii + 1) goto cannot_coalesce;
1113 if (vreg_lrs[m].live_after != ii) goto cannot_coalesce;
1114 if (DEBUG_REGALLOC) {
1115 vex_printf("COALESCE ");
1116 (*ppReg)(vregS);
1117 vex_printf(" -> ");
1118 (*ppReg)(vregD);
1119 vex_printf("\n\n");
1120 }
1121 /* Find the state entry for vregS. */
1122 Int n = vreg_state[k]; /* k is the index of vregS */
1123 if (n == INVALID_RREG_NO) {
1124 /* vregS is not currently in a real register. So we can't
1125 do the coalescing. Give up. */
1126 goto cannot_coalesce;
1127 }
1128 vassert(IS_VALID_RREGNO(n));
1129
1130 /* Finally, we can do the coalescing. It's trivial -- merely
1131 claim vregS's register for vregD. */
1132 rreg_state[n].vreg = vregD;
1133 vassert(IS_VALID_VREGNO(hregIndex(vregD)));
1134 vassert(IS_VALID_VREGNO(hregIndex(vregS)));
1135 vreg_state[hregIndex(vregD)] = toShort(n);
1136 vreg_state[hregIndex(vregS)] = INVALID_RREG_NO;
1137
1138 /* This rreg has become associated with a different vreg and
1139 hence with a different spill slot. Play safe. */
1140 rreg_state[n].eq_spill_slot = False;
1141
1142 /* Move on to the next insn. We skip the post-insn stuff for
1143 fixed registers, since this move should not interact with
1144 them in any way. */
1145 continue;
1146 }
1147 cannot_coalesce:
1148
1149 /* ------ Free up rregs bound to dead vregs ------ */
1150
1151 /* Look for vregs whose live range has just ended, and
1152 mark the associated rreg as free. */
1153
1154 for (Int j = 0; j < n_rregs; j++) {
1155 if (rreg_state[j].disp != Bound)
1156 continue;
1157 UInt vregno = hregIndex(rreg_state[j].vreg);
1158 vassert(IS_VALID_VREGNO(vregno));
1159 if (vreg_lrs[vregno].dead_before <= ii) {
1160 rreg_state[j].disp = Free;
1161 rreg_state[j].eq_spill_slot = False;
1162 Int m = hregIndex(rreg_state[j].vreg);
1163 vassert(IS_VALID_VREGNO(m));
1164 vreg_state[m] = INVALID_RREG_NO;
1165 if (DEBUG_REGALLOC) {
1166 vex_printf("free up ");
1167 (*ppReg)(univ->regs[j]);
1168 vex_printf("\n");
1169 }
1170 }
1171 }
1172
1173 /* ------ Pre-instruction actions for fixed rreg uses ------ */
1174
1175 /* Now we have to deal with rregs which are about to be made
1176 live by this instruction -- in other words, are entering into
1177 one of their live ranges. If any such rreg holds a vreg, we
1178 will have to free up the rreg. The simplest solution which
1179 is correct is to spill the rreg.
1180
1181 Note we could do better:
1182 * Could move it into some other free rreg, if one is available
1183
1184 Do this efficiently, by incrementally stepping along an array
1185 of rreg HLRs that are known to be sorted by start point
1186 (their .live_after field).
1187 */
1188 while (True) {
1189 vassert(rreg_lrs_la_next >= 0);
1190 vassert(rreg_lrs_la_next <= rreg_lrs_used);
1191 if (rreg_lrs_la_next == rreg_lrs_used)
1192 break; /* no more real reg live ranges to consider */
1193 if (ii < rreg_lrs_la[rreg_lrs_la_next].live_after)
1194 break; /* next live range does not yet start */
1195 vassert(ii == rreg_lrs_la[rreg_lrs_la_next].live_after);
1196 /* rreg_lrs_la[rreg_lrs_la_next].rreg needs to be freed up.
1197 Find the associated rreg_state entry. */
1198 /* Note, re ii == rreg_lrs_la[rreg_lrs_la_next].live_after.
1199 Real register live ranges are guaranteed to be well-formed
1200 in that they start with a write to the register -- Stage 2
1201 rejects any code not satisfying this. So the correct
1202 question to ask is whether
1203 rreg_lrs_la[rreg_lrs_la_next].live_after == ii, that is,
1204 whether the reg becomes live after this insn -- rather
1205 than before it. */
1206 if (DEBUG_REGALLOC) {
1207 vex_printf("need to free up rreg: ");
1208 (*ppReg)(rreg_lrs_la[rreg_lrs_la_next].rreg);
1209 vex_printf("\n\n");
1210 }
1211 Int k = hregIndex(rreg_lrs_la[rreg_lrs_la_next].rreg);
1212
1213 /* If this fails, we don't have an entry for this rreg.
1214 Which we should. */
1215 vassert(IS_VALID_RREGNO(k));
1216 Int m = hregIndex(rreg_state[k].vreg);
1217 if (rreg_state[k].disp == Bound) {
1218 /* Yes, there is an associated vreg. Spill it if it's
1219 still live. */
1220 vassert(IS_VALID_VREGNO(m));
1221 vreg_state[m] = INVALID_RREG_NO;
1222 if (vreg_lrs[m].dead_before > ii) {
1223 vassert(vreg_lrs[m].reg_class != HRcINVALID);
1224 if ((!eq_spill_opt) || !rreg_state[k].eq_spill_slot) {
1225 HInstr* spill1 = NULL;
1226 HInstr* spill2 = NULL;
1227 (*genSpill)( &spill1, &spill2, univ->regs[k],
1228 vreg_lrs[m].spill_offset, mode64 );
1229 vassert(spill1 || spill2); /* can't both be NULL */
1230 if (spill1)
1231 EMIT_INSTR(spill1);
1232 if (spill2)
1233 EMIT_INSTR(spill2);
1234 }
1235 rreg_state[k].eq_spill_slot = True;
1236 }
1237 }
1238 rreg_state[k].disp = Unavail;
1239 rreg_state[k].vreg = INVALID_HREG;
1240 rreg_state[k].eq_spill_slot = False;
1241
1242 /* check for further rregs entering HLRs at this point */
1243 rreg_lrs_la_next++;
1244 }
1245
1246 if (DEBUG_REGALLOC) {
1247 vex_printf("After pre-insn actions for fixed regs:\n");
1248 PRINT_STATE;
1249 vex_printf("\n");
1250 }
1251
1252 /* ------ Deal with the current instruction. ------ */
1253
1254 /* Finally we can begin the processing of this instruction
1255 itself. The aim is to free up enough rregs for this insn.
1256 This may generate spill stores since we may have to evict
1257 some vregs currently in rregs. Also generates spill loads.
1258 We also build up the final vreg->rreg mapping to be applied
1259 to the insn. */
1260
1261 initHRegRemap(&remap);
1262
1263 /* ------------ BEGIN directReload optimisation ----------- */
1264
1265 /* If the instruction reads exactly one vreg which is currently
1266 in a spill slot, and this is last use of that vreg, see if we
1267 can convert the instruction into one that reads directly from
1268 the spill slot. This is clearly only possible for x86 and
1269 amd64 targets, since ppc and arm are load-store
1270 architectures. If successful, replace instrs_in->arr[ii]
1271 with this new instruction, and recompute its reg usage, so
1272 that the change is invisible to the standard-case handling
1273 that follows. */
1274
1275 if (directReload && reg_usage_arr[ii].n_vRegs <= 2) {
1276 Bool debug_direct_reload = False;
1277 HReg cand = INVALID_HREG;
1278 Bool nreads = 0;
1279 Short spilloff = 0;
1280
1281 for (Int j = 0; j < reg_usage_arr[ii].n_vRegs; j++) {
1282
1283 HReg vreg = reg_usage_arr[ii].vRegs[j];
1284 vassert(hregIsVirtual(vreg));
1285
1286 if (reg_usage_arr[ii].vMode[j] == HRmRead) {
1287 nreads++;
1288 Int m = hregIndex(vreg);
1289 vassert(IS_VALID_VREGNO(m));
1290 Int k = vreg_state[m];
1291 if (!IS_VALID_RREGNO(k)) {
1292 /* ok, it is spilled. Now, is this its last use? */
1293 vassert(vreg_lrs[m].dead_before >= ii+1);
1294 if (vreg_lrs[m].dead_before == ii+1
1295 && hregIsInvalid(cand)) {
1296 spilloff = vreg_lrs[m].spill_offset;
1297 cand = vreg;
1298 }
1299 }
1300 }
1301 }
1302
1303 if (nreads == 1 && ! hregIsInvalid(cand)) {
1304 HInstr* reloaded;
1305 if (reg_usage_arr[ii].n_vRegs == 2)
1306 vassert(! sameHReg(reg_usage_arr[ii].vRegs[0],
1307 reg_usage_arr[ii].vRegs[1]));
1308
1309 reloaded = directReload ( instrs_in->arr[ii], cand, spilloff );
1310 if (debug_direct_reload && !reloaded) {
1311 vex_printf("[%3d] ", spilloff); ppHReg(cand); vex_printf(" ");
1312 ppInstr(instrs_in->arr[ii], mode64);
1313 }
1314 if (reloaded) {
1315 /* Update info about the insn, so it looks as if it had
1316 been in this form all along. */
1317 instrs_in->arr[ii] = reloaded;
1318 (*getRegUsage)( ®_usage_arr[ii], instrs_in->arr[ii], mode64 );
1319 if (debug_direct_reload && !reloaded) {
1320 vex_printf(" --> ");
1321 ppInstr(reloaded, mode64);
1322 }
1323 }
1324
1325 if (debug_direct_reload && !reloaded)
1326 vex_printf("\n");
1327 }
1328
1329 }
1330
1331 /* ------------ END directReload optimisation ------------ */
1332
1333 /* for each virtual reg mentioned in the insn ... */
1334 for (Int j = 0; j < reg_usage_arr[ii].n_vRegs; j++) {
1335
1336 HReg vreg = reg_usage_arr[ii].vRegs[j];
1337 vassert(hregIsVirtual(vreg));
1338
1339 if (0) {
1340 vex_printf("considering "); (*ppReg)(vreg); vex_printf("\n");
1341 }
1342
1343 /* Now we're trying to find a rreg for "vreg". First of all,
1344 if it already has an rreg assigned, we don't need to do
1345 anything more. Inspect the current state to find out. */
1346 Int m = hregIndex(vreg);
1347 vassert(IS_VALID_VREGNO(m));
1348 Int n = vreg_state[m];
1349 if (IS_VALID_RREGNO(n)) {
1350 vassert(rreg_state[n].disp == Bound);
1351 addToHRegRemap(&remap, vreg, univ->regs[n]);
1352 /* If this rreg is written or modified, mark it as different
1353 from any spill slot value. */
1354 if (reg_usage_arr[ii].vMode[j] != HRmRead)
1355 rreg_state[n].eq_spill_slot = False;
1356 continue;
1357 } else {
1358 vassert(n == INVALID_RREG_NO);
1359 }
1360
1361 /* No luck. The next thing to do is see if there is a
1362 currently free rreg available, of the correct class. If
1363 so, bag it. NOTE, we could improve this by selecting an
1364 rreg for which the next live-range event is as far ahead
1365 as possible. */
1366 Int k_suboptimal = -1;
1367 Int k;
1368 for (k = 0; k < n_rregs; k++) {
1369 if (rreg_state[k].disp != Free
1370 || hregClass(univ->regs[k]) != hregClass(vreg))
1371 continue;
1372 if (rreg_state[k].has_hlrs) {
1373 /* Well, at least we can use k_suboptimal if we really
1374 have to. Keep on looking for a better candidate. */
1375 k_suboptimal = k;
1376 } else {
1377 /* Found a preferable reg. Use it. */
1378 k_suboptimal = -1;
1379 break;
1380 }
1381 }
1382 if (k_suboptimal >= 0)
1383 k = k_suboptimal;
1384
1385 if (k < n_rregs) {
1386 rreg_state[k].disp = Bound;
1387 rreg_state[k].vreg = vreg;
1388 Int p = hregIndex(vreg);
1389 vassert(IS_VALID_VREGNO(p));
1390 vreg_state[p] = toShort(k);
1391 addToHRegRemap(&remap, vreg, univ->regs[k]);
1392 /* Generate a reload if needed. This only creates needed
1393 reloads because the live range builder for vregs will
1394 guarantee that the first event for a vreg is a write.
1395 Hence, if this reference is not a write, it cannot be
1396 the first reference for this vreg, and so a reload is
1397 indeed needed. */
1398 if (reg_usage_arr[ii].vMode[j] != HRmWrite) {
1399 vassert(vreg_lrs[p].reg_class != HRcINVALID);
1400 HInstr* reload1 = NULL;
1401 HInstr* reload2 = NULL;
1402 (*genReload)( &reload1, &reload2, univ->regs[k],
1403 vreg_lrs[p].spill_offset, mode64 );
1404 vassert(reload1 || reload2); /* can't both be NULL */
1405 if (reload1)
1406 EMIT_INSTR(reload1);
1407 if (reload2)
1408 EMIT_INSTR(reload2);
1409 /* This rreg is read or modified by the instruction.
1410 If it's merely read we can claim it now equals the
1411 spill slot, but not so if it is modified. */
1412 if (reg_usage_arr[ii].vMode[j] == HRmRead) {
1413 rreg_state[k].eq_spill_slot = True;
1414 } else {
1415 vassert(reg_usage_arr[ii].vMode[j] == HRmModify);
1416 rreg_state[k].eq_spill_slot = False;
1417 }
1418 } else {
1419 rreg_state[k].eq_spill_slot = False;
1420 }
1421
1422 continue;
1423 }
1424
1425 /* Well, now we have no option but to spill a vreg. It's
1426 important to make a good choice of vreg to spill, and of
1427 course we need to be careful not to spill a vreg which is
1428 needed by this insn. */
1429
1430 /* First, mark in the rreg_state, those rregs which are not spill
1431 candidates, due to holding a vreg mentioned by this
1432 instruction. Or being of the wrong class. */
1433 for (k = 0; k < n_rregs; k++) {
1434 rreg_state[k].is_spill_cand = False;
1435 if (rreg_state[k].disp != Bound)
1436 continue;
1437 if (hregClass(univ->regs[k]) != hregClass(vreg))
1438 continue;
1439 rreg_state[k].is_spill_cand = True;
1440 /* Note, the following loop visits only the virtual regs
1441 mentioned by the instruction. */
1442 for (m = 0; m < reg_usage_arr[ii].n_vRegs; m++) {
1443 if (sameHReg(rreg_state[k].vreg, reg_usage_arr[ii].vRegs[m])) {
1444 rreg_state[k].is_spill_cand = False;
1445 break;
1446 }
1447 }
1448 }
1449
1450 /* We can choose to spill any rreg satisfying
1451 rreg_state[r].is_spill_cand (so to speak). Choose r so that
1452 the next use of its associated vreg is as far ahead as
1453 possible, in the hope that this will minimise the number
1454 of consequent reloads required. */
1455 Int spillee
1456 = findMostDistantlyMentionedVReg (
1457 reg_usage_arr, ii+1, instrs_in->arr_used, rreg_state, n_rregs );
1458
1459 if (spillee == -1) {
1460 /* Hmmmmm. There don't appear to be any spill candidates.
1461 We're hosed. */
1462 vex_printf("reg_alloc: can't find a register in class: ");
1463 ppHRegClass(hregClass(vreg));
1464 vex_printf("\n");
1465 vpanic("reg_alloc: can't create a free register.");
1466 }
1467
1468 /* Right. So we're going to spill rreg_state[spillee]. */
1469 vassert(IS_VALID_RREGNO(spillee));
1470 vassert(rreg_state[spillee].disp == Bound);
1471 /* check it's the right class */
1472 vassert(hregClass(univ->regs[spillee]) == hregClass(vreg));
1473 /* check we're not ejecting the vreg for which we are trying
1474 to free up a register. */
1475 vassert(! sameHReg(rreg_state[spillee].vreg, vreg));
1476
1477 m = hregIndex(rreg_state[spillee].vreg);
1478 vassert(IS_VALID_VREGNO(m));
1479
1480 /* So here's the spill store. Assert that we're spilling a
1481 live vreg. */
1482 vassert(vreg_lrs[m].dead_before > ii);
1483 vassert(vreg_lrs[m].reg_class != HRcINVALID);
1484 if ((!eq_spill_opt) || !rreg_state[spillee].eq_spill_slot) {
1485 HInstr* spill1 = NULL;
1486 HInstr* spill2 = NULL;
1487 (*genSpill)( &spill1, &spill2, univ->regs[spillee],
1488 vreg_lrs[m].spill_offset, mode64 );
1489 vassert(spill1 || spill2); /* can't both be NULL */
1490 if (spill1)
1491 EMIT_INSTR(spill1);
1492 if (spill2)
1493 EMIT_INSTR(spill2);
1494 }
1495
1496 /* Update the rreg_state to reflect the new assignment for this
1497 rreg. */
1498 rreg_state[spillee].vreg = vreg;
1499 vreg_state[m] = INVALID_RREG_NO;
1500
1501 rreg_state[spillee].eq_spill_slot = False; /* be safe */
1502
1503 m = hregIndex(vreg);
1504 vassert(IS_VALID_VREGNO(m));
1505 vreg_state[m] = toShort(spillee);
1506
1507 /* Now, if this vreg is being read or modified (as opposed to
1508 written), we have to generate a reload for it. */
1509 if (reg_usage_arr[ii].vMode[j] != HRmWrite) {
1510 vassert(vreg_lrs[m].reg_class != HRcINVALID);
1511 HInstr* reload1 = NULL;
1512 HInstr* reload2 = NULL;
1513 (*genReload)( &reload1, &reload2, univ->regs[spillee],
1514 vreg_lrs[m].spill_offset, mode64 );
1515 vassert(reload1 || reload2); /* can't both be NULL */
1516 if (reload1)
1517 EMIT_INSTR(reload1);
1518 if (reload2)
1519 EMIT_INSTR(reload2);
1520 /* This rreg is read or modified by the instruction.
1521 If it's merely read we can claim it now equals the
1522 spill slot, but not so if it is modified. */
1523 if (reg_usage_arr[ii].vMode[j] == HRmRead) {
1524 rreg_state[spillee].eq_spill_slot = True;
1525 } else {
1526 vassert(reg_usage_arr[ii].vMode[j] == HRmModify);
1527 rreg_state[spillee].eq_spill_slot = False;
1528 }
1529 }
1530
1531 /* So after much twisting and turning, we have vreg mapped to
1532 rreg_state[spillee].rreg. Note that in the map. */
1533 addToHRegRemap(&remap, vreg, univ->regs[spillee]);
1534
1535 } /* iterate over virtual registers in this instruction. */
1536
1537 /* We've finished clowning around with registers in this instruction.
1538 Three results:
1539 - the running rreg_state[] has been updated
1540 - a suitable vreg->rreg mapping for this instruction has been
1541 constructed
1542 - spill and reload instructions may have been emitted.
1543
1544 The final step is to apply the mapping to the instruction,
1545 and emit that.
1546 */
1547
1548 /* NOTE, DESTRUCTIVELY MODIFIES instrs_in->arr[ii]. */
1549 (*mapRegs)( &remap, instrs_in->arr[ii], mode64 );
1550 EMIT_INSTR( instrs_in->arr[ii] );
1551
1552 if (DEBUG_REGALLOC) {
1553 vex_printf("After dealing with current insn:\n");
1554 PRINT_STATE;
1555 vex_printf("\n");
1556 }
1557
1558 /* ------ Post-instruction actions for fixed rreg uses ------ */
1559
1560 /* Now we need to check for rregs exiting fixed live ranges
1561 after this instruction, and if so mark them as free. */
1562 while (True) {
1563 vassert(rreg_lrs_db_next >= 0);
1564 vassert(rreg_lrs_db_next <= rreg_lrs_used);
1565 if (rreg_lrs_db_next == rreg_lrs_used)
1566 break; /* no more real reg live ranges to consider */
1567 if (ii+1 < rreg_lrs_db[rreg_lrs_db_next].dead_before)
1568 break; /* next live range does not yet start */
1569 vassert(ii+1 == rreg_lrs_db[rreg_lrs_db_next].dead_before);
1570 /* rreg_lrs_db[[rreg_lrs_db_next].rreg is exiting a hard live
1571 range. Mark it as such in the main rreg_state array. */
1572 HReg reg = rreg_lrs_db[rreg_lrs_db_next].rreg;
1573 vassert(!hregIsVirtual(reg));
1574 Int k = hregIndex(reg);
1575 vassert(IS_VALID_RREGNO(k));
1576 vassert(rreg_state[k].disp == Unavail);
1577 rreg_state[k].disp = Free;
1578 rreg_state[k].vreg = INVALID_HREG;
1579 rreg_state[k].eq_spill_slot = False;
1580
1581 /* check for further rregs leaving HLRs at this point */
1582 rreg_lrs_db_next++;
1583 }
1584
1585 if (DEBUG_REGALLOC) {
1586 vex_printf("After post-insn actions for fixed regs:\n");
1587 PRINT_STATE;
1588 vex_printf("\n");
1589 }
1590
1591 } /* iterate over insns */
1592
1593 /* ------ END: Process each insn in turn. ------ */
1594
1595 /* free(rreg_state); */
1596 /* free(rreg_lrs); */
1597 /* if (vreg_lrs) free(vreg_lrs); */
1598
1599 /* Paranoia */
1600 vassert(rreg_lrs_la_next == rreg_lrs_used);
1601 vassert(rreg_lrs_db_next == rreg_lrs_used);
1602
1603 return instrs_out;
1604
1605 # undef INVALID_INSTRNO
1606 # undef EMIT_INSTR
1607 # undef PRINT_STATE
1608 }
1609
1610
1611
1612 /*---------------------------------------------------------------*/
1613 /*--- host_reg_alloc2.c ---*/
1614 /*---------------------------------------------------------------*/
1615