1 /*
2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 * Vadim Girlin
25 */
26
27 #define RA_DEBUG 0
28
29 #if RA_DEBUG
30 #define RA_DUMP(q) do { q } while (0)
31 #else
32 #define RA_DUMP(q)
33 #endif
34
35 #include <cstring>
36
37 #include "sb_bc.h"
38 #include "sb_shader.h"
39 #include "sb_pass.h"
40
41 namespace r600_sb {
42
43 class regbits {
44 typedef uint32_t basetype;
45 static const unsigned bt_bytes = sizeof(basetype);
46 static const unsigned bt_index_shift = 5;
47 static const unsigned bt_index_mask = (1u << bt_index_shift) - 1;
48 static const unsigned bt_bits = bt_bytes << 3;
49 static const unsigned size = MAX_GPR * 4 / bt_bits;
50
51 basetype dta[size];
52
53 unsigned num_temps;
54
55 public:
56
regbits(unsigned num_temps)57 regbits(unsigned num_temps) : dta(), num_temps(num_temps) {}
regbits(unsigned num_temps,unsigned value)58 regbits(unsigned num_temps, unsigned value) : num_temps(num_temps)
59 { set_all(value); }
60
regbits(shader & sh,val_set & vs)61 regbits(shader &sh, val_set &vs) : num_temps(sh.get_ctx().alu_temp_gprs)
62 { set_all(1); from_val_set(sh, vs); }
63
64 void set_all(unsigned val);
65 void from_val_set(shader &sh, val_set &vs);
66
67 void set(unsigned index);
68 void clear(unsigned index);
69 bool get(unsigned index);
70
71 void set(unsigned index, unsigned val);
72
73 sel_chan find_free_bit();
74 sel_chan find_free_chans(unsigned mask);
75 sel_chan find_free_chan_by_mask(unsigned mask);
76 sel_chan find_free_array(unsigned size, unsigned mask);
77
78 void dump();
79 };
80
81 // =======================================
82
dump()83 void regbits::dump() {
84 for (unsigned i = 0; i < size * bt_bits; ++i) {
85
86 if (!(i & 31))
87 sblog << "\n";
88
89 if (!(i & 3)) {
90 sblog.print_w(i / 4, 7);
91 sblog << " ";
92 }
93
94 sblog << (get(i) ? 1 : 0);
95 }
96 }
97
98
set_all(unsigned v)99 void regbits::set_all(unsigned v) {
100 memset(&dta, v ? 0xFF : 0x00, size * bt_bytes);
101 }
102
from_val_set(shader & sh,val_set & vs)103 void regbits::from_val_set(shader &sh, val_set& vs) {
104 val_set &s = vs;
105 unsigned g;
106 for (val_set::iterator I = s.begin(sh), E = s.end(sh); I != E; ++I) {
107 value *v = *I;
108 if (v->is_any_gpr()) {
109 g = v->get_final_gpr();
110 if (!g)
111 continue;
112 } else
113 continue;
114
115 assert(g);
116 --g;
117 assert(g < 512);
118 clear(g);
119 }
120 }
121
set(unsigned index)122 void regbits::set(unsigned index) {
123 unsigned ih = index >> bt_index_shift;
124 unsigned il = index & bt_index_mask;
125 dta[ih] |= ((basetype)1u << il);
126 }
127
clear(unsigned index)128 void regbits::clear(unsigned index) {
129 unsigned ih = index >> bt_index_shift;
130 unsigned il = index & bt_index_mask;
131 assert(ih < size);
132 dta[ih] &= ~((basetype)1u << il);
133 }
134
get(unsigned index)135 bool regbits::get(unsigned index) {
136 unsigned ih = index >> bt_index_shift;
137 unsigned il = index & bt_index_mask;
138 return dta[ih] & ((basetype)1u << il);
139 }
140
set(unsigned index,unsigned val)141 void regbits::set(unsigned index, unsigned val) {
142 unsigned ih = index >> bt_index_shift;
143 unsigned il = index & bt_index_mask;
144 basetype bm = 1u << il;
145 dta[ih] = (dta[ih] & ~bm) | (val << il);
146 }
147
148 // free register for ra means the bit is set
find_free_bit()149 sel_chan regbits::find_free_bit() {
150 unsigned elt = 0;
151 unsigned bit = 0;
152
153 while (elt < size && !dta[elt])
154 ++elt;
155
156 if (elt >= size)
157 return 0;
158
159 bit = __builtin_ctz(dta[elt]) + (elt << bt_index_shift);
160
161 assert(bit < ((MAX_GPR - num_temps) << 2));
162
163 return bit + 1;
164 }
165
166 // find free gpr component to use as indirectly addressable array
find_free_array(unsigned length,unsigned mask)167 sel_chan regbits::find_free_array(unsigned length, unsigned mask) {
168 unsigned cc[4] = {};
169
170 // FIXME optimize this. though hopefully we won't have a lot of arrays
171 for (unsigned a = 0; a < MAX_GPR - num_temps; ++a) {
172 for(unsigned c = 0; c < MAX_CHAN; ++c) {
173 if (mask & (1 << c)) {
174 if (get((a << 2) | c)) {
175 if (++cc[c] == length)
176 return sel_chan(a - length + 1, c);
177 } else {
178 cc[c] = 0;
179 }
180 }
181 }
182 }
183 return 0;
184 }
185
find_free_chans(unsigned mask)186 sel_chan regbits::find_free_chans(unsigned mask) {
187 unsigned elt = 0;
188 unsigned bit = 0;
189
190 assert (!(mask & ~0xF));
191 basetype cd = dta[elt];
192
193 do {
194 if (!cd) {
195 if (++elt < size) {
196 cd = dta[elt];
197 bit = 0;
198 continue;
199 } else
200 return 0;
201 }
202
203 unsigned p = __builtin_ctz(cd) & ~(basetype)3u;
204
205 assert (p <= bt_bits - bit);
206 bit += p;
207 cd >>= p;
208
209 if ((cd & mask) == mask) {
210 return ((elt << bt_index_shift) | bit) + 1;
211 }
212
213 bit += 4;
214 cd >>= 4;
215
216 } while (1);
217
218 return 0;
219 }
220
find_free_chan_by_mask(unsigned mask)221 sel_chan regbits::find_free_chan_by_mask(unsigned mask) {
222 unsigned elt = 0;
223 unsigned bit = 0;
224
225 assert (!(mask & ~0xF));
226 basetype cd = dta[elt];
227
228 do {
229 if (!cd) {
230 if (++elt < size) {
231 cd = dta[elt];
232 bit = 0;
233 continue;
234 } else
235 return 0;
236 }
237
238 unsigned p = __builtin_ctz(cd) & ~(basetype)3u;
239
240 assert (p <= bt_bits - bit);
241 bit += p;
242 cd >>= p;
243
244 if (cd & mask) {
245 unsigned nb = __builtin_ctz(cd & mask);
246 unsigned ofs = ((elt << bt_index_shift) | bit);
247 return nb + ofs + 1;
248 }
249
250 bit += 4;
251 cd >>= 4;
252
253 } while (1);
254
255 return 0;
256 }
257
258 // ================================
259
alloc_arrays()260 void ra_init::alloc_arrays() {
261
262 gpr_array_vec &ga = sh.arrays();
263
264 for(gpr_array_vec::iterator I = ga.begin(), E = ga.end(); I != E; ++I) {
265 gpr_array *a = *I;
266
267 RA_DUMP(
268 sblog << "array [" << a->array_size << "] at " << a->base_gpr << "\n";
269 sblog << "\n";
270 );
271
272 // skip preallocated arrays (e.g. with preloaded inputs)
273 if (a->gpr) {
274 RA_DUMP( sblog << " FIXED at " << a->gpr << "\n"; );
275 continue;
276 }
277
278 bool dead = a->is_dead();
279
280 if (dead) {
281 RA_DUMP( sblog << " DEAD\n"; );
282 continue;
283 }
284
285 val_set &s = a->interferences;
286
287
288 for (val_set::iterator I = s.begin(sh), E = s.end(sh); I != E; ++I) {
289 value *v = *I;
290 if (v->array == a)
291 s.remove_val(v);
292 }
293
294 RA_DUMP(
295 sblog << " interf: ";
296 dump::dump_set(sh, s);
297 sblog << "\n";
298 );
299
300 regbits rb(sh, s);
301
302 sel_chan base = rb.find_free_array(a->array_size,
303 (1 << a->base_gpr.chan()));
304
305 RA_DUMP( sblog << " found base: " << base << "\n"; );
306
307 a->gpr = base;
308 }
309 }
310
311
run()312 int ra_init::run() {
313
314 alloc_arrays();
315
316 ra_node(sh.root);
317 return 0;
318 }
319
ra_node(container_node * c)320 void ra_init::ra_node(container_node* c) {
321
322 for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) {
323 node *n = *I;
324 if (n->type == NT_OP) {
325 process_op(n);
326 }
327 if (n->is_container() && !n->is_alu_packed()) {
328 ra_node(static_cast<container_node*>(n));
329 }
330 }
331 }
332
process_op(node * n)333 void ra_init::process_op(node* n) {
334
335 bool copy = n->is_copy_mov();
336
337 RA_DUMP(
338 sblog << "ra_init: process_op : ";
339 dump::dump_op(n);
340 sblog << "\n";
341 );
342
343 if (n->is_alu_packed()) {
344 for (vvec::iterator I = n->src.begin(), E = n->src.end(); I != E; ++I) {
345 value *v = *I;
346 if (v && v->is_sgpr() && v->constraint &&
347 v->constraint->kind == CK_PACKED_BS) {
348 color_bs_constraint(v->constraint);
349 break;
350 }
351 }
352 }
353
354 if (n->is_fetch_inst() || n->is_cf_inst()) {
355 for (vvec::iterator I = n->src.begin(), E = n->src.end(); I != E; ++I) {
356 value *v = *I;
357 if (v && v->is_sgpr())
358 color(v);
359 }
360 }
361
362 for (vvec::iterator I = n->dst.begin(), E = n->dst.end(); I != E; ++I) {
363 value *v = *I;
364 if (!v)
365 continue;
366 if (v->is_sgpr()) {
367 if (!v->gpr) {
368 if (copy && !v->constraint) {
369 value *s = *(n->src.begin() + (I - n->dst.begin()));
370 assert(s);
371 if (s->is_sgpr()) {
372 assign_color(v, s->gpr);
373 }
374 } else
375 color(v);
376 }
377 }
378 }
379 }
380
color_bs_constraint(ra_constraint * c)381 void ra_init::color_bs_constraint(ra_constraint* c) {
382 vvec &vv = c->values;
383 assert(vv.size() <= 8);
384
385 RA_DUMP(
386 sblog << "color_bs_constraint: ";
387 dump::dump_vec(vv);
388 sblog << "\n";
389 );
390
391 regbits rb(ctx.alu_temp_gprs);
392
393 unsigned chan_count[4] = {};
394 unsigned allowed_chans = 0x0F;
395
396 for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) {
397 value *v = *I;
398
399 if (!v || v->is_dead())
400 continue;
401
402 sel_chan gpr = v->get_final_gpr();
403
404 val_set interf;
405
406 if (v->chunk)
407 sh.coal.get_chunk_interferences(v->chunk, interf);
408 else
409 interf = v->interferences;
410
411 RA_DUMP(
412 sblog << " processing " << *v << " interferences : ";
413 dump::dump_set(sh, interf);
414 sblog << "\n";
415 );
416
417 if (gpr) {
418 unsigned chan = gpr.chan();
419 if (chan_count[chan] < 3) {
420 ++chan_count[chan];
421 continue;
422 } else {
423 v->flags &= ~VLF_FIXED;
424 allowed_chans &= ~(1 << chan);
425 assert(allowed_chans);
426 }
427 }
428
429 v->gpr = 0;
430
431 gpr = 1;
432 rb.set_all(1);
433
434
435 rb.from_val_set(sh, interf);
436
437 RA_DUMP(
438 sblog << " regbits : ";
439 rb.dump();
440 sblog << "\n";
441 );
442
443 while (allowed_chans && gpr.sel() < sh.num_nontemp_gpr()) {
444
445 while (rb.get(gpr - 1) == 0)
446 gpr = gpr + 1;
447
448 RA_DUMP(
449 sblog << " trying " << gpr << "\n";
450 );
451
452 unsigned chan = gpr.chan();
453 if (chan_count[chan] < 3) {
454 ++chan_count[chan];
455
456 if (v->chunk) {
457 vvec::iterator F = std::find(v->chunk->values.begin(),
458 v->chunk->values.end(),
459 v);
460 v->chunk->values.erase(F);
461 v->chunk = NULL;
462 }
463
464 assign_color(v, gpr);
465 break;
466 } else {
467 allowed_chans &= ~(1 << chan);
468 }
469 gpr = gpr + 1;
470 }
471
472 if (!gpr) {
473 sblog << "color_bs_constraint: failed...\n";
474 assert(!"coloring failed");
475 }
476 }
477 }
478
color(value * v)479 void ra_init::color(value* v) {
480
481 if (v->constraint && v->constraint->kind == CK_PACKED_BS) {
482 color_bs_constraint(v->constraint);
483 return;
484 }
485
486 if (v->chunk && v->chunk->is_fixed())
487 return;
488
489 RA_DUMP(
490 sblog << "coloring ";
491 dump::dump_val(v);
492 sblog << " interferences ";
493 dump::dump_set(sh, v->interferences);
494 sblog << "\n";
495 );
496
497 if (v->is_reg_pinned()) {
498 assert(v->is_chan_pinned());
499 assign_color(v, v->pin_gpr);
500 return;
501 }
502
503 regbits rb(sh, v->interferences);
504 sel_chan c;
505
506 if (v->is_chan_pinned()) {
507 RA_DUMP( sblog << "chan_pinned = " << v->pin_gpr.chan() << " "; );
508 unsigned mask = 1 << v->pin_gpr.chan();
509 c = rb.find_free_chans(mask) + v->pin_gpr.chan();
510 } else {
511 unsigned cm = get_preferable_chan_mask();
512 RA_DUMP( sblog << "pref chan mask: " << cm << "\n"; );
513 c = rb.find_free_chan_by_mask(cm);
514 }
515
516 assert(c && c.sel() < 128 - ctx.alu_temp_gprs && "color failed");
517 assign_color(v, c);
518 }
519
assign_color(value * v,sel_chan c)520 void ra_init::assign_color(value* v, sel_chan c) {
521 add_prev_chan(c.chan());
522 v->gpr = c;
523 RA_DUMP(
524 sblog << "colored ";
525 dump::dump_val(v);
526 sblog << " to " << c << "\n";
527 );
528 }
529
530 // ===================================================
531
run()532 int ra_split::run() {
533 split(sh.root);
534 return 0;
535 }
536
split_phi_src(container_node * loc,container_node * c,unsigned id,bool loop)537 void ra_split::split_phi_src(container_node *loc, container_node *c,
538 unsigned id, bool loop) {
539 for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) {
540 node *p = *I;
541 value* &v = p->src[id], *d = p->dst[0];
542 assert(v);
543
544 if (!d->is_sgpr() || v->is_undef())
545 continue;
546
547 value *t = sh.create_temp_value();
548 alu_node* n = sh.create_copy_mov(t, v);
549 if (loop)
550 n->flags |= NF_DONT_MOVE;
551 if (loop && id == 0)
552 loc->insert_before(n);
553 else
554 loc->push_back(n);
555 v = t;
556
557 sh.coal.add_edge(v, d, coalescer::phi_cost);
558 }
559 }
560
split_phi_dst(node * loc,container_node * c,bool loop)561 void ra_split::split_phi_dst(node* loc, container_node *c, bool loop) {
562 for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) {
563 node *p = *I;
564 value* &v = p->dst[0];
565 assert(v);
566
567 if (!v->is_sgpr())
568 continue;
569
570 value *t = sh.create_temp_value();
571 node *cp = sh.create_copy_mov(v, t);
572 if (loop) {
573 cp->flags |= NF_DONT_MOVE;
574 static_cast<container_node*>(loc)->push_front(cp);
575 } else
576 loc->insert_after(cp);
577 v = t;
578 }
579 }
580
581
init_phi_constraints(container_node * c)582 void ra_split::init_phi_constraints(container_node *c) {
583 for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) {
584 node *p = *I;
585 ra_constraint *cc = sh.coal.create_constraint(CK_PHI);
586 cc->values.push_back(p->dst[0]);
587
588 for (vvec::iterator I = p->src.begin(), E = p->src.end(); I != E; ++I) {
589 value *v = *I;
590 if (v->is_sgpr())
591 cc->values.push_back(v);
592 }
593
594 cc->update_values();
595 }
596 }
597
split(container_node * n)598 void ra_split::split(container_node* n) {
599
600 if (n->type == NT_DEPART) {
601 depart_node *d = static_cast<depart_node*>(n);
602 if (d->target->phi)
603 split_phi_src(d, d->target->phi, d->dep_id, false);
604 } else if (n->type == NT_REPEAT) {
605 repeat_node *r = static_cast<repeat_node*>(n);
606 if (r->target->loop_phi)
607 split_phi_src(r, r->target->loop_phi, r->rep_id, true);
608 } else if (n->type == NT_REGION) {
609 region_node *r = static_cast<region_node*>(n);
610 if (r->phi) {
611 split_phi_dst(r, r->phi, false);
612 }
613 if (r->loop_phi) {
614 split_phi_dst(r->get_entry_code_location(), r->loop_phi,
615 true);
616 split_phi_src(r, r->loop_phi, 0, true);
617 }
618 }
619
620 for (node_riterator N, I = n->rbegin(), E = n->rend(); I != E; I = N) {
621 N = I;
622 ++N;
623 node *o = *I;
624 if (o->type == NT_OP) {
625 split_op(o);
626 } else if (o->is_container()) {
627 split(static_cast<container_node*>(o));
628 }
629 }
630
631 if (n->type == NT_REGION) {
632 region_node *r = static_cast<region_node*>(n);
633 if (r->phi)
634 init_phi_constraints(r->phi);
635 if (r->loop_phi)
636 init_phi_constraints(r->loop_phi);
637 }
638 }
639
split_op(node * n)640 void ra_split::split_op(node* n) {
641 switch(n->subtype) {
642 case NST_ALU_PACKED_INST:
643 split_alu_packed(static_cast<alu_packed_node*>(n));
644 break;
645 case NST_FETCH_INST:
646 case NST_CF_INST:
647 split_vector_inst(n);
648 default:
649 break;
650 }
651 }
652
split_packed_ins(alu_packed_node * n)653 void ra_split::split_packed_ins(alu_packed_node *n) {
654 vvec vv = n->src;
655 vvec sv, dv;
656
657 for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) {
658
659 value *&v = *I;
660
661 if (v && v->is_any_gpr() && !v->is_undef()) {
662
663 vvec::iterator F = std::find(sv.begin(), sv.end(), v);
664 value *t;
665
666 if (F != sv.end()) {
667 t = *(dv.begin() + (F - sv.begin()));
668 } else {
669 t = sh.create_temp_value();
670 sv.push_back(v);
671 dv.push_back(t);
672 }
673 v = t;
674 }
675 }
676
677 unsigned cnt = sv.size();
678
679 if (cnt > 0) {
680 n->src = vv;
681 for (vvec::iterator SI = sv.begin(), DI = dv.begin(), SE = sv.end();
682 SI != SE; ++SI, ++DI) {
683 n->insert_before(sh.create_copy_mov(*DI, *SI));
684 }
685
686 ra_constraint *c = sh.coal.create_constraint(CK_PACKED_BS);
687 c->values = dv;
688 c->update_values();
689 }
690 }
691
692 // TODO handle other packed ops for cayman
split_alu_packed(alu_packed_node * n)693 void ra_split::split_alu_packed(alu_packed_node* n) {
694 switch (n->op()) {
695 case ALU_OP2_DOT4:
696 case ALU_OP2_DOT4_IEEE:
697 case ALU_OP2_CUBE:
698 split_packed_ins(n);
699 break;
700 default:
701 break;
702 }
703 }
704
split_vec(vvec & vv,vvec & v1,vvec & v2,bool allow_swz)705 void ra_split::split_vec(vvec &vv, vvec &v1, vvec &v2, bool allow_swz) {
706 unsigned ch = 0;
707 for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I, ++ch) {
708
709 value* &o = *I;
710
711 if (o) {
712
713 assert(!o->is_dead());
714
715 if (o->is_undef() || o->is_geometry_emit() || o->is_scratch())
716 continue;
717
718 if (allow_swz && o->is_float_0_or_1())
719 continue;
720
721 value *t;
722 vvec::iterator F =
723 allow_swz ? std::find(v2.begin(), v2.end(), o) : v2.end();
724
725 if (F != v2.end()) {
726 t = *(v1.begin() + (F - v2.begin()));
727 } else {
728 t = sh.create_temp_value();
729
730 if (!allow_swz) {
731 t->flags |= VLF_PIN_CHAN;
732 t->pin_gpr = sel_chan(0, ch);
733 }
734
735 v2.push_back(o);
736 v1.push_back(t);
737 }
738 o = t;
739 }
740 }
741 }
742
split_vector_inst(node * n)743 void ra_split::split_vector_inst(node* n) {
744 ra_constraint *c;
745
746 bool call_fs = n->is_cf_op(CF_OP_CALL_FS);
747 bool no_src_swizzle = n->is_cf_inst() && (n->cf_op_flags() & CF_MEM);
748
749 no_src_swizzle |= n->is_fetch_op(FETCH_OP_VFETCH) ||
750 n->is_fetch_op(FETCH_OP_SEMFETCH);
751
752 no_src_swizzle |= n->is_fetch_inst() && (n->fetch_op_flags() & FF_GDS);
753
754 if (!n->src.empty() && !call_fs) {
755
756 // we may have more than one source vector -
757 // fetch instructions with FF_USEGRAD have gradient values in
758 // src vectors 1 (src[4-7] and 2 (src[8-11])
759
760 unsigned nvec = n->src.size() >> 2;
761 assert(nvec << 2 <= n->src.size());
762
763 for (unsigned nv = 0; nv < nvec; ++nv) {
764 vvec sv, tv, nsrc(4);
765 unsigned arg_start = nv << 2;
766
767 std::copy(n->src.begin() + arg_start,
768 n->src.begin() + arg_start + 4,
769 nsrc.begin());
770
771 split_vec(nsrc, tv, sv, !no_src_swizzle);
772
773 unsigned cnt = sv.size();
774
775 if (no_src_swizzle || cnt) {
776
777 std::copy(nsrc.begin(), nsrc.end(), n->src.begin() + arg_start);
778
779 for(unsigned i = 0, s = tv.size(); i < s; ++i) {
780 n->insert_before(sh.create_copy_mov(tv[i], sv[i]));
781 }
782
783 c = sh.coal.create_constraint(CK_SAME_REG);
784 c->values = tv;
785 c->update_values();
786 }
787 }
788 }
789
790 if (!n->dst.empty()) {
791 vvec sv, tv, ndst = n->dst;
792
793 split_vec(ndst, tv, sv, true);
794
795 if (sv.size()) {
796 n->dst = ndst;
797
798 node *lp = n;
799 for(unsigned i = 0, s = tv.size(); i < s; ++i) {
800 lp->insert_after(sh.create_copy_mov(sv[i], tv[i]));
801 lp = lp->next;
802 }
803
804 if (call_fs) {
805 for (unsigned i = 0, cnt = tv.size(); i < cnt; ++i) {
806 value *v = tv[i];
807 value *s = sv[i];
808 if (!v)
809 continue;
810
811 v->flags |= VLF_PIN_REG | VLF_PIN_CHAN;
812 s->flags &= ~(VLF_PIN_REG | VLF_PIN_CHAN);
813 sel_chan sel;
814
815 if (s->is_rel()) {
816 assert(s->rel->is_const());
817 sel = sel_chan(s->select.sel() +
818 s->rel->get_const_value().u,
819 s->select.chan());
820 } else
821 sel = s->select;
822
823 v->gpr = v->pin_gpr = sel;
824 v->fix();
825 }
826 } else {
827 c = sh.coal.create_constraint(CK_SAME_REG);
828 c->values = tv;
829 c->update_values();
830 }
831 }
832 }
833 }
834
add_prev_chan(unsigned chan)835 void ra_init::add_prev_chan(unsigned chan) {
836 prev_chans = (prev_chans << 4) | (1 << chan);
837 }
838
get_preferable_chan_mask()839 unsigned ra_init::get_preferable_chan_mask() {
840 unsigned i, used_chans = 0;
841 unsigned chans = prev_chans;
842
843 for (i = 0; i < ra_tune; ++i) {
844 used_chans |= chans;
845 chans >>= 4;
846 }
847
848 return (~used_chans) & 0xF;
849 }
850
851 } // namespace r600_sb
852