• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * on the rights to use, copy, modify, merge, publish, distribute, sub
8  * license, and/or sell copies of the Software, and to permit persons to whom
9  * the Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
22  *
23  * Authors:
24  *      Vadim Girlin
25  */
26 
27 #define RA_DEBUG 0
28 
29 #if RA_DEBUG
30 #define RA_DUMP(q) do { q } while (0)
31 #else
32 #define RA_DUMP(q)
33 #endif
34 
35 #include <cstring>
36 
37 #include "sb_bc.h"
38 #include "sb_shader.h"
39 #include "sb_pass.h"
40 
41 namespace r600_sb {
42 
43 class regbits {
44 	typedef uint32_t basetype;
45 	static const unsigned bt_bytes = sizeof(basetype);
46 	static const unsigned bt_index_shift = 5;
47 	static const unsigned bt_index_mask = (1u << bt_index_shift) - 1;
48 	static const unsigned bt_bits = bt_bytes << 3;
49 	static const unsigned size = MAX_GPR * 4 / bt_bits;
50 
51 	basetype dta[size];
52 
53 	unsigned num_temps;
54 
55 public:
56 
regbits(unsigned num_temps)57 	regbits(unsigned num_temps) : dta(), num_temps(num_temps) {}
regbits(unsigned num_temps,unsigned value)58 	regbits(unsigned num_temps, unsigned value)	: num_temps(num_temps)
59 	{ set_all(value); }
60 
regbits(shader & sh,val_set & vs)61 	regbits(shader &sh, val_set &vs) : num_temps(sh.get_ctx().alu_temp_gprs)
62 	{ set_all(1); from_val_set(sh, vs); }
63 
64 	void set_all(unsigned val);
65 	void from_val_set(shader &sh, val_set &vs);
66 
67 	void set(unsigned index);
68 	void clear(unsigned index);
69 	bool get(unsigned index);
70 
71 	void set(unsigned index, unsigned val);
72 
73 	sel_chan find_free_bit();
74 	sel_chan find_free_chans(unsigned mask);
75 	sel_chan find_free_chan_by_mask(unsigned mask);
76 	sel_chan find_free_array(unsigned size, unsigned mask);
77 
78 	void dump();
79 };
80 
81 // =======================================
82 
dump()83 void regbits::dump() {
84 	for (unsigned i = 0; i < size * bt_bits; ++i) {
85 
86 		if (!(i & 31))
87 			sblog << "\n";
88 
89 		if (!(i & 3)) {
90 			sblog.print_w(i / 4, 7);
91 			sblog << " ";
92 		}
93 
94 		sblog << (get(i) ? 1 : 0);
95 	}
96 }
97 
98 
set_all(unsigned v)99 void regbits::set_all(unsigned v) {
100 	memset(&dta, v ? 0xFF : 0x00, size * bt_bytes);
101 }
102 
from_val_set(shader & sh,val_set & vs)103 void regbits::from_val_set(shader &sh, val_set& vs) {
104 	val_set &s = vs;
105 	unsigned g;
106 	for (val_set::iterator I = s.begin(sh), E = s.end(sh); I != E; ++I) {
107 		value *v = *I;
108 		if (v->is_any_gpr()) {
109 			g = v->get_final_gpr();
110 			if (!g)
111 				continue;
112 		} else
113 			continue;
114 
115 		assert(g);
116 		--g;
117 		assert(g < 512);
118 		clear(g);
119 	}
120 }
121 
set(unsigned index)122 void regbits::set(unsigned index) {
123 	unsigned ih = index >> bt_index_shift;
124 	unsigned il = index & bt_index_mask;
125 	dta[ih] |= ((basetype)1u << il);
126 }
127 
clear(unsigned index)128 void regbits::clear(unsigned index) {
129 	unsigned ih = index >> bt_index_shift;
130 	unsigned il = index & bt_index_mask;
131 	assert(ih < size);
132 	dta[ih] &= ~((basetype)1u << il);
133 }
134 
get(unsigned index)135 bool regbits::get(unsigned index) {
136 	unsigned ih = index >> bt_index_shift;
137 	unsigned il = index & bt_index_mask;
138 	return dta[ih] & ((basetype)1u << il);
139 }
140 
set(unsigned index,unsigned val)141 void regbits::set(unsigned index, unsigned val) {
142 	unsigned ih = index >> bt_index_shift;
143 	unsigned il = index & bt_index_mask;
144 	basetype bm = 1u << il;
145 	dta[ih] = (dta[ih] & ~bm) | (val << il);
146 }
147 
148 // free register for ra means the bit is set
find_free_bit()149 sel_chan regbits::find_free_bit() {
150 	unsigned elt = 0;
151 	unsigned bit = 0;
152 
153 	while (elt < size && !dta[elt])
154 		++elt;
155 
156 	if (elt >= size)
157 		return 0;
158 
159 	bit = __builtin_ctz(dta[elt]) + (elt << bt_index_shift);
160 
161 	assert(bit < ((MAX_GPR - num_temps) << 2));
162 
163 	return bit + 1;
164 }
165 
166 // find free gpr component to use as indirectly addressable array
find_free_array(unsigned length,unsigned mask)167 sel_chan regbits::find_free_array(unsigned length, unsigned mask) {
168 	unsigned cc[4] = {};
169 
170 	// FIXME optimize this. though hopefully we won't have a lot of arrays
171 	for (unsigned a = 0; a < MAX_GPR - num_temps; ++a) {
172 		for(unsigned c = 0; c < MAX_CHAN; ++c) {
173 			if (mask & (1 << c)) {
174 				if (get((a << 2) | c)) {
175 					if (++cc[c] == length)
176 						return sel_chan(a - length + 1, c);
177 				} else {
178 					cc[c] = 0;
179 				}
180 			}
181 		}
182 	}
183 	return 0;
184 }
185 
find_free_chans(unsigned mask)186 sel_chan regbits::find_free_chans(unsigned mask) {
187 	unsigned elt = 0;
188 	unsigned bit = 0;
189 
190 	assert (!(mask & ~0xF));
191 	basetype cd = dta[elt];
192 
193 	do {
194 		if (!cd) {
195 			if (++elt < size) {
196 				cd = dta[elt];
197 				bit = 0;
198 				continue;
199 			} else
200 				return 0;
201 		}
202 
203 		unsigned p = __builtin_ctz(cd) & ~(basetype)3u;
204 
205 		assert (p <= bt_bits - bit);
206 		bit += p;
207 		cd >>= p;
208 
209 		if ((cd & mask) == mask) {
210 			return ((elt << bt_index_shift) | bit) + 1;
211 		}
212 
213 		bit += 4;
214 		cd >>= 4;
215 
216 	} while (1);
217 
218 	return 0;
219 }
220 
find_free_chan_by_mask(unsigned mask)221 sel_chan regbits::find_free_chan_by_mask(unsigned mask) {
222 	unsigned elt = 0;
223 	unsigned bit = 0;
224 
225 	assert (!(mask & ~0xF));
226 	basetype cd = dta[elt];
227 
228 	do {
229 		if (!cd) {
230 			if (++elt < size) {
231 				cd = dta[elt];
232 				bit = 0;
233 				continue;
234 			} else
235 				return 0;
236 		}
237 
238 		unsigned p = __builtin_ctz(cd) & ~(basetype)3u;
239 
240 		assert (p <= bt_bits - bit);
241 		bit += p;
242 		cd >>= p;
243 
244 		if (cd & mask) {
245 			unsigned nb = __builtin_ctz(cd & mask);
246 			unsigned ofs = ((elt << bt_index_shift) | bit);
247 			return nb + ofs + 1;
248 		}
249 
250 		bit += 4;
251 		cd >>= 4;
252 
253 	} while (1);
254 
255 	return 0;
256 }
257 
258 // ================================
259 
alloc_arrays()260 void ra_init::alloc_arrays() {
261 
262 	gpr_array_vec &ga = sh.arrays();
263 
264 	for(gpr_array_vec::iterator I = ga.begin(), E = ga.end(); I != E; ++I) {
265 		gpr_array *a = *I;
266 
267 		RA_DUMP(
268 			sblog << "array [" << a->array_size << "] at " << a->base_gpr << "\n";
269 			sblog << "\n";
270 		);
271 
272 		// skip preallocated arrays (e.g. with preloaded inputs)
273 		if (a->gpr) {
274 			RA_DUMP( sblog << "   FIXED at " << a->gpr << "\n"; );
275 			continue;
276 		}
277 
278 		bool dead = a->is_dead();
279 
280 		if (dead) {
281 			RA_DUMP( sblog << "   DEAD\n"; );
282 			continue;
283 		}
284 
285 		val_set &s = a->interferences;
286 
287 
288 		for (val_set::iterator I = s.begin(sh), E = s.end(sh); I != E; ++I) {
289 			value *v = *I;
290 			if (v->array == a)
291 				s.remove_val(v);
292 		}
293 
294 		RA_DUMP(
295 			sblog << "  interf: ";
296 			dump::dump_set(sh, s);
297 			sblog << "\n";
298 		);
299 
300 		regbits rb(sh, s);
301 
302 		sel_chan base = rb.find_free_array(a->array_size,
303 		                                   (1 << a->base_gpr.chan()));
304 
305 		RA_DUMP( sblog << "  found base: " << base << "\n"; );
306 
307 		a->gpr = base;
308 	}
309 }
310 
311 
run()312 int ra_init::run() {
313 
314 	alloc_arrays();
315 
316 	return ra_node(sh.root) ? 0 : 1;
317 }
318 
ra_node(container_node * c)319 bool ra_init::ra_node(container_node* c) {
320 
321 	for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) {
322 		node *n = *I;
323 		if (n->type == NT_OP) {
324 			if (!process_op(n))
325                            return false;
326 		}
327 		if (n->is_container() && !n->is_alu_packed()) {
328 			if (!ra_node(static_cast<container_node*>(n)))
329                            return false;
330 		}
331 	}
332         return true;
333 }
334 
process_op(node * n)335 bool ra_init::process_op(node* n) {
336 
337 	bool copy = n->is_copy_mov();
338 
339 	RA_DUMP(
340 		sblog << "ra_init: process_op : ";
341 		dump::dump_op(n);
342 		sblog << "\n";
343 	);
344 
345 	if (n->is_alu_packed()) {
346 		for (vvec::iterator I = n->src.begin(), E = n->src.end(); I != E; ++I) {
347 			value *v = *I;
348 			if (v && v->is_sgpr() && v->constraint &&
349 					v->constraint->kind == CK_PACKED_BS) {
350 				color_bs_constraint(v->constraint);
351 				break;
352 			}
353 		}
354 	}
355 
356 	if (n->is_fetch_inst() || n->is_cf_inst()) {
357 		for (vvec::iterator I = n->src.begin(), E = n->src.end(); I != E; ++I) {
358 			value *v = *I;
359 			if (v && v->is_sgpr())
360 				if (!color(v))
361                                        return false;
362 		}
363 	}
364 
365 	for (vvec::iterator I = n->dst.begin(), E = n->dst.end(); I != E; ++I) {
366 		value *v = *I;
367 		if (!v)
368 			continue;
369 		if (v->is_sgpr()) {
370 			if (!v->gpr) {
371 				if (copy && !v->constraint) {
372 					value *s = *(n->src.begin() + (I - n->dst.begin()));
373 					assert(s);
374 					if (s->is_sgpr()) {
375 						assign_color(v, s->gpr);
376 					}
377 				} else
378                                    if (!color(v))
379                                           return false;
380 			}
381 		}
382 	}
383         return true;
384 }
385 
color_bs_constraint(ra_constraint * c)386 void ra_init::color_bs_constraint(ra_constraint* c) {
387 	vvec &vv = c->values;
388 	assert(vv.size() <= 8);
389 
390 	RA_DUMP(
391 		sblog << "color_bs_constraint: ";
392 		dump::dump_vec(vv);
393 		sblog << "\n";
394 	);
395 
396 	regbits rb(ctx.alu_temp_gprs);
397 
398 	unsigned chan_count[4] = {};
399 	unsigned allowed_chans = 0x0F;
400 
401 	for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) {
402 		value *v = *I;
403 
404 		if (!v || v->is_dead())
405 			continue;
406 
407 		sel_chan gpr = v->get_final_gpr();
408 
409 		val_set interf;
410 
411 		if (v->chunk)
412 			sh.coal.get_chunk_interferences(v->chunk, interf);
413 		else
414 			interf = v->interferences;
415 
416 		RA_DUMP(
417 			sblog << "   processing " << *v << "  interferences : ";
418 			dump::dump_set(sh, interf);
419 			sblog << "\n";
420 		);
421 
422 		if (gpr) {
423 			unsigned chan = gpr.chan();
424 			if (chan_count[chan] < 3) {
425 				++chan_count[chan];
426 				continue;
427 			} else {
428 				v->flags &= ~VLF_FIXED;
429 				allowed_chans &= ~(1 << chan);
430 				assert(allowed_chans);
431 			}
432 		}
433 
434 		v->gpr = 0;
435 
436 		gpr = 1;
437 		rb.set_all(1);
438 
439 
440 		rb.from_val_set(sh, interf);
441 
442 		RA_DUMP(
443 			sblog << "   regbits : ";
444 			rb.dump();
445 			sblog << "\n";
446 		);
447 
448 		while (allowed_chans && gpr.sel() < sh.num_nontemp_gpr()) {
449 
450 			while (rb.get(gpr - 1) == 0)
451 				gpr = gpr + 1;
452 
453 			RA_DUMP(
454 				sblog << "    trying " << gpr << "\n";
455 			);
456 
457 			unsigned chan = gpr.chan();
458 			if (chan_count[chan] < 3) {
459 				++chan_count[chan];
460 
461 				if (v->chunk) {
462 					vvec::iterator F = std::find(v->chunk->values.begin(),
463 					                             v->chunk->values.end(),
464 					                             v);
465 					v->chunk->values.erase(F);
466 					v->chunk = NULL;
467 				}
468 
469 				assign_color(v, gpr);
470 				break;
471 			} else {
472 				allowed_chans &= ~(1 << chan);
473 			}
474 			gpr = gpr + 1;
475 		}
476 
477 		if (!gpr) {
478 			sblog << "color_bs_constraint: failed...\n";
479 			assert(!"coloring failed");
480 		}
481 	}
482 }
483 
color(value * v)484 bool ra_init::color(value* v) {
485 
486 	if (v->constraint && v->constraint->kind == CK_PACKED_BS) {
487 		color_bs_constraint(v->constraint);
488 		return true;
489 	}
490 
491 	if (v->chunk && v->chunk->is_fixed())
492 		return true;
493 
494 	RA_DUMP(
495 		sblog << "coloring ";
496 		dump::dump_val(v);
497 		sblog << "   interferences ";
498 		dump::dump_set(sh, v->interferences);
499 		sblog << "\n";
500 	);
501 
502 	if (v->is_reg_pinned()) {
503 		assert(v->is_chan_pinned());
504 		assign_color(v, v->pin_gpr);
505 		return true;
506 	}
507 
508 	regbits rb(sh, v->interferences);
509 	sel_chan c;
510 
511 	if (v->is_chan_pinned()) {
512 		unsigned mask = 1 << v->pin_gpr.chan();
513 		c = rb.find_free_chans(mask) + v->pin_gpr.chan();
514 	} else {
515 		unsigned cm = get_preferable_chan_mask();
516 		c = rb.find_free_chan_by_mask(cm);
517 	}
518 
519         if (!c || c.sel() >= 128 - ctx.alu_temp_gprs)
520            return false;
521 	assign_color(v, c);
522         return true;
523 }
524 
assign_color(value * v,sel_chan c)525 void ra_init::assign_color(value* v, sel_chan c) {
526 	add_prev_chan(c.chan());
527 	v->gpr = c;
528 	RA_DUMP(
529 		sblog << "colored ";
530 		dump::dump_val(v);
531 		sblog << " to " << c << "\n";
532 	);
533 }
534 
535 // ===================================================
536 
run()537 int ra_split::run() {
538 	split(sh.root);
539 	return 0;
540 }
541 
split_phi_src(container_node * loc,container_node * c,unsigned id,bool loop)542 void ra_split::split_phi_src(container_node *loc, container_node *c,
543                              unsigned id, bool loop) {
544 	for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) {
545 		node *p = *I;
546 		value* &v = p->src[id], *d = p->dst[0];
547 		assert(v);
548 
549 		if (!d->is_sgpr() || v->is_undef())
550 			continue;
551 
552 		value *t = sh.create_temp_value();
553 		alu_node* n = sh.create_copy_mov(t, v);
554 		if (loop)
555 			n->flags |= NF_DONT_MOVE;
556 		if (loop && id == 0)
557 			loc->insert_before(n);
558 		else
559 			loc->push_back(n);
560 		v = t;
561 
562 		sh.coal.add_edge(v, d, coalescer::phi_cost);
563 	}
564 }
565 
split_phi_dst(node * loc,container_node * c,bool loop)566 void ra_split::split_phi_dst(node* loc, container_node *c, bool loop) {
567 	for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) {
568 		node *p = *I;
569 		value* &v = p->dst[0];
570 		assert(v);
571 
572 		if (!v->is_sgpr())
573 			continue;
574 
575 		value *t = sh.create_temp_value();
576 		node *cp = sh.create_copy_mov(v, t);
577 		if (loop) {
578 			cp->flags |= NF_DONT_MOVE;
579 			static_cast<container_node*>(loc)->push_front(cp);
580 		} else
581 			loc->insert_after(cp);
582 		v = t;
583 	}
584 }
585 
586 
init_phi_constraints(container_node * c)587 void ra_split::init_phi_constraints(container_node *c) {
588 	for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) {
589 		node *p = *I;
590 		ra_constraint *cc = sh.coal.create_constraint(CK_PHI);
591 		cc->values.push_back(p->dst[0]);
592 
593 		for (vvec::iterator I = p->src.begin(), E = p->src.end(); I != E; ++I) {
594 			value *v = *I;
595 			if (v->is_sgpr())
596 				cc->values.push_back(v);
597 		}
598 
599 		cc->update_values();
600 	}
601 }
602 
split(container_node * n)603 void ra_split::split(container_node* n) {
604 
605 	if (n->type == NT_DEPART) {
606 		depart_node *d = static_cast<depart_node*>(n);
607 		if (d->target->phi)
608 			split_phi_src(d, d->target->phi, d->dep_id, false);
609 	} else if (n->type == NT_REPEAT) {
610 		repeat_node *r = static_cast<repeat_node*>(n);
611 		if (r->target->loop_phi)
612 			split_phi_src(r, r->target->loop_phi, r->rep_id, true);
613 	} else if (n->type == NT_REGION) {
614 		region_node *r = static_cast<region_node*>(n);
615 		if (r->phi) {
616 			split_phi_dst(r, r->phi, false);
617 		}
618 		if (r->loop_phi) {
619 			split_phi_dst(r->get_entry_code_location(), r->loop_phi,
620 					true);
621 			split_phi_src(r, r->loop_phi, 0, true);
622 		}
623 	}
624 
625 	for (node_riterator N, I = n->rbegin(), E = n->rend(); I != E; I = N) {
626 		N = I;
627 		++N;
628 		node *o = *I;
629 		if (o->type == NT_OP) {
630 			split_op(o);
631 		} else if (o->is_container()) {
632 			split(static_cast<container_node*>(o));
633 		}
634 	}
635 
636 	if (n->type == NT_REGION) {
637 		region_node *r = static_cast<region_node*>(n);
638 		if (r->phi)
639 			init_phi_constraints(r->phi);
640 		if (r->loop_phi)
641 			init_phi_constraints(r->loop_phi);
642 	}
643 }
644 
split_op(node * n)645 void ra_split::split_op(node* n) {
646 	switch(n->subtype) {
647 		case NST_ALU_PACKED_INST:
648 			split_alu_packed(static_cast<alu_packed_node*>(n));
649 			break;
650 		case NST_FETCH_INST:
651 		case NST_CF_INST:
652 			split_vector_inst(n);
653 		default:
654 			break;
655 	}
656 }
657 
split_packed_ins(alu_packed_node * n)658 void ra_split::split_packed_ins(alu_packed_node *n) {
659 	vvec vv = n->src;
660 	vvec sv, dv;
661 
662 	for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) {
663 
664 		value *&v = *I;
665 
666 		if (v && v->is_any_gpr() && !v->is_undef()) {
667 
668 			vvec::iterator F = std::find(sv.begin(), sv.end(), v);
669 			value *t;
670 
671 			if (F != sv.end()) {
672 				t = *(dv.begin() + (F - sv.begin()));
673 			} else {
674 				t = sh.create_temp_value();
675 				sv.push_back(v);
676 				dv.push_back(t);
677 			}
678 			v = t;
679 		}
680 	}
681 
682 	unsigned cnt = sv.size();
683 
684 	if (cnt > 0) {
685 		n->src = vv;
686 		for (vvec::iterator SI = sv.begin(), DI = dv.begin(), SE = sv.end();
687 				SI != SE; ++SI, ++DI) {
688 			n->insert_before(sh.create_copy_mov(*DI, *SI));
689 		}
690 
691 		ra_constraint *c = sh.coal.create_constraint(CK_PACKED_BS);
692 		c->values = dv;
693 		c->update_values();
694 	}
695 }
696 
697 // TODO handle other packed ops for cayman
split_alu_packed(alu_packed_node * n)698 void ra_split::split_alu_packed(alu_packed_node* n) {
699 	switch (n->op()) {
700 		case ALU_OP2_DOT4:
701 		case ALU_OP2_DOT4_IEEE:
702 		case ALU_OP2_CUBE:
703 			split_packed_ins(n);
704 			break;
705 		default:
706 			break;
707 	}
708 }
709 
split_vec(vvec & vv,vvec & v1,vvec & v2,bool allow_swz)710 void ra_split::split_vec(vvec &vv, vvec &v1, vvec &v2, bool allow_swz) {
711 	unsigned ch = 0;
712 	for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I, ++ch) {
713 
714 		value* &o = *I;
715 
716 		if (o) {
717 
718 			assert(!o->is_dead());
719 
720 			if (o->is_undef() || o->is_geometry_emit() || o->is_scratch())
721 				continue;
722 
723 			if (allow_swz && o->is_float_0_or_1())
724 				continue;
725 
726 			value *t;
727 			vvec::iterator F =
728 					allow_swz ? std::find(v2.begin(), v2.end(), o) : v2.end();
729 
730 			if (F != v2.end()) {
731 				t = *(v1.begin() + (F - v2.begin()));
732 			} else {
733 				t = sh.create_temp_value();
734 
735 				if (!allow_swz) {
736 					t->flags |= VLF_PIN_CHAN;
737 					t->pin_gpr = sel_chan(0, ch);
738 				}
739 
740 				v2.push_back(o);
741 				v1.push_back(t);
742 			}
743 			o = t;
744 		}
745 	}
746 }
747 
split_vector_inst(node * n)748 void ra_split::split_vector_inst(node* n) {
749 	ra_constraint *c;
750 
751 	bool call_fs = n->is_cf_op(CF_OP_CALL_FS);
752 	bool no_src_swizzle = n->is_cf_inst() && (n->cf_op_flags() & CF_MEM);
753 
754 	no_src_swizzle |= n->is_fetch_op(FETCH_OP_VFETCH) ||
755 			n->is_fetch_op(FETCH_OP_SEMFETCH);
756 
757 	no_src_swizzle |= n->is_fetch_inst() && (n->fetch_op_flags() & FF_GDS);
758 
759 	if (!n->src.empty() && !call_fs) {
760 
761 		// we may have more than one source vector -
762 		// fetch instructions with FF_USEGRAD have gradient values in
763 		// src vectors 1 (src[4-7] and 2 (src[8-11])
764 
765 		unsigned nvec = n->src.size() >> 2;
766 		assert(nvec << 2 <= n->src.size());
767 
768 		for (unsigned nv = 0; nv < nvec; ++nv) {
769 			vvec sv, tv, nsrc(4);
770 			unsigned arg_start = nv << 2;
771 
772 			std::copy(n->src.begin() + arg_start,
773 			          n->src.begin() + arg_start + 4,
774 			          nsrc.begin());
775 
776 			split_vec(nsrc, tv, sv, !no_src_swizzle);
777 
778 			unsigned cnt = sv.size();
779 
780 			if (no_src_swizzle || cnt) {
781 
782 				std::copy(nsrc.begin(), nsrc.end(), n->src.begin() + arg_start);
783 
784 				for(unsigned i = 0, s = tv.size(); i < s; ++i) {
785 					n->insert_before(sh.create_copy_mov(tv[i], sv[i]));
786 				}
787 
788 				c = sh.coal.create_constraint(CK_SAME_REG);
789 				c->values = tv;
790 				c->update_values();
791 			}
792 		}
793 	}
794 
795 	if (!n->dst.empty()) {
796 		vvec sv, tv, ndst = n->dst;
797 
798 		split_vec(ndst, tv, sv, true);
799 
800 		if (sv.size()) {
801 			n->dst = ndst;
802 
803 			node *lp = n;
804 			for(unsigned i = 0, s = tv.size(); i < s; ++i) {
805 				lp->insert_after(sh.create_copy_mov(sv[i], tv[i]));
806 				lp = lp->next;
807 			}
808 
809 			if (call_fs) {
810 				for (unsigned i = 0, cnt = tv.size(); i < cnt; ++i) {
811 					value *v = tv[i];
812 					value *s = sv[i];
813 					if (!v)
814 						continue;
815 
816 					v->flags |= VLF_PIN_REG | VLF_PIN_CHAN;
817 					s->flags &= ~(VLF_PIN_REG | VLF_PIN_CHAN);
818 					sel_chan sel;
819 
820 					if (s->is_rel()) {
821 						assert(s->rel->is_const());
822 						sel = sel_chan(s->select.sel() +
823 										 s->rel->get_const_value().u,
824 						             s->select.chan());
825 					} else
826 						sel = s->select;
827 
828 					v->gpr = v->pin_gpr = sel;
829 					v->fix();
830 				}
831 			} else {
832 				c = sh.coal.create_constraint(CK_SAME_REG);
833 				c->values = tv;
834 				c->update_values();
835 			}
836 		}
837 	}
838 }
839 
add_prev_chan(unsigned chan)840 void ra_init::add_prev_chan(unsigned chan) {
841 	prev_chans = (prev_chans << 4) | (1 << chan);
842 }
843 
get_preferable_chan_mask()844 unsigned ra_init::get_preferable_chan_mask() {
845 	unsigned i, used_chans = 0;
846 	unsigned chans = prev_chans;
847 
848 	for (i = 0; i < ra_tune; ++i) {
849 		used_chans |= chans;
850 		chans >>= 4;
851 	}
852 
853 	return (~used_chans) & 0xF;
854 }
855 
856 } // namespace r600_sb
857