• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * on the rights to use, copy, modify, merge, publish, distribute, sub
8  * license, and/or sell copies of the Software, and to permit persons to whom
9  * the Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
22  *
23  * Authors:
24  *      Vadim Girlin
25  */
26 
27 #define PSC_DEBUG 0
28 
29 #if PSC_DEBUG
30 #define PSC_DUMP(a) do { a } while (0)
31 #else
32 #define PSC_DUMP(a)
33 #endif
34 
35 #include "sb_bc.h"
36 #include "sb_shader.h"
37 #include "sb_pass.h"
38 #include "sb_sched.h"
39 #include "eg_sq.h" // V_SQ_CF_INDEX_NONE/0/1
40 
41 namespace r600_sb {
42 
rp_kcache_tracker(shader & sh)43 rp_kcache_tracker::rp_kcache_tracker(shader &sh) : rp(), uc(),
44 		// FIXME: for now we'll use "two const pairs" limit for r600, same as
45 		// for other chips, otherwise additional check in alu_group_tracker is
46 		// required to make sure that all 4 consts in the group fit into 2
47 		// kcache sets
48 		sel_count(2) {}
49 
try_reserve(sel_chan r)50 bool rp_kcache_tracker::try_reserve(sel_chan r) {
51 	unsigned sel = kc_sel(r);
52 
53 	for (unsigned i = 0; i < sel_count; ++i) {
54 		if (rp[i] == 0) {
55 			rp[i] = sel;
56 			++uc[i];
57 			return true;
58 		}
59 		if (rp[i] == sel) {
60 			++uc[i];
61 			return true;
62 		}
63 	}
64 	return false;
65 }
66 
try_reserve(node * n)67 bool rp_kcache_tracker::try_reserve(node* n) {
68 	bool need_unreserve = false;
69 	vvec::iterator I(n->src.begin()), E(n->src.end());
70 
71 	for (; I != E; ++I) {
72 		value *v = *I;
73 		if (v->is_kcache()) {
74 			if (!try_reserve(v->select))
75 				break;
76 			else
77 				need_unreserve = true;
78 		}
79 	}
80 	if (I == E)
81 		return true;
82 
83 	if (need_unreserve && I != n->src.begin()) {
84 		do {
85 			--I;
86 			value *v =*I;
87 			if (v->is_kcache())
88 				unreserve(v->select);
89 		} while (I != n->src.begin());
90 	}
91 	return false;
92 }
93 
94 inline
unreserve(node * n)95 void rp_kcache_tracker::unreserve(node* n) {
96 	vvec::iterator I(n->src.begin()), E(n->src.end());
97 	for (; I != E; ++I) {
98 		value *v = *I;
99 		if (v->is_kcache())
100 			unreserve(v->select);
101 	}
102 }
103 
unreserve(sel_chan r)104 void rp_kcache_tracker::unreserve(sel_chan r) {
105 	unsigned sel = kc_sel(r);
106 
107 	for (unsigned i = 0; i < sel_count; ++i)
108 		if (rp[i] == sel) {
109 			if (--uc[i] == 0)
110 				rp[i] = 0;
111 			return;
112 		}
113 	assert(0);
114 	return;
115 }
116 
try_reserve(alu_node * n)117 bool literal_tracker::try_reserve(alu_node* n) {
118 	bool need_unreserve = false;
119 
120 	vvec::iterator I(n->src.begin()), E(n->src.end());
121 
122 	for (; I != E; ++I) {
123 		value *v = *I;
124 		if (v->is_literal()) {
125 			if (!try_reserve(v->literal_value))
126 				break;
127 			else
128 				need_unreserve = true;
129 		}
130 	}
131 	if (I == E)
132 		return true;
133 
134 	if (need_unreserve && I != n->src.begin()) {
135 		do {
136 			--I;
137 			value *v =*I;
138 			if (v->is_literal())
139 				unreserve(v->literal_value);
140 		} while (I != n->src.begin());
141 	}
142 	return false;
143 }
144 
unreserve(alu_node * n)145 void literal_tracker::unreserve(alu_node* n) {
146 	unsigned nsrc = n->bc.op_ptr->src_count, i;
147 
148 	for (i = 0; i < nsrc; ++i) {
149 		value *v = n->src[i];
150 		if (v->is_literal())
151 			unreserve(v->literal_value);
152 	}
153 }
154 
try_reserve(literal l)155 bool literal_tracker::try_reserve(literal l) {
156 
157 	PSC_DUMP( sblog << "literal reserve " << l.u << "  " << l.f << "\n"; );
158 
159 	for (unsigned i = 0; i < MAX_ALU_LITERALS; ++i) {
160 		if (lt[i] == 0) {
161 			lt[i] = l;
162 			++uc[i];
163 			PSC_DUMP( sblog << "  reserved new uc = " << uc[i] << "\n"; );
164 			return true;
165 		} else if (lt[i] == l) {
166 			++uc[i];
167 			PSC_DUMP( sblog << "  reserved uc = " << uc[i] << "\n"; );
168 			return true;
169 		}
170 	}
171 	PSC_DUMP( sblog << "  failed to reserve literal\n"; );
172 	return false;
173 }
174 
unreserve(literal l)175 void literal_tracker::unreserve(literal l) {
176 
177 	PSC_DUMP( sblog << "literal unreserve " << l.u << "  " << l.f << "\n"; );
178 
179 	for (unsigned i = 0; i < MAX_ALU_LITERALS; ++i) {
180 		if (lt[i] == l) {
181 			if (--uc[i] == 0)
182 				lt[i] = 0;
183 			return;
184 		}
185 	}
186 	assert(0);
187 	return;
188 }
189 
bs_cycle_vector(unsigned bs,unsigned src)190 static inline unsigned bs_cycle_vector(unsigned bs, unsigned src) {
191 	static const unsigned swz[VEC_NUM][3] = {
192 		{0, 1, 2}, {0, 2, 1}, {1, 2, 0}, {1, 0, 2}, {2, 0, 1}, {2, 1, 0}
193 	};
194 	assert(bs < VEC_NUM && src < 3);
195 	return swz[bs][src];
196 }
197 
bs_cycle_scalar(unsigned bs,unsigned src)198 static inline unsigned bs_cycle_scalar(unsigned bs, unsigned src) {
199 	static const unsigned swz[SCL_NUM][3] = {
200 		{2, 1, 0}, {1, 2, 2}, {2, 1, 2}, {2, 2, 1}
201 	};
202 
203 	if (bs >= SCL_NUM || src >= 3) {
204 		// this prevents gcc warning "array subscript is above array bounds"
205 		// AFAICS we should never hit this path
206 		abort();
207 	}
208 	return swz[bs][src];
209 }
210 
bs_cycle(bool trans,unsigned bs,unsigned src)211 static inline unsigned bs_cycle(bool trans, unsigned bs, unsigned src) {
212 	return trans ? bs_cycle_scalar(bs, src) : bs_cycle_vector(bs, src);
213 }
214 
215 inline
try_reserve(unsigned cycle,unsigned sel,unsigned chan)216 bool rp_gpr_tracker::try_reserve(unsigned cycle, unsigned sel, unsigned chan) {
217 	++sel;
218 	if (rp[cycle][chan] == 0) {
219 		rp[cycle][chan] = sel;
220 		++uc[cycle][chan];
221 		return true;
222 	} else if (rp[cycle][chan] == sel) {
223 		++uc[cycle][chan];
224 		return true;
225 	}
226 	return false;
227 }
228 
229 inline
unreserve(alu_node * n)230 void rp_gpr_tracker::unreserve(alu_node* n) {
231 	unsigned nsrc = n->bc.op_ptr->src_count, i;
232 	unsigned trans = n->bc.slot == SLOT_TRANS;
233 	unsigned bs = n->bc.bank_swizzle;
234 	unsigned opt = !trans
235 			&& n->bc.src[0].sel == n->bc.src[1].sel
236 			&& n->bc.src[0].chan == n->bc.src[1].chan;
237 
238 	for (i = 0; i < nsrc; ++i) {
239 		value *v = n->src[i];
240 		if (v->is_readonly() || v->is_undef())
241 			continue;
242 		if (i == 1 && opt)
243 			continue;
244 		unsigned cycle = bs_cycle(trans, bs, i);
245 		unreserve(cycle, n->bc.src[i].sel, n->bc.src[i].chan);
246 	}
247 }
248 
249 inline
unreserve(unsigned cycle,unsigned sel,unsigned chan)250 void rp_gpr_tracker::unreserve(unsigned cycle, unsigned sel, unsigned chan) {
251 	++sel;
252 	assert(rp[cycle][chan] == sel && uc[cycle][chan]);
253 	if (--uc[cycle][chan] == 0)
254 		rp[cycle][chan] = 0;
255 }
256 
257 inline
try_reserve(alu_node * n)258 bool rp_gpr_tracker::try_reserve(alu_node* n) {
259 	unsigned nsrc = n->bc.op_ptr->src_count, i;
260 	unsigned trans = n->bc.slot == SLOT_TRANS;
261 	unsigned bs = n->bc.bank_swizzle;
262 	unsigned opt = !trans && nsrc >= 2 &&
263 			n->src[0] == n->src[1];
264 
265 	bool need_unreserve = false;
266 	unsigned const_count = 0, min_gpr_cycle = 3;
267 
268 	for (i = 0; i < nsrc; ++i) {
269 		value *v = n->src[i];
270 		if (v->is_readonly() || v->is_undef()) {
271 			const_count++;
272 			if (trans && const_count == 3)
273 				break;
274 		} else {
275 			if (i == 1 && opt)
276 				continue;
277 
278 			unsigned cycle = bs_cycle(trans, bs, i);
279 
280 			if (trans && cycle < min_gpr_cycle)
281 				min_gpr_cycle = cycle;
282 
283 			if (const_count && cycle < const_count && trans)
284 				break;
285 
286 			if (!try_reserve(cycle, n->bc.src[i].sel, n->bc.src[i].chan))
287 				break;
288 			else
289 				need_unreserve = true;
290 		}
291 	}
292 
293 	if ((i == nsrc) && (min_gpr_cycle + 1 > const_count))
294 		return true;
295 
296 	if (need_unreserve && i--) {
297 		do {
298 			value *v = n->src[i];
299 			if (!v->is_readonly() && !v->is_undef()) {
300 			if (i == 1 && opt)
301 				continue;
302 			unreserve(bs_cycle(trans, bs, i), n->bc.src[i].sel,
303 			          n->bc.src[i].chan);
304 			}
305 		} while (i--);
306 	}
307 	return false;
308 }
309 
alu_group_tracker(shader & sh)310 alu_group_tracker::alu_group_tracker(shader &sh)
311 	: sh(sh), kc(sh),
312 	  gpr(), lt(), slots(),
313 	  max_slots(sh.get_ctx().is_cayman() ? 4 : 5),
314 	  has_mova(), uses_ar(), has_predset(), has_kill(),
315 	  updates_exec_mask(), consumes_lds_oqa(), produces_lds_oqa(), chan_count(), interp_param(), next_id() {
316 
317 	available_slots = sh.get_ctx().has_trans ? 0x1F : 0x0F;
318 }
319 
320 inline
get_value_id(value * v)321 sel_chan alu_group_tracker::get_value_id(value* v) {
322 	unsigned &id = vmap[v];
323 	if (!id)
324 		id = ++next_id;
325 	return sel_chan(id, v->get_final_chan());
326 }
327 
328 inline
assign_slot(unsigned slot,alu_node * n)329 void alu_group_tracker::assign_slot(unsigned slot, alu_node* n) {
330 	update_flags(n);
331 	slots[slot] = n;
332 	available_slots &= ~(1 << slot);
333 
334 	unsigned param = n->interp_param();
335 
336 	if (param) {
337 		assert(!interp_param || interp_param == param);
338 		interp_param = param;
339 	}
340 }
341 
342 
discard_all_slots(container_node & removed_nodes)343 void alu_group_tracker::discard_all_slots(container_node &removed_nodes) {
344 	PSC_DUMP( sblog << "agt::discard_all_slots\n"; );
345 	discard_slots(~available_slots & ((1 << max_slots) - 1), removed_nodes);
346 }
347 
discard_slots(unsigned slot_mask,container_node & removed_nodes)348 void alu_group_tracker::discard_slots(unsigned slot_mask,
349                                     container_node &removed_nodes) {
350 
351 	PSC_DUMP(
352 		sblog << "discard_slots : packed_ops : "
353 			<< (unsigned)packed_ops.size() << "\n";
354 	);
355 
356 	for (node_vec::iterator N, I = packed_ops.begin();
357 			I != packed_ops.end(); I = N) {
358 		N = I; ++N;
359 
360 		alu_packed_node *n = static_cast<alu_packed_node*>(*I);
361 		unsigned pslots = n->get_slot_mask();
362 
363 		PSC_DUMP(
364 			sblog << "discard_slots : packed slot_mask : " << pslots << "\n";
365 		);
366 
367 		if (pslots & slot_mask) {
368 
369 			PSC_DUMP(
370 				sblog << "discard_slots : discarding packed...\n";
371 			);
372 
373 			removed_nodes.push_back(n);
374 			slot_mask &= ~pslots;
375 			N = packed_ops.erase(I);
376 			available_slots |= pslots;
377 			for (unsigned k = 0; k < max_slots; ++k) {
378 				if (pslots & (1 << k))
379 					slots[k] = NULL;
380 			}
381 		}
382 	}
383 
384 	for (unsigned slot = 0; slot < max_slots; ++slot) {
385 		unsigned slot_bit = 1 << slot;
386 
387 		if (slot_mask & slot_bit) {
388 			assert(!(available_slots & slot_bit));
389 			assert(slots[slot]);
390 
391 			assert(!(slots[slot]->bc.slot_flags & AF_4SLOT));
392 
393 			PSC_DUMP(
394 				sblog << "discarding slot " << slot << " : ";
395 				dump::dump_op(slots[slot]);
396 				sblog << "\n";
397 			);
398 
399 			removed_nodes.push_back(slots[slot]);
400 			slots[slot] = NULL;
401 			available_slots |= slot_bit;
402 		}
403 	}
404 
405 	alu_node *t = slots[4];
406 	if (t && (t->bc.slot_flags & AF_V)) {
407 		unsigned chan = t->bc.dst_chan;
408 		if (!slots[chan]) {
409 			PSC_DUMP(
410 				sblog << "moving ";
411 				dump::dump_op(t);
412 				sblog << " from trans slot to free slot " << chan << "\n";
413 			);
414 
415 			slots[chan] = t;
416 			slots[4] = NULL;
417 			t->bc.slot = chan;
418 		}
419 	}
420 
421 	reinit();
422 }
423 
emit()424 alu_group_node* alu_group_tracker::emit() {
425 
426 	alu_group_node *g = sh.create_alu_group();
427 
428 	lt.init_group_literals(g);
429 
430 	for (unsigned i = 0; i < max_slots; ++i) {
431 		alu_node *n = slots[i];
432 		if (n) {
433 			g->push_back(n);
434 		}
435 	}
436 	return g;
437 }
438 
try_reserve(alu_node * n)439 bool alu_group_tracker::try_reserve(alu_node* n) {
440 	unsigned nsrc = n->bc.op_ptr->src_count;
441 	unsigned slot = n->bc.slot;
442 	bool trans = slot == 4;
443 
444 	if (slots[slot])
445 		return false;
446 
447 	unsigned flags = n->bc.op_ptr->flags;
448 
449 	unsigned param = n->interp_param();
450 
451 	if (param && interp_param && interp_param != param)
452 		return false;
453 
454 	if ((flags & AF_KILL) && has_predset)
455 		return false;
456 	if ((flags & AF_ANY_PRED) && (has_kill || has_predset))
457 		return false;
458 	if ((flags & AF_MOVA) && (has_mova || uses_ar))
459 		return false;
460 
461 	if (n->uses_ar() && has_mova)
462 		return false;
463 
464 	if (consumes_lds_oqa)
465 		return false;
466 	if (n->consumes_lds_oq() && available_slots != (sh.get_ctx().has_trans ? 0x1F : 0x0F))
467 		return false;
468 	for (unsigned i = 0; i < nsrc; ++i) {
469 
470 		unsigned last_id = next_id;
471 
472 		value *v = n->src[i];
473 		if (!v->is_any_gpr() && !v->is_rel())
474 			continue;
475 		sel_chan vid = get_value_id(n->src[i]);
476 
477 		if (vid > last_id && chan_count[vid.chan()] == 3) {
478 			return false;
479 		}
480 
481 		n->bc.src[i].sel = vid.sel();
482 		n->bc.src[i].chan = vid.chan();
483 	}
484 
485 	if (!lt.try_reserve(n))
486 		return false;
487 
488 	if (!kc.try_reserve(n)) {
489 		lt.unreserve(n);
490 		return false;
491 	}
492 
493 	unsigned fbs = n->forced_bank_swizzle();
494 
495 	n->bc.bank_swizzle = 0;
496 
497 	if (!trans && fbs)
498 		n->bc.bank_swizzle = VEC_210;
499 
500 	if (gpr.try_reserve(n)) {
501 		assign_slot(slot, n);
502 		return true;
503 	}
504 
505 	if (!fbs) {
506 		unsigned swz_num = trans ? SCL_NUM : VEC_NUM;
507 		for (unsigned bs = 0; bs < swz_num; ++bs) {
508 			n->bc.bank_swizzle = bs;
509 			if (gpr.try_reserve(n)) {
510 				assign_slot(slot, n);
511 				return true;
512 			}
513 		}
514 	}
515 
516 	gpr.reset();
517 
518 	slots[slot] = n;
519 	unsigned forced_swz_slots = 0;
520 	int first_slot = ~0, first_nf = ~0, last_slot = ~0;
521 	unsigned save_bs[5];
522 
523 	for (unsigned i = 0; i < max_slots; ++i) {
524 		alu_node *a = slots[i];
525 		if (a) {
526 			if (first_slot == ~0)
527 				first_slot = i;
528 			last_slot = i;
529 			save_bs[i] = a->bc.bank_swizzle;
530 			if (a->forced_bank_swizzle()) {
531 				assert(i != SLOT_TRANS);
532 				forced_swz_slots |= (1 << i);
533 				a->bc.bank_swizzle = VEC_210;
534 				if (!gpr.try_reserve(a))
535 					assert(!"internal reservation error");
536 			} else {
537 				if (first_nf == ~0)
538 					first_nf = i;
539 
540 				a->bc.bank_swizzle = 0;
541 			}
542 		}
543 	}
544 
545 	if (first_nf == ~0) {
546 		assign_slot(slot, n);
547 		return true;
548 	}
549 
550 	assert(first_slot != ~0 && last_slot != ~0);
551 
552 	// silence "array subscript is above array bounds" with gcc 4.8
553 	if (last_slot >= 5)
554 		abort();
555 
556 	int i = first_nf;
557 	alu_node *a = slots[i];
558 	bool backtrack = false;
559 
560 	while (1) {
561 
562 		PSC_DUMP(
563 			sblog << " bs: trying s" << i << " bs:" << a->bc.bank_swizzle
564 				<< " bt:" << backtrack << "\n";
565 		);
566 
567 		if (!backtrack && gpr.try_reserve(a)) {
568 			PSC_DUMP(
569 				sblog << " bs: reserved s" << i << " bs:" << a->bc.bank_swizzle
570 					<< "\n";
571 			);
572 
573 			while ((++i <= last_slot) && !slots[i]);
574 			if (i <= last_slot)
575 				a = slots[i];
576 			else
577 				break;
578 		} else {
579 			bool itrans = i == SLOT_TRANS;
580 			unsigned max_swz = itrans ? SCL_221 : VEC_210;
581 
582 			if (a->bc.bank_swizzle < max_swz) {
583 				++a->bc.bank_swizzle;
584 
585 				PSC_DUMP(
586 					sblog << " bs: inc s" << i << " bs:" << a->bc.bank_swizzle
587 						<< "\n";
588 				);
589 
590 			} else {
591 
592 				a->bc.bank_swizzle = 0;
593 				while ((--i >= first_nf) && !slots[i]);
594 				if (i < first_nf)
595 					break;
596 				a = slots[i];
597 				PSC_DUMP(
598 					sblog << " bs: unreserve s" << i << " bs:" << a->bc.bank_swizzle
599 						<< "\n";
600 				);
601 				gpr.unreserve(a);
602 				backtrack = true;
603 
604 				continue;
605 			}
606 		}
607 		backtrack = false;
608 	}
609 
610 	if (i == last_slot + 1) {
611 		assign_slot(slot, n);
612 		return true;
613 	}
614 
615 	// reservation failed, restore previous state
616 	slots[slot] = NULL;
617 	gpr.reset();
618 	for (unsigned i = 0; i < max_slots; ++i) {
619 		alu_node *a = slots[i];
620 		if (a) {
621 			a->bc.bank_swizzle = save_bs[i];
622 			bool b = gpr.try_reserve(a);
623 			assert(b);
624 		}
625 	}
626 
627 	kc.unreserve(n);
628 	lt.unreserve(n);
629 	return false;
630 }
631 
try_reserve(alu_packed_node * p)632 bool alu_group_tracker::try_reserve(alu_packed_node* p) {
633 	bool need_unreserve = false;
634 	node_iterator I(p->begin()), E(p->end());
635 
636 	for (; I != E; ++I) {
637 		alu_node *n = static_cast<alu_node*>(*I);
638 		if (!try_reserve(n))
639 			break;
640 		else
641 			need_unreserve = true;
642 	}
643 
644 	if (I == E)  {
645 		packed_ops.push_back(p);
646 		return true;
647 	}
648 
649 	if (need_unreserve) {
650 		while (--I != E) {
651 			alu_node *n = static_cast<alu_node*>(*I);
652 			slots[n->bc.slot] = NULL;
653 		}
654 		reinit();
655 	}
656 	return false;
657 }
658 
reinit()659 void alu_group_tracker::reinit() {
660 	alu_node * s[5];
661 	memcpy(s, slots, sizeof(slots));
662 
663 	reset(true);
664 
665 	for (int i = max_slots - 1; i >= 0; --i) {
666 		if (s[i] && !try_reserve(s[i])) {
667 			sblog << "alu_group_tracker: reinit error on slot " << i <<  "\n";
668 			for (unsigned i = 0; i < max_slots; ++i) {
669 				sblog << "  slot " << i << " : ";
670 				if (s[i])
671 					dump::dump_op(s[i]);
672 
673 				sblog << "\n";
674 			}
675 			assert(!"alu_group_tracker: reinit error");
676 		}
677 	}
678 }
679 
reset(bool keep_packed)680 void alu_group_tracker::reset(bool keep_packed) {
681 	kc.reset();
682 	gpr.reset();
683 	lt.reset();
684 	memset(slots, 0, sizeof(slots));
685 	vmap.clear();
686 	next_id = 0;
687 	produces_lds_oqa = 0;
688 	consumes_lds_oqa = 0;
689 	has_mova = false;
690 	uses_ar = false;
691 	has_predset = false;
692 	has_kill = false;
693 	updates_exec_mask = false;
694 	available_slots = sh.get_ctx().has_trans ? 0x1F : 0x0F;
695 	interp_param = 0;
696 
697 	chan_count[0] = 0;
698 	chan_count[1] = 0;
699 	chan_count[2] = 0;
700 	chan_count[3] = 0;
701 
702 	if (!keep_packed)
703 		packed_ops.clear();
704 }
705 
update_flags(alu_node * n)706 void alu_group_tracker::update_flags(alu_node* n) {
707 	unsigned flags = n->bc.op_ptr->flags;
708 	has_kill |= (flags & AF_KILL);
709 	has_mova |= (flags & AF_MOVA);
710 	has_predset |= (flags & AF_ANY_PRED);
711 	uses_ar |= n->uses_ar();
712 	consumes_lds_oqa |= n->consumes_lds_oq();
713 	produces_lds_oqa |= n->produces_lds_oq();
714 	if (flags & AF_ANY_PRED) {
715 		if (n->dst[2] != NULL)
716 			updates_exec_mask = true;
717 	}
718 }
719 
run()720 int post_scheduler::run() {
721 	return run_on(sh.root) ? 0 : 1;
722 }
723 
run_on(container_node * n)724 bool post_scheduler::run_on(container_node* n) {
725 	int r = true;
726 	for (node_riterator I = n->rbegin(), E = n->rend(); I != E; ++I) {
727 		if (I->is_container()) {
728 			if (I->subtype == NST_BB) {
729 				bb_node* bb = static_cast<bb_node*>(*I);
730 				r = schedule_bb(bb);
731 			} else {
732 				r = run_on(static_cast<container_node*>(*I));
733 			}
734 			if (!r)
735 				break;
736 		}
737 	}
738 	return r;
739 }
740 
init_uc_val(container_node * c,value * v)741 void post_scheduler::init_uc_val(container_node *c, value *v) {
742 	node *d = v->any_def();
743 	if (d && d->parent == c)
744 		++ucm[d];
745 }
746 
init_uc_vec(container_node * c,vvec & vv,bool src)747 void post_scheduler::init_uc_vec(container_node *c, vvec &vv, bool src) {
748 	for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) {
749 		value *v = *I;
750 		if (!v || v->is_readonly())
751 			continue;
752 
753 		if (v->is_rel()) {
754 			init_uc_val(c, v->rel);
755 			init_uc_vec(c, v->muse, true);
756 		} if (src) {
757 			init_uc_val(c, v);
758 		}
759 	}
760 }
761 
init_ucm(container_node * c,node * n)762 unsigned post_scheduler::init_ucm(container_node *c, node *n) {
763 	init_uc_vec(c, n->src, true);
764 	init_uc_vec(c, n->dst, false);
765 
766 	uc_map::iterator F = ucm.find(n);
767 	return F == ucm.end() ? 0 : F->second;
768 }
769 
schedule_bb(bb_node * bb)770 bool post_scheduler::schedule_bb(bb_node* bb) {
771 	PSC_DUMP(
772 		sblog << "scheduling BB " << bb->id << "\n";
773 		if (!pending.empty())
774 			dump::dump_op_list(&pending);
775 	);
776 
777 	assert(pending.empty());
778 	assert(bb_pending.empty());
779 	assert(ready.empty());
780 
781 	bb_pending.append_from(bb);
782 	cur_bb = bb;
783 
784 	node *n;
785 
786 	while ((n = bb_pending.back())) {
787 
788 		PSC_DUMP(
789 			sblog << "post_sched_bb ";
790 			dump::dump_op(n);
791 			sblog << "\n";
792 		);
793 
794 		// May require emitting ALU ops to load index registers
795 		if (n->is_fetch_clause()) {
796 			n->remove();
797 			process_fetch(static_cast<container_node *>(n));
798 			continue;
799 		}
800 
801 		if (n->is_alu_clause()) {
802 			n->remove();
803 			bool r = process_alu(static_cast<container_node*>(n));
804 			if (r)
805 				continue;
806 			return false;
807 		}
808 
809 		n->remove();
810 		bb->push_front(n);
811 	}
812 
813 	this->cur_bb = NULL;
814 	return true;
815 }
816 
init_regmap()817 void post_scheduler::init_regmap() {
818 
819 	regmap.clear();
820 
821 	PSC_DUMP(
822 		sblog << "init_regmap: live: ";
823 		dump::dump_set(sh, live);
824 		sblog << "\n";
825 	);
826 
827 	for (val_set::iterator I = live.begin(sh), E = live.end(sh); I != E; ++I) {
828 		value *v = *I;
829 		assert(v);
830 		if (!v->is_sgpr() || !v->is_prealloc())
831 			continue;
832 
833 		sel_chan r = v->gpr;
834 
835 		PSC_DUMP(
836 			sblog << "init_regmap:  " << r << " <= ";
837 			dump::dump_val(v);
838 			sblog << "\n";
839 		);
840 
841 		assert(r);
842 		regmap[r] = v;
843 	}
844 }
845 
create_set_idx(shader & sh,unsigned ar_idx)846 static alu_node *create_set_idx(shader &sh, unsigned ar_idx) {
847 	alu_node *a = sh.create_alu();
848 
849 	assert(ar_idx == V_SQ_CF_INDEX_0 || ar_idx == V_SQ_CF_INDEX_1);
850 	if (ar_idx == V_SQ_CF_INDEX_0)
851 		a->bc.set_op(ALU_OP0_SET_CF_IDX0);
852 	else
853 		a->bc.set_op(ALU_OP0_SET_CF_IDX1);
854 	a->bc.slot = SLOT_X;
855 	a->dst.resize(1); // Dummy needed for recolor
856 
857 	PSC_DUMP(
858 		sblog << "created IDX load: ";
859 		dump::dump_op(a);
860 		sblog << "\n";
861 	);
862 
863 	return a;
864 }
865 
load_index_register(value * v,unsigned ar_idx)866 void post_scheduler::load_index_register(value *v, unsigned ar_idx)
867 {
868 	alu.reset();
869 
870 	if (!sh.get_ctx().is_cayman()) {
871 		// Evergreen has to first load address register, then use CF_SET_IDX0/1
872 		alu_group_tracker &rt = alu.grp();
873 		alu_node *set_idx = create_set_idx(sh, ar_idx);
874 		if (!rt.try_reserve(set_idx)) {
875 			sblog << "can't emit SET_CF_IDX";
876 			dump::dump_op(set_idx);
877 			sblog << "\n";
878 		}
879 		process_group();
880 
881 		if (!alu.check_clause_limits()) {
882 			// Can't happen since clause only contains MOVA/CF_SET_IDX0/1
883 		}
884 		alu.emit_group();
885 	}
886 
887 	alu_group_tracker &rt = alu.grp();
888 	alu_node *a = alu.create_ar_load(v, ar_idx == V_SQ_CF_INDEX_1 ? SEL_Z : SEL_Y);
889 
890 	if (!rt.try_reserve(a)) {
891 		sblog << "can't emit AR load : ";
892 		dump::dump_op(a);
893 		sblog << "\n";
894 	}
895 
896 	process_group();
897 
898 	if (!alu.check_clause_limits()) {
899 		// Can't happen since clause only contains MOVA/CF_SET_IDX0/1
900 	}
901 
902 	alu.emit_group();
903 	alu.emit_clause(cur_bb);
904 }
905 
process_fetch(container_node * c)906 void post_scheduler::process_fetch(container_node *c) {
907 	if (c->empty())
908 		return;
909 
910 	for (node_iterator N, I = c->begin(), E = c->end(); I != E; I = N) {
911 		N = I;
912 		++N;
913 
914 		node *n = *I;
915 
916 		fetch_node *f = static_cast<fetch_node*>(n);
917 
918 		PSC_DUMP(
919 			sblog << "process_tex ";
920 			dump::dump_op(n);
921 			sblog << "  ";
922 		);
923 
924 		// TODO: If same values used can avoid reloading index register
925 		if (f->bc.sampler_index_mode != V_SQ_CF_INDEX_NONE ||
926 			f->bc.resource_index_mode != V_SQ_CF_INDEX_NONE) {
927 			unsigned index_mode = f->bc.sampler_index_mode != V_SQ_CF_INDEX_NONE ?
928 				f->bc.sampler_index_mode : f->bc.resource_index_mode;
929 
930 			// Currently require prior opt passes to use one TEX per indexed op
931 			assert(f->parent->count() == 1);
932 
933 			value *v = f->src.back(); // Last src is index offset
934 			assert(v);
935 
936 			cur_bb->push_front(c);
937 
938 			load_index_register(v, index_mode);
939 			f->src.pop_back(); // Don't need index value any more
940 
941 			return;
942 		}
943 	}
944 
945 	cur_bb->push_front(c);
946 }
947 
process_alu(container_node * c)948 bool post_scheduler::process_alu(container_node *c) {
949 
950 	if (c->empty())
951 		return true;
952 
953 	ucm.clear();
954 	alu.reset();
955 
956 	live = c->live_after;
957 
958 	init_globals(c->live_after, true);
959 	init_globals(c->live_before, true);
960 
961 	init_regmap();
962 
963 	update_local_interferences();
964 
965 	for (node_riterator N, I = c->rbegin(), E = c->rend(); I != E; I = N) {
966 		N = I;
967 		++N;
968 
969 		node *n = *I;
970 		unsigned uc = init_ucm(c, n);
971 
972 		PSC_DUMP(
973 			sblog << "process_alu uc=" << uc << "  ";
974 			dump::dump_op(n);
975 			sblog << "  ";
976 		);
977 
978 		if (uc) {
979 			n->remove();
980 
981 			pending.push_back(n);
982 			PSC_DUMP( sblog << "pending\n"; );
983 		} else {
984 			release_op(n);
985 		}
986 	}
987 
988 	return schedule_alu(c);
989 }
990 
update_local_interferences()991 void post_scheduler::update_local_interferences() {
992 
993 	PSC_DUMP(
994 		sblog << "update_local_interferences : ";
995 		dump::dump_set(sh, live);
996 		sblog << "\n";
997 	);
998 
999 
1000 	for (val_set::iterator I = live.begin(sh), E = live.end(sh); I != E; ++I) {
1001 		value *v = *I;
1002 		if (v->is_prealloc())
1003 			continue;
1004 
1005 		v->interferences.add_set(live);
1006 	}
1007 }
1008 
update_live_src_vec(vvec & vv,val_set * born,bool src)1009 void post_scheduler::update_live_src_vec(vvec &vv, val_set *born, bool src) {
1010 	for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) {
1011 		value *v = *I;
1012 
1013 		if (!v)
1014 			continue;
1015 
1016 		if (src && v->is_any_gpr()) {
1017 			if (live.add_val(v)) {
1018 				if (!v->is_prealloc()) {
1019 					if (!cleared_interf.contains(v)) {
1020 						PSC_DUMP(
1021 							sblog << "clearing interferences for " << *v << "\n";
1022 						);
1023 						v->interferences.clear();
1024 						cleared_interf.add_val(v);
1025 					}
1026 				}
1027 				if (born)
1028 					born->add_val(v);
1029 			}
1030 		} else if (v->is_rel()) {
1031 			if (!v->rel->is_any_gpr())
1032 				live.add_val(v->rel);
1033 			update_live_src_vec(v->muse, born, true);
1034 		}
1035 	}
1036 }
1037 
update_live_dst_vec(vvec & vv)1038 void post_scheduler::update_live_dst_vec(vvec &vv) {
1039 	for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) {
1040 		value *v = *I;
1041 		if (!v)
1042 			continue;
1043 
1044 		if (v->is_rel()) {
1045 			update_live_dst_vec(v->mdef);
1046 		} else if (v->is_any_gpr()) {
1047 			if (!live.remove_val(v)) {
1048 				PSC_DUMP(
1049 						sblog << "failed to remove ";
1050 				dump::dump_val(v);
1051 				sblog << " from live : ";
1052 				dump::dump_set(sh, live);
1053 				sblog << "\n";
1054 				);
1055 			}
1056 		}
1057 	}
1058 }
1059 
update_live(node * n,val_set * born)1060 void post_scheduler::update_live(node *n, val_set *born) {
1061 	update_live_dst_vec(n->dst);
1062 	update_live_src_vec(n->src, born, true);
1063 	update_live_src_vec(n->dst, born, false);
1064 }
1065 
process_group()1066 void post_scheduler::process_group() {
1067 	alu_group_tracker &rt = alu.grp();
1068 
1069 	val_set vals_born;
1070 
1071 	recolor_locals();
1072 
1073 	PSC_DUMP(
1074 		sblog << "process_group: live_before : ";
1075 		dump::dump_set(sh, live);
1076 		sblog << "\n";
1077 	);
1078 
1079 	for (unsigned s = 0; s < ctx.num_slots; ++s) {
1080 		alu_node *n = rt.slot(s);
1081 		if (!n)
1082 			continue;
1083 
1084 		update_live(n, &vals_born);
1085 	}
1086 
1087 	PSC_DUMP(
1088 		sblog << "process_group: live_after : ";
1089 		dump::dump_set(sh, live);
1090 		sblog << "\n";
1091 	);
1092 
1093 	update_local_interferences();
1094 
1095 	for (unsigned i = 0; i < 5; ++i) {
1096 		node *n = rt.slot(i);
1097 		if (n && !n->is_mova()) {
1098 			release_src_values(n);
1099 		}
1100 	}
1101 }
1102 
init_globals(val_set & s,bool prealloc)1103 void post_scheduler::init_globals(val_set &s, bool prealloc) {
1104 
1105 	PSC_DUMP(
1106 		sblog << "init_globals: ";
1107 		dump::dump_set(sh, s);
1108 		sblog << "\n";
1109 	);
1110 
1111 	for (val_set::iterator I = s.begin(sh), E = s.end(sh); I != E; ++I) {
1112 		value *v = *I;
1113 		if (v->is_sgpr() && !v->is_global()) {
1114 			v->set_global();
1115 
1116 			if (prealloc && v->is_fixed()) {
1117 				v->set_prealloc();
1118 			}
1119 		}
1120 	}
1121 }
1122 
emit_index_registers()1123 void post_scheduler::emit_index_registers() {
1124 	for (unsigned i = 0; i < 2; i++) {
1125 		if (alu.current_idx[i]) {
1126 			regmap = prev_regmap;
1127 			alu.discard_current_group();
1128 
1129 			load_index_register(alu.current_idx[i], KC_INDEX_0 + i);
1130 			alu.current_idx[i] = NULL;
1131 		}
1132 	}
1133 }
1134 
emit_clause()1135 void post_scheduler::emit_clause() {
1136 
1137 	if (alu.current_ar) {
1138 		emit_load_ar();
1139 		process_group();
1140 		if (!alu.check_clause_limits()) {
1141 			// Can't happen since clause only contains MOVA/CF_SET_IDX0/1
1142 		}
1143 		alu.emit_group();
1144 	}
1145 
1146 	if (!alu.is_empty()) {
1147 		alu.emit_clause(cur_bb);
1148 	}
1149 
1150 	emit_index_registers();
1151 }
1152 
schedule_alu(container_node * c)1153 bool post_scheduler::schedule_alu(container_node *c) {
1154 
1155 	assert(!ready.empty() || !ready_copies.empty());
1156 
1157 	/* This number is rather arbitrary, important is that the scheduler has
1158 	 * more than one try to create an instruction group
1159 	 */
1160 	int improving = 10;
1161 	int last_pending = pending.count();
1162 	while (improving > 0) {
1163 		prev_regmap = regmap;
1164 		if (!prepare_alu_group()) {
1165 
1166 			int new_pending = pending.count();
1167 			if ((new_pending < last_pending) || (last_pending == 0))
1168 				improving = 10;
1169 			else
1170 				--improving;
1171 
1172 			last_pending = new_pending;
1173 
1174 			if (alu.current_idx[0] || alu.current_idx[1]) {
1175 				regmap = prev_regmap;
1176 				emit_clause();
1177 				init_globals(live, false);
1178 
1179 				continue;
1180 			}
1181 
1182 			if (alu.current_ar) {
1183 				emit_load_ar();
1184 				continue;
1185 			} else
1186 				break;
1187 		}
1188 
1189 		if (!alu.check_clause_limits()) {
1190 			regmap = prev_regmap;
1191 			emit_clause();
1192 			init_globals(live, false);
1193 
1194 			continue;
1195 		}
1196 
1197 		process_group();
1198 		alu.emit_group();
1199 	};
1200 
1201 	if (!alu.is_empty()) {
1202 		emit_clause();
1203 	}
1204 
1205 	if (!ready.empty()) {
1206 		sblog << "##post_scheduler: unscheduled ready instructions :";
1207 		dump::dump_op_list(&ready);
1208 	}
1209 
1210 	if (!pending.empty()) {
1211 		sblog << "##post_scheduler: unscheduled pending instructions :";
1212 		dump::dump_op_list(&pending);
1213 	}
1214 	return pending.empty() && ready.empty() && improving != 0;
1215 }
1216 
add_interferences(value * v,sb_bitset & rb,val_set & vs)1217 void post_scheduler::add_interferences(value *v, sb_bitset &rb, val_set &vs) {
1218 	unsigned chan = v->gpr.chan();
1219 
1220 	for (val_set::iterator I = vs.begin(sh), E = vs.end(sh);
1221 			I != E; ++I) {
1222 		value *vi = *I;
1223 		sel_chan gpr = vi->get_final_gpr();
1224 
1225 		if (vi->is_any_gpr() && gpr && vi != v &&
1226 				(!v->chunk || v->chunk != vi->chunk) &&
1227 				vi->is_fixed() && gpr.chan() == chan) {
1228 
1229 			unsigned r = gpr.sel();
1230 
1231 			PSC_DUMP(
1232 				sblog << "\tadd_interferences: " << *vi << "\n";
1233 			);
1234 
1235 			if (rb.size() <= r)
1236 				rb.resize(r + 32);
1237 			rb.set(r);
1238 		}
1239 	}
1240 }
1241 
set_color_local_val(value * v,sel_chan color)1242 void post_scheduler::set_color_local_val(value *v, sel_chan color) {
1243 	v->gpr = color;
1244 
1245 	PSC_DUMP(
1246 		sblog << "     recolored: ";
1247 		dump::dump_val(v);
1248 		sblog << "\n";
1249 	);
1250 }
1251 
set_color_local(value * v,sel_chan color)1252 void post_scheduler::set_color_local(value *v, sel_chan color) {
1253 	if (v->chunk) {
1254 		vvec &vv = v->chunk->values;
1255 		for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) {
1256 			value *v2 =*I;
1257 			set_color_local_val(v2, color);
1258 		}
1259 		v->chunk->fix();
1260 	} else {
1261 		set_color_local_val(v, color);
1262 		v->fix();
1263 	}
1264 }
1265 
recolor_local(value * v)1266 bool post_scheduler::recolor_local(value *v) {
1267 
1268 	sb_bitset rb;
1269 
1270 	assert(v->is_sgpr());
1271 	assert(!v->is_prealloc());
1272 	assert(v->gpr);
1273 
1274 	unsigned chan = v->gpr.chan();
1275 
1276 	PSC_DUMP(
1277 		sblog << "recolor_local: ";
1278 		dump::dump_val(v);
1279 		sblog << "   interferences: ";
1280 		dump::dump_set(sh, v->interferences);
1281 		sblog << "\n";
1282 		if (v->chunk) {
1283 			sblog << "     in chunk: ";
1284 			coalescer::dump_chunk(v->chunk);
1285 			sblog << "\n";
1286 		}
1287 	);
1288 
1289 	if (v->chunk) {
1290 		for (vvec::iterator I = v->chunk->values.begin(),
1291 				E = v->chunk->values.end(); I != E; ++I) {
1292 			value *v2 = *I;
1293 
1294 			PSC_DUMP( sblog << "   add_interferences for " << *v2 << " :\n"; );
1295 
1296 			add_interferences(v, rb, v2->interferences);
1297 		}
1298 	} else {
1299 		add_interferences(v, rb, v->interferences);
1300 	}
1301 
1302 	PSC_DUMP(
1303 		unsigned sz = rb.size();
1304 		sblog << "registers bits: " << sz;
1305 		for (unsigned r = 0; r < sz; ++r) {
1306 			if ((r & 7) == 0)
1307 				sblog << "\n  " << r << "   ";
1308 			sblog << (rb.get(r) ? 1 : 0);
1309 		}
1310 	);
1311 
1312 	bool no_temp_gprs = v->is_global();
1313 	unsigned rs, re, pass = no_temp_gprs ? 1 : 0;
1314 
1315 	while (pass < 2) {
1316 
1317 		if (pass == 0) {
1318 			rs = sh.first_temp_gpr();
1319 			re = MAX_GPR;
1320 		} else {
1321 			rs = 0;
1322 			re = sh.num_nontemp_gpr();
1323 		}
1324 
1325 		for (unsigned reg = rs; reg < re; ++reg) {
1326 			if (reg >= rb.size() || !rb.get(reg)) {
1327 				// color found
1328 				set_color_local(v, sel_chan(reg, chan));
1329 				return true;
1330 			}
1331 		}
1332 		++pass;
1333 	}
1334 
1335 	assert(!"recolor_local failed");
1336 	return true;
1337 }
1338 
emit_load_ar()1339 void post_scheduler::emit_load_ar() {
1340 
1341 	regmap = prev_regmap;
1342 	alu.discard_current_group();
1343 
1344 	alu_group_tracker &rt = alu.grp();
1345 	alu_node *a = alu.create_ar_load(alu.current_ar, SEL_X);
1346 
1347 	if (!rt.try_reserve(a)) {
1348 		sblog << "can't emit AR load : ";
1349 		dump::dump_op(a);
1350 		sblog << "\n";
1351 	}
1352 
1353 	alu.current_ar = 0;
1354 }
1355 
unmap_dst_val(value * d)1356 bool post_scheduler::unmap_dst_val(value *d) {
1357 
1358 	if (d == alu.current_ar) {
1359 		emit_load_ar();
1360 		return false;
1361 	}
1362 
1363 	if (d->is_prealloc()) {
1364 		sel_chan gpr = d->get_final_gpr();
1365 		rv_map::iterator F = regmap.find(gpr);
1366 		value *c = NULL;
1367 		if (F != regmap.end())
1368 			c = F->second;
1369 
1370 		if (c && c!=d && (!c->chunk || c->chunk != d->chunk)) {
1371 			PSC_DUMP(
1372 				sblog << "dst value conflict : ";
1373 				dump::dump_val(d);
1374 				sblog << "   regmap contains ";
1375 				dump::dump_val(c);
1376 				sblog << "\n";
1377 			);
1378 			assert(!"scheduler error");
1379 			return false;
1380 		} else if (c) {
1381 			regmap.erase(F);
1382 		}
1383 	}
1384 	return true;
1385 }
1386 
unmap_dst(alu_node * n)1387 bool post_scheduler::unmap_dst(alu_node *n) {
1388 	value *d = n->dst.empty() ? NULL : n->dst[0];
1389 
1390 	if (!d)
1391 		return true;
1392 
1393 	if (!d->is_rel()) {
1394 		if (d && d->is_any_reg()) {
1395 
1396 			if (d->is_AR()) {
1397 				if (alu.current_ar != d) {
1398 					sblog << "loading wrong ar value\n";
1399 					assert(0);
1400 				} else {
1401 					alu.current_ar = NULL;
1402 				}
1403 
1404 			} else if (d->is_any_gpr()) {
1405 				if (!unmap_dst_val(d))
1406 					return false;
1407 			}
1408 		}
1409 	} else {
1410 		for (vvec::iterator I = d->mdef.begin(), E = d->mdef.end();
1411 				I != E; ++I) {
1412 			d = *I;
1413 			if (!d)
1414 				continue;
1415 
1416 			assert(d->is_any_gpr());
1417 
1418 			if (!unmap_dst_val(d))
1419 				return false;
1420 		}
1421 	}
1422 	return true;
1423 }
1424 
map_src_val(value * v)1425 bool post_scheduler::map_src_val(value *v) {
1426 
1427 	if (!v->is_prealloc())
1428 		return true;
1429 
1430 	sel_chan gpr = v->get_final_gpr();
1431 	rv_map::iterator F = regmap.find(gpr);
1432 	value *c = NULL;
1433 	if (F != regmap.end()) {
1434 		c = F->second;
1435 		if (!v->v_equal(c)) {
1436 			PSC_DUMP(
1437 				sblog << "can't map src value ";
1438 				dump::dump_val(v);
1439 				sblog << ", regmap contains ";
1440 				dump::dump_val(c);
1441 				sblog << "\n";
1442 			);
1443 			return false;
1444 		}
1445 	} else {
1446 		regmap.insert(std::make_pair(gpr, v));
1447 	}
1448 	return true;
1449 }
1450 
map_src_vec(vvec & vv,bool src)1451 bool post_scheduler::map_src_vec(vvec &vv, bool src) {
1452 	if (src) {
1453 		// Handle possible UBO indexing
1454 		bool ubo_indexing[2] = { false, false };
1455 		for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) {
1456 			value *v = *I;
1457 			if (!v)
1458 				continue;
1459 
1460 			if (v->is_kcache()) {
1461 				unsigned index_mode = v->select.kcache_index_mode();
1462 				if (index_mode == KC_INDEX_0 || index_mode == KC_INDEX_1) {
1463 					ubo_indexing[index_mode - KC_INDEX_0] = true;
1464 				}
1465 			}
1466 		}
1467 
1468 		// idx values stored at end of src vec, see bc_parser::prepare_alu_group
1469 		for (unsigned i = 2; i != 0; i--) {
1470 			if (ubo_indexing[i-1]) {
1471 				// TODO: skip adding value to kcache reservation somehow, causes
1472 				// unnecessary group breaks and cache line locks
1473 				value *v = vv.back();
1474 				if (alu.current_idx[i-1] && alu.current_idx[i-1] != v) {
1475 					PSC_DUMP(
1476 						sblog << "IDX" << i-1 << " already set to " <<
1477 						*alu.current_idx[i-1] << ", trying to set " << *v << "\n";
1478 					);
1479 					return false;
1480 				}
1481 
1482 				alu.current_idx[i-1] = v;
1483 				PSC_DUMP(sblog << "IDX" << i-1 << " set to " << *v << "\n";);
1484 			}
1485 		}
1486 	}
1487 
1488 	for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) {
1489 		value *v = *I;
1490 		if (!v)
1491 			continue;
1492 
1493 		if ((!v->is_any_gpr() || !v->is_fixed()) && !v->is_rel())
1494 			continue;
1495 
1496 		if (v->is_rel()) {
1497 			value *rel = v->rel;
1498 			assert(rel);
1499 
1500 			if (!rel->is_const()) {
1501 				if (!map_src_vec(v->muse, true))
1502 					return false;
1503 
1504 				if (rel != alu.current_ar) {
1505 					if (alu.current_ar) {
1506 						PSC_DUMP(
1507 							sblog << "  current_AR is " << *alu.current_ar
1508 								<< "  trying to use " << *rel << "\n";
1509 						);
1510 						return false;
1511 					}
1512 
1513 					alu.current_ar = rel;
1514 
1515 					PSC_DUMP(
1516 						sblog << "  new current_AR assigned: " << *alu.current_ar
1517 							<< "\n";
1518 					);
1519 				}
1520 			}
1521 
1522 		} else if (src) {
1523 			if (!map_src_val(v)) {
1524 				return false;
1525 			}
1526 		}
1527 	}
1528 	return true;
1529 }
1530 
map_src(alu_node * n)1531 bool post_scheduler::map_src(alu_node *n) {
1532 	if (!map_src_vec(n->dst, false))
1533 		return false;
1534 
1535 	if (!map_src_vec(n->src, true))
1536 		return false;
1537 
1538 	return true;
1539 }
1540 
dump_regmap()1541 void post_scheduler::dump_regmap() {
1542 
1543 	sblog << "# REGMAP :\n";
1544 
1545 	for(rv_map::iterator I = regmap.begin(), E = regmap.end(); I != E; ++I) {
1546 		sblog << "  # " << I->first << " => " << *(I->second) << "\n";
1547 	}
1548 
1549 	if (alu.current_ar)
1550 		sblog << "    current_AR: " << *alu.current_ar << "\n";
1551 	if (alu.current_pr)
1552 		sblog << "    current_PR: " << *alu.current_pr << "\n";
1553 	if (alu.current_idx[0])
1554 		sblog << "    current IDX0: " << *alu.current_idx[0] << "\n";
1555 	if (alu.current_idx[1])
1556 		sblog << "    current IDX1: " << *alu.current_idx[1] << "\n";
1557 }
1558 
recolor_locals()1559 void post_scheduler::recolor_locals() {
1560 	alu_group_tracker &rt = alu.grp();
1561 
1562 	for (unsigned s = 0; s < ctx.num_slots; ++s) {
1563 		alu_node *n = rt.slot(s);
1564 		if (n) {
1565 			value *d = n->dst[0];
1566 			if (d && d->is_sgpr() && !d->is_prealloc()) {
1567 				recolor_local(d);
1568 			}
1569 		}
1570 	}
1571 }
1572 
1573 // returns true if there are interferences
check_interferences()1574 bool post_scheduler::check_interferences() {
1575 
1576 	alu_group_tracker &rt = alu.grp();
1577 
1578 	unsigned interf_slots;
1579 
1580 	bool discarded = false;
1581 
1582 	PSC_DUMP(
1583 			sblog << "check_interferences: before: \n";
1584 	dump_regmap();
1585 	);
1586 
1587 	do {
1588 
1589 		interf_slots = 0;
1590 
1591 		for (unsigned s = 0; s < ctx.num_slots; ++s) {
1592 			alu_node *n = rt.slot(s);
1593 			if (n) {
1594 				if (!unmap_dst(n)) {
1595 					return true;
1596 				}
1597 			}
1598 		}
1599 
1600 		for (unsigned s = 0; s < ctx.num_slots; ++s) {
1601 			alu_node *n = rt.slot(s);
1602 			if (n) {
1603 				if (!map_src(n)) {
1604 					interf_slots |= (1 << s);
1605 				}
1606 			}
1607 		}
1608 
1609 		PSC_DUMP(
1610 				for (unsigned i = 0; i < 5; ++i) {
1611 					if (interf_slots & (1 << i)) {
1612 						sblog << "!!!!!! interf slot: " << i << "  : ";
1613 						dump::dump_op(rt.slot(i));
1614 						sblog << "\n";
1615 					}
1616 				}
1617 		);
1618 
1619 		if (!interf_slots)
1620 			break;
1621 
1622 		PSC_DUMP( sblog << "ci: discarding slots " << interf_slots << "\n"; );
1623 
1624 		rt.discard_slots(interf_slots, alu.conflict_nodes);
1625 		regmap = prev_regmap;
1626 		discarded = true;
1627 
1628 	} while(1);
1629 
1630 	PSC_DUMP(
1631 		sblog << "check_interferences: after: \n";
1632 		dump_regmap();
1633 	);
1634 
1635 	return discarded;
1636 }
1637 
1638 // add instruction(s) (alu_node or contents of alu_packed_node) to current group
1639 // returns the number of added instructions on success
try_add_instruction(node * n)1640 unsigned post_scheduler::try_add_instruction(node *n) {
1641 
1642 	alu_group_tracker &rt = alu.grp();
1643 
1644 	unsigned avail_slots = rt.avail_slots();
1645 
1646 	// Cannot schedule in same clause as instructions using this index value
1647 	if (!n->dst.empty() && n->dst[0] &&
1648 		(n->dst[0] == alu.current_idx[0] || n->dst[0] == alu.current_idx[1])) {
1649 		PSC_DUMP(sblog << "   CF_IDX source: " << *n->dst[0] << "\n";);
1650 		return 0;
1651 	}
1652 
1653 	if (n->is_alu_packed()) {
1654 		alu_packed_node *p = static_cast<alu_packed_node*>(n);
1655 		unsigned slots = p->get_slot_mask();
1656 		unsigned cnt = __builtin_popcount(slots);
1657 
1658 		if ((slots & avail_slots) != slots) {
1659 			PSC_DUMP( sblog << "   no slots \n"; );
1660 			return 0;
1661 		}
1662 
1663 		p->update_packed_items(ctx);
1664 
1665 		if (!rt.try_reserve(p)) {
1666 			PSC_DUMP( sblog << "   reservation failed \n"; );
1667 			return 0;
1668 		}
1669 
1670 		p->remove();
1671 		return cnt;
1672 
1673 	} else {
1674 		alu_node *a = static_cast<alu_node*>(n);
1675 		value *d = a->dst.empty() ? NULL : a->dst[0];
1676 
1677 		if (d && d->is_special_reg()) {
1678 			assert((a->bc.op_ptr->flags & AF_MOVA) || d->is_geometry_emit() || d->is_lds_oq() || d->is_lds_access() || d->is_scratch());
1679 			d = NULL;
1680 		}
1681 
1682 		unsigned allowed_slots = ctx.alu_slots_mask(a->bc.op_ptr);
1683 		unsigned slot;
1684 
1685 		allowed_slots &= avail_slots;
1686 
1687 		if (!allowed_slots)
1688 			return 0;
1689 
1690 		if (d) {
1691 			slot = d->get_final_chan();
1692 			a->bc.dst_chan = slot;
1693 			allowed_slots &= (1 << slot) | 0x10;
1694 		} else {
1695 			if (a->bc.op_ptr->flags & AF_MOVA) {
1696 				if (a->bc.slot_flags & AF_V)
1697 					allowed_slots &= (1 << SLOT_X);
1698 				else
1699 					allowed_slots &= (1 << SLOT_TRANS);
1700 			}
1701 		}
1702 
1703 		// FIXME workaround for some problems with MULADD in trans slot on r700,
1704 		// (is it really needed on r600?)
1705 		if ((a->bc.op == ALU_OP3_MULADD || a->bc.op == ALU_OP3_MULADD_IEEE) &&
1706 				!ctx.is_egcm()) {
1707 			allowed_slots &= 0x0F;
1708 		}
1709 
1710 		if (!allowed_slots) {
1711 			PSC_DUMP( sblog << "   no suitable slots\n"; );
1712 			return 0;
1713 		}
1714 
1715 		slot = __builtin_ctz(allowed_slots);
1716 		a->bc.slot = slot;
1717 
1718 		PSC_DUMP( sblog << "slot: " << slot << "\n"; );
1719 
1720 		if (!rt.try_reserve(a)) {
1721 			PSC_DUMP( sblog << "   reservation failed\n"; );
1722 			return 0;
1723 		}
1724 
1725 		a->remove();
1726 		return 1;
1727 	}
1728 }
1729 
check_copy(node * n)1730 bool post_scheduler::check_copy(node *n) {
1731 	if (!n->is_copy_mov())
1732 		return false;
1733 
1734 	value *s = n->src[0];
1735 	value *d = n->dst[0];
1736 
1737 	if (!s->is_sgpr() || !d->is_sgpr())
1738 		return false;
1739 
1740 	if (!s->is_prealloc()) {
1741 		recolor_local(s);
1742 
1743 		if (!s->chunk || s->chunk != d->chunk)
1744 			return false;
1745 	}
1746 
1747 	if (s->gpr == d->gpr) {
1748 
1749 		PSC_DUMP(
1750 			sblog << "check_copy: ";
1751 			dump::dump_op(n);
1752 			sblog << "\n";
1753 		);
1754 
1755 		rv_map::iterator F = regmap.find(d->gpr);
1756 		bool gpr_free = (F == regmap.end());
1757 
1758 		if (d->is_prealloc()) {
1759 			if (gpr_free) {
1760 				PSC_DUMP( sblog << "    copy not ready...\n";);
1761 				return true;
1762 			}
1763 
1764 			value *rv = F->second;
1765 			if (rv != d && (!rv->chunk || rv->chunk != d->chunk)) {
1766 				PSC_DUMP( sblog << "    copy not ready(2)...\n";);
1767 				return true;
1768 			}
1769 
1770 			unmap_dst(static_cast<alu_node*>(n));
1771 		}
1772 
1773 		if (s->is_prealloc() && !map_src_val(s))
1774 			return true;
1775 
1776 		update_live(n, NULL);
1777 
1778 		release_src_values(n);
1779 		n->remove();
1780 		PSC_DUMP( sblog << "    copy coalesced...\n";);
1781 		return true;
1782 	}
1783 	return false;
1784 }
1785 
dump_group(alu_group_tracker & rt)1786 void post_scheduler::dump_group(alu_group_tracker &rt) {
1787 	for (unsigned i = 0; i < 5; ++i) {
1788 		node *n = rt.slot(i);
1789 		if (n) {
1790 			sblog << "slot " << i << " : ";
1791 			dump::dump_op(n);
1792 			sblog << "\n";
1793 		}
1794 	}
1795 }
1796 
process_ready_copies()1797 void post_scheduler::process_ready_copies() {
1798 
1799 	node *last;
1800 
1801 	do {
1802 		last = ready_copies.back();
1803 
1804 		for (node_iterator N, I = ready_copies.begin(), E = ready_copies.end();
1805 				I != E; I = N) {
1806 			N = I; ++N;
1807 
1808 			node *n = *I;
1809 
1810 			if (!check_copy(n)) {
1811 				n->remove();
1812 				ready.push_back(n);
1813 			}
1814 		}
1815 	} while (last != ready_copies.back());
1816 
1817 	update_local_interferences();
1818 }
1819 
1820 
prepare_alu_group()1821 bool post_scheduler::prepare_alu_group() {
1822 
1823 	alu_group_tracker &rt = alu.grp();
1824 
1825 	unsigned i1 = 0;
1826 
1827 	PSC_DUMP(
1828 		sblog << "prepare_alu_group: starting...\n";
1829 		dump_group(rt);
1830 	);
1831 
1832 	ready.append_from(&alu.conflict_nodes);
1833 
1834 	// FIXME rework this loop
1835 
1836 	do {
1837 
1838 		process_ready_copies();
1839 
1840 		++i1;
1841 
1842 		for (node_iterator N, I = ready.begin(), E = ready.end(); I != E;
1843 				I = N) {
1844 			N = I; ++N;
1845 			node *n = *I;
1846 
1847 			PSC_DUMP(
1848 				sblog << "p_a_g: ";
1849 				dump::dump_op(n);
1850 				sblog << "\n";
1851 			);
1852 
1853 
1854 			unsigned cnt = try_add_instruction(n);
1855 
1856 			if (!cnt)
1857 				continue;
1858 
1859 			PSC_DUMP(
1860 				sblog << "current group:\n";
1861 				dump_group(rt);
1862 			);
1863 
1864 			if (rt.inst_count() == ctx.num_slots) {
1865 				PSC_DUMP( sblog << " all slots used\n"; );
1866 				break;
1867 			}
1868 		}
1869 
1870 		if (!check_interferences())
1871 			break;
1872 
1873 		// don't try to add more instructions to the group with mova if this
1874 		// can lead to breaking clause slot count limit - we don't want mova to
1875 		// end up in the end of the new clause instead of beginning of the
1876 		// current clause.
1877 		if (rt.has_ar_load() && alu.total_slots() > 121)
1878 			break;
1879 
1880 		if (rt.inst_count() && i1 > 50)
1881 			break;
1882 
1883 		regmap = prev_regmap;
1884 
1885 	} while (1);
1886 
1887 	PSC_DUMP(
1888 		sblog << " prepare_alu_group done, " << rt.inst_count()
1889 	          << " slot(s) \n";
1890 
1891 		sblog << "$$$$$$$$PAG i1=" << i1
1892 				<< "  ready " << ready.count()
1893 				<< "  pending " << pending.count()
1894 				<< "  conflicting " << alu.conflict_nodes.count()
1895 				<<"\n";
1896 
1897 	);
1898 
1899 	return rt.inst_count();
1900 }
1901 
release_src_values(node * n)1902 void post_scheduler::release_src_values(node* n) {
1903 	release_src_vec(n->src, true);
1904 	release_src_vec(n->dst, false);
1905 }
1906 
release_op(node * n)1907 void post_scheduler::release_op(node *n) {
1908 	PSC_DUMP(
1909 		sblog << "release_op ";
1910 		dump::dump_op(n);
1911 		sblog << "\n";
1912 	);
1913 
1914 	n->remove();
1915 
1916 	if (n->is_copy_mov()) {
1917 		ready_copies.push_back(n);
1918 	} else if (n->is_mova() || n->is_pred_set()) {
1919 		ready.push_front(n);
1920 	} else {
1921 		ready.push_back(n);
1922 	}
1923 }
1924 
release_src_val(value * v)1925 void post_scheduler::release_src_val(value *v) {
1926 	node *d = v->any_def();
1927 	if (d) {
1928 		if (!--ucm[d])
1929 			release_op(d);
1930 	}
1931 }
1932 
release_src_vec(vvec & vv,bool src)1933 void post_scheduler::release_src_vec(vvec& vv, bool src) {
1934 
1935 	for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) {
1936 		value *v = *I;
1937 		if (!v || v->is_readonly())
1938 			continue;
1939 
1940 		if (v->is_rel()) {
1941 			release_src_val(v->rel);
1942 			release_src_vec(v->muse, true);
1943 
1944 		} else if (src) {
1945 			release_src_val(v);
1946 		}
1947 	}
1948 }
1949 
reset()1950 void literal_tracker::reset() {
1951 	lt[0].u = 0;
1952 	lt[1].u = 0;
1953 	lt[2].u = 0;
1954 	lt[3].u = 0;
1955 	memset(uc, 0, sizeof(uc));
1956 }
1957 
reset()1958 void rp_gpr_tracker::reset() {
1959 	memset(rp, 0, sizeof(rp));
1960 	memset(uc, 0, sizeof(uc));
1961 }
1962 
reset()1963 void rp_kcache_tracker::reset() {
1964 	memset(rp, 0, sizeof(rp));
1965 	memset(uc, 0, sizeof(uc));
1966 }
1967 
reset()1968 void alu_kcache_tracker::reset() {
1969 	memset(kc, 0, sizeof(kc));
1970 	lines.clear();
1971 }
1972 
reset()1973 void alu_clause_tracker::reset() {
1974 	group = 0;
1975 	slot_count = 0;
1976 	outstanding_lds_oqa_reads = 0;
1977 	grp0.reset();
1978 	grp1.reset();
1979 }
1980 
alu_clause_tracker(shader & sh)1981 alu_clause_tracker::alu_clause_tracker(shader &sh)
1982 	: sh(sh), kt(sh.get_ctx().hw_class), slot_count(),
1983 	  grp0(sh), grp1(sh),
1984 	  group(), clause(),
1985 	  push_exec_mask(), outstanding_lds_oqa_reads(),
1986 	  current_ar(), current_pr(), current_idx() {}
1987 
emit_group()1988 void alu_clause_tracker::emit_group() {
1989 
1990 	assert(grp().inst_count());
1991 
1992 	alu_group_node *g = grp().emit();
1993 
1994 	if (grp().has_update_exec_mask()) {
1995 		assert(!push_exec_mask);
1996 		push_exec_mask = true;
1997 	}
1998 
1999 	assert(g);
2000 
2001 	if (!clause) {
2002 		clause = sh.create_clause(NST_ALU_CLAUSE);
2003 	}
2004 
2005 	clause->push_front(g);
2006 
2007 	outstanding_lds_oqa_reads += grp().get_consumes_lds_oqa();
2008 	outstanding_lds_oqa_reads -= grp().get_produces_lds_oqa();
2009 	slot_count += grp().slot_count();
2010 
2011 	new_group();
2012 
2013 	PSC_DUMP( sblog << "   #### group emitted\n"; );
2014 }
2015 
emit_clause(container_node * c)2016 void alu_clause_tracker::emit_clause(container_node *c) {
2017 	assert(clause);
2018 
2019 	kt.init_clause(clause->bc);
2020 
2021 	assert(!outstanding_lds_oqa_reads);
2022 	assert(!current_ar);
2023 	assert(!current_pr);
2024 
2025 	if (push_exec_mask)
2026 		clause->bc.set_op(CF_OP_ALU_PUSH_BEFORE);
2027 
2028 	c->push_front(clause);
2029 
2030 	clause = NULL;
2031 	push_exec_mask = false;
2032 	slot_count = 0;
2033 	kt.reset();
2034 
2035 	PSC_DUMP( sblog << "######### ALU clause emitted\n"; );
2036 }
2037 
check_clause_limits()2038 bool alu_clause_tracker::check_clause_limits() {
2039 
2040 	alu_group_tracker &gt = grp();
2041 
2042 	unsigned slots = gt.slot_count();
2043 
2044 	// reserving slots to load AR and PR values
2045 	unsigned reserve_slots = (current_ar ? 1 : 0) + (current_pr ? 1 : 0);
2046 	// ...and index registers
2047 	reserve_slots += (current_idx[0] != NULL) + (current_idx[1] != NULL);
2048 
2049 	if (gt.get_consumes_lds_oqa() && !outstanding_lds_oqa_reads)
2050 		reserve_slots += 60;
2051 
2052 	if (slot_count + slots > MAX_ALU_SLOTS - reserve_slots)
2053 		return false;
2054 
2055 	if (!kt.try_reserve(gt))
2056 		return false;
2057 
2058 	return true;
2059 }
2060 
new_group()2061 void alu_clause_tracker::new_group() {
2062 	group = !group;
2063 	grp().reset();
2064 }
2065 
is_empty()2066 bool alu_clause_tracker::is_empty() {
2067 	return clause == NULL;
2068 }
2069 
init_group_literals(alu_group_node * g)2070 void literal_tracker::init_group_literals(alu_group_node* g) {
2071 
2072 	g->literals.clear();
2073 	for (unsigned i = 0; i < 4; ++i) {
2074 		if (!lt[i])
2075 			break;
2076 
2077 		g->literals.push_back(lt[i]);
2078 
2079 		PSC_DUMP(
2080 			sblog << "literal emitted: " << lt[i].f;
2081 			sblog.print_zw_hex(lt[i].u, 8);
2082 			sblog << "   " << lt[i].i << "\n";
2083 		);
2084 	}
2085 }
2086 
try_reserve(alu_group_tracker & gt)2087 bool alu_kcache_tracker::try_reserve(alu_group_tracker& gt) {
2088 	rp_kcache_tracker &kt = gt.kcache();
2089 
2090 	if (!kt.num_sels())
2091 		return true;
2092 
2093 	sb_set<unsigned> group_lines;
2094 
2095 	unsigned nl = kt.get_lines(group_lines);
2096 	assert(nl);
2097 
2098 	sb_set<unsigned> clause_lines(lines);
2099 	lines.add_set(group_lines);
2100 
2101 	if (clause_lines.size() == lines.size())
2102 		return true;
2103 
2104 	if (update_kc())
2105 		return true;
2106 
2107 	lines = clause_lines;
2108 
2109 	return false;
2110 }
2111 
get_lines(kc_lines & lines)2112 unsigned rp_kcache_tracker::get_lines(kc_lines& lines) {
2113 	unsigned cnt = 0;
2114 
2115 	for (unsigned i = 0; i < sel_count; ++i) {
2116 		unsigned line = rp[i] & 0x1fffffffu;
2117 		unsigned index_mode = rp[i] >> 29;
2118 
2119 		if (!line)
2120 			return cnt;
2121 
2122 		--line;
2123 		line = (sel_count == 2) ? line >> 5 : line >> 6;
2124 		line |= index_mode << 29;
2125 
2126 		if (lines.insert(line).second)
2127 			++cnt;
2128 	}
2129 	return cnt;
2130 }
2131 
update_kc()2132 bool alu_kcache_tracker::update_kc() {
2133 	unsigned c = 0;
2134 
2135 	bc_kcache old_kc[4];
2136 	memcpy(old_kc, kc, sizeof(kc));
2137 
2138 	for (kc_lines::iterator I = lines.begin(), E = lines.end(); I != E; ++I) {
2139 		unsigned index_mode = *I >> 29;
2140 		unsigned line = *I & 0x1fffffffu;
2141 		unsigned bank = line >> 8;
2142 
2143 		assert(index_mode <= KC_INDEX_INVALID);
2144 		line &= 0xFF;
2145 
2146 		if (c && (bank == kc[c-1].bank) && (kc[c-1].addr + 1 == line) &&
2147 			kc[c-1].index_mode == index_mode)
2148 		{
2149 			kc[c-1].mode = KC_LOCK_2;
2150 		} else {
2151 			if (c == max_kcs) {
2152 				memcpy(kc, old_kc, sizeof(kc));
2153 				return false;
2154 			}
2155 
2156 			kc[c].mode = KC_LOCK_1;
2157 
2158 			kc[c].bank = bank;
2159 			kc[c].addr = line;
2160 			kc[c].index_mode = index_mode;
2161 			++c;
2162 		}
2163 	}
2164 	return true;
2165 }
2166 
create_ar_load(value * v,chan_select ar_channel)2167 alu_node* alu_clause_tracker::create_ar_load(value *v, chan_select ar_channel) {
2168 	alu_node *a = sh.create_alu();
2169 
2170 	if (sh.get_ctx().uses_mova_gpr) {
2171 		a->bc.set_op(ALU_OP1_MOVA_GPR_INT);
2172 		a->bc.slot = SLOT_TRANS;
2173 	} else {
2174 		a->bc.set_op(ALU_OP1_MOVA_INT);
2175 		a->bc.slot = SLOT_X;
2176 	}
2177 	a->bc.dst_chan = ar_channel;
2178 	if (ar_channel != SEL_X && sh.get_ctx().is_cayman()) {
2179 		a->bc.dst_gpr = ar_channel == SEL_Y ? CM_V_SQ_MOVA_DST_CF_IDX0 : CM_V_SQ_MOVA_DST_CF_IDX1;
2180 	}
2181 
2182 	a->dst.resize(1);
2183 	a->src.push_back(v);
2184 
2185 	PSC_DUMP(
2186 		sblog << "created AR load: ";
2187 		dump::dump_op(a);
2188 		sblog << "\n";
2189 	);
2190 
2191 	return a;
2192 }
2193 
discard_current_group()2194 void alu_clause_tracker::discard_current_group() {
2195 	PSC_DUMP( sblog << "act::discard_current_group\n"; );
2196 	grp().discard_all_slots(conflict_nodes);
2197 }
2198 
dump()2199 void rp_gpr_tracker::dump() {
2200 	sblog << "=== gpr_tracker dump:\n";
2201 	for (int c = 0; c < 3; ++c) {
2202 		sblog << "cycle " << c << "      ";
2203 		for (int h = 0; h < 4; ++h) {
2204 			sblog << rp[c][h] << ":" << uc[c][h] << "   ";
2205 		}
2206 		sblog << "\n";
2207 	}
2208 }
2209 
2210 } // namespace r600_sb
2211