• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1  /*
2   * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3   *
4   * Permission is hereby granted, free of charge, to any person obtaining a
5   * copy of this software and associated documentation files (the "Software"),
6   * to deal in the Software without restriction, including without limitation
7   * on the rights to use, copy, modify, merge, publish, distribute, sub
8   * license, and/or sell copies of the Software, and to permit persons to whom
9   * the Software is furnished to do so, subject to the following conditions:
10   *
11   * The above copyright notice and this permission notice (including the next
12   * paragraph) shall be included in all copies or substantial portions of the
13   * Software.
14   *
15   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16   * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17   * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18   * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19   * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20   * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21   * USE OR OTHER DEALINGS IN THE SOFTWARE.
22   *
23   * Authors:
24   *      Vadim Girlin
25   */
26  
27  #define GCM_DEBUG 0
28  
29  #if GCM_DEBUG
30  #define GCM_DUMP(a) do { a } while(0);
31  #else
32  #define GCM_DUMP(a)
33  #endif
34  
35  #include <map>
36  
37  #include "sb_bc.h"
38  #include "sb_shader.h"
39  #include "sb_pass.h"
40  #include "eg_sq.h" // V_SQ_CF_INDEX_NONE
41  
42  namespace r600_sb {
43  
run()44  int gcm::run() {
45  
46  	GCM_DUMP( sblog << "==== GCM ==== \n"; sh.dump_ir(); );
47  
48  	collect_instructions(sh.root, true);
49  
50  	init_def_count(uses, pending);
51  
52  	for (node_iterator N, I = pending.begin(), E = pending.end();
53  			I != E; I = N) {
54  		N = I;
55  		++N;
56  		node *o = *I;
57  
58  		GCM_DUMP(
59  			sblog << "pending : ";
60  			dump::dump_op(o);
61  			sblog << "\n";
62  		);
63  
64  		if (td_is_ready(o)) {
65  
66  			GCM_DUMP(
67  				sblog << "  ready: ";
68  				dump::dump_op(o);
69  				sblog << "\n";
70  			);
71  			pending.remove_node(o);
72  			ready.push_back(o);
73  		} else {
74  		}
75  	}
76  
77  	sched_early(sh.root);
78  
79  	if (!pending.empty()) {
80  		sblog << "##### gcm_sched_early_pass: unscheduled ops:\n";
81  		dump::dump_op(pending.front());
82  	}
83  
84  	assert(pending.empty());
85  
86  	GCM_DUMP( sh.dump_ir(); );
87  
88  	GCM_DUMP( sblog << "\n\n ############## gcm late\n\n"; );
89  
90  	collect_instructions(sh.root, false);
91  
92  	init_use_count(uses, pending);
93  
94  	sched_late(sh.root);
95  	if (!pending.empty()) {
96  		sblog << "##### gcm_sched_late_pass: unscheduled ops:\n";
97  		dump::dump_op(pending.front());
98  	}
99  
100  	assert(ucs_level == 0);
101  	assert(pending.empty());
102  
103  	return 0;
104  }
105  
106  
collect_instructions(container_node * c,bool early_pass)107  void gcm::collect_instructions(container_node *c, bool early_pass) {
108  	if (c->is_bb()) {
109  
110  		if (early_pass) {
111  			for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) {
112  				node *n = *I;
113  				if (n->flags & NF_DONT_MOVE) {
114  					op_info &o = op_map[n];
115  					o.top_bb = o.bottom_bb = static_cast<bb_node*>(c);
116  				}
117  			}
118  		}
119  
120  		pending.append_from(c);
121  		return;
122  	}
123  
124  	for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) {
125  		if (I->is_container()) {
126  			collect_instructions(static_cast<container_node*>(*I), early_pass);
127  		}
128  	}
129  }
130  
sched_early(container_node * n)131  void gcm::sched_early(container_node *n) {
132  
133  	region_node *r =
134  			(n->type == NT_REGION) ? static_cast<region_node*>(n) : NULL;
135  
136  	if (r && r->loop_phi) {
137  		sched_early(r->loop_phi);
138  	}
139  
140  	for (node_iterator I = n->begin(), E = n->end(); I != E; ++I) {
141  		if (I->type == NT_OP) {
142  			node *op = *I;
143  			if (op->subtype == NST_PHI) {
144  				td_release_uses(op->dst);
145  			}
146  		} else if (I->is_container()) {
147  			if (I->subtype == NST_BB) {
148  				bb_node* bb = static_cast<bb_node*>(*I);
149  				td_sched_bb(bb);
150  			} else {
151  				sched_early(static_cast<container_node*>(*I));
152  			}
153  		}
154  	}
155  
156  	if (r && r->phi) {
157  		sched_early(r->phi);
158  	}
159  }
160  
td_schedule(bb_node * bb,node * n)161  void gcm::td_schedule(bb_node *bb, node *n) {
162  	GCM_DUMP(
163  		sblog << "scheduling : ";
164  		dump::dump_op(n);
165  		sblog << "\n";
166  	);
167  	td_release_uses(n->dst);
168  
169  	bb->push_back(n);
170  
171  	op_map[n].top_bb = bb;
172  
173  }
174  
td_sched_bb(bb_node * bb)175  void gcm::td_sched_bb(bb_node* bb) {
176  	GCM_DUMP(
177  	sblog << "td scheduling BB_" << bb->id << "\n";
178  	);
179  
180  	while (!ready.empty()) {
181  		for (sq_iterator N, I = ready.begin(), E = ready.end(); I != E;
182  				I = N) {
183  			N = I; ++N;
184  			td_schedule(bb, *I);
185  			ready.erase(I);
186  		}
187  	}
188  }
189  
td_is_ready(node * n)190  bool gcm::td_is_ready(node* n) {
191  	return uses[n] == 0;
192  }
193  
td_release_val(value * v)194  void gcm::td_release_val(value *v) {
195  
196  	GCM_DUMP(
197  		sblog << "td checking uses: ";
198  		dump::dump_val(v);
199  		sblog << "\n";
200  	);
201  
202  	for (uselist::iterator I = v->uses.begin(), E = v->uses.end(); I != E; ++I) {
203  		use_info *u = *I;
204  		if (u->op->parent != &pending) {
205  			continue;
206  		}
207  
208  		GCM_DUMP(
209  			sblog << "td    used in ";
210  			dump::dump_op(u->op);
211  			sblog << "\n";
212  		);
213  
214  		assert(uses[u->op] > 0);
215  		if (--uses[u->op] == 0) {
216  			GCM_DUMP(
217  				sblog << "td        released : ";
218  				dump::dump_op(u->op);
219  				sblog << "\n";
220  			);
221  
222  			pending.remove_node(u->op);
223  			ready.push_back(u->op);
224  		}
225  	}
226  
227  }
228  
td_release_uses(vvec & v)229  void gcm::td_release_uses(vvec& v) {
230  	for (vvec::iterator I = v.begin(), E = v.end(); I != E; ++I) {
231  		value *v = *I;
232  		if (!v)
233  			continue;
234  
235  		if (v->is_rel())
236  			td_release_uses(v->mdef);
237  		else
238  			td_release_val(v);
239  	}
240  }
241  
sched_late(container_node * n)242  void gcm::sched_late(container_node *n) {
243  
244  	bool stack_pushed = false;
245  
246  	if (n->is_depart()) {
247  		depart_node *d = static_cast<depart_node*>(n);
248  		push_uc_stack();
249  		stack_pushed = true;
250  		bu_release_phi_defs(d->target->phi, d->dep_id);
251  	} else if (n->is_repeat()) {
252  		repeat_node *r = static_cast<repeat_node*>(n);
253  		assert(r->target->loop_phi);
254  		push_uc_stack();
255  		stack_pushed = true;
256  		bu_release_phi_defs(r->target->loop_phi, r->rep_id);
257  	}
258  
259  	for (node_riterator I = n->rbegin(), E = n->rend(); I != E; ++I) {
260  		if (I->is_container()) {
261  			if (I->subtype == NST_BB) {
262  				bb_node* bb = static_cast<bb_node*>(*I);
263  				bu_sched_bb(bb);
264  			} else {
265  				sched_late(static_cast<container_node*>(*I));
266  			}
267  		}
268  	}
269  
270  	if (n->type == NT_IF) {
271  		if_node *f = static_cast<if_node*>(n);
272  		if (f->cond)
273  			pending_defs.push_back(f->cond);
274  	} else if (n->type == NT_REGION) {
275  		region_node *r = static_cast<region_node*>(n);
276  		if (r->loop_phi)
277  			bu_release_phi_defs(r->loop_phi, 0);
278  	}
279  
280  	if (stack_pushed)
281  		pop_uc_stack();
282  
283  }
284  
bu_sched_bb(bb_node * bb)285  void gcm::bu_sched_bb(bb_node* bb) {
286  	GCM_DUMP(
287  	sblog << "bu scheduling BB_" << bb->id << "\n";
288  	);
289  
290  	bu_bb = bb;
291  
292  	if (!pending_nodes.empty()) {
293  		GCM_DUMP(
294  				sblog << "pending nodes:\n";
295  		);
296  
297  		// TODO consider sorting the exports by array_base,
298  		// possibly it can improve performance
299  
300  		for (node_list::iterator I = pending_nodes.begin(),
301  				E = pending_nodes.end(); I != E; ++I) {
302  			bu_release_op(*I);
303  		}
304  		pending_nodes.clear();
305  		GCM_DUMP(
306  			sblog << "pending nodes processed...\n";
307  		);
308  	}
309  
310  
311  	if (!pending_defs.empty()) {
312  		for (vvec::iterator I = pending_defs.begin(), E = pending_defs.end();
313  				I != E; ++I) {
314  			bu_release_val(*I);
315  		}
316  		pending_defs.clear();
317  	}
318  
319  	for (sched_queue::iterator N, I = ready_above.begin(), E = ready_above.end();
320  			I != E;	I = N) {
321  		N = I;
322  		++N;
323  		node *n = *I;
324  		if (op_map[n].bottom_bb == bb) {
325  			add_ready(*I);
326  			ready_above.erase(I);
327  		}
328  	}
329  
330  	unsigned cnt_ready[SQ_NUM];
331  
332  	container_node *clause = NULL;
333  	unsigned last_inst_type = ~0;
334  	unsigned last_count = 0;
335  
336  	bool s = true;
337  	while (s) {
338  		node *n;
339  
340  		s = false;
341  
342  		unsigned ready_mask = 0;
343  
344  		for (unsigned sq = SQ_CF; sq < SQ_NUM; ++sq) {
345  			if (!bu_ready[sq].empty() || !bu_ready_next[sq].empty())
346  				ready_mask |= (1 << sq);
347  		}
348  
349  		if (!ready_mask) {
350  			for (unsigned sq = SQ_CF; sq < SQ_NUM; ++sq) {
351  				if (!bu_ready_early[sq].empty()) {
352  					node *n = bu_ready_early[sq].front();
353  					bu_ready_early[sq].pop_front();
354  					bu_ready[sq].push_back(n);
355  					break;
356  				}
357  			}
358  		}
359  
360  		for (unsigned sq = SQ_CF; sq < SQ_NUM; ++sq) {
361  
362  			if (sq == SQ_CF && pending_exec_mask_update) {
363  				pending_exec_mask_update = false;
364  				sq = SQ_ALU;
365  				--sq;
366  				continue;
367  			}
368  
369  			if (!bu_ready_next[sq].empty())
370  				bu_ready[sq].splice(bu_ready[sq].end(), bu_ready_next[sq]);
371  
372  			cnt_ready[sq] = bu_ready[sq].size();
373  
374  			if ((sq == SQ_TEX || sq == SQ_VTX) && live_count <= rp_threshold &&
375  					cnt_ready[sq] < ctx.max_fetch/2	&&
376  					!bu_ready_next[SQ_ALU].empty()) {
377  				sq = SQ_ALU;
378  				--sq;
379  				continue;
380  			}
381  
382  			while (!bu_ready[sq].empty()) {
383  
384  				if (last_inst_type != sq) {
385  					clause = NULL;
386  					last_count = 0;
387  					last_inst_type = sq;
388  				}
389  
390  				// simple heuristic to limit register pressure,
391  				if (sq == SQ_ALU && live_count > rp_threshold &&
392  						(!bu_ready[SQ_TEX].empty() ||
393  						 !bu_ready[SQ_VTX].empty() ||
394  						 !bu_ready_next[SQ_TEX].empty() ||
395  						 !bu_ready_next[SQ_VTX].empty())) {
396  					GCM_DUMP( sblog << "switching to fetch (regpressure)\n"; );
397  					break;
398  				}
399  
400  				n = bu_ready[sq].front();
401  
402  				// real count (e.g. SAMPLE_G will be expanded to 3 instructions,
403  				// 2 SET_GRAD_ + 1 SAMPLE_G
404  				unsigned ncnt = 1;
405  				if (n->is_fetch_inst() && n->src.size() == 12) {
406  					ncnt = 3;
407  				}
408  
409  				bool sampler_indexing = false;
410  				if (n->is_fetch_inst() &&
411  					static_cast<fetch_node *>(n)->bc.sampler_index_mode != V_SQ_CF_INDEX_NONE)
412  				{
413  					sampler_indexing = true; // Give sampler indexed ops get their own clause
414  					ncnt = sh.get_ctx().is_cayman() ? 2 : 3; // MOVA + SET_CF_IDX0/1
415  				}
416  
417  				if ((sq == SQ_TEX || sq == SQ_VTX) &&
418  						((last_count >= ctx.max_fetch/2 &&
419  						check_alu_ready_count(24)) ||
420  								last_count + ncnt > ctx.max_fetch))
421  					break;
422  				else if (sq == SQ_CF && last_count > 4 &&
423  						check_alu_ready_count(24))
424  					break;
425  
426  				bu_ready[sq].pop_front();
427  
428  				if (sq != SQ_CF) {
429  					if (!clause || sampler_indexing) {
430  						clause = sh.create_clause(sq == SQ_ALU ?
431  								NST_ALU_CLAUSE :
432  									sq == SQ_TEX ? NST_TEX_CLAUSE :
433  											NST_VTX_CLAUSE);
434  						bb->push_front(clause);
435  					}
436  				} else {
437  					clause = bb;
438  				}
439  
440  				bu_schedule(clause, n);
441  				s = true;
442  				last_count += ncnt;
443  			}
444  		}
445  	}
446  
447  	bu_bb = NULL;
448  
449  	GCM_DUMP(
450  		sblog << "bu finished scheduling BB_" << bb->id << "\n";
451  	);
452  }
453  
bu_release_defs(vvec & v,bool src)454  void gcm::bu_release_defs(vvec& v, bool src) {
455  	for (vvec::reverse_iterator I = v.rbegin(), E = v.rend(); I != E; ++I) {
456  		value *v = *I;
457  		if (!v || v->is_readonly())
458  			continue;
459  
460  		if (v->is_rel()) {
461  			if (!v->rel->is_readonly())
462  				bu_release_val(v->rel);
463  			bu_release_defs(v->muse, true);
464  		} else if (src)
465  			bu_release_val(v);
466  		else {
467  			if (live.remove_val(v)) {
468  				--live_count;
469  			}
470  		}
471  	}
472  }
473  
push_uc_stack()474  void gcm::push_uc_stack() {
475  	GCM_DUMP(
476  		sblog << "pushing use count stack prev_level " << ucs_level
477  			<< "   new level " << (ucs_level + 1) << "\n";
478  	);
479  	++ucs_level;
480  	if (ucs_level == nuc_stk.size()) {
481  		nuc_stk.resize(ucs_level + 1);
482  	}
483  	else {
484  		nuc_stk[ucs_level].clear();
485  	}
486  }
487  
bu_is_ready(node * n)488  bool gcm::bu_is_ready(node* n) {
489  	nuc_map &cm = nuc_stk[ucs_level];
490  	nuc_map::iterator F = cm.find(n);
491  	unsigned uc = (F == cm.end() ? 0 : F->second);
492  	return uc == uses[n];
493  }
494  
bu_schedule(container_node * c,node * n)495  void gcm::bu_schedule(container_node* c, node* n) {
496  	GCM_DUMP(
497  		sblog << "bu scheduling : ";
498  		dump::dump_op(n);
499  		sblog << "\n";
500  	);
501  
502  	assert(op_map[n].bottom_bb == bu_bb);
503  
504  	bu_release_defs(n->src, true);
505  	bu_release_defs(n->dst, false);
506  
507  	c->push_front(n);
508  }
509  
dump_uc_stack()510  void gcm::dump_uc_stack() {
511  	sblog << "##### uc_stk start ####\n";
512  	for (unsigned l = 0; l <= ucs_level; ++l) {
513  		nuc_map &m = nuc_stk[l];
514  
515  		sblog << "nuc_stk[" << l << "] :   @" << &m << "\n";
516  
517  		for (nuc_map::iterator I = m.begin(), E = m.end(); I != E; ++I) {
518  			sblog << "    uc " << I->second << " for ";
519  			dump::dump_op(I->first);
520  			sblog << "\n";
521  		}
522  	}
523  	sblog << "##### uc_stk end ####\n";
524  }
525  
pop_uc_stack()526  void gcm::pop_uc_stack() {
527  	nuc_map &pm = nuc_stk[ucs_level];
528  	--ucs_level;
529  	nuc_map &cm = nuc_stk[ucs_level];
530  
531  	GCM_DUMP(
532  		sblog << "merging use stack from level " << (ucs_level+1)
533  			<< " to " << ucs_level << "\n";
534  	);
535  
536  	for (nuc_map::iterator N, I = pm.begin(), E = pm.end(); I != E; ++I) {
537  		node *n = I->first;
538  
539  		GCM_DUMP(
540  			sblog << "      " << cm[n] << " += " << I->second << "  for ";
541  			dump::dump_op(n);
542  			sblog << "\n";
543  		);
544  
545  		unsigned uc = cm[n] += I->second;
546  
547  		if (n->parent == &pending && uc == uses[n]) {
548  			cm.erase(n);
549  			pending_nodes.push_back(n);
550  			GCM_DUMP(
551  				sblog << "pushed pending_node due to stack pop ";
552  				dump::dump_op(n);
553  				sblog << "\n";
554  			);
555  		}
556  	}
557  }
558  
bu_find_best_bb(node * n,op_info & oi)559  void gcm::bu_find_best_bb(node *n, op_info &oi) {
560  
561  	GCM_DUMP(
562  		sblog << "  find best bb : ";
563  		dump::dump_op(n);
564  		sblog << "\n";
565  	);
566  
567  	if (oi.bottom_bb)
568  		return;
569  
570  	// don't hoist generated copies
571  	if (n->flags & NF_DONT_HOIST) {
572  		oi.bottom_bb = bu_bb;
573  		return;
574  	}
575  
576  	bb_node* best_bb = bu_bb;
577  	bb_node* top_bb = oi.top_bb;
578  	assert(oi.top_bb && !oi.bottom_bb);
579  
580  	node *c = best_bb;
581  
582  	// FIXME top_bb may be located inside the loop so we'll never enter it
583  	// in the loop below, and the instruction will be incorrectly placed at the
584  	// beginning of the shader.
585  	// For now just check if top_bb's loop_level is higher than of
586  	// current bb and abort the search for better bb in such case,
587  	// but this problem may require more complete (and more expensive) fix
588  	if (top_bb->loop_level <= best_bb->loop_level) {
589  		while (c && c != top_bb) {
590  
591  			if (c->prev) {
592  				c = c->prev;
593  			} else {
594  				c = c->parent;
595  				if (!c)
596  					break;
597  				continue;
598  			}
599  
600  			if (c->subtype == NST_BB) {
601  				bb_node *bb = static_cast<bb_node*>(c);
602  				if (bb->loop_level < best_bb->loop_level)
603  					best_bb = bb;
604  			}
605  		}
606  	}
607  
608  	oi.bottom_bb = best_bb;
609  }
610  
add_ready(node * n)611  void gcm::add_ready(node *n) {
612  	sched_queue_id sq = sh.get_queue_id(n);
613  	if (n->flags & NF_SCHEDULE_EARLY)
614  		bu_ready_early[sq].push_back(n);
615  	else if (sq == SQ_ALU && n->is_copy_mov())
616  		bu_ready[sq].push_front(n);
617  	else if (n->is_alu_inst()) {
618  		alu_node *a = static_cast<alu_node*>(n);
619  		if (a->bc.op_ptr->flags & AF_PRED && a->dst[2]) {
620  			// PRED_SET instruction that updates exec mask
621  			pending_exec_mask_update = true;
622  		}
623  		bu_ready_next[sq].push_back(n);
624  	} else
625  		bu_ready_next[sq].push_back(n);
626  }
627  
bu_release_op(node * n)628  void gcm::bu_release_op(node * n) {
629  	op_info &oi = op_map[n];
630  
631  	GCM_DUMP(
632  	sblog << "  bu release op  ";
633  	dump::dump_op(n);
634  	);
635  
636  	nuc_stk[ucs_level].erase(n);
637  	pending.remove_node(n);
638  
639  	bu_find_best_bb(n, oi);
640  
641  	if (oi.bottom_bb == bu_bb) {
642  		GCM_DUMP( sblog << "   ready\n";);
643  		add_ready(n);
644  	} else {
645  		GCM_DUMP( sblog << "   ready_above\n";);
646  		ready_above.push_back(n);
647  	}
648  }
649  
bu_release_phi_defs(container_node * p,unsigned op)650  void gcm::bu_release_phi_defs(container_node* p, unsigned op)
651  {
652  	for (node_riterator I = p->rbegin(), E = p->rend(); I != E; ++I) {
653  		node *o = *I;
654  		value *v = o->src[op];
655  		if (v && !v->is_readonly())
656  			pending_defs.push_back(o->src[op]);
657  
658  	}
659  }
660  
get_uc_vec(vvec & vv)661  unsigned gcm::get_uc_vec(vvec &vv) {
662  	unsigned c = 0;
663  	for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) {
664  		value *v = *I;
665  		if (!v)
666  			continue;
667  
668  		if (v->is_rel())
669  			c += get_uc_vec(v->mdef);
670  		else
671  			c += v->use_count();
672  	}
673  	return c;
674  }
675  
init_use_count(nuc_map & m,container_node & s)676  void gcm::init_use_count(nuc_map& m, container_node &s) {
677  	m.clear();
678  	for (node_iterator I = s.begin(), E = s.end(); I != E; ++I) {
679  		node *n = *I;
680  		unsigned uc = get_uc_vec(n->dst);
681  		GCM_DUMP(
682  			sblog << "uc " << uc << "  ";
683  			dump::dump_op(n);
684  			sblog << "\n";
685  		);
686  		if (!uc) {
687  			pending_nodes.push_back(n);
688  			GCM_DUMP(
689  				sblog << "pushed pending_node in init ";
690  				dump::dump_op(n);
691  				sblog << "\n";
692  			);
693  
694  		} else
695  			m[n] = uc;
696  	}
697  }
698  
bu_release_val(value * v)699  void gcm::bu_release_val(value* v) {
700  	node *n = v->any_def();
701  
702  	if (n && n->parent == &pending) {
703  		nuc_map &m = nuc_stk[ucs_level];
704  		unsigned uc = ++m[n];
705  		unsigned uc2 = uses[n];
706  
707  		if (live.add_val(v)) {
708  			++live_count;
709  			GCM_DUMP ( sblog << "live_count: " << live_count << "\n"; );
710  		}
711  
712  		GCM_DUMP(
713  			sblog << "release val ";
714  			dump::dump_val(v);
715  			sblog << "  for node ";
716  			dump::dump_op(n);
717  			sblog << "    new uc=" << uc << ", total " << uc2 << "\n";
718  		);
719  
720  		if (uc == uc2)
721  			bu_release_op(n);
722  	}
723  
724  }
725  
init_def_count(nuc_map & m,container_node & s)726  void gcm::init_def_count(nuc_map& m, container_node& s) {
727  	m.clear();
728  	for (node_iterator I = s.begin(), E = s.end(); I != E; ++I) {
729  		node *n = *I;
730  		unsigned dc = get_dc_vec(n->src, true) + get_dc_vec(n->dst, false);
731  		m[n] = dc;
732  
733  		GCM_DUMP(
734  			sblog << "dc " << dc << "  ";
735  			dump::dump_op(n);
736  			sblog << "\n";
737  		);
738  	}
739  }
740  
get_dc_vec(vvec & vv,bool src)741  unsigned gcm::get_dc_vec(vvec& vv, bool src) {
742  	unsigned c = 0;
743  	for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) {
744  		value *v = *I;
745  		if (!v || v->is_readonly())
746  			continue;
747  
748  		if (v->is_rel()) {
749  			c += v->rel->def != NULL;
750  			c += get_dc_vec(v->muse, true);
751  		}
752  		else if (src) {
753  			c += v->def != NULL;
754  			c += v->adef != NULL;
755  		}
756  	}
757  	return c;
758  }
759  
real_alu_count(sched_queue & q,unsigned max)760  unsigned gcm::real_alu_count(sched_queue& q, unsigned max) {
761  	sq_iterator I(q.begin()), E(q.end());
762  	unsigned c = 0;
763  
764  	while (I != E && c < max) {
765  		node *n = *I;
766  		if (n->is_alu_inst()) {
767  			if (!n->is_copy_mov() || !n->src[0]->is_any_gpr())
768  				++c;
769  		} else if (n->is_alu_packed()) {
770  			c += static_cast<container_node*>(n)->count();
771  		}
772  		++I;
773  	}
774  
775  	return c;
776  }
777  
check_alu_ready_count(unsigned threshold)778  bool gcm::check_alu_ready_count(unsigned threshold) {
779  	unsigned r = real_alu_count(bu_ready[SQ_ALU], threshold);
780  	if (r >= threshold)
781  		return true;
782  	r += real_alu_count(bu_ready_next[SQ_ALU], threshold - r);
783  	return r >= threshold;
784  }
785  
786  } // namespace r600_sb
787