1 /*
2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 * Vadim Girlin
25 */
26
27 #define GCM_DEBUG 0
28
29 #if GCM_DEBUG
30 #define GCM_DUMP(a) do { a } while(0);
31 #else
32 #define GCM_DUMP(a)
33 #endif
34
35 #include <map>
36
37 #include "sb_bc.h"
38 #include "sb_shader.h"
39 #include "sb_pass.h"
40 #include "eg_sq.h" // V_SQ_CF_INDEX_NONE
41
42 namespace r600_sb {
43
run()44 int gcm::run() {
45
46 GCM_DUMP( sblog << "==== GCM ==== \n"; sh.dump_ir(); );
47
48 collect_instructions(sh.root, true);
49
50 init_def_count(uses, pending);
51
52 for (node_iterator N, I = pending.begin(), E = pending.end();
53 I != E; I = N) {
54 N = I;
55 ++N;
56 node *o = *I;
57
58 GCM_DUMP(
59 sblog << "pending : ";
60 dump::dump_op(o);
61 sblog << "\n";
62 );
63
64 if (td_is_ready(o)) {
65
66 GCM_DUMP(
67 sblog << " ready: ";
68 dump::dump_op(o);
69 sblog << "\n";
70 );
71 pending.remove_node(o);
72 ready.push_back(o);
73 } else {
74 }
75 }
76
77 sched_early(sh.root);
78
79 if (!pending.empty()) {
80 sblog << "##### gcm_sched_early_pass: unscheduled ops:\n";
81 dump::dump_op(pending.front());
82 }
83
84 assert(pending.empty());
85
86 GCM_DUMP( sh.dump_ir(); );
87
88 GCM_DUMP( sblog << "\n\n ############## gcm late\n\n"; );
89
90 collect_instructions(sh.root, false);
91
92 init_use_count(uses, pending);
93
94 sched_late(sh.root);
95 if (!pending.empty()) {
96 sblog << "##### gcm_sched_late_pass: unscheduled ops:\n";
97 dump::dump_op(pending.front());
98 }
99
100 assert(ucs_level == 0);
101 assert(pending.empty());
102
103 return 0;
104 }
105
106
collect_instructions(container_node * c,bool early_pass)107 void gcm::collect_instructions(container_node *c, bool early_pass) {
108 if (c->is_bb()) {
109
110 if (early_pass) {
111 for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) {
112 node *n = *I;
113 if (n->flags & NF_DONT_MOVE) {
114 op_info &o = op_map[n];
115 o.top_bb = o.bottom_bb = static_cast<bb_node*>(c);
116 }
117 }
118 }
119
120 pending.append_from(c);
121 return;
122 }
123
124 for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) {
125 if (I->is_container()) {
126 collect_instructions(static_cast<container_node*>(*I), early_pass);
127 }
128 }
129 }
130
sched_early(container_node * n)131 void gcm::sched_early(container_node *n) {
132
133 region_node *r =
134 (n->type == NT_REGION) ? static_cast<region_node*>(n) : NULL;
135
136 if (r && r->loop_phi) {
137 sched_early(r->loop_phi);
138 }
139
140 for (node_iterator I = n->begin(), E = n->end(); I != E; ++I) {
141 if (I->type == NT_OP) {
142 node *op = *I;
143 if (op->subtype == NST_PHI) {
144 td_release_uses(op->dst);
145 }
146 } else if (I->is_container()) {
147 if (I->subtype == NST_BB) {
148 bb_node* bb = static_cast<bb_node*>(*I);
149 td_sched_bb(bb);
150 } else {
151 sched_early(static_cast<container_node*>(*I));
152 }
153 }
154 }
155
156 if (r && r->phi) {
157 sched_early(r->phi);
158 }
159 }
160
td_schedule(bb_node * bb,node * n)161 void gcm::td_schedule(bb_node *bb, node *n) {
162 GCM_DUMP(
163 sblog << "scheduling : ";
164 dump::dump_op(n);
165 sblog << "\n";
166 );
167 td_release_uses(n->dst);
168
169 bb->push_back(n);
170
171 op_map[n].top_bb = bb;
172
173 }
174
td_sched_bb(bb_node * bb)175 void gcm::td_sched_bb(bb_node* bb) {
176 GCM_DUMP(
177 sblog << "td scheduling BB_" << bb->id << "\n";
178 );
179
180 while (!ready.empty()) {
181 for (sq_iterator N, I = ready.begin(), E = ready.end(); I != E;
182 I = N) {
183 N = I; ++N;
184 td_schedule(bb, *I);
185 ready.erase(I);
186 }
187 }
188 }
189
td_is_ready(node * n)190 bool gcm::td_is_ready(node* n) {
191 return uses[n] == 0;
192 }
193
td_release_val(value * v)194 void gcm::td_release_val(value *v) {
195
196 GCM_DUMP(
197 sblog << "td checking uses: ";
198 dump::dump_val(v);
199 sblog << "\n";
200 );
201
202 for (uselist::iterator I = v->uses.begin(), E = v->uses.end(); I != E; ++I) {
203 node *op = *I;
204 if (op->parent != &pending) {
205 continue;
206 }
207
208 GCM_DUMP(
209 sblog << "td used in ";
210 dump::dump_op(op);
211 sblog << "\n";
212 );
213
214 assert(uses[op] > 0);
215 if (--uses[op] == 0) {
216 GCM_DUMP(
217 sblog << "td released : ";
218 dump::dump_op(op);
219 sblog << "\n";
220 );
221
222 pending.remove_node(op);
223 ready.push_back(op);
224 }
225 }
226
227 }
228
td_release_uses(vvec & v)229 void gcm::td_release_uses(vvec& v) {
230 for (vvec::iterator I = v.begin(), E = v.end(); I != E; ++I) {
231 value *v = *I;
232 if (!v)
233 continue;
234
235 if (v->is_rel())
236 td_release_uses(v->mdef);
237 else
238 td_release_val(v);
239 }
240 }
241
sched_late(container_node * n)242 void gcm::sched_late(container_node *n) {
243
244 bool stack_pushed = false;
245
246 if (n->is_depart()) {
247 depart_node *d = static_cast<depart_node*>(n);
248 push_uc_stack();
249 stack_pushed = true;
250 bu_release_phi_defs(d->target->phi, d->dep_id);
251 } else if (n->is_repeat()) {
252 repeat_node *r = static_cast<repeat_node*>(n);
253 assert(r->target->loop_phi);
254 push_uc_stack();
255 stack_pushed = true;
256 bu_release_phi_defs(r->target->loop_phi, r->rep_id);
257 }
258
259 for (node_riterator I = n->rbegin(), E = n->rend(); I != E; ++I) {
260 if (I->is_container()) {
261 if (I->subtype == NST_BB) {
262 bb_node* bb = static_cast<bb_node*>(*I);
263 bu_sched_bb(bb);
264 } else {
265 sched_late(static_cast<container_node*>(*I));
266 }
267 }
268 }
269
270 if (n->type == NT_IF) {
271 if_node *f = static_cast<if_node*>(n);
272 if (f->cond)
273 pending_defs.push_back(f->cond);
274 } else if (n->type == NT_REGION) {
275 region_node *r = static_cast<region_node*>(n);
276 if (r->loop_phi)
277 bu_release_phi_defs(r->loop_phi, 0);
278 }
279
280 if (stack_pushed)
281 pop_uc_stack();
282
283 }
284
bu_sched_bb(bb_node * bb)285 void gcm::bu_sched_bb(bb_node* bb) {
286 GCM_DUMP(
287 sblog << "bu scheduling BB_" << bb->id << "\n";
288 );
289
290 bu_bb = bb;
291
292 if (!pending_nodes.empty()) {
293 GCM_DUMP(
294 sblog << "pending nodes:\n";
295 );
296
297 // TODO consider sorting the exports by array_base,
298 // possibly it can improve performance
299
300 for (node_list::iterator I = pending_nodes.begin(),
301 E = pending_nodes.end(); I != E; ++I) {
302 bu_release_op(*I);
303 }
304 pending_nodes.clear();
305 GCM_DUMP(
306 sblog << "pending nodes processed...\n";
307 );
308 }
309
310
311 if (!pending_defs.empty()) {
312 for (vvec::iterator I = pending_defs.begin(), E = pending_defs.end();
313 I != E; ++I) {
314 bu_release_val(*I);
315 }
316 pending_defs.clear();
317 }
318
319 for (sched_queue::iterator N, I = ready_above.begin(), E = ready_above.end();
320 I != E; I = N) {
321 N = I;
322 ++N;
323 node *n = *I;
324 if (op_map[n].bottom_bb == bb) {
325 add_ready(*I);
326 ready_above.erase(I);
327 }
328 }
329
330 unsigned cnt_ready[SQ_NUM];
331
332 container_node *clause = NULL;
333 unsigned last_inst_type = ~0;
334 unsigned last_count = 0;
335
336 bool s = true;
337 while (s) {
338 node *n;
339
340 s = false;
341
342 unsigned ready_mask = 0;
343
344 for (unsigned sq = SQ_CF; sq < SQ_NUM; ++sq) {
345 if (!bu_ready[sq].empty() || !bu_ready_next[sq].empty())
346 ready_mask |= (1 << sq);
347 }
348
349 if (!ready_mask) {
350 for (unsigned sq = SQ_CF; sq < SQ_NUM; ++sq) {
351 if (!bu_ready_early[sq].empty()) {
352 node *n = bu_ready_early[sq].front();
353 bu_ready_early[sq].pop_front();
354 bu_ready[sq].push_back(n);
355 break;
356 }
357 }
358 }
359
360 for (unsigned sq = SQ_CF; sq < SQ_NUM; ++sq) {
361
362 if (sq == SQ_CF && pending_exec_mask_update) {
363 pending_exec_mask_update = false;
364 sq = SQ_ALU;
365 --sq;
366 continue;
367 }
368
369 if (sq != SQ_ALU && outstanding_lds_oq)
370 continue;
371
372 if (!bu_ready_next[sq].empty())
373 bu_ready[sq].splice(bu_ready[sq].end(), bu_ready_next[sq]);
374
375 cnt_ready[sq] = bu_ready[sq].size();
376
377 if ((sq == SQ_TEX || sq == SQ_VTX) && live_count <= rp_threshold &&
378 cnt_ready[sq] < ctx.max_fetch/2 &&
379 !bu_ready_next[SQ_ALU].empty()) {
380 sq = SQ_ALU;
381 --sq;
382 continue;
383 }
384
385 while (!bu_ready[sq].empty()) {
386
387 if (last_inst_type != sq) {
388 clause = NULL;
389 last_count = 0;
390 last_inst_type = sq;
391 }
392
393 // simple heuristic to limit register pressure,
394 if (sq == SQ_ALU && live_count > rp_threshold && !outstanding_lds_oq &&
395 (!bu_ready[SQ_TEX].empty() ||
396 !bu_ready[SQ_VTX].empty() ||
397 !bu_ready_next[SQ_TEX].empty() ||
398 !bu_ready_next[SQ_VTX].empty())) {
399 GCM_DUMP( sblog << "switching to fetch (regpressure)\n"; );
400 break;
401 }
402
403 n = bu_ready[sq].front();
404
405 // real count (e.g. SAMPLE_G will be expanded to 3 instructions,
406 // 2 SET_GRAD_ + 1 SAMPLE_G
407 unsigned ncnt = 1;
408 if (n->is_fetch_inst() && n->src.size() == 12) {
409 ncnt = 3;
410 }
411
412 bool sampler_indexing = false;
413 if (n->is_fetch_inst() &&
414 static_cast<fetch_node *>(n)->bc.sampler_index_mode != V_SQ_CF_INDEX_NONE)
415 {
416 sampler_indexing = true; // Give sampler indexed ops get their own clause
417 ncnt = sh.get_ctx().is_cayman() ? 2 : 3; // MOVA + SET_CF_IDX0/1
418 }
419
420 if ((sq == SQ_TEX || sq == SQ_VTX) &&
421 ((last_count >= ctx.max_fetch/2 &&
422 check_alu_ready_count(24)) ||
423 last_count + ncnt > ctx.max_fetch))
424 break;
425 else if (sq == SQ_CF && last_count > 4 &&
426 check_alu_ready_count(24))
427 break;
428
429
430 if (sq == SQ_ALU && n->consumes_lds_oq() &&
431 (bu_ready[SQ_TEX].size() || bu_ready[SQ_VTX].size() || bu_ready[SQ_GDS].size())) {
432 GCM_DUMP( sblog << "switching scheduling due to lds op\n"; );
433 break;
434 }
435 bu_ready[sq].pop_front();
436
437 if (sq != SQ_CF) {
438 if (!clause || sampler_indexing) {
439 node_subtype nst;
440 switch (sq) {
441 case SQ_ALU:
442 nst = NST_ALU_CLAUSE;
443 break;
444 case SQ_TEX:
445 nst = NST_TEX_CLAUSE;
446 break;
447 case SQ_GDS:
448 nst = NST_GDS_CLAUSE;
449 break;
450 default:
451 nst = NST_VTX_CLAUSE;
452 break;
453 }
454 clause = sh.create_clause(nst);
455 bb->push_front(clause);
456 }
457 } else {
458 clause = bb;
459 }
460
461 bu_schedule(clause, n);
462 s = true;
463 last_count += ncnt;
464 }
465 }
466 }
467
468 bu_bb = NULL;
469
470 GCM_DUMP(
471 sblog << "bu finished scheduling BB_" << bb->id << "\n";
472 );
473 }
474
bu_release_defs(vvec & v,bool src)475 void gcm::bu_release_defs(vvec& v, bool src) {
476 for (vvec::reverse_iterator I = v.rbegin(), E = v.rend(); I != E; ++I) {
477 value *v = *I;
478 if (!v || v->is_readonly())
479 continue;
480
481 if (v->is_rel()) {
482 if (!v->rel->is_readonly())
483 bu_release_val(v->rel);
484 bu_release_defs(v->muse, true);
485 } else if (src)
486 bu_release_val(v);
487 else {
488 if (live.remove_val(v)) {
489 --live_count;
490 }
491 }
492 }
493 }
494
push_uc_stack()495 void gcm::push_uc_stack() {
496 GCM_DUMP(
497 sblog << "pushing use count stack prev_level " << ucs_level
498 << " new level " << (ucs_level + 1) << "\n";
499 );
500 ++ucs_level;
501 if (ucs_level == nuc_stk.size()) {
502 nuc_stk.resize(ucs_level + 1);
503 }
504 else {
505 nuc_stk[ucs_level].clear();
506 }
507 }
508
bu_is_ready(node * n)509 bool gcm::bu_is_ready(node* n) {
510 nuc_map &cm = nuc_stk[ucs_level];
511 nuc_map::iterator F = cm.find(n);
512 unsigned uc = (F == cm.end() ? 0 : F->second);
513 return uc == uses[n];
514 }
515
bu_schedule(container_node * c,node * n)516 void gcm::bu_schedule(container_node* c, node* n) {
517 GCM_DUMP(
518 sblog << "bu scheduling : ";
519 dump::dump_op(n);
520 sblog << "\n";
521 );
522
523 assert(op_map[n].bottom_bb == bu_bb);
524
525 if (n->produces_lds_oq())
526 outstanding_lds_oq--;
527 if (n->consumes_lds_oq())
528 outstanding_lds_oq++;
529 bu_release_defs(n->src, true);
530 bu_release_defs(n->dst, false);
531
532 c->push_front(n);
533 }
534
dump_uc_stack()535 void gcm::dump_uc_stack() {
536 sblog << "##### uc_stk start ####\n";
537 for (unsigned l = 0; l <= ucs_level; ++l) {
538 nuc_map &m = nuc_stk[l];
539
540 sblog << "nuc_stk[" << l << "] : @" << &m << "\n";
541
542 for (nuc_map::iterator I = m.begin(), E = m.end(); I != E; ++I) {
543 sblog << " uc " << I->second << " for ";
544 dump::dump_op(I->first);
545 sblog << "\n";
546 }
547 }
548 sblog << "##### uc_stk end ####\n";
549 }
550
pop_uc_stack()551 void gcm::pop_uc_stack() {
552 nuc_map &pm = nuc_stk[ucs_level];
553 --ucs_level;
554 nuc_map &cm = nuc_stk[ucs_level];
555
556 GCM_DUMP(
557 sblog << "merging use stack from level " << (ucs_level+1)
558 << " to " << ucs_level << "\n";
559 );
560
561 for (nuc_map::iterator N, I = pm.begin(), E = pm.end(); I != E; ++I) {
562 node *n = I->first;
563
564 GCM_DUMP(
565 sblog << " " << cm[n] << " += " << I->second << " for ";
566 dump::dump_op(n);
567 sblog << "\n";
568 );
569
570 unsigned uc = cm[n] += I->second;
571
572 if (n->parent == &pending && uc == uses[n]) {
573 cm.erase(n);
574 pending_nodes.push_back(n);
575 GCM_DUMP(
576 sblog << "pushed pending_node due to stack pop ";
577 dump::dump_op(n);
578 sblog << "\n";
579 );
580 }
581 }
582 }
583
bu_find_best_bb(node * n,op_info & oi)584 void gcm::bu_find_best_bb(node *n, op_info &oi) {
585
586 GCM_DUMP(
587 sblog << " find best bb : ";
588 dump::dump_op(n);
589 sblog << "\n";
590 );
591
592 if (oi.bottom_bb)
593 return;
594
595 // don't hoist generated copies
596 if (n->flags & NF_DONT_HOIST) {
597 oi.bottom_bb = bu_bb;
598 return;
599 }
600
601 bb_node* best_bb = bu_bb;
602 bb_node* top_bb = oi.top_bb;
603 assert(oi.top_bb && !oi.bottom_bb);
604
605 node *c = best_bb;
606
607 // FIXME top_bb may be located inside the loop so we'll never enter it
608 // in the loop below, and the instruction will be incorrectly placed at the
609 // beginning of the shader.
610 // For now just check if top_bb's loop_level is higher than of
611 // current bb and abort the search for better bb in such case,
612 // but this problem may require more complete (and more expensive) fix
613 if (top_bb->loop_level <= best_bb->loop_level) {
614 while (c && c != top_bb) {
615
616 if (c->prev) {
617 c = c->prev;
618 } else {
619 c = c->parent;
620 if (!c)
621 break;
622 continue;
623 }
624
625 if (c->subtype == NST_BB) {
626 bb_node *bb = static_cast<bb_node*>(c);
627 if (bb->loop_level < best_bb->loop_level)
628 best_bb = bb;
629 }
630 }
631 }
632
633 oi.bottom_bb = best_bb;
634 }
635
add_ready(node * n)636 void gcm::add_ready(node *n) {
637 sched_queue_id sq = sh.get_queue_id(n);
638 if (n->flags & NF_SCHEDULE_EARLY)
639 bu_ready_early[sq].push_back(n);
640 else if (sq == SQ_ALU && n->is_copy_mov())
641 bu_ready[sq].push_front(n);
642 else if (n->is_alu_inst()) {
643 alu_node *a = static_cast<alu_node*>(n);
644 if (a->bc.op_ptr->flags & AF_PRED && a->dst[2]) {
645 // PRED_SET instruction that updates exec mask
646 pending_exec_mask_update = true;
647 }
648 bu_ready_next[sq].push_back(n);
649 } else
650 bu_ready_next[sq].push_back(n);
651 }
652
bu_release_op(node * n)653 void gcm::bu_release_op(node * n) {
654 op_info &oi = op_map[n];
655
656 GCM_DUMP(
657 sblog << " bu release op ";
658 dump::dump_op(n);
659 );
660
661 nuc_stk[ucs_level].erase(n);
662 pending.remove_node(n);
663
664 bu_find_best_bb(n, oi);
665
666 if (oi.bottom_bb == bu_bb) {
667 GCM_DUMP( sblog << " ready\n";);
668 add_ready(n);
669 } else {
670 GCM_DUMP( sblog << " ready_above\n";);
671 ready_above.push_back(n);
672 }
673 }
674
bu_release_phi_defs(container_node * p,unsigned op)675 void gcm::bu_release_phi_defs(container_node* p, unsigned op)
676 {
677 for (node_riterator I = p->rbegin(), E = p->rend(); I != E; ++I) {
678 node *o = *I;
679 value *v = o->src[op];
680 if (v && !v->is_readonly())
681 pending_defs.push_back(o->src[op]);
682
683 }
684 }
685
get_uc_vec(vvec & vv)686 unsigned gcm::get_uc_vec(vvec &vv) {
687 unsigned c = 0;
688 for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) {
689 value *v = *I;
690 if (!v)
691 continue;
692
693 if (v->is_rel())
694 c += get_uc_vec(v->mdef);
695 else
696 c += v->use_count();
697 }
698 return c;
699 }
700
init_use_count(nuc_map & m,container_node & s)701 void gcm::init_use_count(nuc_map& m, container_node &s) {
702 m.clear();
703 for (node_iterator I = s.begin(), E = s.end(); I != E; ++I) {
704 node *n = *I;
705 unsigned uc = get_uc_vec(n->dst);
706 GCM_DUMP(
707 sblog << "uc " << uc << " ";
708 dump::dump_op(n);
709 sblog << "\n";
710 );
711 if (!uc) {
712 pending_nodes.push_back(n);
713 GCM_DUMP(
714 sblog << "pushed pending_node in init ";
715 dump::dump_op(n);
716 sblog << "\n";
717 );
718
719 } else
720 m[n] = uc;
721 }
722 }
723
bu_release_val(value * v)724 void gcm::bu_release_val(value* v) {
725 node *n = v->any_def();
726
727 if (n && n->parent == &pending) {
728 nuc_map &m = nuc_stk[ucs_level];
729 unsigned uc = ++m[n];
730 unsigned uc2 = uses[n];
731
732 if (live.add_val(v)) {
733 ++live_count;
734 GCM_DUMP ( sblog << "live_count: " << live_count << "\n"; );
735 }
736
737 GCM_DUMP(
738 sblog << "release val ";
739 dump::dump_val(v);
740 sblog << " for node ";
741 dump::dump_op(n);
742 sblog << " new uc=" << uc << ", total " << uc2 << "\n";
743 );
744
745 if (uc == uc2)
746 bu_release_op(n);
747 }
748
749 }
750
init_def_count(nuc_map & m,container_node & s)751 void gcm::init_def_count(nuc_map& m, container_node& s) {
752 m.clear();
753 for (node_iterator I = s.begin(), E = s.end(); I != E; ++I) {
754 node *n = *I;
755 unsigned dc = get_dc_vec(n->src, true) + get_dc_vec(n->dst, false);
756 m[n] = dc;
757
758 GCM_DUMP(
759 sblog << "dc " << dc << " ";
760 dump::dump_op(n);
761 sblog << "\n";
762 );
763 }
764 }
765
get_dc_vec(vvec & vv,bool src)766 unsigned gcm::get_dc_vec(vvec& vv, bool src) {
767 unsigned c = 0;
768 for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) {
769 value *v = *I;
770 if (!v || v->is_readonly())
771 continue;
772
773 if (v->is_rel()) {
774 c += v->rel->def != NULL;
775 c += get_dc_vec(v->muse, true);
776 }
777 else if (src) {
778 c += v->def != NULL;
779 c += v->adef != NULL;
780 }
781 }
782 return c;
783 }
784
real_alu_count(sched_queue & q,unsigned max)785 unsigned gcm::real_alu_count(sched_queue& q, unsigned max) {
786 sq_iterator I(q.begin()), E(q.end());
787 unsigned c = 0;
788
789 while (I != E && c < max) {
790 node *n = *I;
791 if (n->is_alu_inst()) {
792 if (!n->is_copy_mov() || !n->src[0]->is_any_gpr())
793 ++c;
794 } else if (n->is_alu_packed()) {
795 c += static_cast<container_node*>(n)->count();
796 }
797 ++I;
798 }
799
800 return c;
801 }
802
check_alu_ready_count(unsigned threshold)803 bool gcm::check_alu_ready_count(unsigned threshold) {
804 unsigned r = real_alu_count(bu_ready[SQ_ALU], threshold);
805 if (r >= threshold)
806 return true;
807 r += real_alu_count(bu_ready_next[SQ_ALU], threshold - r);
808 return r >= threshold;
809 }
810
811 } // namespace r600_sb
812