1 /*
2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 * Vadim Girlin
25 */
26
27 #define FBC_DEBUG 0
28
29 #if FBC_DEBUG
30 #define FBC_DUMP(q) do { q } while (0)
31 #else
32 #define FBC_DUMP(q)
33 #endif
34
35 #include "sb_bc.h"
36 #include "sb_shader.h"
37 #include "sb_pass.h"
38
39 namespace r600_sb {
40
insert_rv6xx_load_ar_workaround(alu_group_node * b4)41 void bc_finalizer::insert_rv6xx_load_ar_workaround(alu_group_node *b4) {
42
43 alu_group_node *g = sh.create_alu_group();
44 alu_node *a = sh.create_alu();
45
46 a->bc.set_op(ALU_OP0_NOP);
47 a->bc.last = 1;
48
49 g->push_back(a);
50 b4->insert_before(g);
51 }
52
run()53 int bc_finalizer::run() {
54
55 run_on(sh.root);
56
57 regions_vec &rv = sh.get_regions();
58 for (regions_vec::reverse_iterator I = rv.rbegin(), E = rv.rend(); I != E;
59 ++I) {
60 region_node *r = *I;
61
62 assert(r);
63
64 bool loop = r->is_loop();
65
66 if (loop)
67 finalize_loop(r);
68 else
69 finalize_if(r);
70
71 r->expand();
72 }
73
74 cf_peephole();
75
76 // workaround for some problems on r6xx/7xx
77 // add ALU NOP to each vertex shader
78 if (!ctx.is_egcm() && (sh.target == TARGET_VS || sh.target == TARGET_ES)) {
79 cf_node *c = sh.create_clause(NST_ALU_CLAUSE);
80
81 alu_group_node *g = sh.create_alu_group();
82
83 alu_node *a = sh.create_alu();
84 a->bc.set_op(ALU_OP0_NOP);
85 a->bc.last = 1;
86
87 g->push_back(a);
88 c->push_back(g);
89
90 sh.root->push_back(c);
91
92 c = sh.create_cf(CF_OP_NOP);
93 sh.root->push_back(c);
94
95 last_cf = c;
96 }
97
98 if (!ctx.is_cayman() && last_cf->bc.op_ptr->flags & CF_ALU) {
99 last_cf = sh.create_cf(CF_OP_NOP);
100 sh.root->push_back(last_cf);
101 }
102
103 if (ctx.is_cayman()) {
104 if (!last_cf) {
105 cf_node *c = sh.create_cf(CF_OP_CF_END);
106 sh.root->push_back(c);
107 } else
108 last_cf->insert_after(sh.create_cf(CF_OP_CF_END));
109 } else
110 last_cf->bc.end_of_program = 1;
111
112 for (unsigned t = EXP_PIXEL; t < EXP_TYPE_COUNT; ++t) {
113 cf_node *le = last_export[t];
114 if (le)
115 le->bc.set_op(CF_OP_EXPORT_DONE);
116 }
117
118 sh.ngpr = ngpr;
119 sh.nstack = nstack;
120 return 0;
121 }
122
finalize_loop(region_node * r)123 void bc_finalizer::finalize_loop(region_node* r) {
124
125 update_nstack(r);
126
127 cf_node *loop_start = sh.create_cf(CF_OP_LOOP_START_DX10);
128 cf_node *loop_end = sh.create_cf(CF_OP_LOOP_END);
129
130 // Update last_cf, but don't overwrite it if it's outside the current loop nest since
131 // it may point to a cf that is later in program order.
132 // The single parent level check is sufficient since finalize_loop() is processed in
133 // reverse order from innermost to outermost loop nest level.
134 if (!last_cf || last_cf->get_parent_region() == r) {
135 last_cf = loop_end;
136 }
137
138 loop_start->jump_after(loop_end);
139 loop_end->jump_after(loop_start);
140
141 for (depart_vec::iterator I = r->departs.begin(), E = r->departs.end();
142 I != E; ++I) {
143 depart_node *dep = *I;
144 cf_node *loop_break = sh.create_cf(CF_OP_LOOP_BREAK);
145 loop_break->jump(loop_end);
146 dep->push_back(loop_break);
147 dep->expand();
148 }
149
150 // FIXME produces unnecessary LOOP_CONTINUE
151 for (repeat_vec::iterator I = r->repeats.begin(), E = r->repeats.end();
152 I != E; ++I) {
153 repeat_node *rep = *I;
154 if (!(rep->parent == r && rep->prev == NULL)) {
155 cf_node *loop_cont = sh.create_cf(CF_OP_LOOP_CONTINUE);
156 loop_cont->jump(loop_end);
157 rep->push_back(loop_cont);
158 }
159 rep->expand();
160 }
161
162 r->push_front(loop_start);
163 r->push_back(loop_end);
164 }
165
finalize_if(region_node * r)166 void bc_finalizer::finalize_if(region_node* r) {
167
168 update_nstack(r);
169
170 // expecting the following control flow structure here:
171 // - region
172 // {
173 // - depart/repeat 1 (it may be depart/repeat for some outer region)
174 // {
175 // - if
176 // {
177 // - depart/repeat 2 (possibly for outer region)
178 // {
179 // - some optional code
180 // }
181 // }
182 // - optional <else> code> ...
183 // }
184 // }
185
186 container_node *repdep1 = static_cast<container_node*>(r->first);
187 assert(repdep1->is_depart() || repdep1->is_repeat());
188
189 if_node *n_if = static_cast<if_node*>(repdep1->first);
190
191 if (n_if) {
192
193
194 assert(n_if->is_if());
195
196 container_node *repdep2 = static_cast<container_node*>(n_if->first);
197 assert(repdep2->is_depart() || repdep2->is_repeat());
198
199 cf_node *if_jump = sh.create_cf(CF_OP_JUMP);
200 cf_node *if_pop = sh.create_cf(CF_OP_POP);
201
202 if (!last_cf || last_cf->get_parent_region() == r) {
203 last_cf = if_pop;
204 }
205 if_pop->bc.pop_count = 1;
206 if_pop->jump_after(if_pop);
207
208 r->push_front(if_jump);
209 r->push_back(if_pop);
210
211 /* the depart/repeat 1 is actually part of the "else" code.
212 * if it's a depart for an outer loop region it will want to
213 * insert a LOOP_BREAK or LOOP_CONTINUE in here, so we need
214 * to emit the else clause.
215 */
216 bool has_else = n_if->next;
217
218 if (repdep1->is_depart()) {
219 depart_node *dep1 = static_cast<depart_node*>(repdep1);
220 if (dep1->target != r && dep1->target->is_loop())
221 has_else = true;
222 }
223
224 if (repdep1->is_repeat()) {
225 repeat_node *rep1 = static_cast<repeat_node*>(repdep1);
226 if (rep1->target != r && rep1->target->is_loop())
227 has_else = true;
228 }
229
230 if (has_else) {
231 cf_node *nelse = sh.create_cf(CF_OP_ELSE);
232 n_if->insert_after(nelse);
233 if_jump->jump(nelse);
234 nelse->jump_after(if_pop);
235 nelse->bc.pop_count = 1;
236
237 } else {
238 if_jump->jump_after(if_pop);
239 if_jump->bc.pop_count = 1;
240 }
241
242 n_if->expand();
243 }
244
245 for (depart_vec::iterator I = r->departs.begin(), E = r->departs.end();
246 I != E; ++I) {
247 (*I)->expand();
248 }
249 r->departs.clear();
250 assert(r->repeats.empty());
251 }
252
run_on(container_node * c)253 void bc_finalizer::run_on(container_node* c) {
254 node *prev_node = NULL;
255 for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) {
256 node *n = *I;
257
258 if (n->is_alu_group()) {
259 finalize_alu_group(static_cast<alu_group_node*>(n), prev_node);
260 } else {
261 if (n->is_alu_clause()) {
262 cf_node *c = static_cast<cf_node*>(n);
263
264 if (c->bc.op == CF_OP_ALU_PUSH_BEFORE && ctx.is_egcm()) {
265 if (ctx.stack_workaround_8xx) {
266 region_node *r = c->get_parent_region();
267 if (r) {
268 unsigned ifs, loops;
269 unsigned elems = get_stack_depth(r, loops, ifs);
270 unsigned dmod1 = elems % ctx.stack_entry_size;
271 unsigned dmod2 = (elems + 1) % ctx.stack_entry_size;
272
273 if (elems && (!dmod1 || !dmod2))
274 c->flags |= NF_ALU_STACK_WORKAROUND;
275 }
276 } else if (ctx.stack_workaround_9xx) {
277 region_node *r = c->get_parent_region();
278 if (r) {
279 unsigned ifs, loops;
280 get_stack_depth(r, loops, ifs);
281 if (loops >= 2)
282 c->flags |= NF_ALU_STACK_WORKAROUND;
283 }
284 }
285 }
286 last_cf = c;
287 } else if (n->is_fetch_inst()) {
288 finalize_fetch(static_cast<fetch_node*>(n));
289 } else if (n->is_cf_inst()) {
290 finalize_cf(static_cast<cf_node*>(n));
291 }
292 if (n->is_container())
293 run_on(static_cast<container_node*>(n));
294 }
295 prev_node = n;
296 }
297 }
298
finalize_alu_group(alu_group_node * g,node * prev_node)299 void bc_finalizer::finalize_alu_group(alu_group_node* g, node *prev_node) {
300
301 alu_node *last = NULL;
302 alu_group_node *prev_g = NULL;
303 bool add_nop = false;
304 if (prev_node && prev_node->is_alu_group()) {
305 prev_g = static_cast<alu_group_node*>(prev_node);
306 }
307
308 for (node_iterator I = g->begin(), E = g->end(); I != E; ++I) {
309 alu_node *n = static_cast<alu_node*>(*I);
310 unsigned slot = n->bc.slot;
311 value *d = n->dst.empty() ? NULL : n->dst[0];
312
313 if (d && d->is_special_reg()) {
314 assert((n->bc.op_ptr->flags & AF_MOVA) || d->is_geometry_emit() || d->is_lds_oq() || d->is_lds_access() || d->is_scratch());
315 d = NULL;
316 }
317
318 sel_chan fdst = d ? d->get_final_gpr() : sel_chan(0, 0);
319
320 if (d) {
321 assert(fdst.chan() == slot || slot == SLOT_TRANS);
322 }
323
324 if (!(n->bc.op_ptr->flags & AF_MOVA && ctx.is_cayman()))
325 n->bc.dst_gpr = fdst.sel();
326 n->bc.dst_chan = d ? fdst.chan() : slot < SLOT_TRANS ? slot : 0;
327
328
329 if (d && d->is_rel() && d->rel && !d->rel->is_const()) {
330 n->bc.dst_rel = 1;
331 update_ngpr(d->array->gpr.sel() + d->array->array_size -1);
332 } else {
333 n->bc.dst_rel = 0;
334 }
335
336 n->bc.write_mask = d != NULL;
337 n->bc.last = 0;
338
339 if (n->bc.op_ptr->flags & AF_PRED) {
340 n->bc.update_pred = (n->dst[1] != NULL);
341 n->bc.update_exec_mask = (n->dst[2] != NULL);
342 }
343
344 // FIXME handle predication here
345 n->bc.pred_sel = PRED_SEL_OFF;
346
347 update_ngpr(n->bc.dst_gpr);
348
349 add_nop |= finalize_alu_src(g, n, prev_g);
350
351 last = n;
352 }
353
354 if (add_nop) {
355 if (sh.get_ctx().r6xx_gpr_index_workaround) {
356 insert_rv6xx_load_ar_workaround(g);
357 }
358 }
359 last->bc.last = 1;
360 }
361
finalize_alu_src(alu_group_node * g,alu_node * a,alu_group_node * prev)362 bool bc_finalizer::finalize_alu_src(alu_group_node* g, alu_node* a, alu_group_node *prev) {
363 vvec &sv = a->src;
364 bool add_nop = false;
365 FBC_DUMP(
366 sblog << "finalize_alu_src: ";
367 dump::dump_op(a);
368 sblog << "\n";
369 );
370
371 unsigned si = 0;
372
373 for (vvec::iterator I = sv.begin(), E = sv.end(); I != E; ++I, ++si) {
374 value *v = *I;
375 assert(v);
376
377 bc_alu_src &src = a->bc.src[si];
378 sel_chan sc;
379 src.rel = 0;
380
381 sel_chan gpr;
382
383 switch (v->kind) {
384 case VLK_REL_REG:
385 sc = v->get_final_gpr();
386 src.sel = sc.sel();
387 src.chan = sc.chan();
388 if (!v->rel->is_const()) {
389 src.rel = 1;
390 update_ngpr(v->array->gpr.sel() + v->array->array_size -1);
391 if (prev && !add_nop) {
392 for (node_iterator pI = prev->begin(), pE = prev->end(); pI != pE; ++pI) {
393 alu_node *pn = static_cast<alu_node*>(*pI);
394 if (pn->bc.dst_gpr == src.sel) {
395 add_nop = true;
396 break;
397 }
398 }
399 }
400 } else
401 src.rel = 0;
402
403 break;
404 case VLK_REG:
405 gpr = v->get_final_gpr();
406 src.sel = gpr.sel();
407 src.chan = gpr.chan();
408 update_ngpr(src.sel);
409 break;
410 case VLK_TEMP:
411 src.sel = v->gpr.sel();
412 src.chan = v->gpr.chan();
413 update_ngpr(src.sel);
414 break;
415 case VLK_UNDEF:
416 case VLK_CONST: {
417 literal lv = v->literal_value;
418 src.chan = 0;
419
420 if (lv == literal(0))
421 src.sel = ALU_SRC_0;
422 else if (lv == literal(0.5f))
423 src.sel = ALU_SRC_0_5;
424 else if (lv == literal(1.0f))
425 src.sel = ALU_SRC_1;
426 else if (lv == literal(1))
427 src.sel = ALU_SRC_1_INT;
428 else if (lv == literal(-1))
429 src.sel = ALU_SRC_M_1_INT;
430 else {
431 src.sel = ALU_SRC_LITERAL;
432 src.chan = g->literal_chan(lv);
433 src.value = lv;
434 }
435 break;
436 }
437 case VLK_KCACHE: {
438 cf_node *clause = static_cast<cf_node*>(g->parent);
439 assert(clause->is_alu_clause());
440 sel_chan k = translate_kcache(clause, v);
441
442 assert(k && "kcache translation failed");
443
444 src.sel = k.sel();
445 src.chan = k.chan();
446 break;
447 }
448 case VLK_SPECIAL_REG:
449 if (v->select.sel() == SV_LDS_OQA) {
450 src.sel = ALU_SRC_LDS_OQ_A_POP;
451 src.chan = 0;
452 } else if (v->select.sel() == SV_LDS_OQB) {
453 src.sel = ALU_SRC_LDS_OQ_B_POP;
454 src.chan = 0;
455 } else {
456 src.sel = ALU_SRC_0;
457 src.chan = 0;
458 }
459 break;
460 case VLK_PARAM:
461 case VLK_SPECIAL_CONST:
462 src.sel = v->select.sel();
463 src.chan = v->select.chan();
464 break;
465 default:
466 assert(!"unknown value kind");
467 break;
468 }
469 if (prev && !add_nop) {
470 for (node_iterator pI = prev->begin(), pE = prev->end(); pI != pE; ++pI) {
471 alu_node *pn = static_cast<alu_node*>(*pI);
472 if (pn->bc.dst_rel) {
473 if (pn->bc.dst_gpr == src.sel) {
474 add_nop = true;
475 break;
476 }
477 }
478 }
479 }
480 }
481
482 while (si < 3) {
483 a->bc.src[si++].sel = 0;
484 }
485 return add_nop;
486 }
487
copy_fetch_src(fetch_node & dst,fetch_node & src,unsigned arg_start)488 void bc_finalizer::copy_fetch_src(fetch_node &dst, fetch_node &src, unsigned arg_start)
489 {
490 int reg = -1;
491
492 for (unsigned chan = 0; chan < 4; ++chan) {
493
494 dst.bc.dst_sel[chan] = SEL_MASK;
495
496 unsigned sel = SEL_MASK;
497
498 value *v = src.src[arg_start + chan];
499
500 if (!v || v->is_undef()) {
501 sel = SEL_MASK;
502 } else if (v->is_const()) {
503 literal l = v->literal_value;
504 if (l == literal(0))
505 sel = SEL_0;
506 else if (l == literal(1.0f))
507 sel = SEL_1;
508 else {
509 sblog << "invalid fetch constant operand " << chan << " ";
510 dump::dump_op(&src);
511 sblog << "\n";
512 abort();
513 }
514
515 } else if (v->is_any_gpr()) {
516 unsigned vreg = v->gpr.sel();
517 unsigned vchan = v->gpr.chan();
518
519 if (reg == -1)
520 reg = vreg;
521 else if ((unsigned)reg != vreg) {
522 sblog << "invalid fetch source operand " << chan << " ";
523 dump::dump_op(&src);
524 sblog << "\n";
525 abort();
526 }
527
528 sel = vchan;
529
530 } else {
531 sblog << "invalid fetch source operand " << chan << " ";
532 dump::dump_op(&src);
533 sblog << "\n";
534 abort();
535 }
536
537 dst.bc.src_sel[chan] = sel;
538 }
539
540 if (reg >= 0)
541 update_ngpr(reg);
542
543 dst.bc.src_gpr = reg >= 0 ? reg : 0;
544 }
545
emit_set_grad(fetch_node * f)546 void bc_finalizer::emit_set_grad(fetch_node* f) {
547
548 assert(f->src.size() == 12 || f->src.size() == 13);
549 unsigned ops[2] = { FETCH_OP_SET_GRADIENTS_V, FETCH_OP_SET_GRADIENTS_H };
550
551 unsigned arg_start = 0;
552
553 for (unsigned op = 0; op < 2; ++op) {
554 fetch_node *n = sh.create_fetch();
555 n->bc.set_op(ops[op]);
556
557 arg_start += 4;
558
559 copy_fetch_src(*n, *f, arg_start);
560
561 f->insert_before(n);
562 }
563
564 }
565
emit_set_texture_offsets(fetch_node & f)566 void bc_finalizer::emit_set_texture_offsets(fetch_node &f) {
567 assert(f.src.size() == 8);
568
569 fetch_node *n = sh.create_fetch();
570
571 n->bc.set_op(FETCH_OP_SET_TEXTURE_OFFSETS);
572
573 copy_fetch_src(*n, f, 4);
574
575 f.insert_before(n);
576 }
577
finalize_fetch(fetch_node * f)578 void bc_finalizer::finalize_fetch(fetch_node* f) {
579
580 int reg = -1;
581
582 // src
583
584 unsigned src_count = 4;
585
586 unsigned flags = f->bc.op_ptr->flags;
587
588 if (flags & FF_VTX) {
589 src_count = 1;
590 } else if (flags & FF_GDS) {
591 src_count = 2;
592 } else if (flags & FF_USEGRAD) {
593 emit_set_grad(f);
594 } else if (flags & FF_USE_TEXTURE_OFFSETS) {
595 emit_set_texture_offsets(*f);
596 }
597
598 for (unsigned chan = 0; chan < src_count; ++chan) {
599
600 unsigned sel = f->bc.src_sel[chan];
601
602 if (sel > SEL_W)
603 continue;
604
605 value *v = f->src[chan];
606
607 if (v->is_undef()) {
608 sel = SEL_MASK;
609 } else if (v->is_const()) {
610 literal l = v->literal_value;
611 if (l == literal(0))
612 sel = SEL_0;
613 else if (l == literal(1.0f))
614 sel = SEL_1;
615 else {
616 sblog << "invalid fetch constant operand " << chan << " ";
617 dump::dump_op(f);
618 sblog << "\n";
619 abort();
620 }
621
622 } else if (v->is_any_gpr()) {
623 unsigned vreg = v->gpr.sel();
624 unsigned vchan = v->gpr.chan();
625
626 if (reg == -1)
627 reg = vreg;
628 else if ((unsigned)reg != vreg) {
629 sblog << "invalid fetch source operand " << chan << " ";
630 dump::dump_op(f);
631 sblog << "\n";
632 abort();
633 }
634
635 sel = vchan;
636
637 } else {
638 sblog << "invalid fetch source operand " << chan << " ";
639 dump::dump_op(f);
640 sblog << "\n";
641 abort();
642 }
643
644 f->bc.src_sel[chan] = sel;
645 }
646
647 if (reg >= 0)
648 update_ngpr(reg);
649
650 f->bc.src_gpr = reg >= 0 ? reg : 0;
651
652 // dst
653
654 reg = -1;
655
656 unsigned dst_swz[4] = {SEL_MASK, SEL_MASK, SEL_MASK, SEL_MASK};
657
658 for (unsigned chan = 0; chan < 4; ++chan) {
659
660 unsigned sel = f->bc.dst_sel[chan];
661
662 if (sel == SEL_MASK)
663 continue;
664
665 value *v = f->dst[chan];
666 if (!v)
667 continue;
668
669 if (v->is_any_gpr()) {
670 unsigned vreg = v->gpr.sel();
671 unsigned vchan = v->gpr.chan();
672
673 if (reg == -1)
674 reg = vreg;
675 else if ((unsigned)reg != vreg) {
676 sblog << "invalid fetch dst operand " << chan << " ";
677 dump::dump_op(f);
678 sblog << "\n";
679 abort();
680 }
681
682 dst_swz[vchan] = sel;
683
684 } else {
685 sblog << "invalid fetch dst operand " << chan << " ";
686 dump::dump_op(f);
687 sblog << "\n";
688 abort();
689 }
690
691 }
692
693 for (unsigned i = 0; i < 4; ++i)
694 f->bc.dst_sel[i] = dst_swz[i];
695
696 if ((flags & FF_GDS) && reg == -1) {
697 f->bc.dst_sel[0] = SEL_MASK;
698 f->bc.dst_gpr = 0;
699 return ;
700 }
701 assert(reg >= 0);
702
703 if (reg >= 0)
704 update_ngpr(reg);
705
706 f->bc.dst_gpr = reg >= 0 ? reg : 0;
707 }
708
finalize_cf(cf_node * c)709 void bc_finalizer::finalize_cf(cf_node* c) {
710
711 unsigned flags = c->bc.op_ptr->flags;
712
713 c->bc.end_of_program = 0;
714 last_cf = c;
715
716 if (flags & CF_EXP) {
717 c->bc.set_op(CF_OP_EXPORT);
718 last_export[c->bc.type] = c;
719
720 int reg = -1;
721
722 for (unsigned chan = 0; chan < 4; ++chan) {
723
724 unsigned sel = c->bc.sel[chan];
725
726 if (sel > SEL_W)
727 continue;
728
729 value *v = c->src[chan];
730
731 if (v->is_undef()) {
732 sel = SEL_MASK;
733 } else if (v->is_const()) {
734 literal l = v->literal_value;
735 if (l == literal(0))
736 sel = SEL_0;
737 else if (l == literal(1.0f))
738 sel = SEL_1;
739 else {
740 sblog << "invalid export constant operand " << chan << " ";
741 dump::dump_op(c);
742 sblog << "\n";
743 abort();
744 }
745
746 } else if (v->is_any_gpr()) {
747 unsigned vreg = v->gpr.sel();
748 unsigned vchan = v->gpr.chan();
749
750 if (reg == -1)
751 reg = vreg;
752 else if ((unsigned)reg != vreg) {
753 sblog << "invalid export source operand " << chan << " ";
754 dump::dump_op(c);
755 sblog << "\n";
756 abort();
757 }
758
759 sel = vchan;
760
761 } else {
762 sblog << "invalid export source operand " << chan << " ";
763 dump::dump_op(c);
764 sblog << "\n";
765 abort();
766 }
767
768 c->bc.sel[chan] = sel;
769 }
770
771 if (reg >= 0)
772 update_ngpr(reg);
773
774 c->bc.rw_gpr = reg >= 0 ? reg : 0;
775
776 } else if (flags & CF_MEM) {
777
778 int reg = -1;
779 unsigned mask = 0;
780
781
782 for (unsigned chan = 0; chan < 4; ++chan) {
783 value *v;
784 if (ctx.hw_class == HW_CLASS_R600 && c->bc.op == CF_OP_MEM_SCRATCH &&
785 (c->bc.type == 2 || c->bc.type == 3))
786 v = c->dst[chan];
787 else
788 v = c->src[chan];
789
790 if (!v || v->is_undef())
791 continue;
792
793 if (!v->is_any_gpr() || v->gpr.chan() != chan) {
794 sblog << "invalid source operand " << chan << " ";
795 dump::dump_op(c);
796 sblog << "\n";
797 abort();
798 }
799 unsigned vreg = v->gpr.sel();
800 if (reg == -1)
801 reg = vreg;
802 else if ((unsigned)reg != vreg) {
803 sblog << "invalid source operand " << chan << " ";
804 dump::dump_op(c);
805 sblog << "\n";
806 abort();
807 }
808
809 mask |= (1 << chan);
810 }
811
812 if (reg >= 0)
813 update_ngpr(reg);
814
815 c->bc.rw_gpr = reg >= 0 ? reg : 0;
816 c->bc.comp_mask = mask;
817
818 if (((flags & CF_RAT) || (!(flags & CF_STRM))) && (c->bc.type & 1)) {
819
820 reg = -1;
821
822 for (unsigned chan = 0; chan < 4; ++chan) {
823 value *v = c->src[4 + chan];
824 if (!v || v->is_undef())
825 continue;
826
827 if (!v->is_any_gpr() || v->gpr.chan() != chan) {
828 sblog << "invalid source operand " << chan << " ";
829 dump::dump_op(c);
830 sblog << "\n";
831 abort();
832 }
833 unsigned vreg = v->gpr.sel();
834 if (reg == -1)
835 reg = vreg;
836 else if ((unsigned)reg != vreg) {
837 sblog << "invalid source operand " << chan << " ";
838 dump::dump_op(c);
839 sblog << "\n";
840 abort();
841 }
842 }
843
844 assert(reg >= 0);
845
846 if (reg >= 0)
847 update_ngpr(reg);
848
849 c->bc.index_gpr = reg >= 0 ? reg : 0;
850 }
851 } else if (flags & CF_CALL) {
852 update_nstack(c->get_parent_region(), ctx.wavefront_size == 16 ? 2 : 1);
853 }
854 }
855
translate_kcache(cf_node * alu,value * v)856 sel_chan bc_finalizer::translate_kcache(cf_node* alu, value* v) {
857 unsigned sel = v->select.kcache_sel();
858 unsigned bank = v->select.kcache_bank();
859 unsigned chan = v->select.chan();
860 static const unsigned kc_base[] = {128, 160, 256, 288};
861
862 sel &= 4095;
863
864 unsigned line = sel >> 4;
865
866 for (unsigned k = 0; k < 4; ++k) {
867 bc_kcache &kc = alu->bc.kc[k];
868
869 if (kc.mode == KC_LOCK_NONE)
870 break;
871
872 if (kc.bank == bank && (kc.addr == line ||
873 (kc.mode == KC_LOCK_2 && kc.addr + 1 == line))) {
874
875 sel = kc_base[k] + (sel - (kc.addr << 4));
876
877 return sel_chan(sel, chan);
878 }
879 }
880
881 assert(!"kcache translation error");
882 return 0;
883 }
884
update_ngpr(unsigned gpr)885 void bc_finalizer::update_ngpr(unsigned gpr) {
886 if (gpr < MAX_GPR - ctx.alu_temp_gprs && gpr >= ngpr)
887 ngpr = gpr + 1;
888 }
889
get_stack_depth(node * n,unsigned & loops,unsigned & ifs,unsigned add)890 unsigned bc_finalizer::get_stack_depth(node *n, unsigned &loops,
891 unsigned &ifs, unsigned add) {
892 unsigned stack_elements = add;
893 bool has_non_wqm_push = (add != 0);
894 region_node *r = n->is_region() ?
895 static_cast<region_node*>(n) : n->get_parent_region();
896
897 loops = 0;
898 ifs = 0;
899
900 while (r) {
901 if (r->is_loop()) {
902 ++loops;
903 } else {
904 ++ifs;
905 has_non_wqm_push = true;
906 }
907 r = r->get_parent_region();
908 }
909 stack_elements += (loops * ctx.stack_entry_size) + ifs;
910
911 // reserve additional elements in some cases
912 switch (ctx.hw_class) {
913 case HW_CLASS_R600:
914 case HW_CLASS_R700:
915 // If any non-WQM push is invoked, 2 elements should be reserved.
916 if (has_non_wqm_push)
917 stack_elements += 2;
918 break;
919 case HW_CLASS_CAYMAN:
920 // If any stack operation is invoked, 2 elements should be reserved
921 if (stack_elements)
922 stack_elements += 2;
923 break;
924 case HW_CLASS_EVERGREEN:
925 // According to the docs we need to reserve 1 element for each of the
926 // following cases:
927 // 1) non-WQM push is used with WQM/LOOP frames on stack
928 // 2) ALU_ELSE_AFTER is used at the point of max stack usage
929 // NOTE:
930 // It was found that the conditions above are not sufficient, there are
931 // other cases where we also need to reserve stack space, that's why
932 // we always reserve 1 stack element if we have non-WQM push on stack.
933 // Condition 2 is ignored for now because we don't use this instruction.
934 if (has_non_wqm_push)
935 ++stack_elements;
936 break;
937 case HW_CLASS_UNKNOWN:
938 assert(0);
939 }
940 return stack_elements;
941 }
942
update_nstack(region_node * r,unsigned add)943 void bc_finalizer::update_nstack(region_node* r, unsigned add) {
944 unsigned loops = 0;
945 unsigned ifs = 0;
946 unsigned elems = r ? get_stack_depth(r, loops, ifs, add) : add;
947
948 // XXX all chips expect this value to be computed using 4 as entry size,
949 // not the real entry size
950 unsigned stack_entries = (elems + 3) >> 2;
951
952 if (nstack < stack_entries)
953 nstack = stack_entries;
954 }
955
cf_peephole()956 void bc_finalizer::cf_peephole() {
957 if (ctx.stack_workaround_8xx || ctx.stack_workaround_9xx) {
958 for (node_iterator N, I = sh.root->begin(), E = sh.root->end(); I != E;
959 I = N) {
960 N = I; ++N;
961 cf_node *c = static_cast<cf_node*>(*I);
962
963 if (c->bc.op == CF_OP_ALU_PUSH_BEFORE &&
964 (c->flags & NF_ALU_STACK_WORKAROUND)) {
965 cf_node *push = sh.create_cf(CF_OP_PUSH);
966 c->insert_before(push);
967 push->jump(c);
968 c->bc.set_op(CF_OP_ALU);
969 }
970 }
971 }
972
973 for (node_iterator N, I = sh.root->begin(), E = sh.root->end(); I != E;
974 I = N) {
975 N = I; ++N;
976
977 cf_node *c = static_cast<cf_node*>(*I);
978
979 if (c->jump_after_target) {
980 if (c->jump_target->next == NULL) {
981 c->jump_target->insert_after(sh.create_cf(CF_OP_NOP));
982 if (last_cf == c->jump_target)
983 last_cf = static_cast<cf_node*>(c->jump_target->next);
984 }
985 c->jump_target = static_cast<cf_node*>(c->jump_target->next);
986 c->jump_after_target = false;
987 }
988
989 if (c->is_cf_op(CF_OP_POP)) {
990 node *p = c->prev;
991 if (p->is_alu_clause()) {
992 cf_node *a = static_cast<cf_node*>(p);
993
994 if (a->bc.op == CF_OP_ALU) {
995 a->bc.set_op(CF_OP_ALU_POP_AFTER);
996 c->remove();
997 }
998 }
999 } else if (c->is_cf_op(CF_OP_JUMP) && c->jump_target == c->next) {
1000 // if JUMP is immediately followed by its jump target,
1001 // then JUMP is useless and we can eliminate it
1002 c->remove();
1003 }
1004 }
1005 }
1006
1007 } // namespace r600_sb
1008