• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * on the rights to use, copy, modify, merge, publish, distribute, sub
8  * license, and/or sell copies of the Software, and to permit persons to whom
9  * the Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
22  *
23  * Authors:
24  *      Vadim Girlin
25  */
26 
27 #define FBC_DEBUG 0
28 
29 #if FBC_DEBUG
30 #define FBC_DUMP(q) do { q } while (0)
31 #else
32 #define FBC_DUMP(q)
33 #endif
34 
35 #include "sb_bc.h"
36 #include "sb_shader.h"
37 #include "sb_pass.h"
38 
39 namespace r600_sb {
40 
insert_rv6xx_load_ar_workaround(alu_group_node * b4)41 void bc_finalizer::insert_rv6xx_load_ar_workaround(alu_group_node *b4) {
42 
43 	alu_group_node *g = sh.create_alu_group();
44 	alu_node *a = sh.create_alu();
45 
46 	a->bc.set_op(ALU_OP0_NOP);
47 	a->bc.last = 1;
48 
49 	g->push_back(a);
50 	b4->insert_before(g);
51 }
52 
run()53 int bc_finalizer::run() {
54 
55 	run_on(sh.root);
56 
57 	regions_vec &rv = sh.get_regions();
58 	for (regions_vec::reverse_iterator I = rv.rbegin(), E = rv.rend(); I != E;
59 			++I) {
60 		region_node *r = *I;
61 
62 		assert(r);
63 
64 		bool loop = r->is_loop();
65 
66 		if (loop)
67 			finalize_loop(r);
68 		else
69 			finalize_if(r);
70 
71 		r->expand();
72 	}
73 
74 	cf_peephole();
75 
76 	// workaround for some problems on r6xx/7xx
77 	// add ALU NOP to each vertex shader
78 	if (!ctx.is_egcm() && (sh.target == TARGET_VS || sh.target == TARGET_ES)) {
79 		cf_node *c = sh.create_clause(NST_ALU_CLAUSE);
80 
81 		alu_group_node *g = sh.create_alu_group();
82 
83 		alu_node *a = sh.create_alu();
84 		a->bc.set_op(ALU_OP0_NOP);
85 		a->bc.last = 1;
86 
87 		g->push_back(a);
88 		c->push_back(g);
89 
90 		sh.root->push_back(c);
91 
92 		c = sh.create_cf(CF_OP_NOP);
93 		sh.root->push_back(c);
94 
95 		last_cf = c;
96 	}
97 
98 	if (!ctx.is_cayman() && last_cf->bc.op_ptr->flags & CF_ALU) {
99 		last_cf = sh.create_cf(CF_OP_NOP);
100 		sh.root->push_back(last_cf);
101 	}
102 
103 	if (ctx.is_cayman()) {
104 		if (!last_cf) {
105 			cf_node *c = sh.create_cf(CF_OP_CF_END);
106 			sh.root->push_back(c);
107 		} else
108 			last_cf->insert_after(sh.create_cf(CF_OP_CF_END));
109 	} else
110 		last_cf->bc.end_of_program = 1;
111 
112 	for (unsigned t = EXP_PIXEL; t < EXP_TYPE_COUNT; ++t) {
113 		cf_node *le = last_export[t];
114 		if (le)
115 			le->bc.set_op(CF_OP_EXPORT_DONE);
116 	}
117 
118 	sh.ngpr = ngpr;
119 	sh.nstack = nstack;
120 	return 0;
121 }
122 
finalize_loop(region_node * r)123 void bc_finalizer::finalize_loop(region_node* r) {
124 
125 	update_nstack(r);
126 
127 	cf_node *loop_start = sh.create_cf(CF_OP_LOOP_START_DX10);
128 	cf_node *loop_end = sh.create_cf(CF_OP_LOOP_END);
129 
130 	// Update last_cf, but don't overwrite it if it's outside the current loop nest since
131 	// it may point to a cf that is later in program order.
132 	// The single parent level check is sufficient since finalize_loop() is processed in
133 	// reverse order from innermost to outermost loop nest level.
134 	if (!last_cf || last_cf->get_parent_region() == r) {
135 		last_cf = loop_end;
136 	}
137 
138 	loop_start->jump_after(loop_end);
139 	loop_end->jump_after(loop_start);
140 
141 	for (depart_vec::iterator I = r->departs.begin(), E = r->departs.end();
142 			I != E; ++I) {
143 		depart_node *dep = *I;
144 		cf_node *loop_break = sh.create_cf(CF_OP_LOOP_BREAK);
145 		loop_break->jump(loop_end);
146 		dep->push_back(loop_break);
147 		dep->expand();
148 	}
149 
150 	// FIXME produces unnecessary LOOP_CONTINUE
151 	for (repeat_vec::iterator I = r->repeats.begin(), E = r->repeats.end();
152 			I != E; ++I) {
153 		repeat_node *rep = *I;
154 		if (!(rep->parent == r && rep->prev == NULL)) {
155 			cf_node *loop_cont = sh.create_cf(CF_OP_LOOP_CONTINUE);
156 			loop_cont->jump(loop_end);
157 			rep->push_back(loop_cont);
158 		}
159 		rep->expand();
160 	}
161 
162 	r->push_front(loop_start);
163 	r->push_back(loop_end);
164 }
165 
finalize_if(region_node * r)166 void bc_finalizer::finalize_if(region_node* r) {
167 
168 	update_nstack(r);
169 
170 	// expecting the following control flow structure here:
171 	//   - region
172 	//     {
173 	//       - depart/repeat 1 (it may be depart/repeat for some outer region)
174 	//         {
175 	//           - if
176 	//             {
177 	//               - depart/repeat 2 (possibly for outer region)
178 	//                 {
179 	//                   - some optional code
180 	//                 }
181 	//             }
182 	//           - optional <else> code> ...
183 	//         }
184 	//     }
185 
186 	container_node *repdep1 = static_cast<container_node*>(r->first);
187 	assert(repdep1->is_depart() || repdep1->is_repeat());
188 
189 	if_node *n_if = static_cast<if_node*>(repdep1->first);
190 
191 	if (n_if) {
192 
193 
194 		assert(n_if->is_if());
195 
196 		container_node *repdep2 = static_cast<container_node*>(n_if->first);
197 		assert(repdep2->is_depart() || repdep2->is_repeat());
198 
199 		cf_node *if_jump = sh.create_cf(CF_OP_JUMP);
200 		cf_node *if_pop = sh.create_cf(CF_OP_POP);
201 
202 		if (!last_cf || last_cf->get_parent_region() == r) {
203 			last_cf = if_pop;
204 		}
205 		if_pop->bc.pop_count = 1;
206 		if_pop->jump_after(if_pop);
207 
208 		r->push_front(if_jump);
209 		r->push_back(if_pop);
210 
211 		bool has_else = n_if->next;
212 
213 		if (has_else) {
214 			cf_node *nelse = sh.create_cf(CF_OP_ELSE);
215 			n_if->insert_after(nelse);
216 			if_jump->jump(nelse);
217 			nelse->jump_after(if_pop);
218 			nelse->bc.pop_count = 1;
219 
220 		} else {
221 			if_jump->jump_after(if_pop);
222 			if_jump->bc.pop_count = 1;
223 		}
224 
225 		n_if->expand();
226 	}
227 
228 	for (depart_vec::iterator I = r->departs.begin(), E = r->departs.end();
229 			I != E; ++I) {
230 		(*I)->expand();
231 	}
232 	r->departs.clear();
233 	assert(r->repeats.empty());
234 }
235 
run_on(container_node * c)236 void bc_finalizer::run_on(container_node* c) {
237 	node *prev_node = NULL;
238 	for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) {
239 		node *n = *I;
240 
241 		if (n->is_alu_group()) {
242 			finalize_alu_group(static_cast<alu_group_node*>(n), prev_node);
243 		} else {
244 			if (n->is_alu_clause()) {
245 				cf_node *c = static_cast<cf_node*>(n);
246 
247 				if (c->bc.op == CF_OP_ALU_PUSH_BEFORE && ctx.is_egcm()) {
248 					if (ctx.stack_workaround_8xx) {
249 						region_node *r = c->get_parent_region();
250 						if (r) {
251 							unsigned ifs, loops;
252 							unsigned elems = get_stack_depth(r, loops, ifs);
253 							unsigned dmod1 = elems % ctx.stack_entry_size;
254 							unsigned dmod2 = (elems + 1) % ctx.stack_entry_size;
255 
256 							if (elems && (!dmod1 || !dmod2))
257 								c->flags |= NF_ALU_STACK_WORKAROUND;
258 						}
259 					} else if (ctx.stack_workaround_9xx) {
260 						region_node *r = c->get_parent_region();
261 						if (r) {
262 							unsigned ifs, loops;
263 							get_stack_depth(r, loops, ifs);
264 							if (loops >= 2)
265 								c->flags |= NF_ALU_STACK_WORKAROUND;
266 						}
267 					}
268 				}
269 			} else if (n->is_fetch_inst()) {
270 				finalize_fetch(static_cast<fetch_node*>(n));
271 			} else if (n->is_cf_inst()) {
272 				finalize_cf(static_cast<cf_node*>(n));
273 			}
274 			if (n->is_container())
275 				run_on(static_cast<container_node*>(n));
276 		}
277 		prev_node = n;
278 	}
279 }
280 
finalize_alu_group(alu_group_node * g,node * prev_node)281 void bc_finalizer::finalize_alu_group(alu_group_node* g, node *prev_node) {
282 
283 	alu_node *last = NULL;
284 	alu_group_node *prev_g = NULL;
285 	bool add_nop = false;
286 	if (prev_node && prev_node->is_alu_group()) {
287 		prev_g = static_cast<alu_group_node*>(prev_node);
288 	}
289 
290 	for (node_iterator I = g->begin(), E = g->end(); I != E; ++I) {
291 		alu_node *n = static_cast<alu_node*>(*I);
292 		unsigned slot = n->bc.slot;
293 		value *d = n->dst.empty() ? NULL : n->dst[0];
294 
295 		if (d && d->is_special_reg()) {
296 			assert((n->bc.op_ptr->flags & AF_MOVA) || d->is_geometry_emit());
297 			d = NULL;
298 		}
299 
300 		sel_chan fdst = d ? d->get_final_gpr() : sel_chan(0, 0);
301 
302 		if (d) {
303 			assert(fdst.chan() == slot || slot == SLOT_TRANS);
304 		}
305 
306 		if (!(n->bc.op_ptr->flags & AF_MOVA && ctx.is_cayman()))
307 			n->bc.dst_gpr = fdst.sel();
308 		n->bc.dst_chan = d ? fdst.chan() : slot < SLOT_TRANS ? slot : 0;
309 
310 
311 		if (d && d->is_rel() && d->rel && !d->rel->is_const()) {
312 			n->bc.dst_rel = 1;
313 			update_ngpr(d->array->gpr.sel() + d->array->array_size -1);
314 		} else {
315 			n->bc.dst_rel = 0;
316 		}
317 
318 		n->bc.write_mask = d != NULL;
319 		n->bc.last = 0;
320 
321 		if (n->bc.op_ptr->flags & AF_PRED) {
322 			n->bc.update_pred = (n->dst[1] != NULL);
323 			n->bc.update_exec_mask = (n->dst[2] != NULL);
324 		}
325 
326 		// FIXME handle predication here
327 		n->bc.pred_sel = PRED_SEL_OFF;
328 
329 		update_ngpr(n->bc.dst_gpr);
330 
331 		add_nop |= finalize_alu_src(g, n, prev_g);
332 
333 		last = n;
334 	}
335 
336 	if (add_nop) {
337 		if (sh.get_ctx().r6xx_gpr_index_workaround) {
338 			insert_rv6xx_load_ar_workaround(g);
339 		}
340 	}
341 	last->bc.last = 1;
342 }
343 
finalize_alu_src(alu_group_node * g,alu_node * a,alu_group_node * prev)344 bool bc_finalizer::finalize_alu_src(alu_group_node* g, alu_node* a, alu_group_node *prev) {
345 	vvec &sv = a->src;
346 	bool add_nop = false;
347 	FBC_DUMP(
348 		sblog << "finalize_alu_src: ";
349 		dump::dump_op(a);
350 		sblog << "\n";
351 	);
352 
353 	unsigned si = 0;
354 
355 	for (vvec::iterator I = sv.begin(), E = sv.end(); I != E; ++I, ++si) {
356 		value *v = *I;
357 		assert(v);
358 
359 		bc_alu_src &src = a->bc.src[si];
360 		sel_chan sc;
361 		src.rel = 0;
362 
363 		sel_chan gpr;
364 
365 		switch (v->kind) {
366 		case VLK_REL_REG:
367 			sc = v->get_final_gpr();
368 			src.sel = sc.sel();
369 			src.chan = sc.chan();
370 			if (!v->rel->is_const()) {
371 				src.rel = 1;
372 				update_ngpr(v->array->gpr.sel() + v->array->array_size -1);
373 				if (prev && !add_nop) {
374 					for (node_iterator pI = prev->begin(), pE = prev->end(); pI != pE; ++pI) {
375 						alu_node *pn = static_cast<alu_node*>(*pI);
376 						if (pn->bc.dst_gpr == src.sel) {
377 							add_nop = true;
378 							break;
379 						}
380 					}
381 				}
382 			} else
383 				src.rel = 0;
384 
385 			break;
386 		case VLK_REG:
387 			gpr = v->get_final_gpr();
388 			src.sel = gpr.sel();
389 			src.chan = gpr.chan();
390 			update_ngpr(src.sel);
391 			break;
392 		case VLK_TEMP:
393 			src.sel = v->gpr.sel();
394 			src.chan = v->gpr.chan();
395 			update_ngpr(src.sel);
396 			break;
397 		case VLK_UNDEF:
398 		case VLK_CONST: {
399 			literal lv = v->literal_value;
400 			src.chan = 0;
401 
402 			if (lv == literal(0))
403 				src.sel = ALU_SRC_0;
404 			else if (lv == literal(0.5f))
405 				src.sel = ALU_SRC_0_5;
406 			else if (lv == literal(1.0f))
407 				src.sel = ALU_SRC_1;
408 			else if (lv == literal(1))
409 				src.sel = ALU_SRC_1_INT;
410 			else if (lv == literal(-1))
411 				src.sel = ALU_SRC_M_1_INT;
412 			else {
413 				src.sel = ALU_SRC_LITERAL;
414 				src.chan = g->literal_chan(lv);
415 				src.value = lv;
416 			}
417 			break;
418 		}
419 		case VLK_KCACHE: {
420 			cf_node *clause = static_cast<cf_node*>(g->parent);
421 			assert(clause->is_alu_clause());
422 			sel_chan k = translate_kcache(clause, v);
423 
424 			assert(k && "kcache translation failed");
425 
426 			src.sel = k.sel();
427 			src.chan = k.chan();
428 			break;
429 		}
430 		case VLK_PARAM:
431 		case VLK_SPECIAL_CONST:
432 			src.sel = v->select.sel();
433 			src.chan = v->select.chan();
434 			break;
435 		default:
436 			assert(!"unknown value kind");
437 			break;
438 		}
439 		if (prev && !add_nop) {
440 			for (node_iterator pI = prev->begin(), pE = prev->end(); pI != pE; ++pI) {
441 				alu_node *pn = static_cast<alu_node*>(*pI);
442 				if (pn->bc.dst_rel) {
443 					if (pn->bc.dst_gpr == src.sel) {
444 						add_nop = true;
445 						break;
446 					}
447 				}
448 			}
449 		}
450 	}
451 
452 	while (si < 3) {
453 		a->bc.src[si++].sel = 0;
454 	}
455 	return add_nop;
456 }
457 
copy_fetch_src(fetch_node & dst,fetch_node & src,unsigned arg_start)458 void bc_finalizer::copy_fetch_src(fetch_node &dst, fetch_node &src, unsigned arg_start)
459 {
460 	int reg = -1;
461 
462 	for (unsigned chan = 0; chan < 4; ++chan) {
463 
464 		dst.bc.dst_sel[chan] = SEL_MASK;
465 
466 		unsigned sel = SEL_MASK;
467 
468 		value *v = src.src[arg_start + chan];
469 
470 		if (!v || v->is_undef()) {
471 			sel = SEL_MASK;
472 		} else if (v->is_const()) {
473 			literal l = v->literal_value;
474 			if (l == literal(0))
475 				sel = SEL_0;
476 			else if (l == literal(1.0f))
477 				sel = SEL_1;
478 			else {
479 				sblog << "invalid fetch constant operand  " << chan << " ";
480 				dump::dump_op(&src);
481 				sblog << "\n";
482 				abort();
483 			}
484 
485 		} else if (v->is_any_gpr()) {
486 			unsigned vreg = v->gpr.sel();
487 			unsigned vchan = v->gpr.chan();
488 
489 			if (reg == -1)
490 				reg = vreg;
491 			else if ((unsigned)reg != vreg) {
492 				sblog << "invalid fetch source operand  " << chan << " ";
493 				dump::dump_op(&src);
494 				sblog << "\n";
495 				abort();
496 			}
497 
498 			sel = vchan;
499 
500 		} else {
501 			sblog << "invalid fetch source operand  " << chan << " ";
502 			dump::dump_op(&src);
503 			sblog << "\n";
504 			abort();
505 		}
506 
507 		dst.bc.src_sel[chan] = sel;
508 	}
509 
510 	if (reg >= 0)
511 		update_ngpr(reg);
512 
513 	dst.bc.src_gpr = reg >= 0 ? reg : 0;
514 }
515 
emit_set_grad(fetch_node * f)516 void bc_finalizer::emit_set_grad(fetch_node* f) {
517 
518 	assert(f->src.size() == 12 || f->src.size() == 13);
519 	unsigned ops[2] = { FETCH_OP_SET_GRADIENTS_V, FETCH_OP_SET_GRADIENTS_H };
520 
521 	unsigned arg_start = 0;
522 
523 	for (unsigned op = 0; op < 2; ++op) {
524 		fetch_node *n = sh.create_fetch();
525 		n->bc.set_op(ops[op]);
526 
527 		arg_start += 4;
528 
529 		copy_fetch_src(*n, *f, arg_start);
530 
531 		f->insert_before(n);
532 	}
533 
534 }
535 
emit_set_texture_offsets(fetch_node & f)536 void bc_finalizer::emit_set_texture_offsets(fetch_node &f) {
537 	assert(f.src.size() == 8);
538 
539 	fetch_node *n = sh.create_fetch();
540 
541 	n->bc.set_op(FETCH_OP_SET_TEXTURE_OFFSETS);
542 
543 	copy_fetch_src(*n, f, 4);
544 
545 	f.insert_before(n);
546 }
547 
finalize_fetch(fetch_node * f)548 void bc_finalizer::finalize_fetch(fetch_node* f) {
549 
550 	int reg = -1;
551 
552 	// src
553 
554 	unsigned src_count = 4;
555 
556 	unsigned flags = f->bc.op_ptr->flags;
557 
558 	if (flags & FF_VTX) {
559 		src_count = 1;
560 	} else if (flags & FF_USEGRAD) {
561 		emit_set_grad(f);
562 	} else if (flags & FF_USE_TEXTURE_OFFSETS) {
563 		emit_set_texture_offsets(*f);
564 	}
565 
566 	for (unsigned chan = 0; chan < src_count; ++chan) {
567 
568 		unsigned sel = f->bc.src_sel[chan];
569 
570 		if (sel > SEL_W)
571 			continue;
572 
573 		value *v = f->src[chan];
574 
575 		if (v->is_undef()) {
576 			sel = SEL_MASK;
577 		} else if (v->is_const()) {
578 			literal l = v->literal_value;
579 			if (l == literal(0))
580 				sel = SEL_0;
581 			else if (l == literal(1.0f))
582 				sel = SEL_1;
583 			else {
584 				sblog << "invalid fetch constant operand  " << chan << " ";
585 				dump::dump_op(f);
586 				sblog << "\n";
587 				abort();
588 			}
589 
590 		} else if (v->is_any_gpr()) {
591 			unsigned vreg = v->gpr.sel();
592 			unsigned vchan = v->gpr.chan();
593 
594 			if (reg == -1)
595 				reg = vreg;
596 			else if ((unsigned)reg != vreg) {
597 				sblog << "invalid fetch source operand  " << chan << " ";
598 				dump::dump_op(f);
599 				sblog << "\n";
600 				abort();
601 			}
602 
603 			sel = vchan;
604 
605 		} else {
606 			sblog << "invalid fetch source operand  " << chan << " ";
607 			dump::dump_op(f);
608 			sblog << "\n";
609 			abort();
610 		}
611 
612 		f->bc.src_sel[chan] = sel;
613 	}
614 
615 	if (reg >= 0)
616 		update_ngpr(reg);
617 
618 	f->bc.src_gpr = reg >= 0 ? reg : 0;
619 
620 	// dst
621 
622 	reg = -1;
623 
624 	unsigned dst_swz[4] = {SEL_MASK, SEL_MASK, SEL_MASK, SEL_MASK};
625 
626 	for (unsigned chan = 0; chan < 4; ++chan) {
627 
628 		unsigned sel = f->bc.dst_sel[chan];
629 
630 		if (sel == SEL_MASK)
631 			continue;
632 
633 		value *v = f->dst[chan];
634 		if (!v)
635 			continue;
636 
637 		if (v->is_any_gpr()) {
638 			unsigned vreg = v->gpr.sel();
639 			unsigned vchan = v->gpr.chan();
640 
641 			if (reg == -1)
642 				reg = vreg;
643 			else if ((unsigned)reg != vreg) {
644 				sblog << "invalid fetch dst operand  " << chan << " ";
645 				dump::dump_op(f);
646 				sblog << "\n";
647 				abort();
648 			}
649 
650 			dst_swz[vchan] = sel;
651 
652 		} else {
653 			sblog << "invalid fetch dst operand  " << chan << " ";
654 			dump::dump_op(f);
655 			sblog << "\n";
656 			abort();
657 		}
658 
659 	}
660 
661 	for (unsigned i = 0; i < 4; ++i)
662 		f->bc.dst_sel[i] = dst_swz[i];
663 
664 	assert(reg >= 0);
665 
666 	if (reg >= 0)
667 		update_ngpr(reg);
668 
669 	f->bc.dst_gpr = reg >= 0 ? reg : 0;
670 }
671 
finalize_cf(cf_node * c)672 void bc_finalizer::finalize_cf(cf_node* c) {
673 
674 	unsigned flags = c->bc.op_ptr->flags;
675 
676 	c->bc.end_of_program = 0;
677 	last_cf = c;
678 
679 	if (flags & CF_EXP) {
680 		c->bc.set_op(CF_OP_EXPORT);
681 		last_export[c->bc.type] = c;
682 
683 		int reg = -1;
684 
685 		for (unsigned chan = 0; chan < 4; ++chan) {
686 
687 			unsigned sel = c->bc.sel[chan];
688 
689 			if (sel > SEL_W)
690 				continue;
691 
692 			value *v = c->src[chan];
693 
694 			if (v->is_undef()) {
695 				sel = SEL_MASK;
696 			} else if (v->is_const()) {
697 				literal l = v->literal_value;
698 				if (l == literal(0))
699 					sel = SEL_0;
700 				else if (l == literal(1.0f))
701 					sel = SEL_1;
702 				else {
703 					sblog << "invalid export constant operand  " << chan << " ";
704 					dump::dump_op(c);
705 					sblog << "\n";
706 					abort();
707 				}
708 
709 			} else if (v->is_any_gpr()) {
710 				unsigned vreg = v->gpr.sel();
711 				unsigned vchan = v->gpr.chan();
712 
713 				if (reg == -1)
714 					reg = vreg;
715 				else if ((unsigned)reg != vreg) {
716 					sblog << "invalid export source operand  " << chan << " ";
717 					dump::dump_op(c);
718 					sblog << "\n";
719 					abort();
720 				}
721 
722 				sel = vchan;
723 
724 			} else {
725 				sblog << "invalid export source operand  " << chan << " ";
726 				dump::dump_op(c);
727 				sblog << "\n";
728 				abort();
729 			}
730 
731 			c->bc.sel[chan] = sel;
732 		}
733 
734 		if (reg >= 0)
735 			update_ngpr(reg);
736 
737 		c->bc.rw_gpr = reg >= 0 ? reg : 0;
738 
739 	} else if (flags & CF_MEM) {
740 
741 		int reg = -1;
742 		unsigned mask = 0;
743 
744 		for (unsigned chan = 0; chan < 4; ++chan) {
745 			value *v = c->src[chan];
746 			if (!v || v->is_undef())
747 				continue;
748 
749 			if (!v->is_any_gpr() || v->gpr.chan() != chan) {
750 				sblog << "invalid source operand  " << chan << " ";
751 				dump::dump_op(c);
752 				sblog << "\n";
753 				abort();
754 			}
755 			unsigned vreg = v->gpr.sel();
756 			if (reg == -1)
757 				reg = vreg;
758 			else if ((unsigned)reg != vreg) {
759 				sblog << "invalid source operand  " << chan << " ";
760 				dump::dump_op(c);
761 				sblog << "\n";
762 				abort();
763 			}
764 
765 			mask |= (1 << chan);
766 		}
767 
768 		if (reg >= 0)
769 			update_ngpr(reg);
770 
771 		c->bc.rw_gpr = reg >= 0 ? reg : 0;
772 		c->bc.comp_mask = mask;
773 
774 		if (((flags & CF_RAT) || (!(flags & CF_STRM))) && (c->bc.type & 1)) {
775 
776 			reg = -1;
777 
778 			for (unsigned chan = 0; chan < 4; ++chan) {
779 				value *v = c->src[4 + chan];
780 				if (!v || v->is_undef())
781 					continue;
782 
783 				if (!v->is_any_gpr() || v->gpr.chan() != chan) {
784 					sblog << "invalid source operand  " << chan << " ";
785 					dump::dump_op(c);
786 					sblog << "\n";
787 					abort();
788 				}
789 				unsigned vreg = v->gpr.sel();
790 				if (reg == -1)
791 					reg = vreg;
792 				else if ((unsigned)reg != vreg) {
793 					sblog << "invalid source operand  " << chan << " ";
794 					dump::dump_op(c);
795 					sblog << "\n";
796 					abort();
797 				}
798 			}
799 
800 			assert(reg >= 0);
801 
802 			if (reg >= 0)
803 				update_ngpr(reg);
804 
805 			c->bc.index_gpr = reg >= 0 ? reg : 0;
806 		}
807 	} else if (flags & CF_CALL) {
808 		update_nstack(c->get_parent_region(), ctx.wavefront_size == 16 ? 2 : 1);
809 	}
810 }
811 
translate_kcache(cf_node * alu,value * v)812 sel_chan bc_finalizer::translate_kcache(cf_node* alu, value* v) {
813 	unsigned sel = v->select.kcache_sel();
814 	unsigned bank = v->select.kcache_bank();
815 	unsigned chan = v->select.chan();
816 	static const unsigned kc_base[] = {128, 160, 256, 288};
817 
818 	sel &= 4095;
819 
820 	unsigned line = sel >> 4;
821 
822 	for (unsigned k = 0; k < 4; ++k) {
823 		bc_kcache &kc = alu->bc.kc[k];
824 
825 		if (kc.mode == KC_LOCK_NONE)
826 			break;
827 
828 		if (kc.bank == bank && (kc.addr == line ||
829 				(kc.mode == KC_LOCK_2 && kc.addr + 1 == line))) {
830 
831 			sel = kc_base[k] + (sel - (kc.addr << 4));
832 
833 			return sel_chan(sel, chan);
834 		}
835 	}
836 
837 	assert(!"kcache translation error");
838 	return 0;
839 }
840 
update_ngpr(unsigned gpr)841 void bc_finalizer::update_ngpr(unsigned gpr) {
842 	if (gpr < MAX_GPR - ctx.alu_temp_gprs && gpr >= ngpr)
843 		ngpr = gpr + 1;
844 }
845 
get_stack_depth(node * n,unsigned & loops,unsigned & ifs,unsigned add)846 unsigned bc_finalizer::get_stack_depth(node *n, unsigned &loops,
847                                            unsigned &ifs, unsigned add) {
848 	unsigned stack_elements = add;
849 	bool has_non_wqm_push = (add != 0);
850 	region_node *r = n->is_region() ?
851 			static_cast<region_node*>(n) : n->get_parent_region();
852 
853 	loops = 0;
854 	ifs = 0;
855 
856 	while (r) {
857 		if (r->is_loop()) {
858 			++loops;
859 		} else {
860 			++ifs;
861 			has_non_wqm_push = true;
862 		}
863 		r = r->get_parent_region();
864 	}
865 	stack_elements += (loops * ctx.stack_entry_size) + ifs;
866 
867 	// reserve additional elements in some cases
868 	switch (ctx.hw_class) {
869 	case HW_CLASS_R600:
870 	case HW_CLASS_R700:
871 		// If any non-WQM push is invoked, 2 elements should be reserved.
872 		if (has_non_wqm_push)
873 			stack_elements += 2;
874 		break;
875 	case HW_CLASS_CAYMAN:
876 		// If any stack operation is invoked, 2 elements should be reserved
877 		if (stack_elements)
878 			stack_elements += 2;
879 		break;
880 	case HW_CLASS_EVERGREEN:
881 		// According to the docs we need to reserve 1 element for each of the
882 		// following cases:
883 		//   1) non-WQM push is used with WQM/LOOP frames on stack
884 		//   2) ALU_ELSE_AFTER is used at the point of max stack usage
885 		// NOTE:
886 		// It was found that the conditions above are not sufficient, there are
887 		// other cases where we also need to reserve stack space, that's why
888 		// we always reserve 1 stack element if we have non-WQM push on stack.
889 		// Condition 2 is ignored for now because we don't use this instruction.
890 		if (has_non_wqm_push)
891 			++stack_elements;
892 		break;
893 	case HW_CLASS_UNKNOWN:
894 		assert(0);
895 	}
896 	return stack_elements;
897 }
898 
update_nstack(region_node * r,unsigned add)899 void bc_finalizer::update_nstack(region_node* r, unsigned add) {
900 	unsigned loops = 0;
901 	unsigned ifs = 0;
902 	unsigned elems = r ? get_stack_depth(r, loops, ifs, add) : add;
903 
904 	// XXX all chips expect this value to be computed using 4 as entry size,
905 	// not the real entry size
906 	unsigned stack_entries = (elems + 3) >> 2;
907 
908 	if (nstack < stack_entries)
909 		nstack = stack_entries;
910 }
911 
cf_peephole()912 void bc_finalizer::cf_peephole() {
913 	if (ctx.stack_workaround_8xx || ctx.stack_workaround_9xx) {
914 		for (node_iterator N, I = sh.root->begin(), E = sh.root->end(); I != E;
915 				I = N) {
916 			N = I; ++N;
917 			cf_node *c = static_cast<cf_node*>(*I);
918 
919 			if (c->bc.op == CF_OP_ALU_PUSH_BEFORE &&
920 					(c->flags & NF_ALU_STACK_WORKAROUND)) {
921 				cf_node *push = sh.create_cf(CF_OP_PUSH);
922 				c->insert_before(push);
923 				push->jump(c);
924 				c->bc.set_op(CF_OP_ALU);
925 			}
926 		}
927 	}
928 
929 	for (node_iterator N, I = sh.root->begin(), E = sh.root->end(); I != E;
930 			I = N) {
931 		N = I; ++N;
932 
933 		cf_node *c = static_cast<cf_node*>(*I);
934 
935 		if (c->jump_after_target) {
936 			c->jump_target = static_cast<cf_node*>(c->jump_target->next);
937 			c->jump_after_target = false;
938 		}
939 
940 		if (c->is_cf_op(CF_OP_POP)) {
941 			node *p = c->prev;
942 			if (p->is_alu_clause()) {
943 				cf_node *a = static_cast<cf_node*>(p);
944 
945 				if (a->bc.op == CF_OP_ALU) {
946 					a->bc.set_op(CF_OP_ALU_POP_AFTER);
947 					c->remove();
948 				}
949 			}
950 		} else if (c->is_cf_op(CF_OP_JUMP) && c->jump_target == c->next) {
951 			// if JUMP is immediately followed by its jump target,
952 			// then JUMP is useless and we can eliminate it
953 			c->remove();
954 		}
955 	}
956 }
957 
958 } // namespace r600_sb
959