• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * on the rights to use, copy, modify, merge, publish, distribute, sub
8  * license, and/or sell copies of the Software, and to permit persons to whom
9  * the Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
22  *
23  * Authors:
24  *      Vadim Girlin
25  */
26 
27 #define BCP_DEBUG 0
28 
29 #if BCP_DEBUG
30 #define BCP_DUMP(q) do { q } while (0)
31 #else
32 #define BCP_DUMP(q)
33 #endif
34 
35 #include "r600_pipe.h"
36 #include "r600_shader.h"
37 #include "eg_sq.h" // CM_V_SQ_MOVA_DST_CF_IDX0/1
38 
39 #include <stack>
40 
41 #include "sb_bc.h"
42 #include "sb_shader.h"
43 #include "sb_pass.h"
44 #include "util/macros.h"
45 
46 namespace r600_sb {
47 
decode()48 int bc_parser::decode() {
49 
50 	dw = bc->bytecode;
51 	bc_ndw = bc->ndw;
52 	max_cf = 0;
53 
54 	dec = new bc_decoder(ctx, dw, bc_ndw);
55 
56 	shader_target t = TARGET_UNKNOWN;
57 
58 	if (pshader) {
59 		switch (bc->type) {
60 		case PIPE_SHADER_FRAGMENT: t = TARGET_PS; break;
61 		case PIPE_SHADER_VERTEX:
62 			t = pshader->vs_as_ls ? TARGET_LS : (pshader->vs_as_es ? TARGET_ES : TARGET_VS);
63 			break;
64 		case PIPE_SHADER_GEOMETRY: t = TARGET_GS; break;
65 		case PIPE_SHADER_COMPUTE: t = TARGET_COMPUTE; break;
66 		case PIPE_SHADER_TESS_CTRL: t = TARGET_HS; break;
67 		case PIPE_SHADER_TESS_EVAL: t = pshader->tes_as_es ? TARGET_ES : TARGET_VS; break;
68 		default: assert(!"unknown shader target"); return -1; break;
69 		}
70 	} else {
71 		if (bc->type == PIPE_SHADER_COMPUTE)
72 			t = TARGET_COMPUTE;
73 		else
74 			t = TARGET_FETCH;
75 	}
76 
77 	sh = new shader(ctx, t, bc->debug_id);
78 	sh->safe_math = sb_context::safe_math || (t == TARGET_COMPUTE);
79 
80 	int r = decode_shader();
81 
82 	delete dec;
83 
84 	sh->ngpr = bc->ngpr;
85 	sh->nstack = bc->nstack;
86 
87 	return r;
88 }
89 
decode_shader()90 int bc_parser::decode_shader() {
91 	int r = 0;
92 	unsigned i = 0;
93 	bool eop = false;
94 
95 	sh->init();
96 
97 	do {
98 		eop = false;
99 		if ((r = decode_cf(i, eop)))
100 			return r;
101 
102 	} while (!eop || (i >> 1) < max_cf);
103 
104 	return 0;
105 }
106 
prepare()107 int bc_parser::prepare() {
108 	int r = 0;
109 	if ((r = parse_decls()))
110 		return r;
111 	if ((r = prepare_ir()))
112 		return r;
113 	return 0;
114 }
115 
parse_decls()116 int bc_parser::parse_decls() {
117 
118 	if (!pshader) {
119 		if (gpr_reladdr)
120 			sh->add_gpr_array(0, bc->ngpr, 0x0F);
121 
122 		// compute shaders have some values preloaded in R0, R1
123 		sh->add_input(0 /* GPR */, true /* preloaded */, 0x0F /* mask */);
124 		sh->add_input(1 /* GPR */, true /* preloaded */, 0x0F /* mask */);
125 		return 0;
126 	}
127 
128 	if (pshader->indirect_files & ~((1 << TGSI_FILE_CONSTANT) | (1 << TGSI_FILE_SAMPLER))) {
129 
130 		assert(pshader->num_arrays);
131 
132 		if (pshader->num_arrays) {
133 			for (unsigned i = 0; i < pshader->num_arrays; ++i) {
134 				r600_shader_array &a = pshader->arrays[i];
135 				sh->add_gpr_array(a.gpr_start, a.gpr_count, a.comp_mask);
136 			}
137 		} else {
138 			sh->add_gpr_array(0, pshader->bc.ngpr, 0x0F);
139 		}
140 	}
141 
142 	// GS inputs can add indirect addressing
143 	if (sh->target == TARGET_GS) {
144 		if (pshader->num_arrays) {
145 			for (unsigned i = 0; i < pshader->num_arrays; ++i) {
146 				r600_shader_array &a = pshader->arrays[i];
147 				sh->add_gpr_array(a.gpr_start, a.gpr_count, a.comp_mask);
148 			}
149 		}
150 	}
151 
152 	if (sh->target == TARGET_VS || sh->target == TARGET_ES || sh->target == TARGET_HS)
153 		sh->add_input(0, 1, 0x0F);
154 	else if (sh->target == TARGET_GS) {
155 		sh->add_input(0, 1, 0x0F);
156 		sh->add_input(1, 1, 0x0F);
157 	}
158 
159 	bool ps_interp = ctx.hw_class >= HW_CLASS_EVERGREEN
160 			&& sh->target == TARGET_PS;
161 
162 	bool ij_interpolators[6];
163 	memset(ij_interpolators, 0, sizeof(ij_interpolators));
164 
165 	for (unsigned i = 0; i < pshader->ninput; ++i) {
166 		r600_shader_io & in = pshader->input[i];
167 		bool preloaded = sh->target == TARGET_PS && !(ps_interp && in.spi_sid);
168 		sh->add_input(in.gpr, preloaded, /*in.write_mask*/ 0x0F);
169 		if (ps_interp && in.spi_sid) {
170 			int k = eg_get_interpolator_index(in.interpolate, in.interpolate_location);
171 			if (k >= 0)
172 				ij_interpolators[k] |= true;
173 		}
174 	}
175 
176 	if (ps_interp) {
177 		/* add the egcm ij interpolators to live inputs */
178 		unsigned num_ij = 0;
179 		for (unsigned i = 0; i < ARRAY_SIZE(ij_interpolators); i++) {
180 			num_ij += ij_interpolators[i];
181 		}
182 
183 		unsigned mask = (1 << (2 * num_ij)) - 1;
184 		unsigned gpr = 0;
185 
186 		while (mask) {
187 			sh->add_input(gpr, true, mask & 0x0F);
188 			++gpr;
189 			mask >>= 4;
190 		}
191 	}
192 
193 	return 0;
194 }
195 
decode_cf(unsigned & i,bool & eop)196 int bc_parser::decode_cf(unsigned &i, bool &eop) {
197 
198 	int r;
199 
200 	cf_node *cf = sh->create_cf();
201 	sh->root->push_back(cf);
202 
203 	unsigned id = i >> 1;
204 
205 	cf->bc.id = id;
206 
207 	if (cf_map.size() < id + 1)
208 		cf_map.resize(id + 1);
209 
210 	cf_map[id] = cf;
211 
212 	if ((r = dec->decode_cf(i, cf->bc)))
213 		return r;
214 
215 	cf_op_flags flags = (cf_op_flags)cf->bc.op_ptr->flags;
216 
217 	if (flags & CF_ALU) {
218 		if ((r = decode_alu_clause(cf)))
219 			return r;
220 	} else if (flags & CF_FETCH) {
221 		if ((r = decode_fetch_clause(cf)))
222 			return r;
223 	} else if (flags & CF_EXP) {
224 		if (cf->bc.rw_rel)
225 			gpr_reladdr = true;
226 		assert(!cf->bc.rw_rel);
227 	} else if (flags & CF_MEM) {
228 		if (cf->bc.rw_rel)
229 			gpr_reladdr = true;
230 		assert(!cf->bc.rw_rel);
231 	} else if (flags & CF_BRANCH) {
232 		if (cf->bc.addr > max_cf)
233 			max_cf = cf->bc.addr;
234 	}
235 
236 	eop = cf->bc.end_of_program || cf->bc.op == CF_OP_CF_END ||
237 			cf->bc.op == CF_OP_RET;
238 	return 0;
239 }
240 
decode_alu_clause(cf_node * cf)241 int bc_parser::decode_alu_clause(cf_node* cf) {
242 	unsigned i = cf->bc.addr << 1, cnt = cf->bc.count + 1, gcnt;
243 
244 	cf->subtype = NST_ALU_CLAUSE;
245 
246 	cgroup = 0;
247 	memset(slots[0], 0, 5*sizeof(slots[0][0]));
248 
249 	unsigned ng = 0;
250 
251 	do {
252 		decode_alu_group(cf, i, gcnt);
253 		assert(gcnt <= cnt);
254 		cnt -= gcnt;
255 		ng++;
256 	} while (cnt);
257 
258 	return 0;
259 }
260 
decode_alu_group(cf_node * cf,unsigned & i,unsigned & gcnt)261 int bc_parser::decode_alu_group(cf_node* cf, unsigned &i, unsigned &gcnt) {
262 	int r;
263 	alu_node *n;
264 	alu_group_node *g = sh->create_alu_group();
265 
266 	cgroup = !cgroup;
267 	memset(slots[cgroup], 0, 5*sizeof(slots[0][0]));
268 	gcnt = 0;
269 
270 	unsigned literal_mask = 0;
271 
272 	do {
273 		n = sh->create_alu();
274 		g->push_back(n);
275 
276 		if ((r = dec->decode_alu(i, n->bc)))
277 			return r;
278 
279 		if (!sh->assign_slot(n, slots[cgroup])) {
280 			assert(!"alu slot assignment failed");
281 			return -1;
282 		}
283 
284 		gcnt++;
285 
286 	} while (gcnt <= 5 && !n->bc.last);
287 
288 	assert(n->bc.last);
289 
290 	for (node_iterator I = g->begin(), E = g->end(); I != E; ++I) {
291 		n = static_cast<alu_node*>(*I);
292 
293 		if (n->bc.dst_rel)
294 			gpr_reladdr = true;
295 
296 		for (int k = 0; k < n->bc.op_ptr->src_count; ++k) {
297 			bc_alu_src &src = n->bc.src[k];
298 			if (src.rel)
299 				gpr_reladdr = true;
300 			if (src.sel == ALU_SRC_LITERAL) {
301 				literal_mask |= (1 << src.chan);
302 				src.value.u = dw[i + src.chan];
303 			}
304 		}
305 	}
306 
307 	unsigned literal_ndw = 0;
308 	while (literal_mask) {
309 		g->literals.push_back(dw[i + literal_ndw]);
310 		literal_ndw += 1;
311 		literal_mask >>= 1;
312 	}
313 
314 	literal_ndw = (literal_ndw + 1) & ~1u;
315 
316 	i += literal_ndw;
317 	gcnt += literal_ndw >> 1;
318 
319 	cf->push_back(g);
320 	return 0;
321 }
322 
prepare_alu_clause(cf_node * cf)323 int bc_parser::prepare_alu_clause(cf_node* cf) {
324 
325 	// loop over alu groups
326 	for (node_iterator I = cf->begin(), E = cf->end(); I != E; ++I) {
327 		assert(I->subtype == NST_ALU_GROUP);
328 		alu_group_node *g = static_cast<alu_group_node*>(*I);
329 		prepare_alu_group(cf, g);
330 	}
331 
332 	return 0;
333 }
334 
save_set_cf_index(value * val,unsigned idx)335 void bc_parser::save_set_cf_index(value *val, unsigned idx)
336 {
337 	assert(idx <= 1);
338 	assert(val);
339 	cf_index_value[idx] = val;
340 }
get_cf_index_value(unsigned idx)341 value *bc_parser::get_cf_index_value(unsigned idx)
342 {
343 	assert(idx <= 1);
344 	assert(cf_index_value[idx]);
345 	return cf_index_value[idx];
346 }
save_mova(alu_node * mova)347 void bc_parser::save_mova(alu_node *mova)
348 {
349 	assert(mova);
350 	this->mova = mova;
351 }
get_mova()352 alu_node *bc_parser::get_mova()
353 {
354 	assert(mova);
355 	return mova;
356 }
357 
prepare_alu_group(cf_node * cf,alu_group_node * g)358 int bc_parser::prepare_alu_group(cf_node* cf, alu_group_node *g) {
359 
360 	alu_node *n;
361 
362 	cgroup = !cgroup;
363 	memset(slots[cgroup], 0, 5*sizeof(slots[0][0]));
364 
365 	for (node_iterator I = g->begin(), E = g->end();
366 			I != E; ++I) {
367 		n = static_cast<alu_node*>(*I);
368 		bool ubo_indexing[2] = {};
369 
370 		if (!sh->assign_slot(n, slots[cgroup])) {
371 			assert(!"alu slot assignment failed");
372 			return -1;
373 		}
374 
375 		unsigned src_count = n->bc.op_ptr->src_count;
376 
377 		if (ctx.alu_slots(n->bc.op) & AF_4SLOT)
378 			n->flags |= NF_ALU_4SLOT;
379 
380 		n->src.resize(src_count);
381 
382 		unsigned flags = n->bc.op_ptr->flags;
383 
384 		if (flags & AF_PRED) {
385 			n->dst.resize(3);
386 			if (n->bc.update_pred)
387 				n->dst[1] = sh->get_special_value(SV_ALU_PRED);
388 			if (n->bc.update_exec_mask)
389 				n->dst[2] = sh->get_special_value(SV_EXEC_MASK);
390 
391 			n->flags |= NF_DONT_HOIST;
392 
393 		} else if (flags & AF_KILL) {
394 
395 			n->dst.resize(2);
396 			n->dst[1] = sh->get_special_value(SV_VALID_MASK);
397 			sh->set_uses_kill();
398 
399 			n->flags |= NF_DONT_HOIST | NF_DONT_MOVE |
400 					NF_DONT_KILL | NF_SCHEDULE_EARLY;
401 
402 		} else {
403 			n->dst.resize(1);
404 		}
405 
406 		if (n->bc.op == ALU_OP0_SET_CF_IDX0 || n->bc.op == ALU_OP0_SET_CF_IDX1) {
407 			// Move CF_IDX value into tex instruction operands, scheduler will later re-emit setting of CF_IDX
408 			// DCE will kill this op
409 			save_set_cf_index(get_mova()->src[0], n->bc.op == ALU_OP0_SET_CF_IDX1);
410 		} else if (flags & AF_MOVA) {
411 
412 			n->dst[0] = sh->get_special_value(SV_AR_INDEX);
413 			save_mova(n);
414 
415 			n->flags |= NF_DONT_HOIST;
416 
417 		} else if (n->bc.op_ptr->src_count == 3 || n->bc.write_mask) {
418 			assert(!n->bc.dst_rel || n->bc.index_mode == INDEX_AR_X);
419 
420 			value *v = sh->get_gpr_value(false, n->bc.dst_gpr, n->bc.dst_chan,
421 					n->bc.dst_rel);
422 
423 			n->dst[0] = v;
424 		}
425 
426 		if (n->bc.pred_sel) {
427 			sh->has_alu_predication = true;
428 			n->pred = sh->get_special_value(SV_ALU_PRED);
429 		}
430 
431 		for (unsigned s = 0; s < src_count; ++s) {
432 			bc_alu_src &src = n->bc.src[s];
433 
434 			if (src.sel == ALU_SRC_LITERAL) {
435 				n->src[s] = sh->get_const_value(src.value);
436 			} else if (src.sel == ALU_SRC_PS || src.sel == ALU_SRC_PV) {
437 				unsigned pgroup = !cgroup, prev_slot = src.sel == ALU_SRC_PS ?
438 						SLOT_TRANS : src.chan;
439 
440 				// XXX shouldn't happen but llvm backend uses PS on cayman
441 				if (prev_slot == SLOT_TRANS && ctx.is_cayman())
442 					prev_slot = SLOT_X;
443 
444 				alu_node *prev_alu = slots[pgroup][prev_slot];
445 
446 				assert(prev_alu);
447 
448 				if (!prev_alu->dst[0]) {
449 					value * t = sh->create_temp_value();
450 					prev_alu->dst[0] = t;
451 				}
452 
453 				value *d = prev_alu->dst[0];
454 
455 				if (d->is_rel()) {
456 					d = sh->get_gpr_value(true, prev_alu->bc.dst_gpr,
457 					                      prev_alu->bc.dst_chan,
458 					                      prev_alu->bc.dst_rel);
459 				}
460 
461 				n->src[s] = d;
462 			} else if (ctx.is_kcache_sel(src.sel)) {
463 				unsigned sel = src.sel, kc_addr;
464 				unsigned kc_set = ((sel >> 7) & 2) + ((sel >> 5) & 1);
465 
466 				bc_kcache &kc = cf->bc.kc[kc_set];
467 				kc_addr = (kc.addr << 4) + (sel & 0x1F);
468 				n->src[s] = sh->get_kcache_value(kc.bank, kc_addr, src.chan, (alu_kcache_index_mode)kc.index_mode);
469 
470 				if (kc.index_mode != KC_INDEX_NONE) {
471 					assert(kc.index_mode != KC_LOCK_LOOP);
472 					ubo_indexing[kc.index_mode - KC_INDEX_0] = true;
473 				}
474 			} else if (src.sel < MAX_GPR) {
475 				value *v = sh->get_gpr_value(true, src.sel, src.chan, src.rel);
476 
477 				n->src[s] = v;
478 
479 			} else if (src.sel >= ALU_SRC_PARAM_OFFSET) {
480 				// using slot for value channel because in fact the slot
481 				// determines the channel that is loaded by INTERP_LOAD_P0
482 				// (and maybe some others).
483 				// otherwise GVN will consider INTERP_LOAD_P0s with the same
484 				// param index as equal instructions and leave only one of them
485 				n->src[s] = sh->get_special_ro_value(sel_chan(src.sel,
486 				                                              n->bc.slot));
487 			} else {
488 				switch (src.sel) {
489 				case ALU_SRC_0:
490 					n->src[s] = sh->get_const_value(0);
491 					break;
492 				case ALU_SRC_0_5:
493 					n->src[s] = sh->get_const_value(0.5f);
494 					break;
495 				case ALU_SRC_1:
496 					n->src[s] = sh->get_const_value(1.0f);
497 					break;
498 				case ALU_SRC_1_INT:
499 					n->src[s] = sh->get_const_value(1);
500 					break;
501 				case ALU_SRC_M_1_INT:
502 					n->src[s] = sh->get_const_value(-1);
503 					break;
504 				default:
505 					n->src[s] = sh->get_special_ro_value(src.sel);
506 					break;
507 				}
508 			}
509 		}
510 
511 		// add UBO index values if any as dependencies
512 		if (ubo_indexing[0]) {
513 			n->src.push_back(get_cf_index_value(0));
514 		}
515 		if (ubo_indexing[1]) {
516 			n->src.push_back(get_cf_index_value(1));
517 		}
518 
519 		if ((n->bc.dst_gpr == CM_V_SQ_MOVA_DST_CF_IDX0 || n->bc.dst_gpr == CM_V_SQ_MOVA_DST_CF_IDX1) &&
520 		    ctx.is_cayman())
521 			// Move CF_IDX value into tex instruction operands, scheduler will later re-emit setting of CF_IDX
522 			save_set_cf_index(n->src[0], n->bc.dst_gpr == CM_V_SQ_MOVA_DST_CF_IDX1);
523 	}
524 
525 	// pack multislot instructions into alu_packed_node
526 
527 	alu_packed_node *p = NULL;
528 	for (node_iterator N, I = g->begin(), E = g->end(); I != E; I = N) {
529 		N = I + 1;
530 		alu_node *a = static_cast<alu_node*>(*I);
531 		unsigned sflags = a->bc.slot_flags;
532 
533 		if (sflags == AF_4V || (ctx.is_cayman() && sflags == AF_S)) {
534 			if (!p)
535 				p = sh->create_alu_packed();
536 
537 			a->remove();
538 			p->push_back(a);
539 		}
540 	}
541 
542 	if (p) {
543 		g->push_front(p);
544 
545 		if (p->count() == 3 && ctx.is_cayman()) {
546 			// cayman's scalar instruction that can use 3 or 4 slots
547 
548 			// FIXME for simplicity we'll always add 4th slot,
549 			// but probably we might want to always remove 4th slot and make
550 			// sure that regalloc won't choose 'w' component for dst
551 
552 			alu_node *f = static_cast<alu_node*>(p->first);
553 			alu_node *a = sh->create_alu();
554 			a->src = f->src;
555 			a->dst.resize(f->dst.size());
556 			a->bc = f->bc;
557 			a->bc.slot = SLOT_W;
558 			p->push_back(a);
559 		}
560 	}
561 
562 	return 0;
563 }
564 
decode_fetch_clause(cf_node * cf)565 int bc_parser::decode_fetch_clause(cf_node* cf) {
566 	int r;
567 	unsigned i = cf->bc.addr << 1, cnt = cf->bc.count + 1;
568 
569 	cf->subtype = NST_TEX_CLAUSE;
570 
571 	while (cnt--) {
572 		fetch_node *n = sh->create_fetch();
573 		cf->push_back(n);
574 		if ((r = dec->decode_fetch(i, n->bc)))
575 			return r;
576 		if (n->bc.src_rel || n->bc.dst_rel)
577 			gpr_reladdr = true;
578 
579 	}
580 	return 0;
581 }
582 
prepare_fetch_clause(cf_node * cf)583 int bc_parser::prepare_fetch_clause(cf_node *cf) {
584 
585 	vvec grad_v, grad_h, texture_offsets;
586 
587 	for (node_iterator I = cf->begin(), E = cf->end(); I != E; ++I) {
588 
589 		fetch_node *n = static_cast<fetch_node*>(*I);
590 		assert(n->is_valid());
591 
592 		unsigned flags = n->bc.op_ptr->flags;
593 
594 		unsigned vtx = flags & FF_VTX;
595 		unsigned num_src = vtx ? ctx.vtx_src_num : 4;
596 
597 		n->dst.resize(4);
598 
599 		if (flags & (FF_SETGRAD | FF_USEGRAD | FF_GETGRAD)) {
600 			sh->uses_gradients = true;
601 		}
602 
603 		if (flags & (FF_SETGRAD | FF_SET_TEXTURE_OFFSETS)) {
604 
605 			vvec *grad = NULL;
606 
607 			switch (n->bc.op) {
608 				case FETCH_OP_SET_GRADIENTS_V:
609 					grad = &grad_v;
610 					break;
611 				case FETCH_OP_SET_GRADIENTS_H:
612 					grad = &grad_h;
613 					break;
614 				case FETCH_OP_SET_TEXTURE_OFFSETS:
615 					grad = &texture_offsets;
616 					break;
617 				default:
618 					assert(!"unexpected SET_GRAD instruction");
619 					return -1;
620 			}
621 
622 			if (grad->empty())
623 				grad->resize(4);
624 
625 			for(unsigned s = 0; s < 4; ++s) {
626 				unsigned sw = n->bc.src_sel[s];
627 				if (sw <= SEL_W)
628 					(*grad)[s] = sh->get_gpr_value(true, n->bc.src_gpr,
629 					                               sw, false);
630 				else if (sw == SEL_0)
631 					(*grad)[s] = sh->get_const_value(0.0f);
632 				else if (sw == SEL_1)
633 					(*grad)[s] = sh->get_const_value(1.0f);
634 			}
635 		} else {
636 			// Fold source values for instructions with hidden target values in to the instructions
637 			// using them. The set instructions are later re-emitted by bc_finalizer
638 			if (flags & FF_USEGRAD) {
639 				n->src.resize(12);
640 				std::copy(grad_v.begin(), grad_v.end(), n->src.begin() + 4);
641 				std::copy(grad_h.begin(), grad_h.end(), n->src.begin() + 8);
642 			} else if (flags & FF_USE_TEXTURE_OFFSETS) {
643 				n->src.resize(8);
644 				std::copy(texture_offsets.begin(), texture_offsets.end(), n->src.begin() + 4);
645 			} else {
646 				n->src.resize(4);
647 			}
648 
649 			for(int s = 0; s < 4; ++s) {
650 				if (n->bc.dst_sel[s] != SEL_MASK)
651 					n->dst[s] = sh->get_gpr_value(false, n->bc.dst_gpr, s, false);
652 				// NOTE: it doesn't matter here which components of the result we
653 				// are using, but original n->bc.dst_sel should be taken into
654 				// account when building the bytecode
655 			}
656 			for(unsigned s = 0; s < num_src; ++s) {
657 				if (n->bc.src_sel[s] <= SEL_W)
658 					n->src[s] = sh->get_gpr_value(true, n->bc.src_gpr,
659 					                              n->bc.src_sel[s], false);
660 			}
661 
662 			// Scheduler will emit the appropriate instructions to set CF_IDX0/1
663 			if (n->bc.sampler_index_mode != V_SQ_CF_INDEX_NONE) {
664 				n->src.push_back(get_cf_index_value(n->bc.sampler_index_mode == V_SQ_CF_INDEX_1));
665 			}
666 			if (n->bc.resource_index_mode != V_SQ_CF_INDEX_NONE) {
667 				n->src.push_back(get_cf_index_value(n->bc.resource_index_mode == V_SQ_CF_INDEX_1));
668 			}
669 		}
670 	}
671 
672 	return 0;
673 }
674 
prepare_ir()675 int bc_parser::prepare_ir() {
676 
677 	for(id_cf_map::iterator I = cf_map.begin(), E = cf_map.end(); I != E; ++I) {
678 		cf_node *c = *I;
679 
680 		if (!c)
681 			continue;
682 
683 		unsigned flags = c->bc.op_ptr->flags;
684 
685 		if (flags & CF_ALU) {
686 			prepare_alu_clause(c);
687 		} else if (flags & CF_FETCH) {
688 			prepare_fetch_clause(c);
689 		} else if (c->bc.op == CF_OP_CALL_FS) {
690 			sh->init_call_fs(c);
691 			c->flags |= NF_SCHEDULE_EARLY | NF_DONT_MOVE;
692 		} else if (flags & CF_LOOP_START) {
693 			prepare_loop(c);
694 		} else if (c->bc.op == CF_OP_JUMP) {
695 			prepare_if(c);
696 		} else if (c->bc.op == CF_OP_LOOP_END) {
697 			loop_stack.pop();
698 		} else if (c->bc.op == CF_OP_LOOP_CONTINUE) {
699 			assert(!loop_stack.empty());
700 			repeat_node *rep = sh->create_repeat(loop_stack.top());
701 			if (c->parent->first != c)
702 				rep->move(c->parent->first, c);
703 			c->replace_with(rep);
704 			sh->simplify_dep_rep(rep);
705 		} else if (c->bc.op == CF_OP_LOOP_BREAK) {
706 			assert(!loop_stack.empty());
707 			depart_node *dep = sh->create_depart(loop_stack.top());
708 			if (c->parent->first != c)
709 				dep->move(c->parent->first, c);
710 			c->replace_with(dep);
711 			sh->simplify_dep_rep(dep);
712 		} else if (flags & CF_EXP) {
713 
714 			// unroll burst exports
715 
716 			assert(c->bc.op == CF_OP_EXPORT || c->bc.op == CF_OP_EXPORT_DONE);
717 
718 			c->bc.set_op(CF_OP_EXPORT);
719 
720 			unsigned burst_count = c->bc.burst_count;
721 			unsigned eop = c->bc.end_of_program;
722 
723 			c->bc.end_of_program = 0;
724 			c->bc.burst_count = 0;
725 
726 			do {
727 				c->src.resize(4);
728 
729 				for(int s = 0; s < 4; ++s) {
730 					switch (c->bc.sel[s]) {
731 					case SEL_0:
732 						c->src[s] = sh->get_const_value(0.0f);
733 						break;
734 					case SEL_1:
735 						c->src[s] = sh->get_const_value(1.0f);
736 						break;
737 					case SEL_MASK:
738 						break;
739 					default:
740 						if (c->bc.sel[s] <= SEL_W)
741 							c->src[s] = sh->get_gpr_value(true, c->bc.rw_gpr,
742 									c->bc.sel[s], false);
743 						else
744 							assert(!"invalid src_sel for export");
745 					}
746 				}
747 
748 				if (!burst_count--)
749 					break;
750 
751 				cf_node *cf_next = sh->create_cf();
752 				cf_next->bc = c->bc;
753 				++cf_next->bc.rw_gpr;
754 				++cf_next->bc.array_base;
755 
756 				c->insert_after(cf_next);
757 				c = cf_next;
758 
759 			} while (1);
760 
761 			c->bc.end_of_program = eop;
762 		} else if (flags & CF_MEM) {
763 
764 			unsigned burst_count = c->bc.burst_count;
765 			unsigned eop = c->bc.end_of_program;
766 
767 			c->bc.end_of_program = 0;
768 			c->bc.burst_count = 0;
769 
770 			do {
771 
772 				c->src.resize(4);
773 
774 				for(int s = 0; s < 4; ++s) {
775 					if (c->bc.comp_mask & (1 << s))
776 						c->src[s] =
777 								sh->get_gpr_value(true, c->bc.rw_gpr, s, false);
778 				}
779 
780 				if (((flags & CF_RAT) || (!(flags & CF_STRM))) && (c->bc.type & 1)) { // indexed write
781 					c->src.resize(8);
782 					for(int s = 0; s < 3; ++s) {
783 						c->src[4 + s] =
784 							sh->get_gpr_value(true, c->bc.index_gpr, s, false);
785 					}
786 
787 					// FIXME probably we can relax it a bit
788 					c->flags |= NF_DONT_HOIST | NF_DONT_MOVE;
789 				}
790 
791 				if (flags & CF_EMIT) {
792 					// Instruction implicitly depends on prior [EMIT_][CUT]_VERTEX
793 					c->src.push_back(sh->get_special_value(SV_GEOMETRY_EMIT));
794 					c->dst.push_back(sh->get_special_value(SV_GEOMETRY_EMIT));
795 					if (sh->target == TARGET_ES) {
796 						// For ES shaders this is an export
797 						c->flags |= NF_DONT_KILL;
798 					}
799 				}
800 
801 				if (!burst_count--)
802 					break;
803 
804 				cf_node *cf_next = sh->create_cf();
805 				cf_next->bc = c->bc;
806 				++cf_next->bc.rw_gpr;
807 
808 				// FIXME is it correct?
809 				cf_next->bc.array_base += cf_next->bc.elem_size + 1;
810 
811 				c->insert_after(cf_next);
812 				c = cf_next;
813 			} while (1);
814 
815 			c->bc.end_of_program = eop;
816 
817 		} else if (flags & CF_EMIT) {
818 			/* quick peephole */
819 			cf_node *prev = static_cast<cf_node *>(c->prev);
820 			if (c->bc.op == CF_OP_CUT_VERTEX &&
821 				prev && prev->is_valid() &&
822 				prev->bc.op == CF_OP_EMIT_VERTEX &&
823 				c->bc.count == prev->bc.count) {
824 				prev->bc.set_op(CF_OP_EMIT_CUT_VERTEX);
825 				prev->bc.end_of_program = c->bc.end_of_program;
826 				c->remove();
827 			}
828 			else {
829 				c->flags |= NF_DONT_KILL | NF_DONT_HOIST | NF_DONT_MOVE;
830 
831 				c->src.push_back(sh->get_special_value(SV_GEOMETRY_EMIT));
832 				c->dst.push_back(sh->get_special_value(SV_GEOMETRY_EMIT));
833 			}
834 		}
835 	}
836 
837 	assert(loop_stack.empty());
838 	return 0;
839 }
840 
prepare_loop(cf_node * c)841 int bc_parser::prepare_loop(cf_node* c) {
842 	assert(c->bc.addr-1 < cf_map.size());
843 
844 	cf_node *end = cf_map[c->bc.addr - 1];
845 	assert(end->bc.op == CF_OP_LOOP_END);
846 	assert(c->parent == end->parent);
847 
848 	region_node *reg = sh->create_region();
849 	repeat_node *rep = sh->create_repeat(reg);
850 
851 	reg->push_back(rep);
852 	c->insert_before(reg);
853 	rep->move(c, end->next);
854 
855 	reg->src_loop = true;
856 
857 	loop_stack.push(reg);
858 	return 0;
859 }
860 
prepare_if(cf_node * c)861 int bc_parser::prepare_if(cf_node* c) {
862 	assert(c->bc.addr-1 < cf_map.size());
863 	cf_node *c_else = NULL, *end = cf_map[c->bc.addr];
864 
865 	if (!end)
866 		return 0; // not quite sure how this happens, malformed input?
867 
868 	BCP_DUMP(
869 		sblog << "parsing JUMP @" << c->bc.id;
870 		sblog << "\n";
871 	);
872 
873 	if (end->bc.op == CF_OP_ELSE) {
874 		BCP_DUMP(
875 			sblog << "  found ELSE : ";
876 			dump::dump_op(end);
877 			sblog << "\n";
878 		);
879 
880 		c_else = end;
881 		end = cf_map[c_else->bc.addr];
882 	} else {
883 		BCP_DUMP(
884 			sblog << "  no else\n";
885 		);
886 
887 		c_else = end;
888 	}
889 
890 	if (c_else->parent != c->parent)
891 		c_else = NULL;
892 
893 	if (end && end->parent != c->parent)
894 		end = NULL;
895 
896 	region_node *reg = sh->create_region();
897 
898 	depart_node *dep2 = sh->create_depart(reg);
899 	depart_node *dep = sh->create_depart(reg);
900 	if_node *n_if = sh->create_if();
901 
902 	c->insert_before(reg);
903 
904 	if (c_else != end)
905 		dep->move(c_else, end);
906 	dep2->move(c, end);
907 
908 	reg->push_back(dep);
909 	dep->push_front(n_if);
910 	n_if->push_back(dep2);
911 
912 	n_if->cond = sh->get_special_value(SV_EXEC_MASK);
913 
914 	return 0;
915 }
916 
917 
918 } // namespace r600_sb
919