• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * on the rights to use, copy, modify, merge, publish, distribute, sub
8  * license, and/or sell copies of the Software, and to permit persons to whom
9  * the Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
22  *
23  * Authors:
24  *      Vadim Girlin
25  */
26 
27 #include "sb_bc.h"
28 #include "sb_shader.h"
29 #include "sb_pass.h"
30 
31 namespace r600_sb {
32 
shader(sb_context & sctx,shader_target t,unsigned id)33 shader::shader(sb_context &sctx, shader_target t, unsigned id)
34 : ctx(sctx), next_temp_value_index(temp_regid_offset),
35   prep_regs_count(), pred_sels(),
36   regions(), inputs(), undef(), val_pool(sizeof(value)),
37   pool(), all_nodes(), src_stats(), opt_stats(), errors(),
38   optimized(), id(id),
39   coal(*this), bbs(),
40   target(t), vt(ex), ex(*this), root(),
41   compute_interferences(),
42   has_alu_predication(),
43   uses_gradients(), safe_math(), ngpr(), nstack(), dce_flags() {}
44 
assign_slot(alu_node * n,alu_node * slots[5])45 bool shader::assign_slot(alu_node* n, alu_node *slots[5]) {
46 
47 	unsigned slot_flags = ctx.alu_slots(n->bc.op);
48 	unsigned slot = n->bc.dst_chan;
49 
50 	if (!ctx.is_cayman() && (!(slot_flags & AF_V) || slots[slot]) &&
51 			(slot_flags & AF_S))
52 		slot = SLOT_TRANS;
53 
54 	if (slots[slot])
55 		return false;
56 
57 	n->bc.slot = slot;
58 	slots[slot] = n;
59 	return true;
60 }
61 
add_pinned_gpr_values(vvec & vec,unsigned gpr,unsigned comp_mask,bool src)62 void shader::add_pinned_gpr_values(vvec& vec, unsigned gpr, unsigned comp_mask,
63                             bool src) {
64 	unsigned chan = 0;
65 	while (comp_mask) {
66 		if (comp_mask & 1) {
67 			value *v = get_gpr_value(src, gpr, chan, false);
68 			v->flags |= (VLF_PIN_REG | VLF_PIN_CHAN);
69 			if (!v->is_rel()) {
70 				v->gpr = v->pin_gpr = v->select;
71 				v->fix();
72 			}
73 			if (v->array && !v->array->gpr) {
74 				// if pinned value can be accessed with indirect addressing
75 				// pin the entire array to its original location
76 				v->array->gpr = v->array->base_gpr;
77 			}
78 			vec.push_back(v);
79 		}
80 		comp_mask >>= 1;
81 		++chan;
82 	}
83 }
84 
create_clause(node_subtype nst)85 cf_node* shader::create_clause(node_subtype nst) {
86 	cf_node *n = create_cf();
87 
88 	n->subtype = nst;
89 
90 	switch (nst) {
91 	case NST_ALU_CLAUSE: n->bc.set_op(CF_OP_ALU); break;
92 	case NST_TEX_CLAUSE: n->bc.set_op(CF_OP_TEX); break;
93 	case NST_VTX_CLAUSE: n->bc.set_op(CF_OP_VTX); break;
94 	case NST_GDS_CLAUSE: n->bc.set_op(CF_OP_GDS); break;
95 	default: assert(!"invalid clause type"); break;
96 	}
97 
98 	n->bc.barrier = 1;
99 	return n;
100 }
101 
create_bbs()102 void shader::create_bbs() {
103 	create_bbs(root, bbs);
104 }
105 
expand_bbs()106 void shader::expand_bbs() {
107 	expand_bbs(bbs);
108 }
109 
create_mov(value * dst,value * src)110 alu_node* shader::create_mov(value* dst, value* src) {
111 	alu_node *n = create_alu();
112 	n->bc.set_op(ALU_OP1_MOV);
113 	n->dst.push_back(dst);
114 	n->src.push_back(src);
115 	dst->def = n;
116 
117 	return n;
118 }
119 
create_copy_mov(value * dst,value * src,unsigned affcost)120 alu_node* shader::create_copy_mov(value* dst, value* src, unsigned affcost) {
121 	alu_node *n = create_mov(dst, src);
122 
123 	dst->assign_source(src);
124 	n->flags |= NF_COPY_MOV | NF_DONT_HOIST;
125 
126 	if (affcost && dst->is_sgpr() && src->is_sgpr())
127 		coal.add_edge(src, dst, affcost);
128 
129 	return n;
130 }
131 
get_value(value_kind kind,sel_chan id,unsigned version)132 value* shader::get_value(value_kind kind, sel_chan id,
133                          unsigned version) {
134 	if (version == 0 && kind == VLK_REG && id.sel() < prep_regs_count)
135 		return val_pool[id - 1];
136 
137 
138 	unsigned key = (kind << 28) | (version << 16) | id;
139 	value_map::iterator i = reg_values.find(key);
140 	if (i != reg_values.end()) {
141 		return i->second;
142 	}
143 	value *v = create_value(kind, id, version);
144 	reg_values.insert(std::make_pair(key, v));
145 	return v;
146 }
147 
get_special_value(unsigned sv_id,unsigned version)148 value* shader::get_special_value(unsigned sv_id, unsigned version) {
149 	sel_chan id(sv_id, 0);
150 	return get_value(VLK_SPECIAL_REG, id, version);
151 }
152 
fill_array_values(gpr_array * a,vvec & vv)153 void shader::fill_array_values(gpr_array *a, vvec &vv) {
154 	unsigned sz = a->array_size;
155 	vv.resize(sz);
156 	for (unsigned i = 0; i < a->array_size; ++i) {
157 		vv[i] = get_gpr_value(true, a->base_gpr.sel() + i, a->base_gpr.chan(),
158 		                      false);
159 	}
160 }
161 
get_gpr_value(bool src,unsigned reg,unsigned chan,bool rel,unsigned version)162 value* shader::get_gpr_value(bool src, unsigned reg, unsigned chan, bool rel,
163                              unsigned version) {
164 	sel_chan id(reg, chan);
165 	value *v;
166 	gpr_array *a = get_gpr_array(reg, chan);
167 	if (rel) {
168 		assert(a);
169 		v = create_value(VLK_REL_REG, id, 0);
170 		v->rel = get_special_value(SV_AR_INDEX);
171 		fill_array_values(a, v->muse);
172 		if (!src)
173 			fill_array_values(a, v->mdef);
174 	} else {
175 		if (version == 0 && reg < prep_regs_count)
176 			return (val_pool[id - 1]);
177 
178 		v = get_value(VLK_REG, id, version);
179 	}
180 
181 	v->array = a;
182 	v->pin_gpr = v->select;
183 
184 	return v;
185 }
186 
create_temp_value()187 value* shader::create_temp_value() {
188 	sel_chan id(++next_temp_value_index, 0);
189 	return get_value(VLK_TEMP, id, 0);
190 }
191 
get_kcache_value(unsigned bank,unsigned index,unsigned chan,alu_kcache_index_mode index_mode)192 value* shader::get_kcache_value(unsigned bank, unsigned index, unsigned chan, alu_kcache_index_mode index_mode) {
193 	return get_ro_value(kcache_values, VLK_KCACHE,
194 			sel_chan(bank, index, chan, index_mode));
195 }
196 
add_input(unsigned gpr,bool preloaded,unsigned comp_mask)197 void shader::add_input(unsigned gpr, bool preloaded, unsigned comp_mask) {
198 	if (inputs.size() <= gpr)
199 		inputs.resize(gpr+1);
200 
201 	shader_input &i = inputs[gpr];
202 	i.preloaded = preloaded;
203 	i.comp_mask = comp_mask;
204 
205 	if (preloaded) {
206 		add_pinned_gpr_values(root->dst, gpr, comp_mask, true);
207 	}
208 
209 }
210 
init()211 void shader::init() {
212 	assert(!root);
213 	root = create_container();
214 }
215 
init_call_fs(cf_node * cf)216 void shader::init_call_fs(cf_node* cf) {
217 	unsigned gpr = 0;
218 
219 	assert(target == TARGET_LS || target == TARGET_VS || target == TARGET_ES);
220 
221 	for(inputs_vec::const_iterator I = inputs.begin(),
222 			E = inputs.end(); I != E; ++I, ++gpr) {
223 		if (!I->preloaded)
224 			add_pinned_gpr_values(cf->dst, gpr, I->comp_mask, false);
225 		else
226 			add_pinned_gpr_values(cf->src, gpr, I->comp_mask, true);
227 	}
228 }
229 
set_undef(val_set & s)230 void shader::set_undef(val_set& s) {
231 	value *undefined = get_undef_value();
232 	if (!undefined->gvn_source)
233 		vt.add_value(undefined);
234 
235 	val_set &vs = s;
236 
237 	for (val_set::iterator I = vs.begin(*this), E = vs.end(*this); I != E; ++I) {
238 		value *v = *I;
239 
240 		assert(!v->is_readonly() && !v->is_rel());
241 
242 		v->gvn_source = undefined->gvn_source;
243 	}
244 }
245 
create_value(value_kind k,sel_chan regid,unsigned ver)246 value* shader::create_value(value_kind k, sel_chan regid, unsigned ver) {
247 	value *v = val_pool.create(k, regid, ver);
248 	return v;
249 }
250 
get_undef_value()251 value* shader::get_undef_value() {
252 	if (!undef)
253 		undef = create_value(VLK_UNDEF, 0, 0);
254 	return undef;
255 }
256 
create_node(node_type nt,node_subtype nst,node_flags flags)257 node* shader::create_node(node_type nt, node_subtype nst, node_flags flags) {
258 	node *n = new (pool.allocate(sizeof(node))) node(nt, nst, flags);
259 	all_nodes.push_back(n);
260 	return n;
261 }
262 
create_alu()263 alu_node* shader::create_alu() {
264 	alu_node* n = new (pool.allocate(sizeof(alu_node))) alu_node();
265 	all_nodes.push_back(n);
266 	return n;
267 }
268 
create_alu_group()269 alu_group_node* shader::create_alu_group() {
270 	alu_group_node* n =
271 			new (pool.allocate(sizeof(alu_group_node))) alu_group_node();
272 	all_nodes.push_back(n);
273 	return n;
274 }
275 
create_alu_packed()276 alu_packed_node* shader::create_alu_packed() {
277 	alu_packed_node* n =
278 			new (pool.allocate(sizeof(alu_packed_node))) alu_packed_node();
279 	all_nodes.push_back(n);
280 	return n;
281 }
282 
create_cf()283 cf_node* shader::create_cf() {
284 	cf_node* n = new (pool.allocate(sizeof(cf_node))) cf_node();
285 	n->bc.barrier = 1;
286 	all_nodes.push_back(n);
287 	return n;
288 }
289 
create_fetch()290 fetch_node* shader::create_fetch() {
291 	fetch_node* n = new (pool.allocate(sizeof(fetch_node))) fetch_node();
292 	all_nodes.push_back(n);
293 	return n;
294 }
295 
create_region()296 region_node* shader::create_region() {
297 	region_node *n = new (pool.allocate(sizeof(region_node)))
298 			region_node(regions.size());
299 	regions.push_back(n);
300 	all_nodes.push_back(n);
301 	return n;
302 }
303 
create_depart(region_node * target)304 depart_node* shader::create_depart(region_node* target) {
305 	depart_node* n = new (pool.allocate(sizeof(depart_node)))
306 			depart_node(target, target->departs.size());
307 	target->departs.push_back(n);
308 	all_nodes.push_back(n);
309 	return n;
310 }
311 
create_repeat(region_node * target)312 repeat_node* shader::create_repeat(region_node* target) {
313 	repeat_node* n = new (pool.allocate(sizeof(repeat_node)))
314 			repeat_node(target, target->repeats.size() + 1);
315 	target->repeats.push_back(n);
316 	all_nodes.push_back(n);
317 	return n;
318 }
319 
create_container(node_type nt,node_subtype nst,node_flags flags)320 container_node* shader::create_container(node_type nt, node_subtype nst,
321 		                                 node_flags flags) {
322 	container_node *n = new (pool.allocate(sizeof(container_node)))
323 			container_node(nt, nst, flags);
324 	all_nodes.push_back(n);
325 	return n;
326 }
327 
create_if()328 if_node* shader::create_if() {
329 	if_node* n = new (pool.allocate(sizeof(if_node))) if_node();
330 	all_nodes.push_back(n);
331 	return n;
332 }
333 
create_bb(unsigned id,unsigned loop_level)334 bb_node* shader::create_bb(unsigned id, unsigned loop_level) {
335 	bb_node* n = new (pool.allocate(sizeof(bb_node))) bb_node(id, loop_level);
336 	all_nodes.push_back(n);
337 	return n;
338 }
339 
get_special_ro_value(unsigned sel)340 value* shader::get_special_ro_value(unsigned sel) {
341 	return get_ro_value(special_ro_values, VLK_PARAM, sel);
342 }
343 
get_const_value(const literal & v)344 value* shader::get_const_value(const literal &v) {
345 	value *val = get_ro_value(const_values, VLK_CONST, v);
346 	val->literal_value = v;
347 	return val;
348 }
349 
~shader()350 shader::~shader() {
351 	for (node_vec::iterator I = all_nodes.begin(), E = all_nodes.end();
352 			I != E; ++I)
353 		(*I)->~node();
354 
355 	for (gpr_array_vec::iterator I = gpr_arrays.begin(), E = gpr_arrays.end();
356 			I != E; ++I) {
357 		delete *I;
358 	}
359 }
360 
dump_ir()361 void shader::dump_ir() {
362 	if (ctx.dump_pass)
363 		dump(*this).run();
364 }
365 
get_value_version(value * v,unsigned ver)366 value* shader::get_value_version(value* v, unsigned ver) {
367 	assert(!v->is_readonly() && !v->is_rel());
368 	value *vv = get_value(v->kind, v->select, ver);
369 	assert(vv);
370 
371 	if (v->array) {
372 		vv->array = v->array;
373 	}
374 
375 	return vv;
376 }
377 
get_gpr_array(unsigned reg,unsigned chan)378 gpr_array* shader::get_gpr_array(unsigned reg, unsigned chan) {
379 
380 	for (regarray_vec::iterator I = gpr_arrays.begin(),
381 			E = gpr_arrays.end(); I != E; ++I) {
382 		gpr_array* a = *I;
383 		unsigned achan = a->base_gpr.chan();
384 		unsigned areg = a->base_gpr.sel();
385 		if (achan == chan && (reg >= areg && reg < areg+a->array_size))
386 			return a;
387 	}
388 	return NULL;
389 }
390 
add_gpr_array(unsigned gpr_start,unsigned gpr_count,unsigned comp_mask)391 void shader::add_gpr_array(unsigned gpr_start, unsigned gpr_count,
392 					   unsigned comp_mask) {
393 	unsigned chan = 0;
394 	while (comp_mask) {
395 		if (comp_mask & 1) {
396 			gpr_array *a = new gpr_array(
397 					sel_chan(gpr_start, chan), gpr_count);
398 
399 			SB_DUMP_PASS( sblog << "add_gpr_array: @" << a->base_gpr
400 			         << " [" << a->array_size << "]\n";
401 			);
402 
403 			gpr_arrays.push_back(a);
404 		}
405 		comp_mask >>= 1;
406 		++chan;
407 	}
408 }
409 
get_pred_sel(int sel)410 value* shader::get_pred_sel(int sel) {
411 	assert(sel == 0 || sel == 1);
412 	if (!pred_sels[sel])
413 		pred_sels[sel] = get_const_value(sel);
414 
415 	return pred_sels[sel];
416 }
417 
create_cf(unsigned op)418 cf_node* shader::create_cf(unsigned op) {
419 	cf_node *c = create_cf();
420 	c->bc.set_op(op);
421 	c->bc.barrier = 1;
422 	return c;
423 }
424 
get_full_target_name()425 std::string shader::get_full_target_name() {
426 	std::string s = get_shader_target_name();
427 	s += "/";
428 	s += ctx.get_hw_chip_name();
429 	s += "/";
430 	s += ctx.get_hw_class_name();
431 	return s;
432 }
433 
get_shader_target_name()434 const char* shader::get_shader_target_name() {
435 	switch (target) {
436 		case TARGET_VS: return "VS";
437 		case TARGET_ES: return "ES";
438 		case TARGET_PS: return "PS";
439 		case TARGET_GS: return "GS";
440 		case TARGET_HS: return "HS";
441 		case TARGET_LS: return "LS";
442 		case TARGET_COMPUTE: return "COMPUTE";
443 		case TARGET_FETCH: return "FETCH";
444 		default:
445 			return "INVALID_TARGET";
446 	}
447 }
448 
simplify_dep_rep(node * dr)449 void shader::simplify_dep_rep(node* dr) {
450 	container_node *p = dr->parent;
451 	if (p->is_repeat()) {
452 		repeat_node *r = static_cast<repeat_node*>(p);
453 		r->target->expand_repeat(r);
454 	} else if (p->is_depart()) {
455 		depart_node *d = static_cast<depart_node*>(p);
456 		d->target->expand_depart(d);
457 	}
458 	if (dr->next)
459 		dr->parent->cut(dr->next, NULL);
460 }
461 
462 
463 // FIXME this is used in some places as the max non-temp gpr,
464 // (MAX_GPR - 2 * ctx.alu_temp_gprs) should be used for that instead.
first_temp_gpr()465 unsigned shader::first_temp_gpr() {
466 	return MAX_GPR - ctx.alu_temp_gprs;
467 }
468 
num_nontemp_gpr()469 unsigned shader::num_nontemp_gpr() {
470 	return MAX_GPR - 2 * ctx.alu_temp_gprs;
471 }
472 
set_uses_kill()473 void shader::set_uses_kill() {
474 	if (root->src.empty())
475 		root->src.resize(1);
476 
477 	if (!root->src[0])
478 		root->src[0] = get_special_value(SV_VALID_MASK);
479 }
480 
clone(alu_node * n)481 alu_node* shader::clone(alu_node* n) {
482 	alu_node *c = create_alu();
483 
484 	// FIXME: this may be wrong with indirect operands
485 	c->src = n->src;
486 	c->dst = n->dst;
487 
488 	c->bc = n->bc;
489 	c->pred = n->pred;
490 
491 	return c;
492 }
493 
collect_stats(bool opt)494 void shader::collect_stats(bool opt) {
495 	if (!sb_context::dump_stat)
496 		return;
497 
498 	shader_stats &s = opt ? opt_stats : src_stats;
499 
500 	s.shaders = 1;
501 	s.ngpr = ngpr;
502 	s.nstack = nstack;
503 	s.collect(root);
504 
505 	if (opt)
506 		ctx.opt_stats.accumulate(s);
507 	else
508 		ctx.src_stats.accumulate(s);
509 }
510 
get_ro_value(value_map & vm,value_kind vk,unsigned key)511 value* shader::get_ro_value(value_map& vm, value_kind vk, unsigned key) {
512 	value_map::iterator I = vm.find(key);
513 	if (I != vm.end())
514 		return I->second;
515 	value *v = create_value(vk, key, 0);
516 	v->flags = VLF_READONLY;
517 	vm.insert(std::make_pair(key, v));
518 	return v;
519 }
520 
create_bbs(container_node * n,bbs_vec & bbs,int loop_level)521 void shader::create_bbs(container_node* n, bbs_vec &bbs, int loop_level) {
522 
523 	bool inside_bb = false;
524 	bool last_inside_bb = true;
525 	node_iterator bb_start(n->begin()), I(bb_start), E(n->end());
526 
527 	for (; I != E; ++I) {
528 		node *k = *I;
529 		inside_bb = k->type == NT_OP;
530 
531 		if (inside_bb && !last_inside_bb)
532 			bb_start = I;
533 		else if (!inside_bb) {
534 			if (last_inside_bb
535 					&& I->type != NT_REPEAT
536 					&& I->type != NT_DEPART
537 					&& I->type != NT_IF) {
538 				bb_node *bb = create_bb(bbs.size(), loop_level);
539 				bbs.push_back(bb);
540 				n->insert_node_before(*bb_start, bb);
541 				if (bb_start != I)
542 					bb->move(bb_start, I);
543 			}
544 
545 			if (k->is_container()) {
546 
547 				bool loop = false;
548 				if (k->type == NT_REGION) {
549 					loop = static_cast<region_node*>(k)->is_loop();
550 				}
551 
552 				create_bbs(static_cast<container_node*>(k), bbs,
553 				           loop_level + loop);
554 			}
555 		}
556 
557 		if (k->type == NT_DEPART)
558 			return;
559 
560 		last_inside_bb = inside_bb;
561 	}
562 
563 	if (last_inside_bb) {
564 		bb_node *bb = create_bb(bbs.size(), loop_level);
565 		bbs.push_back(bb);
566 		if (n->empty())
567 				n->push_back(bb);
568 		else {
569 			n->insert_node_before(*bb_start, bb);
570 			if (bb_start != n->end())
571 				bb->move(bb_start, n->end());
572 		}
573 	} else {
574 		if (n->last && n->last->type == NT_IF) {
575 			bb_node *bb = create_bb(bbs.size(), loop_level);
576 			bbs.push_back(bb);
577 			n->push_back(bb);
578 		}
579 	}
580 }
581 
expand_bbs(bbs_vec & bbs)582 void shader::expand_bbs(bbs_vec &bbs) {
583 
584 	for (bbs_vec::iterator I = bbs.begin(), E = bbs.end(); I != E; ++I) {
585 		bb_node *b = *I;
586 		b->expand();
587 	}
588 }
589 
get_queue_id(node * n)590 sched_queue_id shader::get_queue_id(node* n) {
591 	switch (n->subtype) {
592 		case NST_ALU_INST:
593 		case NST_ALU_PACKED_INST:
594 		case NST_COPY:
595 		case NST_PSI:
596 			return SQ_ALU;
597 		case NST_FETCH_INST: {
598 			fetch_node *f = static_cast<fetch_node*>(n);
599 			if (ctx.is_r600() && (f->bc.op_ptr->flags & FF_VTX))
600 				return SQ_VTX;
601 			if (f->bc.op_ptr->flags & FF_GDS)
602 				return SQ_GDS;
603 			return SQ_TEX;
604 		}
605 		case NST_CF_INST:
606 			return SQ_CF;
607 		default:
608 			assert(0);
609 			return SQ_NUM;
610 	}
611 }
612 
collect(node * n)613 void shader_stats::collect(node *n) {
614 	if (n->is_alu_inst())
615 		++alu;
616 	else if (n->is_fetch_inst())
617 		++fetch;
618 	else if (n->is_container()) {
619 		container_node *c = static_cast<container_node*>(n);
620 
621 		if (n->is_alu_group())
622 			++alu_groups;
623 		else if (n->is_alu_clause())
624 			++alu_clauses;
625 		else if (n->is_fetch_clause())
626 			++fetch_clauses;
627 		else if (n->is_cf_inst())
628 			++cf;
629 
630 		if (!c->empty()) {
631 			for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) {
632 				collect(*I);
633 			}
634 		}
635 	}
636 }
637 
accumulate(shader_stats & s)638 void shader_stats::accumulate(shader_stats& s) {
639 	++shaders;
640 	ndw += s.ndw;
641 	ngpr += s.ngpr;
642 	nstack += s.nstack;
643 
644 	alu += s.alu;
645 	alu_groups += s.alu_groups;
646 	alu_clauses += s.alu_clauses;
647 	fetch += s.fetch;
648 	fetch_clauses += s.fetch_clauses;
649 	cf += s.cf;
650 }
651 
dump()652 void shader_stats::dump() {
653 	sblog << "dw:" << ndw << ", gpr:" << ngpr << ", stk:" << nstack
654 			<< ", alu groups:" << alu_groups << ", alu clauses: " << alu_clauses
655 			<< ", alu:" << alu << ", fetch:" << fetch
656 			<< ", fetch clauses:" << fetch_clauses
657 			<< ", cf:" << cf;
658 
659 	if (shaders > 1)
660 		sblog << ", shaders:" << shaders;
661 
662 	sblog << "\n";
663 }
664 
print_diff(unsigned d1,unsigned d2)665 static void print_diff(unsigned d1, unsigned d2) {
666 	if (d1)
667 		sblog << ((int)d2 - (int)d1) * 100 / (int)d1 << "%";
668 	else if (d2)
669 		sblog << "N/A";
670 	else
671 		sblog << "0%";
672 }
673 
dump_diff(shader_stats & s)674 void shader_stats::dump_diff(shader_stats& s) {
675 	sblog << "dw:"; print_diff(ndw, s.ndw);
676 	sblog << ", gpr:" ; print_diff(ngpr, s.ngpr);
677 	sblog << ", stk:" ; print_diff(nstack, s.nstack);
678 	sblog << ", alu groups:" ; print_diff(alu_groups, s.alu_groups);
679 	sblog << ", alu clauses: " ; print_diff(alu_clauses, s.alu_clauses);
680 	sblog << ", alu:" ; print_diff(alu, s.alu);
681 	sblog << ", fetch:" ; print_diff(fetch, s.fetch);
682 	sblog << ", fetch clauses:" ; print_diff(fetch_clauses, s.fetch_clauses);
683 	sblog << ", cf:" ; print_diff(cf, s.cf);
684 	sblog << "\n";
685 }
686 
687 } // namespace r600_sb
688