• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * on the rights to use, copy, modify, merge, publish, distribute, sub
8  * license, and/or sell copies of the Software, and to permit persons to whom
9  * the Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
22  *
23  * Authors:
24  *      Vadim Girlin
25  */
26 
27 #define RA_DEBUG 0
28 
29 #if RA_DEBUG
30 #define RA_DUMP(q) do { q } while (0)
31 #else
32 #define RA_DUMP(q)
33 #endif
34 
35 #include "sb_shader.h"
36 #include "sb_pass.h"
37 
38 namespace r600_sb {
39 
run()40 int ra_coalesce::run() {
41 	return sh.coal.run();
42 }
43 
add_edge(value * a,value * b,unsigned cost)44 void coalescer::add_edge(value* a, value* b, unsigned cost) {
45 	assert(a->is_sgpr() && b->is_sgpr());
46 	edges.insert(new ra_edge(a,b, cost));
47 }
48 
create_chunk(value * v)49 void coalescer::create_chunk(value *v) {
50 
51 	assert(v->is_sgpr());
52 
53 	ra_chunk *c = new ra_chunk();
54 
55 	c->values.push_back(v);
56 
57 	if (v->is_chan_pinned())
58 		c->flags |= RCF_PIN_CHAN;
59 	if (v->is_reg_pinned()) {
60 		c->flags |= RCF_PIN_REG;
61 	}
62 
63 	c->pin = v->pin_gpr;
64 
65 	RA_DUMP(
66 		sblog << "create_chunk: ";
67 		dump_chunk(c);
68 	);
69 
70 	all_chunks.push_back(c);
71 	v->chunk = c;
72 
73 }
74 
unify_chunks(ra_edge * e)75 void coalescer::unify_chunks(ra_edge *e) {
76 	ra_chunk *c1 = e->a->chunk, *c2 = e->b->chunk;
77 
78 	RA_DUMP(
79 		sblog << "unify_chunks: ";
80 		dump_chunk(c1);
81 		dump_chunk(c2);
82 	);
83 
84 	if (c2->is_chan_pinned() && !c1->is_chan_pinned()) {
85 		c1->flags |= RCF_PIN_CHAN;
86 		c1->pin = sel_chan(c1->pin.sel(), c2->pin.chan());
87 	}
88 
89 	if (c2->is_reg_pinned() && !c1->is_reg_pinned()) {
90 		c1->flags |= RCF_PIN_REG;
91 		c1->pin = sel_chan(c2->pin.sel(), c1->pin.chan());
92 	}
93 
94 	c1->values.reserve(c1->values.size() + c2->values.size());
95 
96 	for (vvec::iterator I = c2->values.begin(), E = c2->values.end(); I != E;
97 			++I) {
98 		(*I)->chunk = c1;
99 		c1->values.push_back(*I);
100 	}
101 
102 	chunk_vec::iterator F = std::find(all_chunks.begin(), all_chunks.end(), c2);
103 	assert(F != all_chunks.end());
104 
105 	all_chunks.erase(F);
106 
107 	c1->cost += c2->cost + e->cost;
108 	delete c2;
109 }
110 
chunks_interference(ra_chunk * c1,ra_chunk * c2)111 bool coalescer::chunks_interference(ra_chunk *c1, ra_chunk *c2) {
112 	unsigned pin_flags = (c1->flags & c2->flags) &
113 			(RCF_PIN_CHAN | RCF_PIN_REG);
114 
115 	if ((pin_flags & RCF_PIN_CHAN) &&
116 			c1->pin.chan() != c2->pin.chan())
117 		return true;
118 
119 	if ((pin_flags & RCF_PIN_REG) &&
120 			c1->pin.sel() != c2->pin.sel())
121 		return true;
122 
123 	for (vvec::iterator I = c1->values.begin(), E = c1->values.end(); I != E;
124 			++I) {
125 		value *v1 = *I;
126 
127 		for (vvec::iterator I = c2->values.begin(), E = c2->values.end(); I != E;
128 				++I) {
129 			value *v2 = *I;
130 
131 			if (!v1->v_equal(v2) && v1->interferences.contains(v2))
132 				return true;
133 		}
134 	}
135 	return false;
136 }
137 
build_chunks()138 void coalescer::build_chunks() {
139 
140 	for (edge_queue::iterator I = edges.begin(), E = edges.end();
141 			I != E; ++I) {
142 
143 		ra_edge *e = *I;
144 
145 		if (!e->a->chunk)
146 			create_chunk(e->a);
147 
148 		if (!e->b->chunk)
149 			create_chunk(e->b);
150 
151 		ra_chunk *c1 = e->a->chunk, *c2 = e->b->chunk;
152 
153 		if (c1 == c2) {
154 			c1->cost += e->cost;
155 		} else if (!chunks_interference(c1, c2))
156 			unify_chunks(e);
157 	}
158 }
159 
create_constraint(constraint_kind kind)160 ra_constraint* coalescer::create_constraint(constraint_kind kind) {
161 	ra_constraint *c = new ra_constraint(kind);
162 	all_constraints.push_back(c);
163 	return c;
164 }
165 
dump_edges()166 void coalescer::dump_edges() {
167 	sblog << "######## affinity edges\n";
168 
169 	for (edge_queue::iterator I = edges.begin(), E = edges.end();
170 			I != E; ++I) {
171 		ra_edge* e = *I;
172 		sblog << "  ra_edge ";
173 		dump::dump_val(e->a);
174 		sblog << " <-> ";
175 		dump::dump_val(e->b);
176 		sblog << "   cost = " << e->cost << "\n";
177 	}
178 }
179 
dump_chunks()180 void coalescer::dump_chunks() {
181 	sblog << "######## chunks\n";
182 
183 	for (chunk_vec::iterator I = all_chunks.begin(), E = all_chunks.end();
184 			I != E; ++I) {
185 		ra_chunk* c = *I;
186 		dump_chunk(c);
187 	}
188 }
189 
190 
dump_constraint_queue()191 void coalescer::dump_constraint_queue() {
192 	sblog << "######## constraints\n";
193 
194 	for (constraint_queue::iterator I = constraints.begin(),
195 			E = constraints.end(); I != E; ++I) {
196 		ra_constraint* c = *I;
197 		dump_constraint(c);
198 	}
199 }
200 
dump_chunk(ra_chunk * c)201 void coalescer::dump_chunk(ra_chunk* c) {
202 	sblog << "  ra_chunk cost = " << c->cost << "  :  ";
203 	dump::dump_vec(c->values);
204 
205 	if (c->flags & RCF_PIN_REG)
206 		sblog << "   REG = " << c->pin.sel();
207 
208 	if (c->flags & RCF_PIN_CHAN)
209 		sblog << "   CHAN = " << c->pin.chan();
210 
211 	sblog << (c->flags & RCF_GLOBAL ? "  GLOBAL" : "");
212 
213 	sblog << "\n";
214 }
215 
dump_constraint(ra_constraint * c)216 void coalescer::dump_constraint(ra_constraint* c) {
217 	sblog << "  ra_constraint: ";
218 	switch (c->kind) {
219 		case CK_PACKED_BS: sblog << "PACKED_BS"; break;
220 		case CK_PHI: sblog << "PHI"; break;
221 		case CK_SAME_REG: sblog << "SAME_REG"; break;
222 		default: sblog << "UNKNOWN_KIND"; assert(0); break;
223 	}
224 
225 	sblog << "  cost = " << c->cost << "  : ";
226 	dump::dump_vec(c->values);
227 
228 	sblog << "\n";
229 }
230 
get_chunk_interferences(ra_chunk * c,val_set & s)231 void coalescer::get_chunk_interferences(ra_chunk *c, val_set &s) {
232 
233 	for (vvec::iterator I = c->values.begin(), E = c->values.end(); I != E;
234 			++I) {
235 		value *v = *I;
236 		s.add_set(v->interferences);
237 	}
238 	s.remove_vec(c->values);
239 }
240 
build_chunk_queue()241 void coalescer::build_chunk_queue() {
242 	for (chunk_vec::iterator I = all_chunks.begin(),
243 			E = all_chunks.end(); I != E; ++I) {
244 		ra_chunk *c = *I;
245 
246 		if (!c->is_fixed())
247 			chunks.insert(c);
248 	}
249 }
250 
build_constraint_queue()251 void coalescer::build_constraint_queue() {
252 	for (constraint_vec::iterator I = all_constraints.begin(),
253 			E = all_constraints.end(); I != E; ++I) {
254 		ra_constraint *c = *I;
255 		unsigned cost = 0;
256 
257 		if (c->values.empty() || !c->values.front()->is_sgpr())
258 			continue;
259 
260 		if (c->kind != CK_SAME_REG)
261 			continue;
262 
263 		for (vvec::iterator I = c->values.begin(), E = c->values.end();
264 				I != E; ++I) {
265 			value *v = *I;
266 			if (!v->chunk)
267 				create_chunk(v);
268 			else
269 				cost += v->chunk->cost;
270 		}
271 		c->cost = cost;
272 		constraints.insert(c);
273 	}
274 }
275 
color_chunks()276 int coalescer::color_chunks() {
277 
278 	for (chunk_queue::iterator I = chunks.begin(), E = chunks.end();
279 			I != E; ++I) {
280 		ra_chunk *c = *I;
281 		if (c->is_fixed() || c->values.size() == 1)
282 			continue;
283 
284 		sb_bitset rb;
285 		val_set interf;
286 
287 		get_chunk_interferences(c, interf);
288 
289 		RA_DUMP(
290 			sblog << "color_chunks: ";
291 			dump_chunk(c);
292 			sblog << "\n interferences: ";
293 			dump::dump_set(sh,interf);
294 			sblog << "\n";
295 		);
296 
297 		init_reg_bitset(rb, interf);
298 
299 		unsigned pass = c->is_reg_pinned() ? 0 : 1;
300 
301 		unsigned cs = c->is_chan_pinned() ? c->pin.chan() : 0;
302 		unsigned ce = c->is_chan_pinned() ? cs + 1 : 4;
303 
304 		unsigned color = 0;
305 
306 		while (pass < 2) {
307 
308 			unsigned rs, re;
309 
310 			if (pass == 0) {
311 				rs = c->pin.sel();
312 				re = rs + 1;
313 			} else {
314 				rs = 0;
315 				re = sh.num_nontemp_gpr();
316 			}
317 
318 			for (unsigned reg = rs; reg < re; ++reg) {
319 				for (unsigned chan = cs; chan < ce; ++chan) {
320 					unsigned bit = sel_chan(reg, chan);
321 					if (bit >= rb.size() || !rb.get(bit)) {
322 						color = bit;
323 						break;
324 					}
325 				}
326 				if (color)
327 					break;
328 			}
329 
330 			if (color)
331 				break;
332 
333 			++pass;
334 		}
335 
336 		if (!color) {
337 			fprintf(stderr, "r600/SB: unable to color registers\n");
338 			return -1;
339 		}
340 		color_chunk(c, color);
341 	}
342 	return 0;
343 }
344 
init_reg_bitset(sb_bitset & bs,val_set & vs)345 void coalescer::init_reg_bitset(sb_bitset &bs, val_set &vs) {
346 
347 	for (val_set::iterator I = vs.begin(sh), E = vs.end(sh); I != E; ++I) {
348 		value *v = *I;
349 
350 		if (!v->is_any_gpr())
351 			continue;
352 
353 		unsigned gpr = v->get_final_gpr();
354 		if (!gpr)
355 			continue;
356 
357 		if (gpr) {
358 			if (gpr >= bs.size())
359 				bs.resize(gpr + 64);
360 			bs.set(gpr, 1);
361 		}
362 	}
363 }
364 
color_chunk(ra_chunk * c,sel_chan color)365 void coalescer::color_chunk(ra_chunk *c, sel_chan color) {
366 
367 	vvec vv = c->values;
368 
369 	for (vvec::iterator I = vv.begin(), E = vv.end(); I != E;
370 			++I) {
371 		value *v = *I;
372 
373 		if (v->is_reg_pinned() && v->pin_gpr.sel() != color.sel()) {
374 			detach_value(v);
375 			continue;
376 		}
377 
378 		if (v->is_chan_pinned() && v->pin_gpr.chan() != color.chan()) {
379 			detach_value(v);
380 			continue;
381 		}
382 
383 		v->gpr = color;
384 
385 		if (v->constraint && v->constraint->kind == CK_PHI)
386 			v->fix();
387 
388 
389 		RA_DUMP(
390 			sblog << " assigned " << color << " to ";
391 			dump::dump_val(v);
392 			sblog << "\n";
393 		);
394 	}
395 
396 	c->pin = color;
397 
398 	if (c->is_reg_pinned()) {
399 		c->fix();
400 	}
401 }
402 
~coalescer()403 coalescer::~coalescer() {
404 
405 	// FIXME use pool allocator ??
406 
407 	for (constraint_vec::iterator I = all_constraints.begin(),
408 			E = all_constraints.end(); I != E; ++I) {
409 		delete (*I);
410 	}
411 
412 	for (chunk_vec::iterator I = all_chunks.begin(),
413 			E = all_chunks.end(); I != E; ++I) {
414 		delete (*I);
415 	}
416 
417 	for (edge_queue::iterator I = edges.begin(), E = edges.end();
418 			I != E; ++I) {
419 		delete (*I);
420 	}
421 }
422 
run()423 int coalescer::run() {
424 	int r;
425 
426 	RA_DUMP( dump_edges(); );
427 
428 	build_chunks();
429 	RA_DUMP( dump_chunks(); );
430 
431 	build_constraint_queue();
432 	RA_DUMP( dump_constraint_queue(); );
433 
434 	if ((r = color_constraints()))
435 		return r;
436 
437 	build_chunk_queue();
438 	return color_chunks();
439 }
440 
color_phi_constraint(ra_constraint * c)441 void coalescer::color_phi_constraint(ra_constraint* c) {
442 }
443 
detach_value(value * v)444 ra_chunk* coalescer::detach_value(value *v) {
445 
446 	vvec::iterator F = std::find(v->chunk->values.begin(),
447 	                             v->chunk->values.end(), v);
448 
449 	assert(F != v->chunk->values.end());
450 	v->chunk->values.erase(F);
451 	create_chunk(v);
452 
453 	if (v->is_reg_pinned()) {
454 		v->chunk->fix();
455 	}
456 
457 	RA_DUMP(
458 		sblog << "           detached : ";
459 		dump_chunk(v->chunk);
460 	);
461 
462 	return v->chunk;
463 
464 }
465 
color_reg_constraint(ra_constraint * c)466 int coalescer::color_reg_constraint(ra_constraint *c) {
467 	unsigned k, cnt = c->values.size();
468 	vvec & cv = c->values;
469 
470 	ra_chunk *ch[4];
471 	unsigned swz[4] = {0, 1, 2, 3};
472 	val_set interf[4];
473 	sb_bitset rb[4];
474 
475 	bool reg_pinned = false;
476 	unsigned pin_reg = ~0;
477 
478 	unsigned chan_mask = 0;
479 
480 	k = 0;
481 	for (vvec::iterator I = cv.begin(), E = cv.end(); I != E; ++I, ++k) {
482 		value *v = *I;
483 
484 		if (!v->chunk)
485 			create_chunk(v);
486 
487 		ch[k] = v->chunk;
488 
489 		if (v->chunk->is_chan_pinned()) {
490 			unsigned chan = 1 << v->chunk->pin.chan();
491 
492 			if (chan & chan_mask) { // channel already in use
493 				ch[k] = detach_value(v);
494 				assert(!ch[k]->is_chan_pinned());
495 			} else {
496 				chan_mask |= chan;
497 			}
498 		}
499 
500 		if (v->chunk->is_reg_pinned()) {
501 			if (!reg_pinned) {
502 				reg_pinned = true;
503 				pin_reg = v->chunk->pin.sel();
504 			}
505 		}
506 
507 		get_chunk_interferences(ch[k], interf[k]);
508 		init_reg_bitset(rb[k], interf[k]);
509 	}
510 
511 	unsigned start_reg, end_reg;
512 
513 	start_reg = 0;
514 	end_reg = sh.num_nontemp_gpr();
515 
516 	unsigned min_reg = end_reg;
517 	unsigned min_swz[4];
518 	unsigned i, pass = reg_pinned ? 0 : 1;
519 
520 	bool done = false;
521 
522 	while (pass < 2) {
523 
524 		unsigned rs, re;
525 
526 		if (pass == 0) {
527 			re = pin_reg + 1;
528 			rs = pin_reg;
529 		} else {
530 			re = end_reg;
531 			rs = start_reg;
532 		}
533 
534 		min_reg = re;
535 
536 		// cycle on swizzle combinations
537 		do {
538 			for (i = 0; i < cnt; ++i) {
539 				if (ch[i]->flags & RCF_PIN_CHAN)
540 					if (ch[i]->pin.chan() != swz[i])
541 						break;
542 			}
543 			if (i != cnt)
544 				continue;
545 
546 			// looking for minimal reg number such that the constrained chunks
547 			// may be colored with the current swizzle combination
548 			for (unsigned reg = rs; reg < min_reg; ++reg) {
549 				for (i = 0; i < cnt; ++i) {
550 					unsigned bit = sel_chan(reg, swz[i]);
551 					if (bit < rb[i].size() && rb[i].get(bit))
552 						break;
553 				}
554 				if (i == cnt) {
555 					done = true;
556 					min_reg = reg;
557 					std::copy(swz, swz + 4, min_swz);
558 					break;
559 				}
560 			}
561 
562 			if (pass == 0 && done)
563 				break;
564 
565 		} while (std::next_permutation(swz, swz + 4));
566 
567 		if (!done && pass) {
568 			sblog << "sb: ra_coalesce - out of registers\n";
569 			return -1;
570 		}
571 
572 		if (pass == 0 && done)
573 			break;
574 
575 		++pass;
576 	};
577 
578 	assert(done);
579 
580 	RA_DUMP(
581 	sblog << "min reg = " << min_reg << "   min_swz = "
582 			<< min_swz[0] << min_swz[1] << min_swz[2] << min_swz[3] << "\n";
583 	);
584 
585 	for (i = 0; i < cnt; ++i) {
586 		sel_chan color(min_reg, min_swz[i]);
587 		ra_chunk *cc = ch[i];
588 
589 		if (cc->is_fixed()) {
590 			if (cc->pin != color)
591 				cc = detach_value(cv[i]);
592 			else
593 				continue;
594 		}
595 
596 		color_chunk(cc, color);
597 		cc->fix();
598 		cc->set_prealloc();
599 	}
600 
601 	return 0;
602 }
603 
color_constraints()604 int coalescer::color_constraints() {
605 	int r;
606 
607 	for (constraint_queue::iterator I = constraints.begin(),
608 			E = constraints.end(); I != E; ++I) {
609 
610 		ra_constraint *c = *I;
611 
612 		RA_DUMP(
613 			sblog << "color_constraints: ";
614 			dump_constraint(c);
615 		);
616 
617 		if (c->kind == CK_SAME_REG) {
618 			if ((r = color_reg_constraint(c)))
619 				return r;
620 		} else if (c->kind == CK_PHI)
621 			color_phi_constraint(c);
622 	}
623 	return 0;
624 }
625 
626 } // namespace r600_sb
627