1 /*
2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 * Vadim Girlin
25 */
26
27 #define RA_DEBUG 0
28
29 #if RA_DEBUG
30 #define RA_DUMP(q) do { q } while (0)
31 #else
32 #define RA_DUMP(q)
33 #endif
34
35 #include "sb_shader.h"
36 #include "sb_pass.h"
37
38 namespace r600_sb {
39
run()40 int ra_coalesce::run() {
41 return sh.coal.run();
42 }
43
add_edge(value * a,value * b,unsigned cost)44 void coalescer::add_edge(value* a, value* b, unsigned cost) {
45 assert(a->is_sgpr() && b->is_sgpr());
46 edges.insert(new ra_edge(a,b, cost));
47 }
48
create_chunk(value * v)49 void coalescer::create_chunk(value *v) {
50
51 assert(v->is_sgpr());
52
53 ra_chunk *c = new ra_chunk();
54
55 c->values.push_back(v);
56
57 if (v->is_chan_pinned())
58 c->flags |= RCF_PIN_CHAN;
59 if (v->is_reg_pinned()) {
60 c->flags |= RCF_PIN_REG;
61 }
62
63 c->pin = v->pin_gpr;
64
65 RA_DUMP(
66 sblog << "create_chunk: ";
67 dump_chunk(c);
68 );
69
70 all_chunks.push_back(c);
71 v->chunk = c;
72
73 }
74
unify_chunks(ra_edge * e)75 void coalescer::unify_chunks(ra_edge *e) {
76 ra_chunk *c1 = e->a->chunk, *c2 = e->b->chunk;
77
78 RA_DUMP(
79 sblog << "unify_chunks: ";
80 dump_chunk(c1);
81 dump_chunk(c2);
82 );
83
84 if (c2->is_chan_pinned() && !c1->is_chan_pinned()) {
85 c1->flags |= RCF_PIN_CHAN;
86 c1->pin = sel_chan(c1->pin.sel(), c2->pin.chan());
87 }
88
89 if (c2->is_reg_pinned() && !c1->is_reg_pinned()) {
90 c1->flags |= RCF_PIN_REG;
91 c1->pin = sel_chan(c2->pin.sel(), c1->pin.chan());
92 }
93
94 c1->values.reserve(c1->values.size() + c2->values.size());
95
96 for (vvec::iterator I = c2->values.begin(), E = c2->values.end(); I != E;
97 ++I) {
98 (*I)->chunk = c1;
99 c1->values.push_back(*I);
100 }
101
102 chunk_vec::iterator F = std::find(all_chunks.begin(), all_chunks.end(), c2);
103 assert(F != all_chunks.end());
104
105 all_chunks.erase(F);
106
107 c1->cost += c2->cost + e->cost;
108 delete c2;
109 }
110
chunks_interference(ra_chunk * c1,ra_chunk * c2)111 bool coalescer::chunks_interference(ra_chunk *c1, ra_chunk *c2) {
112 unsigned pin_flags = (c1->flags & c2->flags) &
113 (RCF_PIN_CHAN | RCF_PIN_REG);
114
115 if ((pin_flags & RCF_PIN_CHAN) &&
116 c1->pin.chan() != c2->pin.chan())
117 return true;
118
119 if ((pin_flags & RCF_PIN_REG) &&
120 c1->pin.sel() != c2->pin.sel())
121 return true;
122
123 for (vvec::iterator I = c1->values.begin(), E = c1->values.end(); I != E;
124 ++I) {
125 value *v1 = *I;
126
127 for (vvec::iterator I = c2->values.begin(), E = c2->values.end(); I != E;
128 ++I) {
129 value *v2 = *I;
130
131 if (!v1->v_equal(v2) && v1->interferences.contains(v2))
132 return true;
133 }
134 }
135 return false;
136 }
137
build_chunks()138 void coalescer::build_chunks() {
139
140 for (edge_queue::iterator I = edges.begin(), E = edges.end();
141 I != E; ++I) {
142
143 ra_edge *e = *I;
144
145 if (!e->a->chunk)
146 create_chunk(e->a);
147
148 if (!e->b->chunk)
149 create_chunk(e->b);
150
151 ra_chunk *c1 = e->a->chunk, *c2 = e->b->chunk;
152
153 if (c1 == c2) {
154 c1->cost += e->cost;
155 } else if (!chunks_interference(c1, c2))
156 unify_chunks(e);
157 }
158 }
159
create_constraint(constraint_kind kind)160 ra_constraint* coalescer::create_constraint(constraint_kind kind) {
161 ra_constraint *c = new ra_constraint(kind);
162 all_constraints.push_back(c);
163 return c;
164 }
165
dump_edges()166 void coalescer::dump_edges() {
167 sblog << "######## affinity edges\n";
168
169 for (edge_queue::iterator I = edges.begin(), E = edges.end();
170 I != E; ++I) {
171 ra_edge* e = *I;
172 sblog << " ra_edge ";
173 dump::dump_val(e->a);
174 sblog << " <-> ";
175 dump::dump_val(e->b);
176 sblog << " cost = " << e->cost << "\n";
177 }
178 }
179
dump_chunks()180 void coalescer::dump_chunks() {
181 sblog << "######## chunks\n";
182
183 for (chunk_vec::iterator I = all_chunks.begin(), E = all_chunks.end();
184 I != E; ++I) {
185 ra_chunk* c = *I;
186 dump_chunk(c);
187 }
188 }
189
190
dump_constraint_queue()191 void coalescer::dump_constraint_queue() {
192 sblog << "######## constraints\n";
193
194 for (constraint_queue::iterator I = constraints.begin(),
195 E = constraints.end(); I != E; ++I) {
196 ra_constraint* c = *I;
197 dump_constraint(c);
198 }
199 }
200
dump_chunk(ra_chunk * c)201 void coalescer::dump_chunk(ra_chunk* c) {
202 sblog << " ra_chunk cost = " << c->cost << " : ";
203 dump::dump_vec(c->values);
204
205 if (c->flags & RCF_PIN_REG)
206 sblog << " REG = " << c->pin.sel();
207
208 if (c->flags & RCF_PIN_CHAN)
209 sblog << " CHAN = " << c->pin.chan();
210
211 sblog << (c->flags & RCF_GLOBAL ? " GLOBAL" : "");
212
213 sblog << "\n";
214 }
215
dump_constraint(ra_constraint * c)216 void coalescer::dump_constraint(ra_constraint* c) {
217 sblog << " ra_constraint: ";
218 switch (c->kind) {
219 case CK_PACKED_BS: sblog << "PACKED_BS"; break;
220 case CK_PHI: sblog << "PHI"; break;
221 case CK_SAME_REG: sblog << "SAME_REG"; break;
222 default: sblog << "UNKNOWN_KIND"; assert(0); break;
223 }
224
225 sblog << " cost = " << c->cost << " : ";
226 dump::dump_vec(c->values);
227
228 sblog << "\n";
229 }
230
get_chunk_interferences(ra_chunk * c,val_set & s)231 void coalescer::get_chunk_interferences(ra_chunk *c, val_set &s) {
232
233 for (vvec::iterator I = c->values.begin(), E = c->values.end(); I != E;
234 ++I) {
235 value *v = *I;
236 s.add_set(v->interferences);
237 }
238 s.remove_vec(c->values);
239 }
240
build_chunk_queue()241 void coalescer::build_chunk_queue() {
242 for (chunk_vec::iterator I = all_chunks.begin(),
243 E = all_chunks.end(); I != E; ++I) {
244 ra_chunk *c = *I;
245
246 if (!c->is_fixed())
247 chunks.insert(c);
248 }
249 }
250
build_constraint_queue()251 void coalescer::build_constraint_queue() {
252 for (constraint_vec::iterator I = all_constraints.begin(),
253 E = all_constraints.end(); I != E; ++I) {
254 ra_constraint *c = *I;
255 unsigned cost = 0;
256
257 if (c->values.empty() || !c->values.front()->is_sgpr())
258 continue;
259
260 if (c->kind != CK_SAME_REG)
261 continue;
262
263 for (vvec::iterator I = c->values.begin(), E = c->values.end();
264 I != E; ++I) {
265 value *v = *I;
266 if (!v->chunk)
267 create_chunk(v);
268 else
269 cost += v->chunk->cost;
270 }
271 c->cost = cost;
272 constraints.insert(c);
273 }
274 }
275
color_chunks()276 void coalescer::color_chunks() {
277
278 for (chunk_queue::iterator I = chunks.begin(), E = chunks.end();
279 I != E; ++I) {
280 ra_chunk *c = *I;
281 if (c->is_fixed() || c->values.size() == 1)
282 continue;
283
284 sb_bitset rb;
285 val_set interf;
286
287 get_chunk_interferences(c, interf);
288
289 RA_DUMP(
290 sblog << "color_chunks: ";
291 dump_chunk(c);
292 sblog << "\n interferences: ";
293 dump::dump_set(sh,interf);
294 sblog << "\n";
295 );
296
297 init_reg_bitset(rb, interf);
298
299 unsigned pass = c->is_reg_pinned() ? 0 : 1;
300
301 unsigned cs = c->is_chan_pinned() ? c->pin.chan() : 0;
302 unsigned ce = c->is_chan_pinned() ? cs + 1 : 4;
303
304 unsigned color = 0;
305
306 while (pass < 2) {
307
308 unsigned rs, re;
309
310 if (pass == 0) {
311 rs = c->pin.sel();
312 re = rs + 1;
313 } else {
314 rs = 0;
315 re = sh.num_nontemp_gpr();
316 }
317
318 for (unsigned reg = rs; reg < re; ++reg) {
319 for (unsigned chan = cs; chan < ce; ++chan) {
320 unsigned bit = sel_chan(reg, chan);
321 if (bit >= rb.size() || !rb.get(bit)) {
322 color = bit;
323 break;
324 }
325 }
326 if (color)
327 break;
328 }
329
330 if (color)
331 break;
332
333 ++pass;
334 }
335
336 assert(color);
337 color_chunk(c, color);
338 }
339 }
340
init_reg_bitset(sb_bitset & bs,val_set & vs)341 void coalescer::init_reg_bitset(sb_bitset &bs, val_set &vs) {
342
343 for (val_set::iterator I = vs.begin(sh), E = vs.end(sh); I != E; ++I) {
344 value *v = *I;
345
346 if (!v->is_any_gpr())
347 continue;
348
349 unsigned gpr = v->get_final_gpr();
350 if (!gpr)
351 continue;
352
353 if (gpr) {
354 if (gpr >= bs.size())
355 bs.resize(gpr + 64);
356 bs.set(gpr, 1);
357 }
358 }
359 }
360
color_chunk(ra_chunk * c,sel_chan color)361 void coalescer::color_chunk(ra_chunk *c, sel_chan color) {
362
363 vvec vv = c->values;
364
365 for (vvec::iterator I = vv.begin(), E = vv.end(); I != E;
366 ++I) {
367 value *v = *I;
368
369 if (v->is_reg_pinned() && v->pin_gpr.sel() != color.sel()) {
370 detach_value(v);
371 continue;
372 }
373
374 if (v->is_chan_pinned() && v->pin_gpr.chan() != color.chan()) {
375 detach_value(v);
376 continue;
377 }
378
379 v->gpr = color;
380
381 if (v->constraint && v->constraint->kind == CK_PHI)
382 v->fix();
383
384
385 RA_DUMP(
386 sblog << " assigned " << color << " to ";
387 dump::dump_val(v);
388 sblog << "\n";
389 );
390 }
391
392 c->pin = color;
393
394 if (c->is_reg_pinned()) {
395 c->fix();
396 }
397 }
398
~coalescer()399 coalescer::~coalescer() {
400
401 // FIXME use pool allocator ??
402
403 for (constraint_vec::iterator I = all_constraints.begin(),
404 E = all_constraints.end(); I != E; ++I) {
405 delete (*I);
406 }
407
408 for (chunk_vec::iterator I = all_chunks.begin(),
409 E = all_chunks.end(); I != E; ++I) {
410 delete (*I);
411 }
412
413 for (edge_queue::iterator I = edges.begin(), E = edges.end();
414 I != E; ++I) {
415 delete (*I);
416 }
417 }
418
run()419 int coalescer::run() {
420 int r;
421
422 RA_DUMP( dump_edges(); );
423
424 build_chunks();
425 RA_DUMP( dump_chunks(); );
426
427 build_constraint_queue();
428 RA_DUMP( dump_constraint_queue(); );
429
430 if ((r = color_constraints()))
431 return r;
432
433 build_chunk_queue();
434 color_chunks();
435
436 return 0;
437 }
438
color_phi_constraint(ra_constraint * c)439 void coalescer::color_phi_constraint(ra_constraint* c) {
440 }
441
detach_value(value * v)442 ra_chunk* coalescer::detach_value(value *v) {
443
444 vvec::iterator F = std::find(v->chunk->values.begin(),
445 v->chunk->values.end(), v);
446
447 assert(F != v->chunk->values.end());
448 v->chunk->values.erase(F);
449 create_chunk(v);
450
451 if (v->is_reg_pinned()) {
452 v->chunk->fix();
453 }
454
455 RA_DUMP(
456 sblog << " detached : ";
457 dump_chunk(v->chunk);
458 );
459
460 return v->chunk;
461
462 }
463
color_reg_constraint(ra_constraint * c)464 int coalescer::color_reg_constraint(ra_constraint *c) {
465 unsigned k, cnt = c->values.size();
466 vvec & cv = c->values;
467
468 ra_chunk *ch[4];
469 unsigned swz[4] = {0, 1, 2, 3};
470 val_set interf[4];
471 sb_bitset rb[4];
472
473 bool reg_pinned = false;
474 unsigned pin_reg = ~0;
475
476 unsigned chan_mask = 0;
477
478 k = 0;
479 for (vvec::iterator I = cv.begin(), E = cv.end(); I != E; ++I, ++k) {
480 value *v = *I;
481
482 if (!v->chunk)
483 create_chunk(v);
484
485 ch[k] = v->chunk;
486
487 if (v->chunk->is_chan_pinned()) {
488 unsigned chan = 1 << v->chunk->pin.chan();
489
490 if (chan & chan_mask) { // channel already in use
491 ch[k] = detach_value(v);
492 assert(!ch[k]->is_chan_pinned());
493 } else {
494 chan_mask |= chan;
495 }
496 }
497
498 if (v->chunk->is_reg_pinned()) {
499 if (!reg_pinned) {
500 reg_pinned = true;
501 pin_reg = v->chunk->pin.sel();
502 }
503 }
504
505 get_chunk_interferences(ch[k], interf[k]);
506 init_reg_bitset(rb[k], interf[k]);
507 }
508
509 unsigned start_reg, end_reg;
510
511 start_reg = 0;
512 end_reg = sh.num_nontemp_gpr();
513
514 unsigned min_reg = end_reg;
515 unsigned min_swz[4];
516 unsigned i, pass = reg_pinned ? 0 : 1;
517
518 bool done = false;
519
520 while (pass < 2) {
521
522 unsigned rs, re;
523
524 if (pass == 0) {
525 re = pin_reg + 1;
526 rs = pin_reg;
527 } else {
528 re = end_reg;
529 rs = start_reg;
530 }
531
532 min_reg = re;
533
534 // cycle on swizzle combinations
535 do {
536 for (i = 0; i < cnt; ++i) {
537 if (ch[i]->flags & RCF_PIN_CHAN)
538 if (ch[i]->pin.chan() != swz[i])
539 break;
540 }
541 if (i != cnt)
542 continue;
543
544 // looking for minimal reg number such that the constrained chunks
545 // may be colored with the current swizzle combination
546 for (unsigned reg = rs; reg < min_reg; ++reg) {
547 for (i = 0; i < cnt; ++i) {
548 unsigned bit = sel_chan(reg, swz[i]);
549 if (bit < rb[i].size() && rb[i].get(bit))
550 break;
551 }
552 if (i == cnt) {
553 done = true;
554 min_reg = reg;
555 std::copy(swz, swz + 4, min_swz);
556 break;
557 }
558 }
559
560 if (pass == 0 && done)
561 break;
562
563 } while (std::next_permutation(swz, swz + 4));
564
565 if (!done && pass) {
566 sblog << "sb: ra_coalesce - out of registers\n";
567 return -1;
568 }
569
570 if (pass == 0 && done)
571 break;
572
573 ++pass;
574 };
575
576 assert(done);
577
578 RA_DUMP(
579 sblog << "min reg = " << min_reg << " min_swz = "
580 << min_swz[0] << min_swz[1] << min_swz[2] << min_swz[3] << "\n";
581 );
582
583 for (i = 0; i < cnt; ++i) {
584 sel_chan color(min_reg, min_swz[i]);
585 ra_chunk *cc = ch[i];
586
587 if (cc->is_fixed()) {
588 if (cc->pin != color)
589 cc = detach_value(cv[i]);
590 else
591 continue;
592 }
593
594 color_chunk(cc, color);
595 cc->fix();
596 cc->set_prealloc();
597 }
598
599 return 0;
600 }
601
color_constraints()602 int coalescer::color_constraints() {
603 int r;
604
605 for (constraint_queue::iterator I = constraints.begin(),
606 E = constraints.end(); I != E; ++I) {
607
608 ra_constraint *c = *I;
609
610 RA_DUMP(
611 sblog << "color_constraints: ";
612 dump_constraint(c);
613 );
614
615 if (c->kind == CK_SAME_REG) {
616 if ((r = color_reg_constraint(c)))
617 return r;
618 } else if (c->kind == CK_PHI)
619 color_phi_constraint(c);
620 }
621 return 0;
622 }
623
624 } // namespace r600_sb
625