1 /*
2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 * Vadim Girlin
25 */
26
27 #define VT_DEBUG 0
28
29 #if VT_DEBUG
30 #define VT_DUMP(q) do { q } while (0)
31 #else
32 #define VT_DUMP(q)
33 #endif
34
35 #include <cstring>
36
37 #include "sb_shader.h"
38 #include "sb_pass.h"
39
40 namespace r600_sb {
41
42 static const char * chans = "xyzw01?_";
43
operator <<(sb_ostream & o,value & v)44 sb_ostream& operator << (sb_ostream &o, value &v) {
45
46 bool dead = v.flags & VLF_DEAD;
47
48 if (dead)
49 o << "{";
50
51 switch (v.kind) {
52 case VLK_SPECIAL_REG: {
53 switch (v.select.sel()) {
54 case SV_AR_INDEX: o << "AR"; break;
55 case SV_ALU_PRED: o << "PR"; break;
56 case SV_EXEC_MASK: o << "EM"; break;
57 case SV_VALID_MASK: o << "VM"; break;
58 case SV_GEOMETRY_EMIT: o << "GEOMETRY_EMIT"; break;
59 case SV_LDS_RW: o << "LDS_RW"; break;
60 case SV_LDS_OQA: o << "LDS_OQA"; break;
61 case SV_LDS_OQB: o << "LDS_OQB"; break;
62 case SV_SCRATCH: o << "SCRATCH"; break;
63 default: o << "???specialreg"; break;
64 }
65 break;
66 }
67
68 case VLK_REG:
69 o << "R" << v.select.sel() << "."
70 << chans[v.select.chan()];
71
72 break;
73 case VLK_KCACHE: {
74 o << "C" << v.select.sel() << "." << chans[v.select.chan()];
75 }
76 break;
77 case VLK_CONST:
78 o << v.literal_value.f << "|";
79 o.print_zw_hex(v.literal_value.u, 8);
80 break;
81 case VLK_PARAM:
82 o << "Param" << (v.select.sel() - ALU_SRC_PARAM_OFFSET)
83 << chans[v.select.chan()];
84 break;
85 case VLK_TEMP:
86 o << "t" << v.select.sel() - shader::temp_regid_offset;
87 break;
88 case VLK_REL_REG:
89
90 o << "A" << v.select;
91 o << "[";
92 o << *v.rel;
93 o << "]";
94
95 o << "_" << v.uid;
96
97 break;
98 case VLK_UNDEF:
99 o << "undef";
100 break;
101 default:
102 o << v.kind << "?????";
103 break;
104 }
105
106 if (v.version)
107 o << "." << v.version;
108
109 if (dead)
110 o << "}";
111
112 if (v.is_global())
113 o << "||";
114 if (v.is_fixed())
115 o << "F";
116 if (v.is_prealloc())
117 o << "P";
118
119 sel_chan g;
120
121 if (v.is_rel()) {
122 g = v.array->gpr;
123 } else {
124 g = v.gpr;
125 }
126
127 if (g) {
128 o << "@R" << g.sel() << "." << chans[g.chan()];
129 }
130
131 return o;
132 }
133
add_value(value * v)134 void value_table::add_value(value* v) {
135
136 if (v->gvn_source) {
137 return;
138 }
139
140 VT_DUMP(
141 sblog << "gvn add_value ";
142 dump::dump_val(v);
143 );
144
145 value_hash hash = v->hash();
146 vt_item & vti = hashtable[hash & size_mask];
147 vti.push_back(v);
148 ++cnt;
149
150 if (v->def && ex.try_fold(v)) {
151 VT_DUMP(
152 sblog << " folded: ";
153 dump::dump_val(v->gvn_source);
154 sblog << "\n";
155 );
156 return;
157 }
158
159 int n = 0;
160 for (vt_item::iterator I = vti.begin(), E = vti.end(); I != E; ++I, ++n) {
161 value *c = *I;
162
163 if (c == v)
164 break;
165
166 if (expr_equal(c, v)) {
167 v->gvn_source = c->gvn_source;
168
169 VT_DUMP(
170 sblog << " found : equal to ";
171 dump::dump_val(v->gvn_source);
172 sblog << "\n";
173 );
174 return;
175 }
176 }
177
178 v->gvn_source = v;
179 VT_DUMP(
180 sblog << " added new\n";
181 );
182 }
183
hash()184 value_hash value::hash() {
185 if (ghash)
186 return ghash;
187 if (is_rel())
188 ghash = rel_hash();
189 else if (def)
190 ghash = def->hash();
191 else
192 ghash = ((uintptr_t)this) | 1;
193
194 return ghash;
195 }
196
rel_hash()197 value_hash value::rel_hash() {
198 value_hash h = rel ? rel->hash() : 0;
199 h |= select << 10;
200 h |= array->hash();
201 return h;
202 }
203
expr_equal(value * l,value * r)204 bool value_table::expr_equal(value* l, value* r) {
205 return ex.equal(l, r);
206 }
207
get_values(vvec & v)208 void value_table::get_values(vvec& v) {
209 v.resize(cnt);
210
211 vvec::iterator T = v.begin();
212
213 for(vt_table::iterator I = hashtable.begin(), E = hashtable.end();
214 I != E; ++I) {
215 T = std::copy(I->begin(), I->end(), T);
216 }
217 }
218
add_use(node * n)219 void value::add_use(node* n) {
220 if (0) {
221 sblog << "add_use ";
222 dump::dump_val(this);
223 sblog << " => ";
224 dump::dump_op(n);
225 }
226 uses.push_back(n);
227 }
228
229 struct use_node_comp {
use_node_compr600_sb::use_node_comp230 explicit use_node_comp(const node *n) : n(n) {}
operator ()r600_sb::use_node_comp231 bool operator() (const node *o) {
232 return o->hash() == n->hash();
233 }
234
235 private:
236 const node *n;
237 };
238
remove_use(const node * n)239 void value::remove_use(const node *n) {
240 uselist::iterator it =
241 std::find_if(uses.begin(), uses.end(), use_node_comp(n));
242
243 if (it != uses.end())
244 {
245 // We only ever had a pointer, so don't delete it here
246 uses.erase(it);
247 }
248 }
249
use_count()250 unsigned value::use_count() {
251 return uses.size();
252 }
253
is_global()254 bool value::is_global() {
255 if (chunk)
256 return chunk->is_global();
257 return flags & VLF_GLOBAL;
258 }
259
set_global()260 void value::set_global() {
261 assert(is_sgpr());
262 flags |= VLF_GLOBAL;
263 if (chunk)
264 chunk->set_global();
265 }
266
set_prealloc()267 void value::set_prealloc() {
268 assert(is_sgpr());
269 flags |= VLF_PREALLOC;
270 if (chunk)
271 chunk->set_prealloc();
272 }
273
is_fixed()274 bool value::is_fixed() {
275 if (array && array->gpr)
276 return true;
277 if (chunk && chunk->is_fixed())
278 return true;
279 return flags & VLF_FIXED;
280 }
281
fix()282 void value::fix() {
283 if (chunk)
284 chunk->fix();
285 flags |= VLF_FIXED;
286 }
287
is_prealloc()288 bool value::is_prealloc() {
289 if (chunk)
290 return chunk->is_prealloc();
291 return flags & VLF_PREALLOC;
292 }
293
delete_uses()294 void value::delete_uses() {
295 // We only ever had pointers, so don't delete them here
296 uses.erase(uses.begin(), uses.end());
297 }
298
no_reladdr_conflict_with(value * src)299 bool value::no_reladdr_conflict_with(value *src)
300 {
301 /* if the register is not relative, it can't create an relative access conflict */
302 if (!src->is_rel())
303 return true;
304
305 /* If the destination is AR then we accept the copy propagation, because the
306 * scheduler actually re-creates the address loading operation and will
307 * signal interference if there is an address register load and it will fail
308 * because of this.
309 */
310 if (gvalue()->is_AR())
311 return true;
312
313 /* For all nodes that use this value test whether the operation uses another
314 * relative access with a different address value. If found, signal conflict.
315 */
316 for (uselist::const_iterator N = uses.begin(); N != uses.end(); ++N) {
317 for (vvec::const_iterator V = (*N)->src.begin(); V != (*N)->src.end(); ++V) {
318 if (*V) {
319 value *v = (*V)->gvalue();
320 if (v != src && v->is_rel() && v->rel != src->rel)
321 return false;
322 }
323 }
324 for (vvec::const_iterator V = (*N)->dst.begin(); V != (*N)->dst.end(); ++V) {
325 if (*V) {
326 value *v = (*V)->gvalue();
327 if (v && v != src && v->is_rel() && (v->rel != src->rel))
328 return false;
329 }
330 }
331 }
332 return true;
333 }
334
update_values()335 void ra_constraint::update_values() {
336 for (vvec::iterator I = values.begin(), E = values.end(); I != E; ++I) {
337 assert(!(*I)->constraint);
338 (*I)->constraint = this;
339 }
340 }
341
allocate(unsigned sz)342 void* sb_pool::allocate(unsigned sz) {
343 sz = (sz + SB_POOL_ALIGN - 1) & ~(SB_POOL_ALIGN - 1);
344 assert (sz < (block_size >> 6) && "too big allocation size for sb_pool");
345
346 unsigned offset = total_size % block_size;
347 unsigned capacity = block_size * blocks.size();
348
349 if (total_size + sz > capacity) {
350 total_size = capacity;
351 void * nb = malloc(block_size);
352 blocks.push_back(nb);
353 offset = 0;
354 }
355
356 total_size += sz;
357 return ((char*)blocks.back() + offset);
358 }
359
free_all()360 void sb_pool::free_all() {
361 for (block_vector::iterator I = blocks.begin(), E = blocks.end(); I != E;
362 ++I) {
363 free(*I);
364 }
365 }
366
create(value_kind k,sel_chan regid,unsigned ver)367 value* sb_value_pool::create(value_kind k, sel_chan regid,
368 unsigned ver) {
369 void* np = allocate(aligned_elt_size);
370 value *v = new (np) value(size(), k, regid, ver);
371 return v;
372 }
373
delete_all()374 void sb_value_pool::delete_all() {
375 unsigned bcnt = blocks.size();
376 unsigned toffset = 0;
377 for (unsigned b = 0; b < bcnt; ++b) {
378 char *bstart = (char*)blocks[b];
379 for (unsigned offset = 0; offset < block_size;
380 offset += aligned_elt_size) {
381 ((value*)(bstart + offset))->~value();
382 toffset += aligned_elt_size;
383 if (toffset >= total_size)
384 return;
385 }
386 }
387 }
388
get(unsigned id)389 bool sb_bitset::get(unsigned id) {
390 assert(id < bit_size);
391 unsigned w = id / bt_bits;
392 unsigned b = id % bt_bits;
393 return (data[w] >> b) & 1;
394 }
395
set(unsigned id,bool bit)396 void sb_bitset::set(unsigned id, bool bit) {
397 assert(id < bit_size);
398 unsigned w = id / bt_bits;
399 unsigned b = id % bt_bits;
400 if (w >= data.size())
401 data.resize(w + 1);
402
403 if (bit)
404 data[w] |= (1 << b);
405 else
406 data[w] &= ~(1 << b);
407 }
408
set_chk(unsigned id,bool bit)409 inline bool sb_bitset::set_chk(unsigned id, bool bit) {
410 assert(id < bit_size);
411 unsigned w = id / bt_bits;
412 unsigned b = id % bt_bits;
413 basetype d = data[w];
414 basetype dn = (d & ~(1 << b)) | (bit << b);
415 bool r = (d != dn);
416 data[w] = r ? dn : data[w];
417 return r;
418 }
419
clear()420 void sb_bitset::clear() {
421 std::fill(data.begin(), data.end(), 0);
422 }
423
resize(unsigned size)424 void sb_bitset::resize(unsigned size) {
425 unsigned cur_data_size = data.size();
426 unsigned new_data_size = (size + bt_bits - 1) / bt_bits;
427
428
429 if (new_data_size != cur_data_size)
430 data.resize(new_data_size);
431
432 // make sure that new bits in the existing word are cleared
433 if (cur_data_size && size > bit_size && bit_size % bt_bits) {
434 basetype clear_mask = (~(basetype)0u) << (bit_size % bt_bits);
435 data[cur_data_size - 1] &= ~clear_mask;
436 }
437
438 bit_size = size;
439 }
440
find_bit(unsigned start)441 unsigned sb_bitset::find_bit(unsigned start) {
442 assert(start < bit_size);
443 unsigned w = start / bt_bits;
444 unsigned b = start % bt_bits;
445 unsigned sz = data.size();
446
447 while (w < sz) {
448 basetype d = data[w] >> b;
449 if (d != 0) {
450 unsigned pos = __builtin_ctz(d) + b + w * bt_bits;
451 return pos;
452 }
453
454 b = 0;
455 ++w;
456 }
457
458 return bit_size;
459 }
460
iterator(shader & sh,sb_value_set * s,unsigned nb)461 sb_value_set::iterator::iterator(shader& sh, sb_value_set* s, unsigned nb)
462 : vp(sh.get_value_pool()), s(s), nb(nb) {}
463
add_set_checked(sb_value_set & s2)464 bool sb_value_set::add_set_checked(sb_value_set& s2) {
465 if (bs.size() < s2.bs.size())
466 bs.resize(s2.bs.size());
467 sb_bitset nbs = bs | s2.bs;
468 if (bs != nbs) {
469 bs.swap(nbs);
470 return true;
471 }
472 return false;
473 }
474
remove_set(sb_value_set & s2)475 void r600_sb::sb_value_set::remove_set(sb_value_set& s2) {
476 bs.mask(s2.bs);
477 }
478
add_val(value * v)479 bool sb_value_set::add_val(value* v) {
480 assert(v);
481 if (bs.size() < v->uid)
482 bs.resize(v->uid + 32);
483
484 return bs.set_chk(v->uid - 1, 1);
485 }
486
remove_vec(vvec & vv)487 bool sb_value_set::remove_vec(vvec& vv) {
488 bool modified = false;
489 for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) {
490 if (*I)
491 modified |= remove_val(*I);
492 }
493 return modified;
494 }
495
clear()496 void sb_value_set::clear() {
497 bs.clear();
498 }
499
remove_val(value * v)500 bool sb_value_set::remove_val(value* v) {
501 assert(v);
502 if (bs.size() < v->uid)
503 return false;
504 return bs.set_chk(v->uid - 1, 0);
505 }
506
add_vec(vvec & vv)507 bool r600_sb::sb_value_set::add_vec(vvec& vv) {
508 bool modified = false;
509 for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) {
510 value *v = *I;
511 if (v)
512 modified |= add_val(v);
513 }
514 return modified;
515 }
516
contains(value * v)517 bool r600_sb::sb_value_set::contains(value* v) {
518 unsigned b = v->uid - 1;
519 if (b < bs.size())
520 return bs.get(b);
521 else
522 return false;
523 }
524
empty()525 bool sb_value_set::empty() {
526 return bs.size() == 0 || bs.find_bit(0) == bs.size();
527 }
528
swap(sb_bitset & bs2)529 void sb_bitset::swap(sb_bitset& bs2) {
530 std::swap(data, bs2.data);
531 std::swap(bit_size, bs2.bit_size);
532 }
533
operator ==(const sb_bitset & bs2)534 bool sb_bitset::operator ==(const sb_bitset& bs2) {
535 if (bit_size != bs2.bit_size)
536 return false;
537
538 for (unsigned i = 0, c = data.size(); i < c; ++i) {
539 if (data[i] != bs2.data[i])
540 return false;
541 }
542 return true;
543 }
544
operator &=(const sb_bitset & bs2)545 sb_bitset& sb_bitset::operator &=(const sb_bitset& bs2) {
546 if (bit_size > bs2.bit_size) {
547 resize(bs2.bit_size);
548 }
549
550 for (unsigned i = 0, c = std::min(data.size(), bs2.data.size()); i < c;
551 ++i) {
552 data[i] &= bs2.data[i];
553 }
554 return *this;
555 }
556
mask(const sb_bitset & bs2)557 sb_bitset& sb_bitset::mask(const sb_bitset& bs2) {
558 if (bit_size < bs2.bit_size) {
559 resize(bs2.bit_size);
560 }
561
562 for (unsigned i = 0, c = data.size(); i < c;
563 ++i) {
564 data[i] &= ~bs2.data[i];
565 }
566 return *this;
567 }
568
check()569 bool ra_constraint::check() {
570 assert(kind == CK_SAME_REG);
571
572 unsigned reg = 0;
573
574 for (vvec::iterator I = values.begin(), E = values.end(); I != E; ++I) {
575 value *v = *I;
576 if (!v)
577 continue;
578
579 if (!v->gpr)
580 return false;
581
582 if (reg == 0)
583 reg = v->gpr.sel() + 1;
584 else if (reg != v->gpr.sel() + 1)
585 return false;
586
587 if (v->is_chan_pinned()) {
588 if (v->pin_gpr.chan() != v->gpr.chan())
589 return false;
590 }
591 }
592 return true;
593 }
594
is_dead()595 bool gpr_array::is_dead() {
596 return false;
597 }
598
599 } // namespace r600_sb
600