1 /*
2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 * Vadim Girlin
25 */
26
27 #define VT_DEBUG 0
28
29 #if VT_DEBUG
30 #define VT_DUMP(q) do { q } while (0)
31 #else
32 #define VT_DUMP(q)
33 #endif
34
35 #include <cstring>
36
37 #include "sb_shader.h"
38 #include "sb_pass.h"
39
40 namespace r600_sb {
41
42 static const char * chans = "xyzw01?_";
43
operator <<(sb_ostream & o,value & v)44 sb_ostream& operator << (sb_ostream &o, value &v) {
45
46 bool dead = v.flags & VLF_DEAD;
47
48 if (dead)
49 o << "{";
50
51 switch (v.kind) {
52 case VLK_SPECIAL_REG: {
53 switch (v.select.sel()) {
54 case SV_AR_INDEX: o << "AR"; break;
55 case SV_ALU_PRED: o << "PR"; break;
56 case SV_EXEC_MASK: o << "EM"; break;
57 case SV_VALID_MASK: o << "VM"; break;
58 case SV_GEOMETRY_EMIT: o << "GEOMETRY_EMIT"; break;
59 case SV_LDS_RW: o << "LDS_RW"; break;
60 case SV_LDS_OQA: o << "LDS_OQA"; break;
61 case SV_LDS_OQB: o << "LDS_OQB"; break;
62 default: o << "???specialreg"; break;
63 }
64 break;
65 }
66
67 case VLK_REG:
68 o << "R" << v.select.sel() << "."
69 << chans[v.select.chan()];
70
71 break;
72 case VLK_KCACHE: {
73 o << "C" << v.select.sel() << "." << chans[v.select.chan()];
74 }
75 break;
76 case VLK_CONST:
77 o << v.literal_value.f << "|";
78 o.print_zw_hex(v.literal_value.u, 8);
79 break;
80 case VLK_PARAM:
81 o << "Param" << (v.select.sel() - ALU_SRC_PARAM_OFFSET)
82 << chans[v.select.chan()];
83 break;
84 case VLK_TEMP:
85 o << "t" << v.select.sel() - shader::temp_regid_offset;
86 break;
87 case VLK_REL_REG:
88
89 o << "A" << v.select;
90 o << "[";
91 o << *v.rel;
92 o << "]";
93
94 o << "_" << v.uid;
95
96 break;
97 case VLK_UNDEF:
98 o << "undef";
99 break;
100 default:
101 o << v.kind << "?????";
102 break;
103 }
104
105 if (v.version)
106 o << "." << v.version;
107
108 if (dead)
109 o << "}";
110
111 if (v.is_global())
112 o << "||";
113 if (v.is_fixed())
114 o << "F";
115 if (v.is_prealloc())
116 o << "P";
117
118 sel_chan g;
119
120 if (v.is_rel()) {
121 g = v.array->gpr;
122 } else {
123 g = v.gpr;
124 }
125
126 if (g) {
127 o << "@R" << g.sel() << "." << chans[g.chan()];
128 }
129
130 return o;
131 }
132
add_value(value * v)133 void value_table::add_value(value* v) {
134
135 if (v->gvn_source) {
136 return;
137 }
138
139 VT_DUMP(
140 sblog << "gvn add_value ";
141 dump::dump_val(v);
142 );
143
144 value_hash hash = v->hash();
145 vt_item & vti = hashtable[hash & size_mask];
146 vti.push_back(v);
147 ++cnt;
148
149 if (v->def && ex.try_fold(v)) {
150 VT_DUMP(
151 sblog << " folded: ";
152 dump::dump_val(v->gvn_source);
153 sblog << "\n";
154 );
155 return;
156 }
157
158 int n = 0;
159 for (vt_item::iterator I = vti.begin(), E = vti.end(); I != E; ++I, ++n) {
160 value *c = *I;
161
162 if (c == v)
163 break;
164
165 if (expr_equal(c, v)) {
166 v->gvn_source = c->gvn_source;
167
168 VT_DUMP(
169 sblog << " found : equal to ";
170 dump::dump_val(v->gvn_source);
171 sblog << "\n";
172 );
173 return;
174 }
175 }
176
177 v->gvn_source = v;
178 VT_DUMP(
179 sblog << " added new\n";
180 );
181 }
182
hash()183 value_hash value::hash() {
184 if (ghash)
185 return ghash;
186 if (is_rel())
187 ghash = rel_hash();
188 else if (def)
189 ghash = def->hash();
190 else
191 ghash = ((uintptr_t)this) | 1;
192
193 return ghash;
194 }
195
rel_hash()196 value_hash value::rel_hash() {
197 value_hash h = rel ? rel->hash() : 0;
198 h |= select << 10;
199 h |= array->hash();
200 return h;
201 }
202
expr_equal(value * l,value * r)203 bool value_table::expr_equal(value* l, value* r) {
204 return ex.equal(l, r);
205 }
206
get_values(vvec & v)207 void value_table::get_values(vvec& v) {
208 v.resize(cnt);
209
210 vvec::iterator T = v.begin();
211
212 for(vt_table::iterator I = hashtable.begin(), E = hashtable.end();
213 I != E; ++I) {
214 T = std::copy(I->begin(), I->end(), T);
215 }
216 }
217
add_use(node * n)218 void value::add_use(node* n) {
219 if (0) {
220 sblog << "add_use ";
221 dump::dump_val(this);
222 sblog << " => ";
223 dump::dump_op(n);
224 }
225 uses.push_back(n);
226 }
227
228 struct use_node_comp {
use_node_compr600_sb::use_node_comp229 explicit use_node_comp(const node *n) : n(n) {}
operator ()r600_sb::use_node_comp230 bool operator() (const node *o) {
231 return o->hash() == n->hash();
232 }
233
234 private:
235 const node *n;
236 };
237
remove_use(const node * n)238 void value::remove_use(const node *n) {
239 uselist::iterator it =
240 std::find_if(uses.begin(), uses.end(), use_node_comp(n));
241
242 if (it != uses.end())
243 {
244 // We only ever had a pointer, so don't delete it here
245 uses.erase(it);
246 }
247 }
248
use_count()249 unsigned value::use_count() {
250 return uses.size();
251 }
252
is_global()253 bool value::is_global() {
254 if (chunk)
255 return chunk->is_global();
256 return flags & VLF_GLOBAL;
257 }
258
set_global()259 void value::set_global() {
260 assert(is_sgpr());
261 flags |= VLF_GLOBAL;
262 if (chunk)
263 chunk->set_global();
264 }
265
set_prealloc()266 void value::set_prealloc() {
267 assert(is_sgpr());
268 flags |= VLF_PREALLOC;
269 if (chunk)
270 chunk->set_prealloc();
271 }
272
is_fixed()273 bool value::is_fixed() {
274 if (array && array->gpr)
275 return true;
276 if (chunk && chunk->is_fixed())
277 return true;
278 return flags & VLF_FIXED;
279 }
280
fix()281 void value::fix() {
282 if (chunk)
283 chunk->fix();
284 flags |= VLF_FIXED;
285 }
286
is_prealloc()287 bool value::is_prealloc() {
288 if (chunk)
289 return chunk->is_prealloc();
290 return flags & VLF_PREALLOC;
291 }
292
delete_uses()293 void value::delete_uses() {
294 // We only ever had pointers, so don't delete them here
295 uses.erase(uses.begin(), uses.end());
296 }
297
update_values()298 void ra_constraint::update_values() {
299 for (vvec::iterator I = values.begin(), E = values.end(); I != E; ++I) {
300 assert(!(*I)->constraint);
301 (*I)->constraint = this;
302 }
303 }
304
allocate(unsigned sz)305 void* sb_pool::allocate(unsigned sz) {
306 sz = (sz + SB_POOL_ALIGN - 1) & ~(SB_POOL_ALIGN - 1);
307 assert (sz < (block_size >> 6) && "too big allocation size for sb_pool");
308
309 unsigned offset = total_size % block_size;
310 unsigned capacity = block_size * blocks.size();
311
312 if (total_size + sz > capacity) {
313 total_size = capacity;
314 void * nb = malloc(block_size);
315 blocks.push_back(nb);
316 offset = 0;
317 }
318
319 total_size += sz;
320 return ((char*)blocks.back() + offset);
321 }
322
free_all()323 void sb_pool::free_all() {
324 for (block_vector::iterator I = blocks.begin(), E = blocks.end(); I != E;
325 ++I) {
326 free(*I);
327 }
328 }
329
create(value_kind k,sel_chan regid,unsigned ver)330 value* sb_value_pool::create(value_kind k, sel_chan regid,
331 unsigned ver) {
332 void* np = allocate(aligned_elt_size);
333 value *v = new (np) value(size(), k, regid, ver);
334 return v;
335 }
336
delete_all()337 void sb_value_pool::delete_all() {
338 unsigned bcnt = blocks.size();
339 unsigned toffset = 0;
340 for (unsigned b = 0; b < bcnt; ++b) {
341 char *bstart = (char*)blocks[b];
342 for (unsigned offset = 0; offset < block_size;
343 offset += aligned_elt_size) {
344 ((value*)(bstart + offset))->~value();
345 toffset += aligned_elt_size;
346 if (toffset >= total_size)
347 return;
348 }
349 }
350 }
351
get(unsigned id)352 bool sb_bitset::get(unsigned id) {
353 assert(id < bit_size);
354 unsigned w = id / bt_bits;
355 unsigned b = id % bt_bits;
356 return (data[w] >> b) & 1;
357 }
358
set(unsigned id,bool bit)359 void sb_bitset::set(unsigned id, bool bit) {
360 assert(id < bit_size);
361 unsigned w = id / bt_bits;
362 unsigned b = id % bt_bits;
363 if (w >= data.size())
364 data.resize(w + 1);
365
366 if (bit)
367 data[w] |= (1 << b);
368 else
369 data[w] &= ~(1 << b);
370 }
371
set_chk(unsigned id,bool bit)372 inline bool sb_bitset::set_chk(unsigned id, bool bit) {
373 assert(id < bit_size);
374 unsigned w = id / bt_bits;
375 unsigned b = id % bt_bits;
376 basetype d = data[w];
377 basetype dn = (d & ~(1 << b)) | (bit << b);
378 bool r = (d != dn);
379 data[w] = r ? dn : data[w];
380 return r;
381 }
382
clear()383 void sb_bitset::clear() {
384 std::fill(data.begin(), data.end(), 0);
385 }
386
resize(unsigned size)387 void sb_bitset::resize(unsigned size) {
388 unsigned cur_data_size = data.size();
389 unsigned new_data_size = (size + bt_bits - 1) / bt_bits;
390
391
392 if (new_data_size != cur_data_size)
393 data.resize(new_data_size);
394
395 // make sure that new bits in the existing word are cleared
396 if (cur_data_size && size > bit_size && bit_size % bt_bits) {
397 basetype clear_mask = (~(basetype)0u) << (bit_size % bt_bits);
398 data[cur_data_size - 1] &= ~clear_mask;
399 }
400
401 bit_size = size;
402 }
403
find_bit(unsigned start)404 unsigned sb_bitset::find_bit(unsigned start) {
405 assert(start < bit_size);
406 unsigned w = start / bt_bits;
407 unsigned b = start % bt_bits;
408 unsigned sz = data.size();
409
410 while (w < sz) {
411 basetype d = data[w] >> b;
412 if (d != 0) {
413 unsigned pos = __builtin_ctz(d) + b + w * bt_bits;
414 return pos;
415 }
416
417 b = 0;
418 ++w;
419 }
420
421 return bit_size;
422 }
423
iterator(shader & sh,sb_value_set * s,unsigned nb)424 sb_value_set::iterator::iterator(shader& sh, sb_value_set* s, unsigned nb)
425 : vp(sh.get_value_pool()), s(s), nb(nb) {}
426
add_set_checked(sb_value_set & s2)427 bool sb_value_set::add_set_checked(sb_value_set& s2) {
428 if (bs.size() < s2.bs.size())
429 bs.resize(s2.bs.size());
430 sb_bitset nbs = bs | s2.bs;
431 if (bs != nbs) {
432 bs.swap(nbs);
433 return true;
434 }
435 return false;
436 }
437
remove_set(sb_value_set & s2)438 void r600_sb::sb_value_set::remove_set(sb_value_set& s2) {
439 bs.mask(s2.bs);
440 }
441
add_val(value * v)442 bool sb_value_set::add_val(value* v) {
443 assert(v);
444 if (bs.size() < v->uid)
445 bs.resize(v->uid + 32);
446
447 return bs.set_chk(v->uid - 1, 1);
448 }
449
remove_vec(vvec & vv)450 bool sb_value_set::remove_vec(vvec& vv) {
451 bool modified = false;
452 for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) {
453 if (*I)
454 modified |= remove_val(*I);
455 }
456 return modified;
457 }
458
clear()459 void sb_value_set::clear() {
460 bs.clear();
461 }
462
remove_val(value * v)463 bool sb_value_set::remove_val(value* v) {
464 assert(v);
465 if (bs.size() < v->uid)
466 return false;
467 return bs.set_chk(v->uid - 1, 0);
468 }
469
add_vec(vvec & vv)470 bool r600_sb::sb_value_set::add_vec(vvec& vv) {
471 bool modified = false;
472 for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) {
473 value *v = *I;
474 if (v)
475 modified |= add_val(v);
476 }
477 return modified;
478 }
479
contains(value * v)480 bool r600_sb::sb_value_set::contains(value* v) {
481 unsigned b = v->uid - 1;
482 if (b < bs.size())
483 return bs.get(b);
484 else
485 return false;
486 }
487
empty()488 bool sb_value_set::empty() {
489 return bs.size() == 0 || bs.find_bit(0) == bs.size();
490 }
491
swap(sb_bitset & bs2)492 void sb_bitset::swap(sb_bitset& bs2) {
493 std::swap(data, bs2.data);
494 std::swap(bit_size, bs2.bit_size);
495 }
496
operator ==(const sb_bitset & bs2)497 bool sb_bitset::operator ==(const sb_bitset& bs2) {
498 if (bit_size != bs2.bit_size)
499 return false;
500
501 for (unsigned i = 0, c = data.size(); i < c; ++i) {
502 if (data[i] != bs2.data[i])
503 return false;
504 }
505 return true;
506 }
507
operator &=(const sb_bitset & bs2)508 sb_bitset& sb_bitset::operator &=(const sb_bitset& bs2) {
509 if (bit_size > bs2.bit_size) {
510 resize(bs2.bit_size);
511 }
512
513 for (unsigned i = 0, c = std::min(data.size(), bs2.data.size()); i < c;
514 ++i) {
515 data[i] &= bs2.data[i];
516 }
517 return *this;
518 }
519
mask(const sb_bitset & bs2)520 sb_bitset& sb_bitset::mask(const sb_bitset& bs2) {
521 if (bit_size < bs2.bit_size) {
522 resize(bs2.bit_size);
523 }
524
525 for (unsigned i = 0, c = data.size(); i < c;
526 ++i) {
527 data[i] &= ~bs2.data[i];
528 }
529 return *this;
530 }
531
check()532 bool ra_constraint::check() {
533 assert(kind == CK_SAME_REG);
534
535 unsigned reg = 0;
536
537 for (vvec::iterator I = values.begin(), E = values.end(); I != E; ++I) {
538 value *v = *I;
539 if (!v)
540 continue;
541
542 if (!v->gpr)
543 return false;
544
545 if (reg == 0)
546 reg = v->gpr.sel() + 1;
547 else if (reg != v->gpr.sel() + 1)
548 return false;
549
550 if (v->is_chan_pinned()) {
551 if (v->pin_gpr.chan() != v->gpr.chan())
552 return false;
553 }
554 }
555 return true;
556 }
557
is_dead()558 bool gpr_array::is_dead() {
559 return false;
560 }
561
562 } // namespace r600_sb
563