1 /*
2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 * Vadim Girlin
25 */
26
27 #define VT_DEBUG 0
28
29 #if VT_DEBUG
30 #define VT_DUMP(q) do { q } while (0)
31 #else
32 #define VT_DUMP(q)
33 #endif
34
35 #include <cstring>
36
37 #include "sb_shader.h"
38 #include "sb_pass.h"
39
40 namespace r600_sb {
41
42 static const char * chans = "xyzw01?_";
43
operator <<(sb_ostream & o,value & v)44 sb_ostream& operator << (sb_ostream &o, value &v) {
45
46 bool dead = v.flags & VLF_DEAD;
47
48 if (dead)
49 o << "{";
50
51 switch (v.kind) {
52 case VLK_SPECIAL_REG: {
53 switch (v.select.sel()) {
54 case SV_AR_INDEX: o << "AR"; break;
55 case SV_ALU_PRED: o << "PR"; break;
56 case SV_EXEC_MASK: o << "EM"; break;
57 case SV_VALID_MASK: o << "VM"; break;
58 case SV_GEOMETRY_EMIT: o << "GEOMETRY_EMIT"; break;
59 default: o << "???specialreg"; break;
60 }
61 break;
62 }
63
64 case VLK_REG:
65 o << "R" << v.select.sel() << "."
66 << chans[v.select.chan()];
67
68 break;
69 case VLK_KCACHE: {
70 o << "C" << v.select.sel() << "." << chans[v.select.chan()];
71 }
72 break;
73 case VLK_CONST:
74 o << v.literal_value.f << "|";
75 o.print_zw_hex(v.literal_value.u, 8);
76 break;
77 case VLK_PARAM:
78 o << "Param" << (v.select.sel() - ALU_SRC_PARAM_OFFSET)
79 << chans[v.select.chan()];
80 break;
81 case VLK_TEMP:
82 o << "t" << v.select.sel() - shader::temp_regid_offset;
83 break;
84 case VLK_REL_REG:
85
86 o << "A" << v.select;
87 o << "[";
88 o << *v.rel;
89 o << "]";
90
91 o << "_" << v.uid;
92
93 break;
94 case VLK_UNDEF:
95 o << "undef";
96 break;
97 default:
98 o << v.kind << "?????";
99 break;
100 }
101
102 if (v.version)
103 o << "." << v.version;
104
105 if (dead)
106 o << "}";
107
108 if (v.is_global())
109 o << "||";
110 if (v.is_fixed())
111 o << "F";
112 if (v.is_prealloc())
113 o << "P";
114
115 sel_chan g;
116
117 if (v.is_rel()) {
118 g = v.array->gpr;
119 } else {
120 g = v.gpr;
121 }
122
123 if (g) {
124 o << "@R" << g.sel() << "." << chans[g.chan()];
125 }
126
127 return o;
128 }
129
add_value(value * v)130 void value_table::add_value(value* v) {
131
132 if (v->gvn_source) {
133 return;
134 }
135
136 VT_DUMP(
137 sblog << "gvn add_value ";
138 dump::dump_val(v);
139 );
140
141 value_hash hash = v->hash();
142 vt_item & vti = hashtable[hash & size_mask];
143 vti.push_back(v);
144 ++cnt;
145
146 if (v->def && ex.try_fold(v)) {
147 VT_DUMP(
148 sblog << " folded: ";
149 dump::dump_val(v->gvn_source);
150 sblog << "\n";
151 );
152 return;
153 }
154
155 int n = 0;
156 for (vt_item::iterator I = vti.begin(), E = vti.end(); I != E; ++I, ++n) {
157 value *c = *I;
158
159 if (c == v)
160 break;
161
162 if (expr_equal(c, v)) {
163 v->gvn_source = c->gvn_source;
164
165 VT_DUMP(
166 sblog << " found : equal to ";
167 dump::dump_val(v->gvn_source);
168 sblog << "\n";
169 );
170 return;
171 }
172 }
173
174 v->gvn_source = v;
175 VT_DUMP(
176 sblog << " added new\n";
177 );
178 }
179
hash()180 value_hash value::hash() {
181 if (ghash)
182 return ghash;
183 if (is_rel())
184 ghash = rel_hash();
185 else if (def)
186 ghash = def->hash();
187 else
188 ghash = ((uintptr_t)this) | 1;
189
190 return ghash;
191 }
192
rel_hash()193 value_hash value::rel_hash() {
194 value_hash h = rel ? rel->hash() : 0;
195 h |= select << 10;
196 h |= array->hash();
197 return h;
198 }
199
expr_equal(value * l,value * r)200 bool value_table::expr_equal(value* l, value* r) {
201 return ex.equal(l, r);
202 }
203
get_values(vvec & v)204 void value_table::get_values(vvec& v) {
205 v.resize(cnt);
206
207 vvec::iterator T = v.begin();
208
209 for(vt_table::iterator I = hashtable.begin(), E = hashtable.end();
210 I != E; ++I) {
211 T = std::copy(I->begin(), I->end(), T);
212 }
213 }
214
add_use(node * n,use_kind kind,int arg)215 void value::add_use(node* n, use_kind kind, int arg) {
216 if (0) {
217 sblog << "add_use ";
218 dump::dump_val(this);
219 sblog << " => ";
220 dump::dump_op(n);
221 sblog << " kind " << kind << " arg " << arg << "\n";
222 }
223 uses.push_back(new use_info(n, kind, arg));
224 }
225
226 struct use_node_comp {
use_node_compr600_sb::use_node_comp227 explicit use_node_comp(const node *n) : n(n) {}
operator ()r600_sb::use_node_comp228 bool operator() (const use_info *u) {
229 return u->op->hash() == n->hash();
230 }
231
232 private:
233 const node *n;
234 };
235
remove_use(const node * n)236 void value::remove_use(const node *n) {
237 uselist::iterator it =
238 std::find_if(uses.begin(), uses.end(), use_node_comp(n));
239
240 if (it != uses.end())
241 {
242 // TODO assert((*it)->kind == kind) ?
243 // TODO assert((*it)->arg == arg) ?
244 uses.erase(it);
245 }
246 }
247
use_count()248 unsigned value::use_count() {
249 return uses.size();
250 }
251
is_global()252 bool value::is_global() {
253 if (chunk)
254 return chunk->is_global();
255 return flags & VLF_GLOBAL;
256 }
257
set_global()258 void value::set_global() {
259 assert(is_sgpr());
260 flags |= VLF_GLOBAL;
261 if (chunk)
262 chunk->set_global();
263 }
264
set_prealloc()265 void value::set_prealloc() {
266 assert(is_sgpr());
267 flags |= VLF_PREALLOC;
268 if (chunk)
269 chunk->set_prealloc();
270 }
271
is_fixed()272 bool value::is_fixed() {
273 if (array && array->gpr)
274 return true;
275 if (chunk && chunk->is_fixed())
276 return true;
277 return flags & VLF_FIXED;
278 }
279
fix()280 void value::fix() {
281 if (chunk)
282 chunk->fix();
283 flags |= VLF_FIXED;
284 }
285
is_prealloc()286 bool value::is_prealloc() {
287 if (chunk)
288 return chunk->is_prealloc();
289 return flags & VLF_PREALLOC;
290 }
291
delete_uses()292 void value::delete_uses() {
293 uses.erase(uses.begin(), uses.end());
294 }
295
update_values()296 void ra_constraint::update_values() {
297 for (vvec::iterator I = values.begin(), E = values.end(); I != E; ++I) {
298 assert(!(*I)->constraint);
299 (*I)->constraint = this;
300 }
301 }
302
allocate(unsigned sz)303 void* sb_pool::allocate(unsigned sz) {
304 sz = (sz + SB_POOL_ALIGN - 1) & ~(SB_POOL_ALIGN - 1);
305 assert (sz < (block_size >> 6) && "too big allocation size for sb_pool");
306
307 unsigned offset = total_size % block_size;
308 unsigned capacity = block_size * blocks.size();
309
310 if (total_size + sz > capacity) {
311 total_size = capacity;
312 void * nb = malloc(block_size);
313 blocks.push_back(nb);
314 offset = 0;
315 }
316
317 total_size += sz;
318 return ((char*)blocks.back() + offset);
319 }
320
free_all()321 void sb_pool::free_all() {
322 for (block_vector::iterator I = blocks.begin(), E = blocks.end(); I != E;
323 ++I) {
324 free(*I);
325 }
326 }
327
create(value_kind k,sel_chan regid,unsigned ver)328 value* sb_value_pool::create(value_kind k, sel_chan regid,
329 unsigned ver) {
330 void* np = allocate(aligned_elt_size);
331 value *v = new (np) value(size(), k, regid, ver);
332 return v;
333 }
334
delete_all()335 void sb_value_pool::delete_all() {
336 unsigned bcnt = blocks.size();
337 unsigned toffset = 0;
338 for (unsigned b = 0; b < bcnt; ++b) {
339 char *bstart = (char*)blocks[b];
340 for (unsigned offset = 0; offset < block_size;
341 offset += aligned_elt_size) {
342 ((value*)(bstart + offset))->~value();
343 toffset += aligned_elt_size;
344 if (toffset >= total_size)
345 return;
346 }
347 }
348 }
349
get(unsigned id)350 bool sb_bitset::get(unsigned id) {
351 assert(id < bit_size);
352 unsigned w = id / bt_bits;
353 unsigned b = id % bt_bits;
354 return (data[w] >> b) & 1;
355 }
356
set(unsigned id,bool bit)357 void sb_bitset::set(unsigned id, bool bit) {
358 assert(id < bit_size);
359 unsigned w = id / bt_bits;
360 unsigned b = id % bt_bits;
361 if (w >= data.size())
362 data.resize(w + 1);
363
364 if (bit)
365 data[w] |= (1 << b);
366 else
367 data[w] &= ~(1 << b);
368 }
369
set_chk(unsigned id,bool bit)370 inline bool sb_bitset::set_chk(unsigned id, bool bit) {
371 assert(id < bit_size);
372 unsigned w = id / bt_bits;
373 unsigned b = id % bt_bits;
374 basetype d = data[w];
375 basetype dn = (d & ~(1 << b)) | (bit << b);
376 bool r = (d != dn);
377 data[w] = r ? dn : data[w];
378 return r;
379 }
380
clear()381 void sb_bitset::clear() {
382 std::fill(data.begin(), data.end(), 0);
383 }
384
resize(unsigned size)385 void sb_bitset::resize(unsigned size) {
386 unsigned cur_data_size = data.size();
387 unsigned new_data_size = (size + bt_bits - 1) / bt_bits;
388
389
390 if (new_data_size != cur_data_size)
391 data.resize(new_data_size);
392
393 // make sure that new bits in the existing word are cleared
394 if (cur_data_size && size > bit_size && bit_size % bt_bits) {
395 basetype clear_mask = (~(basetype)0u) << (bit_size % bt_bits);
396 data[cur_data_size - 1] &= ~clear_mask;
397 }
398
399 bit_size = size;
400 }
401
find_bit(unsigned start)402 unsigned sb_bitset::find_bit(unsigned start) {
403 assert(start < bit_size);
404 unsigned w = start / bt_bits;
405 unsigned b = start % bt_bits;
406 unsigned sz = data.size();
407
408 while (w < sz) {
409 basetype d = data[w] >> b;
410 if (d != 0) {
411 unsigned pos = __builtin_ctz(d) + b + w * bt_bits;
412 return pos;
413 }
414
415 b = 0;
416 ++w;
417 }
418
419 return bit_size;
420 }
421
iterator(shader & sh,sb_value_set * s,unsigned nb)422 sb_value_set::iterator::iterator(shader& sh, sb_value_set* s, unsigned nb)
423 : vp(sh.get_value_pool()), s(s), nb(nb) {}
424
add_set_checked(sb_value_set & s2)425 bool sb_value_set::add_set_checked(sb_value_set& s2) {
426 if (bs.size() < s2.bs.size())
427 bs.resize(s2.bs.size());
428 sb_bitset nbs = bs | s2.bs;
429 if (bs != nbs) {
430 bs.swap(nbs);
431 return true;
432 }
433 return false;
434 }
435
remove_set(sb_value_set & s2)436 void r600_sb::sb_value_set::remove_set(sb_value_set& s2) {
437 bs.mask(s2.bs);
438 }
439
add_val(value * v)440 bool sb_value_set::add_val(value* v) {
441 assert(v);
442 if (bs.size() < v->uid)
443 bs.resize(v->uid + 32);
444
445 return bs.set_chk(v->uid - 1, 1);
446 }
447
remove_vec(vvec & vv)448 bool sb_value_set::remove_vec(vvec& vv) {
449 bool modified = false;
450 for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) {
451 if (*I)
452 modified |= remove_val(*I);
453 }
454 return modified;
455 }
456
clear()457 void sb_value_set::clear() {
458 bs.clear();
459 }
460
remove_val(value * v)461 bool sb_value_set::remove_val(value* v) {
462 assert(v);
463 if (bs.size() < v->uid)
464 return false;
465 return bs.set_chk(v->uid - 1, 0);
466 }
467
add_vec(vvec & vv)468 bool r600_sb::sb_value_set::add_vec(vvec& vv) {
469 bool modified = false;
470 for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) {
471 value *v = *I;
472 if (v)
473 modified |= add_val(v);
474 }
475 return modified;
476 }
477
contains(value * v)478 bool r600_sb::sb_value_set::contains(value* v) {
479 unsigned b = v->uid - 1;
480 if (b < bs.size())
481 return bs.get(b);
482 else
483 return false;
484 }
485
empty()486 bool sb_value_set::empty() {
487 return bs.size() == 0 || bs.find_bit(0) == bs.size();
488 }
489
swap(sb_bitset & bs2)490 void sb_bitset::swap(sb_bitset& bs2) {
491 std::swap(data, bs2.data);
492 std::swap(bit_size, bs2.bit_size);
493 }
494
operator ==(const sb_bitset & bs2)495 bool sb_bitset::operator ==(const sb_bitset& bs2) {
496 if (bit_size != bs2.bit_size)
497 return false;
498
499 for (unsigned i = 0, c = data.size(); i < c; ++i) {
500 if (data[i] != bs2.data[i])
501 return false;
502 }
503 return true;
504 }
505
operator &=(const sb_bitset & bs2)506 sb_bitset& sb_bitset::operator &=(const sb_bitset& bs2) {
507 if (bit_size > bs2.bit_size) {
508 resize(bs2.bit_size);
509 }
510
511 for (unsigned i = 0, c = std::min(data.size(), bs2.data.size()); i < c;
512 ++i) {
513 data[i] &= bs2.data[i];
514 }
515 return *this;
516 }
517
mask(const sb_bitset & bs2)518 sb_bitset& sb_bitset::mask(const sb_bitset& bs2) {
519 if (bit_size < bs2.bit_size) {
520 resize(bs2.bit_size);
521 }
522
523 for (unsigned i = 0, c = data.size(); i < c;
524 ++i) {
525 data[i] &= ~bs2.data[i];
526 }
527 return *this;
528 }
529
check()530 bool ra_constraint::check() {
531 assert(kind == CK_SAME_REG);
532
533 unsigned reg = 0;
534
535 for (vvec::iterator I = values.begin(), E = values.end(); I != E; ++I) {
536 value *v = *I;
537 if (!v)
538 continue;
539
540 if (!v->gpr)
541 return false;
542
543 if (reg == 0)
544 reg = v->gpr.sel() + 1;
545 else if (reg != v->gpr.sel() + 1)
546 return false;
547
548 if (v->is_chan_pinned()) {
549 if (v->pin_gpr.chan() != v->gpr.chan())
550 return false;
551 }
552 }
553 return true;
554 }
555
is_dead()556 bool gpr_array::is_dead() {
557 return false;
558 }
559
560 } // namespace r600_sb
561