1 /* 2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 * 23 * Authors: 24 * Vadim Girlin 25 */ 26 27 #ifndef R600_SB_IR_H_ 28 #define R600_SB_IR_H_ 29 30 #include <algorithm> 31 #include <stdint.h> 32 #include <vector> 33 #include <set> 34 #include <algorithm> 35 36 #include "sb_bc.h" 37 38 namespace r600_sb { 39 40 enum special_regs { 41 SV_ALU_PRED = 128, 42 SV_EXEC_MASK, 43 SV_AR_INDEX, 44 SV_VALID_MASK, 45 SV_GEOMETRY_EMIT, 46 SV_LDS_RW, 47 SV_LDS_OQA, 48 SV_LDS_OQB, 49 SV_SCRATCH 50 }; 51 52 class node; 53 class value; 54 class shader; 55 56 struct sel_chan 57 { 58 unsigned id; 59 idsel_chan60 sel_chan(unsigned id = 0) : id(id) {} sel_chansel_chan61 sel_chan(unsigned sel, unsigned chan) : id(((sel << 2) | chan) + 1) {} 62 selsel_chan63 unsigned sel() const { return sel(id); } chansel_chan64 unsigned chan() const {return chan(id); } 65 operator unsigned() const {return id;} 66 selsel_chan67 static unsigned sel(unsigned idx) { return (idx-1) >> 2; } chansel_chan68 static unsigned chan(unsigned idx) { return (idx-1) & 3; } 69 sel_chansel_chan70 sel_chan(unsigned bank, unsigned index, 71 unsigned chan, alu_kcache_index_mode index_mode) 72 : id(sel_chan((bank << 12) | index | ((unsigned)index_mode << 28), chan).id) {} kcache_index_modesel_chan73 unsigned kcache_index_mode() const { return sel() >> 28; } kcache_selsel_chan74 unsigned kcache_sel() const { return sel() & 0x0fffffffu; } kcache_banksel_chan75 unsigned kcache_bank() const { return kcache_sel() >> 12; } 76 }; 77 78 inline sb_ostream& operator <<(sb_ostream& o, sel_chan r) { 79 static const char * ch = "xyzw"; 80 o << r.sel() << "." << ch[r.chan()]; 81 return o; 82 } 83 84 typedef std::vector<value*> vvec; 85 86 class sb_pool { 87 protected: 88 static const unsigned SB_POOL_ALIGN = 8; 89 static const unsigned SB_POOL_DEFAULT_BLOCK_SIZE = (1 << 16); 90 91 typedef std::vector<void*> block_vector; 92 93 unsigned block_size; 94 block_vector blocks; 95 unsigned total_size; 96 97 public: 98 sb_pool(unsigned block_size = SB_POOL_DEFAULT_BLOCK_SIZE) block_size(block_size)99 : block_size(block_size), blocks(), total_size() {} 100 ~sb_pool()101 virtual ~sb_pool() { free_all(); } 102 103 void* allocate(unsigned sz); 104 105 protected: 106 void free_all(); 107 }; 108 109 template <typename V, typename Comp = std::less<V> > 110 class sb_set { 111 typedef std::vector<V> data_vector; 112 data_vector vec; 113 public: 114 115 typedef typename data_vector::iterator iterator; 116 typedef typename data_vector::const_iterator const_iterator; 117 sb_set()118 sb_set() : vec() {} ~sb_set()119 ~sb_set() { } 120 begin()121 iterator begin() { return vec.begin(); } end()122 iterator end() { return vec.end(); } begin()123 const_iterator begin() const { return vec.begin(); } end()124 const_iterator end() const { return vec.end(); } 125 add_set(const sb_set & s)126 void add_set(const sb_set& s) { 127 data_vector t; 128 t.reserve(vec.size() + s.vec.size()); 129 std::set_union(vec.begin(), vec.end(), s.vec.begin(), s.vec.end(), 130 std::inserter(t, t.begin()), Comp()); 131 vec.swap(t); 132 } 133 lower_bound(const V & v)134 iterator lower_bound(const V& v) { 135 return std::lower_bound(vec.begin(), vec.end(), v, Comp()); 136 } 137 insert(const V & v)138 std::pair<iterator, bool> insert(const V& v) { 139 iterator P = lower_bound(v); 140 if (P != vec.end() && is_equal(*P, v)) 141 return std::make_pair(P, false); 142 return std::make_pair(vec.insert(P, v), true); 143 } 144 erase(const V & v)145 unsigned erase(const V& v) { 146 iterator P = lower_bound(v); 147 if (P == vec.end() || !is_equal(*P, v)) 148 return 0; 149 vec.erase(P); 150 return 1; 151 } 152 clear()153 void clear() { vec.clear(); } 154 empty()155 bool empty() { return vec.empty(); } 156 is_equal(const V & v1,const V & v2)157 bool is_equal(const V& v1, const V& v2) { 158 return !Comp()(v1, v2) && !Comp()(v2, v1); 159 } 160 find(const V & v)161 iterator find(const V& v) { 162 iterator P = lower_bound(v); 163 return (P != vec.end() && is_equal(*P, v)) ? P : vec.end(); 164 } 165 size()166 unsigned size() { return vec.size(); } erase(iterator I)167 void erase(iterator I) { vec.erase(I); } 168 }; 169 170 template <typename K, typename V, typename KComp = std::less<K> > 171 class sb_map { 172 typedef std::pair<K, V> datatype; 173 174 struct Comp { operatorComp175 bool operator()(const datatype &v1, const datatype &v2) { 176 return KComp()(v1.first, v2.first); 177 } 178 }; 179 180 typedef sb_set<datatype, Comp> dataset; 181 182 dataset set; 183 184 public: 185 sb_map()186 sb_map() : set() {} 187 188 typedef typename dataset::iterator iterator; 189 begin()190 iterator begin() { return set.begin(); } end()191 iterator end() { return set.end(); } 192 clear()193 void clear() { set.clear(); } 194 195 V& operator[](const K& key) { 196 datatype P = std::make_pair(key, V()); 197 iterator F = set.find(P); 198 if (F == set.end()) { 199 return (*(set.insert(P).first)).second; 200 } else { 201 return (*F).second; 202 } 203 } 204 insert(const datatype & d)205 std::pair<iterator, bool> insert(const datatype& d) { 206 return set.insert(d); 207 } 208 find(const K & key)209 iterator find(const K& key) { 210 return set.find(std::make_pair(key, V())); 211 } 212 erase(const K & key)213 unsigned erase(const K& key) { 214 return set.erase(std::make_pair(key, V())); 215 } 216 erase(iterator I)217 void erase(iterator I) { 218 set.erase(I); 219 } 220 }; 221 222 class sb_bitset { 223 typedef uint32_t basetype; 224 static const unsigned bt_bits = sizeof(basetype) << 3; 225 std::vector<basetype> data; 226 unsigned bit_size; 227 228 public: 229 sb_bitset()230 sb_bitset() : data(), bit_size() {} 231 232 bool get(unsigned id); 233 void set(unsigned id, bool bit = true); 234 bool set_chk(unsigned id, bool bit = true); 235 236 void clear(); 237 void resize(unsigned size); 238 size()239 unsigned size() { return bit_size; } 240 241 unsigned find_bit(unsigned start = 0); 242 243 void swap(sb_bitset & bs2); 244 245 bool operator==(const sb_bitset &bs2); 246 bool operator!=(const sb_bitset &bs2) { return !(*this == bs2); } 247 248 sb_bitset& operator|=(const sb_bitset &bs2) { 249 if (bit_size < bs2.bit_size) { 250 resize(bs2.bit_size); 251 } 252 253 for (unsigned i = 0, c = std::min(data.size(), bs2.data.size()); i < c; 254 ++i) { 255 data[i] |= bs2.data[i]; 256 } 257 return *this; 258 } 259 260 sb_bitset& operator&=(const sb_bitset &bs2); 261 sb_bitset& mask(const sb_bitset &bs2); 262 263 friend sb_bitset operator|(const sb_bitset &b1, const sb_bitset &b2) { 264 sb_bitset nbs(b1); 265 nbs |= b2; 266 return nbs; 267 } 268 }; 269 270 enum value_kind { 271 VLK_REG, 272 VLK_REL_REG, 273 VLK_SPECIAL_REG, 274 VLK_TEMP, 275 276 VLK_CONST, 277 VLK_KCACHE, 278 VLK_PARAM, 279 VLK_SPECIAL_CONST, 280 281 VLK_UNDEF 282 }; 283 284 285 286 class sb_value_pool : protected sb_pool { 287 unsigned aligned_elt_size; 288 289 public: 290 sb_value_pool(unsigned elt_size, unsigned block_elts = 256) 291 : sb_pool(block_elts * (aligned_elt_size = ((elt_size + 292 SB_POOL_ALIGN - 1) & ~(SB_POOL_ALIGN - 1)))) {} 293 ~sb_value_pool()294 virtual ~sb_value_pool() { delete_all(); } 295 296 value* create(value_kind k, sel_chan regid, unsigned ver); 297 298 value* operator[](unsigned id) { 299 unsigned offset = id * aligned_elt_size; 300 unsigned block_id; 301 if (offset < block_size) { 302 block_id = 0; 303 } else { 304 block_id = offset / block_size; 305 offset = offset % block_size; 306 } 307 return (value*)((char*)blocks[block_id] + offset); 308 } 309 size()310 unsigned size() { return total_size / aligned_elt_size; } 311 312 protected: 313 void delete_all(); 314 }; 315 316 317 318 319 320 class sb_value_set { 321 322 sb_bitset bs; 323 324 public: sb_value_set()325 sb_value_set() : bs() {} 326 327 class iterator { 328 sb_value_pool &vp; 329 sb_value_set *s; 330 unsigned nb; 331 public: 332 iterator(shader &sh, sb_value_set *s, unsigned nb = 0); 333 334 335 iterator& operator++() { 336 if (nb + 1 < s->bs.size()) 337 nb = s->bs.find_bit(nb + 1); 338 else 339 nb = s->bs.size(); 340 return *this; 341 } 342 bool operator !=(const iterator &i) { 343 return s != i.s || nb != i.nb; 344 } 345 bool operator ==(const iterator &i) { return !(*this != i); } 346 value* operator *() { 347 return vp[nb]; 348 } 349 350 351 }; 352 begin(shader & sh)353 iterator begin(shader &sh) { 354 return iterator(sh, this, bs.size() ? bs.find_bit(0) : 0); 355 } end(shader & sh)356 iterator end(shader &sh) { return iterator(sh, this, bs.size()); } 357 358 bool add_set_checked(sb_value_set & s2); 359 add_set(sb_value_set & s2)360 void add_set(sb_value_set & s2) { 361 if (bs.size() < s2.bs.size()) 362 bs.resize(s2.bs.size()); 363 bs |= s2.bs; 364 } 365 366 void remove_set(sb_value_set & s2); 367 368 bool add_vec(vvec &vv); 369 370 bool add_val(value *v); 371 bool contains(value *v); 372 373 bool remove_val(value *v); 374 375 bool remove_vec(vvec &vv); 376 377 void clear(); 378 379 bool empty(); 380 }; 381 382 typedef sb_value_set val_set; 383 384 struct gpr_array { 385 sel_chan base_gpr; // original gpr 386 sel_chan gpr; // assigned by regalloc 387 unsigned array_size; 388 gpr_arraygpr_array389 gpr_array(sel_chan base_gpr, unsigned array_size) : base_gpr(base_gpr), 390 array_size(array_size) {} 391 hashgpr_array392 unsigned hash() { return (base_gpr << 10) * array_size; } 393 394 val_set interferences; 395 vvec refs; 396 397 bool is_dead(); 398 399 }; 400 401 typedef std::vector<gpr_array*> regarray_vec; 402 403 enum value_flags { 404 VLF_UNDEF = (1 << 0), 405 VLF_READONLY = (1 << 1), 406 VLF_DEAD = (1 << 2), 407 408 VLF_PIN_REG = (1 << 3), 409 VLF_PIN_CHAN = (1 << 4), 410 411 // opposite to alu clause local value - goes through alu clause boundary 412 // (can't use temp gpr, can't recolor in the alu scheduler, etc) 413 VLF_GLOBAL = (1 << 5), 414 VLF_FIXED = (1 << 6), 415 VLF_PVPS = (1 << 7), 416 417 VLF_PREALLOC = (1 << 8) 418 }; 419 420 inline value_flags operator |(value_flags l, value_flags r) { 421 return (value_flags)((unsigned)l|(unsigned)r); 422 } 423 inline value_flags operator &(value_flags l, value_flags r) { 424 return (value_flags)((unsigned)l&(unsigned)r); 425 } 426 inline value_flags operator ~(value_flags l) { 427 return (value_flags)(~(unsigned)l); 428 } 429 inline value_flags& operator |=(value_flags &l, value_flags r) { 430 l = l | r; 431 return l; 432 } 433 inline value_flags& operator &=(value_flags &l, value_flags r) { 434 l = l & r; 435 return l; 436 } 437 438 sb_ostream& operator << (sb_ostream &o, value &v); 439 440 typedef uint32_t value_hash; 441 442 typedef std::list< node * > uselist; 443 444 enum constraint_kind { 445 CK_SAME_REG, 446 CK_PACKED_BS, 447 CK_PHI 448 }; 449 450 class shader; 451 class sb_value_pool; 452 struct ra_chunk; 453 class ra_constraint; 454 455 class value { 456 protected: 457 value(unsigned sh_id, value_kind k, sel_chan select, unsigned ver = 0) kind(k)458 : kind(k), flags(), 459 rel(), array(), 460 version(ver), select(select), pin_gpr(select), gpr(), 461 gvn_source(), ghash(), 462 def(), adef(), uses(), constraint(), chunk(), 463 literal_value(), uid(sh_id) {} 464 ~value()465 ~value() { delete_uses(); } 466 467 friend class sb_value_pool; 468 public: 469 value_kind kind; 470 value_flags flags; 471 472 vvec mdef; 473 vvec muse; 474 value *rel; 475 gpr_array *array; 476 477 unsigned version; 478 479 sel_chan select; 480 sel_chan pin_gpr; 481 sel_chan gpr; 482 483 value *gvn_source; 484 value_hash ghash; 485 486 node *def, *adef; 487 uselist uses; 488 489 ra_constraint *constraint; 490 ra_chunk *chunk; 491 492 literal literal_value; 493 is_const()494 bool is_const() { return kind == VLK_CONST || kind == VLK_UNDEF; } 495 is_AR()496 bool is_AR() { 497 return is_special_reg() && select == sel_chan(SV_AR_INDEX, 0); 498 } is_geometry_emit()499 bool is_geometry_emit() { 500 return is_special_reg() && select == sel_chan(SV_GEOMETRY_EMIT, 0); 501 } is_lds_access()502 bool is_lds_access() { 503 return is_special_reg() && select == sel_chan(SV_LDS_RW, 0); 504 } is_lds_oq()505 bool is_lds_oq() { 506 return is_special_reg() && (select == sel_chan(SV_LDS_OQA, 0) || select == sel_chan(SV_LDS_OQB, 0)); 507 } 508 any_def()509 node* any_def() { 510 assert(!(def && adef)); 511 return def ? def : adef; 512 } 513 gvalue()514 value* gvalue() { 515 value *v = this; 516 while (v->gvn_source && v != v->gvn_source) 517 // FIXME we really shouldn't have such chains 518 v = v->gvn_source; 519 return v; 520 } is_scratch()521 bool is_scratch() { 522 return is_special_reg() && select == sel_chan(SV_SCRATCH, 0); 523 } 524 is_float_0_or_1()525 bool is_float_0_or_1() { 526 value *v = gvalue(); 527 return v->is_const() && (v->literal_value == literal(0) 528 || v->literal_value == literal(1.0f)); 529 } 530 is_undef()531 bool is_undef() { return gvalue()->kind == VLK_UNDEF; } 532 is_any_gpr()533 bool is_any_gpr() { 534 return (kind == VLK_REG || kind == VLK_TEMP); 535 } 536 is_agpr()537 bool is_agpr() { 538 return array && is_any_gpr(); 539 } 540 541 // scalar gpr, as opposed to element of gpr array is_sgpr()542 bool is_sgpr() { 543 return !array && is_any_gpr(); 544 } 545 is_special_reg()546 bool is_special_reg() { return kind == VLK_SPECIAL_REG; } is_any_reg()547 bool is_any_reg() { return is_any_gpr() || is_special_reg(); } is_kcache()548 bool is_kcache() { return kind == VLK_KCACHE; } is_rel()549 bool is_rel() { return kind == VLK_REL_REG; } is_readonly()550 bool is_readonly() { return flags & VLF_READONLY; } 551 is_chan_pinned()552 bool is_chan_pinned() { return flags & VLF_PIN_CHAN; } is_reg_pinned()553 bool is_reg_pinned() { return flags & VLF_PIN_REG; } 554 555 bool is_global(); 556 void set_global(); 557 void set_prealloc(); 558 559 bool is_prealloc(); 560 561 bool is_fixed(); 562 void fix(); 563 is_dead()564 bool is_dead() { return flags & VLF_DEAD; } 565 get_const_value()566 literal & get_const_value() { 567 value *v = gvalue(); 568 assert(v->is_const()); 569 return v->literal_value; 570 } 571 572 // true if needs to be encoded as literal in alu is_literal()573 bool is_literal() { 574 return is_const() 575 && literal_value != literal(0) 576 && literal_value != literal(1) 577 && literal_value != literal(-1) 578 && literal_value != literal(0.5) 579 && literal_value != literal(1.0); 580 } 581 582 void add_use(node *n); 583 void remove_use(const node *n); 584 585 value_hash hash(); 586 value_hash rel_hash(); 587 assign_source(value * v)588 void assign_source(value *v) { 589 assert(!gvn_source || gvn_source == this); 590 gvn_source = v->gvalue(); 591 } 592 v_equal(value * v)593 bool v_equal(value *v) { return gvalue() == v->gvalue(); } 594 595 unsigned use_count(); 596 void delete_uses(); 597 get_final_gpr()598 sel_chan get_final_gpr() { 599 if (array && array->gpr) { 600 int reg_offset = select.sel() - array->base_gpr.sel(); 601 if (rel && rel->is_const()) 602 reg_offset += rel->get_const_value().i; 603 return array->gpr + (reg_offset << 2); 604 } else { 605 return gpr; 606 } 607 } 608 get_final_chan()609 unsigned get_final_chan() { 610 if (array) { 611 assert(array->gpr); 612 return array->gpr.chan(); 613 } else { 614 assert(gpr); 615 return gpr.chan(); 616 } 617 } 618 619 /* Check whether copy-propagation of src into this would create an access 620 * conflict with relative addressing, i.e. an operation that tries to access 621 * array elements with different address register values. 622 */ 623 bool no_reladdr_conflict_with(value *src); 624 625 val_set interferences; 626 unsigned uid; 627 }; 628 629 class expr_handler; 630 631 class value_table { 632 typedef std::vector<value*> vt_item; 633 typedef std::vector<vt_item> vt_table; 634 635 expr_handler &ex; 636 637 unsigned size_bits; 638 unsigned size; 639 unsigned size_mask; 640 641 vt_table hashtable; 642 643 unsigned cnt; 644 645 public: 646 647 value_table(expr_handler &ex, unsigned size_bits = 10) ex(ex)648 : ex(ex), size_bits(size_bits), size(1u << size_bits), 649 size_mask(size - 1), hashtable(size), cnt() {} 650 ~value_table()651 ~value_table() {} 652 653 void add_value(value* v); 654 655 bool expr_equal(value* l, value* r); 656 count()657 unsigned count() { return cnt; } 658 659 void get_values(vvec & v); 660 }; 661 662 class sb_context; 663 664 enum node_type { 665 NT_UNKNOWN, 666 NT_LIST, 667 NT_OP, 668 NT_REGION, 669 NT_REPEAT, 670 NT_DEPART, 671 NT_IF, 672 }; 673 674 enum node_subtype { 675 NST_UNKNOWN, 676 NST_LIST, 677 NST_ALU_GROUP, 678 NST_ALU_CLAUSE, 679 NST_ALU_INST, 680 NST_ALU_PACKED_INST, 681 NST_CF_INST, 682 NST_FETCH_INST, 683 NST_TEX_CLAUSE, 684 NST_VTX_CLAUSE, 685 NST_GDS_CLAUSE, 686 687 NST_BB, 688 689 NST_PHI, 690 NST_PSI, 691 NST_COPY, 692 693 NST_LOOP_PHI_CONTAINER, 694 NST_LOOP_CONTINUE, 695 NST_LOOP_BREAK 696 }; 697 698 enum node_flags { 699 NF_EMPTY = 0, 700 NF_DEAD = (1 << 0), 701 NF_REG_CONSTRAINT = (1 << 1), 702 NF_CHAN_CONSTRAINT = (1 << 2), 703 NF_ALU_4SLOT = (1 << 3), 704 NF_CONTAINER = (1 << 4), 705 706 NF_COPY_MOV = (1 << 5), 707 708 NF_DONT_KILL = (1 << 6), 709 NF_DONT_HOIST = (1 << 7), 710 NF_DONT_MOVE = (1 << 8), 711 712 // for KILLxx - we want to schedule them as early as possible 713 NF_SCHEDULE_EARLY = (1 << 9), 714 715 // for ALU_PUSH_BEFORE - when set, replace with PUSH + ALU 716 NF_ALU_STACK_WORKAROUND = (1 << 10), 717 NF_ALU_2SLOT = (1 << 11), 718 }; 719 720 inline node_flags operator |(node_flags l, node_flags r) { 721 return (node_flags)((unsigned)l|(unsigned)r); 722 } 723 inline node_flags& operator |=(node_flags &l, node_flags r) { 724 l = l | r; 725 return l; 726 } 727 728 inline node_flags& operator &=(node_flags &l, node_flags r) { 729 l = (node_flags)((unsigned)l & (unsigned)r); 730 return l; 731 } 732 733 inline node_flags operator ~(node_flags r) { 734 return (node_flags)~(unsigned)r; 735 } 736 737 struct node_stats { 738 unsigned alu_count; 739 unsigned alu_kill_count; 740 unsigned alu_copy_mov_count; 741 unsigned cf_count; 742 unsigned fetch_count; 743 unsigned region_count; 744 unsigned loop_count; 745 unsigned phi_count; 746 unsigned loop_phi_count; 747 unsigned depart_count; 748 unsigned repeat_count; 749 unsigned if_count; 750 bool uses_ar; 751 node_statsnode_stats752 node_stats() : alu_count(), alu_kill_count(), alu_copy_mov_count(), 753 cf_count(), fetch_count(), region_count(), 754 loop_count(), phi_count(), loop_phi_count(), depart_count(), 755 repeat_count(), if_count(), uses_ar(false) {} 756 757 void dump(); 758 }; 759 760 class shader; 761 762 class vpass; 763 764 class container_node; 765 class region_node; 766 767 class node { 768 769 protected: 770 node(node_type nt, node_subtype nst, node_flags flags = NF_EMPTY) prev()771 : prev(), next(), parent(), 772 type(nt), subtype(nst), flags(flags), 773 pred(), dst(), src() {} 774 ~node()775 virtual ~node() {}; 776 777 public: 778 node *prev, *next; 779 container_node *parent; 780 781 node_type type; 782 node_subtype subtype; 783 node_flags flags; 784 785 value *pred; 786 787 vvec dst; 788 vvec src; 789 is_valid()790 virtual bool is_valid() { return true; } 791 virtual bool accept(vpass &p, bool enter); 792 793 void insert_before(node *n); 794 void insert_after(node *n); 795 void replace_with(node *n); 796 void remove(); 797 798 virtual value_hash hash() const; 799 value_hash hash_src() const; 800 801 virtual bool fold_dispatch(expr_handler *ex); 802 is_container()803 bool is_container() { return flags & NF_CONTAINER; } 804 is_alu_packed()805 bool is_alu_packed() { return subtype == NST_ALU_PACKED_INST; } is_alu_inst()806 bool is_alu_inst() { return subtype == NST_ALU_INST; } is_alu_group()807 bool is_alu_group() { return subtype == NST_ALU_GROUP; } is_alu_clause()808 bool is_alu_clause() { return subtype == NST_ALU_CLAUSE; } 809 is_fetch_clause()810 bool is_fetch_clause() { 811 return subtype == NST_TEX_CLAUSE || subtype == NST_VTX_CLAUSE || subtype == NST_GDS_CLAUSE; 812 } 813 is_copy()814 bool is_copy() { return subtype == NST_COPY; } is_copy_mov()815 bool is_copy_mov() { return flags & NF_COPY_MOV; } is_any_alu()816 bool is_any_alu() { return is_alu_inst() || is_alu_packed() || is_copy(); } 817 is_fetch_inst()818 bool is_fetch_inst() { return subtype == NST_FETCH_INST; } is_cf_inst()819 bool is_cf_inst() { return subtype == NST_CF_INST; } 820 is_region()821 bool is_region() { return type == NT_REGION; } is_depart()822 bool is_depart() { return type == NT_DEPART; } is_repeat()823 bool is_repeat() { return type == NT_REPEAT; } is_if()824 bool is_if() { return type == NT_IF; } is_bb()825 bool is_bb() { return subtype == NST_BB; } 826 is_phi()827 bool is_phi() { return subtype == NST_PHI; } 828 is_dead()829 bool is_dead() { return flags & NF_DEAD; } 830 831 bool is_cf_op(unsigned op); 832 bool is_alu_op(unsigned op); 833 bool is_fetch_op(unsigned op); 834 835 unsigned cf_op_flags(); 836 unsigned alu_op_flags(); 837 unsigned alu_op_slot_flags(); 838 unsigned fetch_op_flags(); 839 840 bool is_mova(); 841 bool is_pred_set(); 842 vec_uses_ar(vvec & vv)843 bool vec_uses_ar(vvec &vv) { 844 for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) { 845 value *v = *I; 846 if (v && v->rel && !v->rel->is_const()) 847 return true; 848 } 849 return false; 850 } 851 uses_ar()852 bool uses_ar() { 853 return vec_uses_ar(dst) || vec_uses_ar(src); 854 } 855 vec_uses_lds_oq(vvec & vv)856 bool vec_uses_lds_oq(vvec &vv) { 857 for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) { 858 value *v = *I; 859 if (v && v->is_lds_oq()) 860 return true; 861 } 862 return false; 863 } 864 consumes_lds_oq()865 bool consumes_lds_oq() { 866 return vec_uses_lds_oq(src); 867 } 868 produces_lds_oq()869 bool produces_lds_oq() { 870 return vec_uses_lds_oq(dst); 871 } 872 873 region_node* get_parent_region(); 874 875 friend class shader; 876 }; 877 878 class container_node : public node { 879 public: 880 881 container_node(node_type nt = NT_LIST, node_subtype nst = NST_LIST, 882 node_flags flags = NF_EMPTY) 883 : node(nt, nst, flags | NF_CONTAINER), first(), last(), 884 live_after(), live_before() {} 885 886 // child items list 887 node *first, *last; 888 889 val_set live_after; 890 val_set live_before; 891 892 class iterator { 893 node *p; 894 public: p(pp)895 iterator(node *pp = NULL) : p(pp) {} 896 iterator & operator ++() { p = p->next; return *this;} 897 iterator & operator --() { p = p->prev; return *this;} 898 node* operator *() { return p; } 899 node* operator ->() { return p; } advance(int n)900 const iterator advance(int n) { 901 if (!n) return *this; 902 iterator I(p); 903 if (n > 0) while (n--) ++I; 904 else while (n++) --I; 905 return I; 906 } 907 const iterator operator +(int n) { return advance(n); } 908 const iterator operator -(int n) { return advance(-n); } 909 bool operator !=(const iterator &i) { return p != i.p; } 910 bool operator ==(const iterator &i) { return p == i.p; } 911 }; 912 913 class riterator { 914 iterator i; 915 public: i(p)916 riterator(node *p = NULL) : i(p) {} 917 riterator & operator ++() { --i; return *this;} 918 riterator & operator --() { ++i; return *this;} 919 node* operator *() { return *i; } 920 node* operator ->() { return *i; } 921 bool operator !=(const riterator &r) { return i != r.i; } 922 bool operator ==(const riterator &r) { return i == r.i; } 923 }; 924 begin()925 iterator begin() { return first; } end()926 iterator end() { return NULL; } rbegin()927 riterator rbegin() { return last; } rend()928 riterator rend() { return NULL; } 929 empty()930 bool empty() { assert(first != NULL || first == last); return !first; } 931 unsigned count(); 932 933 // used with node containers that represent scheduling queues 934 // ignores copies and takes into account alu_packed_node items 935 unsigned real_alu_count(); 936 937 void push_back(node *n); 938 void push_front(node *n); 939 940 void insert_node_before(node *s, node *n); 941 void insert_node_after(node *s, node *n); 942 943 void append_from(container_node *c); 944 945 // remove range [b..e) from some container and assign to this container 946 void move(iterator b, iterator e); 947 948 void expand(); 949 void expand(container_node *n); 950 void remove_node(node *n); 951 952 node *cut(iterator b, iterator e); 953 clear()954 void clear() { first = last = NULL; } 955 is_valid()956 virtual bool is_valid() { return true; } 957 virtual bool accept(vpass &p, bool enter); 958 virtual bool fold_dispatch(expr_handler *ex); 959 front()960 node* front() { return first; } back()961 node* back() { return last; } 962 963 void collect_stats(node_stats &s); 964 965 friend class shader; 966 967 968 }; 969 970 typedef container_node::iterator node_iterator; 971 typedef container_node::riterator node_riterator; 972 973 class alu_group_node : public container_node { 974 protected: alu_group_node()975 alu_group_node() : container_node(NT_LIST, NST_ALU_GROUP), literals() {} 976 public: 977 978 std::vector<literal> literals; 979 is_valid()980 virtual bool is_valid() { return subtype == NST_ALU_GROUP; } 981 virtual bool accept(vpass &p, bool enter); 982 983 literal_chan(literal l)984 unsigned literal_chan(literal l) { 985 std::vector<literal>::iterator F = 986 std::find(literals.begin(), literals.end(), l); 987 assert(F != literals.end()); 988 return F - literals.begin(); 989 } 990 991 friend class shader; 992 }; 993 994 class cf_node : public container_node { 995 protected: cf_node()996 cf_node() : container_node(NT_OP, NST_CF_INST), jump_target(), 997 jump_after_target() { memset(&bc, 0, sizeof(bc_cf)); }; 998 public: 999 bc_cf bc; 1000 1001 cf_node *jump_target; 1002 bool jump_after_target; 1003 is_valid()1004 virtual bool is_valid() { return subtype == NST_CF_INST; } 1005 virtual bool accept(vpass &p, bool enter); 1006 virtual bool fold_dispatch(expr_handler *ex); 1007 jump(cf_node * c)1008 void jump(cf_node *c) { jump_target = c; jump_after_target = false; } jump_after(cf_node * c)1009 void jump_after(cf_node *c) { jump_target = c; jump_after_target = true; } 1010 1011 friend class shader; 1012 }; 1013 1014 class alu_node : public node { 1015 protected: alu_node()1016 alu_node() : node(NT_OP, NST_ALU_INST) { } 1017 public: 1018 bc_alu bc; 1019 is_valid()1020 virtual bool is_valid() { return subtype == NST_ALU_INST; } 1021 virtual bool accept(vpass &p, bool enter); 1022 virtual bool fold_dispatch(expr_handler *ex); 1023 forced_bank_swizzle()1024 unsigned forced_bank_swizzle() { 1025 return ((bc.op_ptr->flags & AF_INTERP) && 1026 ((bc.slot_flags == AF_4V) || 1027 (bc.slot_flags == AF_2V))) ? VEC_210 : 0; 1028 } 1029 1030 // return param index + 1 if instruction references interpolation param, 1031 // otherwise 0 1032 unsigned interp_param(); 1033 1034 alu_group_node *get_alu_group_node(); 1035 1036 friend class shader; 1037 }; 1038 1039 // for multi-slot instrs - DOT/INTERP/... (maybe useful for 64bit pairs later) 1040 class alu_packed_node : public container_node { 1041 protected: alu_packed_node()1042 alu_packed_node() : container_node(NT_OP, NST_ALU_PACKED_INST) {} 1043 public: 1044 op_ptr()1045 const alu_op_info* op_ptr() { 1046 return static_cast<alu_node*>(first)->bc.op_ptr; 1047 } op()1048 unsigned op() { return static_cast<alu_node*>(first)->bc.op; } 1049 void init_args(bool repl); 1050 is_valid()1051 virtual bool is_valid() { return subtype == NST_ALU_PACKED_INST; } 1052 virtual bool accept(vpass &p, bool enter); 1053 virtual bool fold_dispatch(expr_handler *ex); 1054 1055 unsigned get_slot_mask(); 1056 void update_packed_items(sb_context &ctx); 1057 1058 friend class shader; 1059 }; 1060 1061 class fetch_node : public node { 1062 protected: fetch_node()1063 fetch_node() : node(NT_OP, NST_FETCH_INST) { memset(&bc, 0, sizeof(bc_fetch)); }; 1064 public: 1065 bc_fetch bc; 1066 is_valid()1067 virtual bool is_valid() { return subtype == NST_FETCH_INST; } 1068 virtual bool accept(vpass &p, bool enter); 1069 virtual bool fold_dispatch(expr_handler *ex); 1070 uses_grad()1071 bool uses_grad() { return bc.op_ptr->flags & FF_USEGRAD; } 1072 1073 friend class shader; 1074 }; 1075 1076 class region_node; 1077 1078 class repeat_node : public container_node { 1079 protected: repeat_node(region_node * target,unsigned id)1080 repeat_node(region_node *target, unsigned id) 1081 : container_node(NT_REPEAT, NST_LIST), target(target), rep_id(id) {} 1082 public: 1083 region_node *target; 1084 unsigned rep_id; 1085 1086 virtual bool accept(vpass &p, bool enter); 1087 1088 friend class shader; 1089 }; 1090 1091 class depart_node : public container_node { 1092 protected: depart_node(region_node * target,unsigned id)1093 depart_node(region_node *target, unsigned id) 1094 : container_node(NT_DEPART, NST_LIST), target(target), dep_id(id) {} 1095 public: 1096 region_node *target; 1097 unsigned dep_id; 1098 1099 virtual bool accept(vpass &p, bool enter); 1100 1101 friend class shader; 1102 }; 1103 1104 class if_node : public container_node { 1105 protected: if_node()1106 if_node() : container_node(NT_IF, NST_LIST), cond() {}; 1107 public: 1108 value *cond; // glued to pseudo output (dst[2]) of the PRED_SETxxx 1109 1110 virtual bool accept(vpass &p, bool enter); 1111 1112 friend class shader; 1113 }; 1114 1115 typedef std::vector<depart_node*> depart_vec; 1116 typedef std::vector<repeat_node*> repeat_vec; 1117 1118 class region_node : public container_node { 1119 protected: region_node(unsigned id)1120 region_node(unsigned id) : container_node(NT_REGION, NST_LIST), region_id(id), 1121 loop_phi(), phi(), vars_defined(), departs(), repeats(), src_loop() 1122 {} 1123 public: 1124 unsigned region_id; 1125 1126 container_node *loop_phi; 1127 container_node *phi; 1128 1129 val_set vars_defined; 1130 1131 depart_vec departs; 1132 repeat_vec repeats; 1133 1134 // true if region was created for loop in the parser, sometimes repeat_node 1135 // may be optimized away so we need to remember this information 1136 bool src_loop; 1137 1138 virtual bool accept(vpass &p, bool enter); 1139 dep_count()1140 unsigned dep_count() { return departs.size(); } rep_count()1141 unsigned rep_count() { return repeats.size() + 1; } 1142 is_loop()1143 bool is_loop() { return src_loop || !repeats.empty(); } 1144 get_entry_code_location()1145 container_node* get_entry_code_location() { 1146 node *p = first; 1147 while (p && (p->is_depart() || p->is_repeat())) 1148 p = static_cast<container_node*>(p)->first; 1149 1150 container_node *c = static_cast<container_node*>(p); 1151 if (c->is_bb()) 1152 return c; 1153 else 1154 return c->parent; 1155 } 1156 1157 void expand_depart(depart_node *d); 1158 void expand_repeat(repeat_node *r); 1159 1160 friend class shader; 1161 }; 1162 1163 class bb_node : public container_node { 1164 protected: bb_node(unsigned id,unsigned loop_level)1165 bb_node(unsigned id, unsigned loop_level) 1166 : container_node(NT_LIST, NST_BB), id(id), loop_level(loop_level) {} 1167 public: 1168 unsigned id; 1169 unsigned loop_level; 1170 1171 virtual bool accept(vpass &p, bool enter); 1172 1173 friend class shader; 1174 }; 1175 1176 1177 typedef std::vector<region_node*> regions_vec; 1178 typedef std::vector<bb_node*> bbs_vec; 1179 typedef std::list<node*> sched_queue; 1180 typedef sched_queue::iterator sq_iterator; 1181 typedef std::vector<node*> node_vec; 1182 typedef std::list<node*> node_list; 1183 typedef std::set<node*> node_set; 1184 1185 1186 1187 } // namespace r600_sb 1188 1189 #endif /* R600_SB_IR_H_ */ 1190