1 /* 2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 * 23 * Authors: 24 * Vadim Girlin 25 */ 26 27 #ifndef R600_SB_IR_H_ 28 #define R600_SB_IR_H_ 29 30 #include <algorithm> 31 #include <stdint.h> 32 #include <vector> 33 #include <set> 34 #include <algorithm> 35 36 #include "sb_bc.h" 37 38 namespace r600_sb { 39 40 enum special_regs { 41 SV_ALU_PRED = 128, 42 SV_EXEC_MASK, 43 SV_AR_INDEX, 44 SV_VALID_MASK, 45 SV_GEOMETRY_EMIT, 46 SV_LDS_RW, 47 SV_LDS_OQA, 48 SV_LDS_OQB, 49 }; 50 51 class node; 52 class value; 53 class shader; 54 55 struct sel_chan 56 { 57 unsigned id; 58 idsel_chan59 sel_chan(unsigned id = 0) : id(id) {} sel_chansel_chan60 sel_chan(unsigned sel, unsigned chan) : id(((sel << 2) | chan) + 1) {} 61 selsel_chan62 unsigned sel() const { return sel(id); } chansel_chan63 unsigned chan() const {return chan(id); } 64 operator unsigned() const {return id;} 65 selsel_chan66 static unsigned sel(unsigned idx) { return (idx-1) >> 2; } chansel_chan67 static unsigned chan(unsigned idx) { return (idx-1) & 3; } 68 sel_chansel_chan69 sel_chan(unsigned bank, unsigned index, 70 unsigned chan, alu_kcache_index_mode index_mode) 71 : id(sel_chan((bank << 12) | index | ((unsigned)index_mode << 28), chan).id) {} kcache_index_modesel_chan72 unsigned kcache_index_mode() const { return sel() >> 28; } kcache_selsel_chan73 unsigned kcache_sel() const { return sel() & 0x0fffffffu; } kcache_banksel_chan74 unsigned kcache_bank() const { return kcache_sel() >> 12; } 75 }; 76 77 inline sb_ostream& operator <<(sb_ostream& o, sel_chan r) { 78 static const char * ch = "xyzw"; 79 o << r.sel() << "." << ch[r.chan()]; 80 return o; 81 } 82 83 typedef std::vector<value*> vvec; 84 85 class sb_pool { 86 protected: 87 static const unsigned SB_POOL_ALIGN = 8; 88 static const unsigned SB_POOL_DEFAULT_BLOCK_SIZE = (1 << 16); 89 90 typedef std::vector<void*> block_vector; 91 92 unsigned block_size; 93 block_vector blocks; 94 unsigned total_size; 95 96 public: 97 sb_pool(unsigned block_size = SB_POOL_DEFAULT_BLOCK_SIZE) block_size(block_size)98 : block_size(block_size), blocks(), total_size() {} 99 ~sb_pool()100 virtual ~sb_pool() { free_all(); } 101 102 void* allocate(unsigned sz); 103 104 protected: 105 void free_all(); 106 }; 107 108 template <typename V, typename Comp = std::less<V> > 109 class sb_set { 110 typedef std::vector<V> data_vector; 111 data_vector vec; 112 public: 113 114 typedef typename data_vector::iterator iterator; 115 typedef typename data_vector::const_iterator const_iterator; 116 sb_set()117 sb_set() : vec() {} ~sb_set()118 ~sb_set() { } 119 begin()120 iterator begin() { return vec.begin(); } end()121 iterator end() { return vec.end(); } begin()122 const_iterator begin() const { return vec.begin(); } end()123 const_iterator end() const { return vec.end(); } 124 add_set(const sb_set & s)125 void add_set(const sb_set& s) { 126 data_vector t; 127 t.reserve(vec.size() + s.vec.size()); 128 std::set_union(vec.begin(), vec.end(), s.vec.begin(), s.vec.end(), 129 std::inserter(t, t.begin()), Comp()); 130 vec.swap(t); 131 } 132 lower_bound(const V & v)133 iterator lower_bound(const V& v) { 134 return std::lower_bound(vec.begin(), vec.end(), v, Comp()); 135 } 136 insert(const V & v)137 std::pair<iterator, bool> insert(const V& v) { 138 iterator P = lower_bound(v); 139 if (P != vec.end() && is_equal(*P, v)) 140 return std::make_pair(P, false); 141 return std::make_pair(vec.insert(P, v), true); 142 } 143 erase(const V & v)144 unsigned erase(const V& v) { 145 iterator P = lower_bound(v); 146 if (P == vec.end() || !is_equal(*P, v)) 147 return 0; 148 vec.erase(P); 149 return 1; 150 } 151 clear()152 void clear() { vec.clear(); } 153 empty()154 bool empty() { return vec.empty(); } 155 is_equal(const V & v1,const V & v2)156 bool is_equal(const V& v1, const V& v2) { 157 return !Comp()(v1, v2) && !Comp()(v2, v1); 158 } 159 find(const V & v)160 iterator find(const V& v) { 161 iterator P = lower_bound(v); 162 return (P != vec.end() && is_equal(*P, v)) ? P : vec.end(); 163 } 164 size()165 unsigned size() { return vec.size(); } erase(iterator I)166 void erase(iterator I) { vec.erase(I); } 167 }; 168 169 template <typename K, typename V, typename KComp = std::less<K> > 170 class sb_map { 171 typedef std::pair<K, V> datatype; 172 173 struct Comp { operatorComp174 bool operator()(const datatype &v1, const datatype &v2) { 175 return KComp()(v1.first, v2.first); 176 } 177 }; 178 179 typedef sb_set<datatype, Comp> dataset; 180 181 dataset set; 182 183 public: 184 sb_map()185 sb_map() : set() {} 186 187 typedef typename dataset::iterator iterator; 188 begin()189 iterator begin() { return set.begin(); } end()190 iterator end() { return set.end(); } 191 clear()192 void clear() { set.clear(); } 193 194 V& operator[](const K& key) { 195 datatype P = std::make_pair(key, V()); 196 iterator F = set.find(P); 197 if (F == set.end()) { 198 return (*(set.insert(P).first)).second; 199 } else { 200 return (*F).second; 201 } 202 } 203 insert(const datatype & d)204 std::pair<iterator, bool> insert(const datatype& d) { 205 return set.insert(d); 206 } 207 find(const K & key)208 iterator find(const K& key) { 209 return set.find(std::make_pair(key, V())); 210 } 211 erase(const K & key)212 unsigned erase(const K& key) { 213 return set.erase(std::make_pair(key, V())); 214 } 215 erase(iterator I)216 void erase(iterator I) { 217 set.erase(I); 218 } 219 }; 220 221 class sb_bitset { 222 typedef uint32_t basetype; 223 static const unsigned bt_bits = sizeof(basetype) << 3; 224 std::vector<basetype> data; 225 unsigned bit_size; 226 227 public: 228 sb_bitset()229 sb_bitset() : data(), bit_size() {} 230 231 bool get(unsigned id); 232 void set(unsigned id, bool bit = true); 233 bool set_chk(unsigned id, bool bit = true); 234 235 void clear(); 236 void resize(unsigned size); 237 size()238 unsigned size() { return bit_size; } 239 240 unsigned find_bit(unsigned start = 0); 241 242 void swap(sb_bitset & bs2); 243 244 bool operator==(const sb_bitset &bs2); 245 bool operator!=(const sb_bitset &bs2) { return !(*this == bs2); } 246 247 sb_bitset& operator|=(const sb_bitset &bs2) { 248 if (bit_size < bs2.bit_size) { 249 resize(bs2.bit_size); 250 } 251 252 for (unsigned i = 0, c = std::min(data.size(), bs2.data.size()); i < c; 253 ++i) { 254 data[i] |= bs2.data[i]; 255 } 256 return *this; 257 } 258 259 sb_bitset& operator&=(const sb_bitset &bs2); 260 sb_bitset& mask(const sb_bitset &bs2); 261 262 friend sb_bitset operator|(const sb_bitset &b1, const sb_bitset &b2) { 263 sb_bitset nbs(b1); 264 nbs |= b2; 265 return nbs; 266 } 267 }; 268 269 enum value_kind { 270 VLK_REG, 271 VLK_REL_REG, 272 VLK_SPECIAL_REG, 273 VLK_TEMP, 274 275 VLK_CONST, 276 VLK_KCACHE, 277 VLK_PARAM, 278 VLK_SPECIAL_CONST, 279 280 VLK_UNDEF 281 }; 282 283 284 285 class sb_value_pool : protected sb_pool { 286 unsigned aligned_elt_size; 287 288 public: 289 sb_value_pool(unsigned elt_size, unsigned block_elts = 256) 290 : sb_pool(block_elts * (aligned_elt_size = ((elt_size + 291 SB_POOL_ALIGN - 1) & ~(SB_POOL_ALIGN - 1)))) {} 292 ~sb_value_pool()293 virtual ~sb_value_pool() { delete_all(); } 294 295 value* create(value_kind k, sel_chan regid, unsigned ver); 296 297 value* operator[](unsigned id) { 298 unsigned offset = id * aligned_elt_size; 299 unsigned block_id; 300 if (offset < block_size) { 301 block_id = 0; 302 } else { 303 block_id = offset / block_size; 304 offset = offset % block_size; 305 } 306 return (value*)((char*)blocks[block_id] + offset); 307 } 308 size()309 unsigned size() { return total_size / aligned_elt_size; } 310 311 protected: 312 void delete_all(); 313 }; 314 315 316 317 318 319 class sb_value_set { 320 321 sb_bitset bs; 322 323 public: sb_value_set()324 sb_value_set() : bs() {} 325 326 class iterator { 327 sb_value_pool &vp; 328 sb_value_set *s; 329 unsigned nb; 330 public: 331 iterator(shader &sh, sb_value_set *s, unsigned nb = 0); 332 333 334 iterator& operator++() { 335 if (nb + 1 < s->bs.size()) 336 nb = s->bs.find_bit(nb + 1); 337 else 338 nb = s->bs.size(); 339 return *this; 340 } 341 bool operator !=(const iterator &i) { 342 return s != i.s || nb != i.nb; 343 } 344 bool operator ==(const iterator &i) { return !(*this != i); } 345 value* operator *() { 346 return vp[nb]; 347 } 348 349 350 }; 351 begin(shader & sh)352 iterator begin(shader &sh) { 353 return iterator(sh, this, bs.size() ? bs.find_bit(0) : 0); 354 } end(shader & sh)355 iterator end(shader &sh) { return iterator(sh, this, bs.size()); } 356 357 bool add_set_checked(sb_value_set & s2); 358 add_set(sb_value_set & s2)359 void add_set(sb_value_set & s2) { 360 if (bs.size() < s2.bs.size()) 361 bs.resize(s2.bs.size()); 362 bs |= s2.bs; 363 } 364 365 void remove_set(sb_value_set & s2); 366 367 bool add_vec(vvec &vv); 368 369 bool add_val(value *v); 370 bool contains(value *v); 371 372 bool remove_val(value *v); 373 374 bool remove_vec(vvec &vv); 375 376 void clear(); 377 378 bool empty(); 379 }; 380 381 typedef sb_value_set val_set; 382 383 struct gpr_array { 384 sel_chan base_gpr; // original gpr 385 sel_chan gpr; // assigned by regalloc 386 unsigned array_size; 387 gpr_arraygpr_array388 gpr_array(sel_chan base_gpr, unsigned array_size) : base_gpr(base_gpr), 389 array_size(array_size) {} 390 hashgpr_array391 unsigned hash() { return (base_gpr << 10) * array_size; } 392 393 val_set interferences; 394 vvec refs; 395 396 bool is_dead(); 397 398 }; 399 400 typedef std::vector<gpr_array*> regarray_vec; 401 402 enum value_flags { 403 VLF_UNDEF = (1 << 0), 404 VLF_READONLY = (1 << 1), 405 VLF_DEAD = (1 << 2), 406 407 VLF_PIN_REG = (1 << 3), 408 VLF_PIN_CHAN = (1 << 4), 409 410 // opposite to alu clause local value - goes through alu clause boundary 411 // (can't use temp gpr, can't recolor in the alu scheduler, etc) 412 VLF_GLOBAL = (1 << 5), 413 VLF_FIXED = (1 << 6), 414 VLF_PVPS = (1 << 7), 415 416 VLF_PREALLOC = (1 << 8) 417 }; 418 419 inline value_flags operator |(value_flags l, value_flags r) { 420 return (value_flags)((unsigned)l|(unsigned)r); 421 } 422 inline value_flags operator &(value_flags l, value_flags r) { 423 return (value_flags)((unsigned)l&(unsigned)r); 424 } 425 inline value_flags operator ~(value_flags l) { 426 return (value_flags)(~(unsigned)l); 427 } 428 inline value_flags& operator |=(value_flags &l, value_flags r) { 429 l = l | r; 430 return l; 431 } 432 inline value_flags& operator &=(value_flags &l, value_flags r) { 433 l = l & r; 434 return l; 435 } 436 437 sb_ostream& operator << (sb_ostream &o, value &v); 438 439 typedef uint32_t value_hash; 440 441 typedef std::list< node * > uselist; 442 443 enum constraint_kind { 444 CK_SAME_REG, 445 CK_PACKED_BS, 446 CK_PHI 447 }; 448 449 class shader; 450 class sb_value_pool; 451 struct ra_chunk; 452 class ra_constraint; 453 454 class value { 455 protected: 456 value(unsigned sh_id, value_kind k, sel_chan select, unsigned ver = 0) kind(k)457 : kind(k), flags(), 458 rel(), array(), 459 version(ver), select(select), pin_gpr(select), gpr(), 460 gvn_source(), ghash(), 461 def(), adef(), uses(), constraint(), chunk(), 462 literal_value(), uid(sh_id) {} 463 ~value()464 ~value() { delete_uses(); } 465 466 friend class sb_value_pool; 467 public: 468 value_kind kind; 469 value_flags flags; 470 471 vvec mdef; 472 vvec muse; 473 value *rel; 474 gpr_array *array; 475 476 unsigned version; 477 478 sel_chan select; 479 sel_chan pin_gpr; 480 sel_chan gpr; 481 482 value *gvn_source; 483 value_hash ghash; 484 485 node *def, *adef; 486 uselist uses; 487 488 ra_constraint *constraint; 489 ra_chunk *chunk; 490 491 literal literal_value; 492 is_const()493 bool is_const() { return kind == VLK_CONST || kind == VLK_UNDEF; } 494 is_AR()495 bool is_AR() { 496 return is_special_reg() && select == sel_chan(SV_AR_INDEX, 0); 497 } is_geometry_emit()498 bool is_geometry_emit() { 499 return is_special_reg() && select == sel_chan(SV_GEOMETRY_EMIT, 0); 500 } is_lds_access()501 bool is_lds_access() { 502 return is_special_reg() && select == sel_chan(SV_LDS_RW, 0); 503 } is_lds_oq()504 bool is_lds_oq() { 505 return is_special_reg() && (select == sel_chan(SV_LDS_OQA, 0) || select == sel_chan(SV_LDS_OQB, 0)); 506 } 507 any_def()508 node* any_def() { 509 assert(!(def && adef)); 510 return def ? def : adef; 511 } 512 gvalue()513 value* gvalue() { 514 value *v = this; 515 while (v->gvn_source && v != v->gvn_source) 516 // FIXME we really shouldn't have such chains 517 v = v->gvn_source; 518 return v; 519 } 520 is_float_0_or_1()521 bool is_float_0_or_1() { 522 value *v = gvalue(); 523 return v->is_const() && (v->literal_value == literal(0) 524 || v->literal_value == literal(1.0f)); 525 } 526 is_undef()527 bool is_undef() { return gvalue()->kind == VLK_UNDEF; } 528 is_any_gpr()529 bool is_any_gpr() { 530 return (kind == VLK_REG || kind == VLK_TEMP); 531 } 532 is_agpr()533 bool is_agpr() { 534 return array && is_any_gpr(); 535 } 536 537 // scalar gpr, as opposed to element of gpr array is_sgpr()538 bool is_sgpr() { 539 return !array && is_any_gpr(); 540 } 541 is_special_reg()542 bool is_special_reg() { return kind == VLK_SPECIAL_REG; } is_any_reg()543 bool is_any_reg() { return is_any_gpr() || is_special_reg(); } is_kcache()544 bool is_kcache() { return kind == VLK_KCACHE; } is_rel()545 bool is_rel() { return kind == VLK_REL_REG; } is_readonly()546 bool is_readonly() { return flags & VLF_READONLY; } 547 is_chan_pinned()548 bool is_chan_pinned() { return flags & VLF_PIN_CHAN; } is_reg_pinned()549 bool is_reg_pinned() { return flags & VLF_PIN_REG; } 550 551 bool is_global(); 552 void set_global(); 553 void set_prealloc(); 554 555 bool is_prealloc(); 556 557 bool is_fixed(); 558 void fix(); 559 is_dead()560 bool is_dead() { return flags & VLF_DEAD; } 561 get_const_value()562 literal & get_const_value() { 563 value *v = gvalue(); 564 assert(v->is_const()); 565 return v->literal_value; 566 } 567 568 // true if needs to be encoded as literal in alu is_literal()569 bool is_literal() { 570 return is_const() 571 && literal_value != literal(0) 572 && literal_value != literal(1) 573 && literal_value != literal(-1) 574 && literal_value != literal(0.5) 575 && literal_value != literal(1.0); 576 } 577 578 void add_use(node *n); 579 void remove_use(const node *n); 580 581 value_hash hash(); 582 value_hash rel_hash(); 583 assign_source(value * v)584 void assign_source(value *v) { 585 assert(!gvn_source || gvn_source == this); 586 gvn_source = v->gvalue(); 587 } 588 v_equal(value * v)589 bool v_equal(value *v) { return gvalue() == v->gvalue(); } 590 591 unsigned use_count(); 592 void delete_uses(); 593 get_final_gpr()594 sel_chan get_final_gpr() { 595 if (array && array->gpr) { 596 int reg_offset = select.sel() - array->base_gpr.sel(); 597 if (rel && rel->is_const()) 598 reg_offset += rel->get_const_value().i; 599 return array->gpr + (reg_offset << 2); 600 } else { 601 return gpr; 602 } 603 } 604 get_final_chan()605 unsigned get_final_chan() { 606 if (array) { 607 assert(array->gpr); 608 return array->gpr.chan(); 609 } else { 610 assert(gpr); 611 return gpr.chan(); 612 } 613 } 614 615 val_set interferences; 616 unsigned uid; 617 }; 618 619 class expr_handler; 620 621 class value_table { 622 typedef std::vector<value*> vt_item; 623 typedef std::vector<vt_item> vt_table; 624 625 expr_handler &ex; 626 627 unsigned size_bits; 628 unsigned size; 629 unsigned size_mask; 630 631 vt_table hashtable; 632 633 unsigned cnt; 634 635 public: 636 637 value_table(expr_handler &ex, unsigned size_bits = 10) ex(ex)638 : ex(ex), size_bits(size_bits), size(1u << size_bits), 639 size_mask(size - 1), hashtable(size), cnt() {} 640 ~value_table()641 ~value_table() {} 642 643 void add_value(value* v); 644 645 bool expr_equal(value* l, value* r); 646 count()647 unsigned count() { return cnt; } 648 649 void get_values(vvec & v); 650 }; 651 652 class sb_context; 653 654 enum node_type { 655 NT_UNKNOWN, 656 NT_LIST, 657 NT_OP, 658 NT_REGION, 659 NT_REPEAT, 660 NT_DEPART, 661 NT_IF, 662 }; 663 664 enum node_subtype { 665 NST_UNKNOWN, 666 NST_LIST, 667 NST_ALU_GROUP, 668 NST_ALU_CLAUSE, 669 NST_ALU_INST, 670 NST_ALU_PACKED_INST, 671 NST_CF_INST, 672 NST_FETCH_INST, 673 NST_TEX_CLAUSE, 674 NST_VTX_CLAUSE, 675 NST_GDS_CLAUSE, 676 677 NST_BB, 678 679 NST_PHI, 680 NST_PSI, 681 NST_COPY, 682 683 NST_LOOP_PHI_CONTAINER, 684 NST_LOOP_CONTINUE, 685 NST_LOOP_BREAK 686 }; 687 688 enum node_flags { 689 NF_EMPTY = 0, 690 NF_DEAD = (1 << 0), 691 NF_REG_CONSTRAINT = (1 << 1), 692 NF_CHAN_CONSTRAINT = (1 << 2), 693 NF_ALU_4SLOT = (1 << 3), 694 NF_CONTAINER = (1 << 4), 695 696 NF_COPY_MOV = (1 << 5), 697 698 NF_DONT_KILL = (1 << 6), 699 NF_DONT_HOIST = (1 << 7), 700 NF_DONT_MOVE = (1 << 8), 701 702 // for KILLxx - we want to schedule them as early as possible 703 NF_SCHEDULE_EARLY = (1 << 9), 704 705 // for ALU_PUSH_BEFORE - when set, replace with PUSH + ALU 706 NF_ALU_STACK_WORKAROUND = (1 << 10) 707 }; 708 709 inline node_flags operator |(node_flags l, node_flags r) { 710 return (node_flags)((unsigned)l|(unsigned)r); 711 } 712 inline node_flags& operator |=(node_flags &l, node_flags r) { 713 l = l | r; 714 return l; 715 } 716 717 inline node_flags& operator &=(node_flags &l, node_flags r) { 718 l = (node_flags)((unsigned)l & (unsigned)r); 719 return l; 720 } 721 722 inline node_flags operator ~(node_flags r) { 723 return (node_flags)~(unsigned)r; 724 } 725 726 struct node_stats { 727 unsigned alu_count; 728 unsigned alu_kill_count; 729 unsigned alu_copy_mov_count; 730 unsigned cf_count; 731 unsigned fetch_count; 732 unsigned region_count; 733 unsigned loop_count; 734 unsigned phi_count; 735 unsigned loop_phi_count; 736 unsigned depart_count; 737 unsigned repeat_count; 738 unsigned if_count; 739 bool uses_ar; 740 node_statsnode_stats741 node_stats() : alu_count(), alu_kill_count(), alu_copy_mov_count(), 742 cf_count(), fetch_count(), region_count(), 743 loop_count(), phi_count(), loop_phi_count(), depart_count(), 744 repeat_count(), if_count(), uses_ar(false) {} 745 746 void dump(); 747 }; 748 749 class shader; 750 751 class vpass; 752 753 class container_node; 754 class region_node; 755 756 class node { 757 758 protected: 759 node(node_type nt, node_subtype nst, node_flags flags = NF_EMPTY) prev()760 : prev(), next(), parent(), 761 type(nt), subtype(nst), flags(flags), 762 pred(), dst(), src() {} 763 ~node()764 virtual ~node() {}; 765 766 public: 767 node *prev, *next; 768 container_node *parent; 769 770 node_type type; 771 node_subtype subtype; 772 node_flags flags; 773 774 value *pred; 775 776 vvec dst; 777 vvec src; 778 is_valid()779 virtual bool is_valid() { return true; } 780 virtual bool accept(vpass &p, bool enter); 781 782 void insert_before(node *n); 783 void insert_after(node *n); 784 void replace_with(node *n); 785 void remove(); 786 787 virtual value_hash hash() const; 788 value_hash hash_src() const; 789 790 virtual bool fold_dispatch(expr_handler *ex); 791 is_container()792 bool is_container() { return flags & NF_CONTAINER; } 793 is_alu_packed()794 bool is_alu_packed() { return subtype == NST_ALU_PACKED_INST; } is_alu_inst()795 bool is_alu_inst() { return subtype == NST_ALU_INST; } is_alu_group()796 bool is_alu_group() { return subtype == NST_ALU_GROUP; } is_alu_clause()797 bool is_alu_clause() { return subtype == NST_ALU_CLAUSE; } 798 is_fetch_clause()799 bool is_fetch_clause() { 800 return subtype == NST_TEX_CLAUSE || subtype == NST_VTX_CLAUSE || subtype == NST_GDS_CLAUSE; 801 } 802 is_copy()803 bool is_copy() { return subtype == NST_COPY; } is_copy_mov()804 bool is_copy_mov() { return flags & NF_COPY_MOV; } is_any_alu()805 bool is_any_alu() { return is_alu_inst() || is_alu_packed() || is_copy(); } 806 is_fetch_inst()807 bool is_fetch_inst() { return subtype == NST_FETCH_INST; } is_cf_inst()808 bool is_cf_inst() { return subtype == NST_CF_INST; } 809 is_region()810 bool is_region() { return type == NT_REGION; } is_depart()811 bool is_depart() { return type == NT_DEPART; } is_repeat()812 bool is_repeat() { return type == NT_REPEAT; } is_if()813 bool is_if() { return type == NT_IF; } is_bb()814 bool is_bb() { return subtype == NST_BB; } 815 is_phi()816 bool is_phi() { return subtype == NST_PHI; } 817 is_dead()818 bool is_dead() { return flags & NF_DEAD; } 819 820 bool is_cf_op(unsigned op); 821 bool is_alu_op(unsigned op); 822 bool is_fetch_op(unsigned op); 823 824 unsigned cf_op_flags(); 825 unsigned alu_op_flags(); 826 unsigned alu_op_slot_flags(); 827 unsigned fetch_op_flags(); 828 829 bool is_mova(); 830 bool is_pred_set(); 831 vec_uses_ar(vvec & vv)832 bool vec_uses_ar(vvec &vv) { 833 for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) { 834 value *v = *I; 835 if (v && v->rel && !v->rel->is_const()) 836 return true; 837 } 838 return false; 839 } 840 uses_ar()841 bool uses_ar() { 842 return vec_uses_ar(dst) || vec_uses_ar(src); 843 } 844 vec_uses_lds_oq(vvec & vv)845 bool vec_uses_lds_oq(vvec &vv) { 846 for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) { 847 value *v = *I; 848 if (v && v->is_lds_oq()) 849 return true; 850 } 851 return false; 852 } 853 consumes_lds_oq()854 bool consumes_lds_oq() { 855 return vec_uses_lds_oq(src); 856 } 857 produces_lds_oq()858 bool produces_lds_oq() { 859 return vec_uses_lds_oq(dst); 860 } 861 862 region_node* get_parent_region(); 863 864 friend class shader; 865 }; 866 867 class container_node : public node { 868 public: 869 870 container_node(node_type nt = NT_LIST, node_subtype nst = NST_LIST, 871 node_flags flags = NF_EMPTY) 872 : node(nt, nst, flags | NF_CONTAINER), first(), last(), 873 live_after(), live_before() {} 874 875 // child items list 876 node *first, *last; 877 878 val_set live_after; 879 val_set live_before; 880 881 class iterator { 882 node *p; 883 public: p(pp)884 iterator(node *pp = NULL) : p(pp) {} 885 iterator & operator ++() { p = p->next; return *this;} 886 iterator & operator --() { p = p->prev; return *this;} 887 node* operator *() { return p; } 888 node* operator ->() { return p; } advance(int n)889 const iterator advance(int n) { 890 if (!n) return *this; 891 iterator I(p); 892 if (n > 0) while (n--) ++I; 893 else while (n++) --I; 894 return I; 895 } 896 const iterator operator +(int n) { return advance(n); } 897 const iterator operator -(int n) { return advance(-n); } 898 bool operator !=(const iterator &i) { return p != i.p; } 899 bool operator ==(const iterator &i) { return p == i.p; } 900 }; 901 902 class riterator { 903 iterator i; 904 public: i(p)905 riterator(node *p = NULL) : i(p) {} 906 riterator & operator ++() { --i; return *this;} 907 riterator & operator --() { ++i; return *this;} 908 node* operator *() { return *i; } 909 node* operator ->() { return *i; } 910 bool operator !=(const riterator &r) { return i != r.i; } 911 bool operator ==(const riterator &r) { return i == r.i; } 912 }; 913 begin()914 iterator begin() { return first; } end()915 iterator end() { return NULL; } rbegin()916 riterator rbegin() { return last; } rend()917 riterator rend() { return NULL; } 918 empty()919 bool empty() { assert(first != NULL || first == last); return !first; } 920 unsigned count(); 921 922 // used with node containers that represent shceduling queues 923 // ignores copies and takes into account alu_packed_node items 924 unsigned real_alu_count(); 925 926 void push_back(node *n); 927 void push_front(node *n); 928 929 void insert_node_before(node *s, node *n); 930 void insert_node_after(node *s, node *n); 931 932 void append_from(container_node *c); 933 934 // remove range [b..e) from some container and assign to this container 935 void move(iterator b, iterator e); 936 937 void expand(); 938 void expand(container_node *n); 939 void remove_node(node *n); 940 941 node *cut(iterator b, iterator e); 942 clear()943 void clear() { first = last = NULL; } 944 is_valid()945 virtual bool is_valid() { return true; } 946 virtual bool accept(vpass &p, bool enter); 947 virtual bool fold_dispatch(expr_handler *ex); 948 front()949 node* front() { return first; } back()950 node* back() { return last; } 951 952 void collect_stats(node_stats &s); 953 954 friend class shader; 955 956 957 }; 958 959 typedef container_node::iterator node_iterator; 960 typedef container_node::riterator node_riterator; 961 962 class alu_group_node : public container_node { 963 protected: alu_group_node()964 alu_group_node() : container_node(NT_LIST, NST_ALU_GROUP), literals() {} 965 public: 966 967 std::vector<literal> literals; 968 is_valid()969 virtual bool is_valid() { return subtype == NST_ALU_GROUP; } 970 virtual bool accept(vpass &p, bool enter); 971 972 literal_chan(literal l)973 unsigned literal_chan(literal l) { 974 std::vector<literal>::iterator F = 975 std::find(literals.begin(), literals.end(), l); 976 assert(F != literals.end()); 977 return F - literals.begin(); 978 } 979 980 friend class shader; 981 }; 982 983 class cf_node : public container_node { 984 protected: cf_node()985 cf_node() : container_node(NT_OP, NST_CF_INST), jump_target(), 986 jump_after_target() { memset(&bc, 0, sizeof(bc_cf)); }; 987 public: 988 bc_cf bc; 989 990 cf_node *jump_target; 991 bool jump_after_target; 992 is_valid()993 virtual bool is_valid() { return subtype == NST_CF_INST; } 994 virtual bool accept(vpass &p, bool enter); 995 virtual bool fold_dispatch(expr_handler *ex); 996 jump(cf_node * c)997 void jump(cf_node *c) { jump_target = c; jump_after_target = false; } jump_after(cf_node * c)998 void jump_after(cf_node *c) { jump_target = c; jump_after_target = true; } 999 1000 friend class shader; 1001 }; 1002 1003 class alu_node : public node { 1004 protected: alu_node()1005 alu_node() : node(NT_OP, NST_ALU_INST) { memset(&bc, 0, sizeof(bc_alu)); }; 1006 public: 1007 bc_alu bc; 1008 is_valid()1009 virtual bool is_valid() { return subtype == NST_ALU_INST; } 1010 virtual bool accept(vpass &p, bool enter); 1011 virtual bool fold_dispatch(expr_handler *ex); 1012 forced_bank_swizzle()1013 unsigned forced_bank_swizzle() { 1014 return ((bc.op_ptr->flags & AF_INTERP) && (bc.slot_flags == AF_4V)) ? 1015 VEC_210 : 0; 1016 } 1017 1018 // return param index + 1 if instruction references interpolation param, 1019 // otherwise 0 1020 unsigned interp_param(); 1021 1022 alu_group_node *get_alu_group_node(); 1023 1024 friend class shader; 1025 }; 1026 1027 // for multi-slot instrs - DOT/INTERP/... (maybe useful for 64bit pairs later) 1028 class alu_packed_node : public container_node { 1029 protected: alu_packed_node()1030 alu_packed_node() : container_node(NT_OP, NST_ALU_PACKED_INST) {} 1031 public: 1032 op_ptr()1033 const alu_op_info* op_ptr() { 1034 return static_cast<alu_node*>(first)->bc.op_ptr; 1035 } op()1036 unsigned op() { return static_cast<alu_node*>(first)->bc.op; } 1037 void init_args(bool repl); 1038 is_valid()1039 virtual bool is_valid() { return subtype == NST_ALU_PACKED_INST; } 1040 virtual bool accept(vpass &p, bool enter); 1041 virtual bool fold_dispatch(expr_handler *ex); 1042 1043 unsigned get_slot_mask(); 1044 void update_packed_items(sb_context &ctx); 1045 1046 friend class shader; 1047 }; 1048 1049 class fetch_node : public node { 1050 protected: fetch_node()1051 fetch_node() : node(NT_OP, NST_FETCH_INST) { memset(&bc, 0, sizeof(bc_fetch)); }; 1052 public: 1053 bc_fetch bc; 1054 is_valid()1055 virtual bool is_valid() { return subtype == NST_FETCH_INST; } 1056 virtual bool accept(vpass &p, bool enter); 1057 virtual bool fold_dispatch(expr_handler *ex); 1058 uses_grad()1059 bool uses_grad() { return bc.op_ptr->flags & FF_USEGRAD; } 1060 1061 friend class shader; 1062 }; 1063 1064 class region_node; 1065 1066 class repeat_node : public container_node { 1067 protected: repeat_node(region_node * target,unsigned id)1068 repeat_node(region_node *target, unsigned id) 1069 : container_node(NT_REPEAT, NST_LIST), target(target), rep_id(id) {} 1070 public: 1071 region_node *target; 1072 unsigned rep_id; 1073 1074 virtual bool accept(vpass &p, bool enter); 1075 1076 friend class shader; 1077 }; 1078 1079 class depart_node : public container_node { 1080 protected: depart_node(region_node * target,unsigned id)1081 depart_node(region_node *target, unsigned id) 1082 : container_node(NT_DEPART, NST_LIST), target(target), dep_id(id) {} 1083 public: 1084 region_node *target; 1085 unsigned dep_id; 1086 1087 virtual bool accept(vpass &p, bool enter); 1088 1089 friend class shader; 1090 }; 1091 1092 class if_node : public container_node { 1093 protected: if_node()1094 if_node() : container_node(NT_IF, NST_LIST), cond() {}; 1095 public: 1096 value *cond; // glued to pseudo output (dst[2]) of the PRED_SETxxx 1097 1098 virtual bool accept(vpass &p, bool enter); 1099 1100 friend class shader; 1101 }; 1102 1103 typedef std::vector<depart_node*> depart_vec; 1104 typedef std::vector<repeat_node*> repeat_vec; 1105 1106 class region_node : public container_node { 1107 protected: region_node(unsigned id)1108 region_node(unsigned id) : container_node(NT_REGION, NST_LIST), region_id(id), 1109 loop_phi(), phi(), vars_defined(), departs(), repeats(), src_loop() 1110 {} 1111 public: 1112 unsigned region_id; 1113 1114 container_node *loop_phi; 1115 container_node *phi; 1116 1117 val_set vars_defined; 1118 1119 depart_vec departs; 1120 repeat_vec repeats; 1121 1122 // true if region was created for loop in the parser, sometimes repeat_node 1123 // may be optimized away so we need to remember this information 1124 bool src_loop; 1125 1126 virtual bool accept(vpass &p, bool enter); 1127 dep_count()1128 unsigned dep_count() { return departs.size(); } rep_count()1129 unsigned rep_count() { return repeats.size() + 1; } 1130 is_loop()1131 bool is_loop() { return src_loop || !repeats.empty(); } 1132 get_entry_code_location()1133 container_node* get_entry_code_location() { 1134 node *p = first; 1135 while (p && (p->is_depart() || p->is_repeat())) 1136 p = static_cast<container_node*>(p)->first; 1137 1138 container_node *c = static_cast<container_node*>(p); 1139 if (c->is_bb()) 1140 return c; 1141 else 1142 return c->parent; 1143 } 1144 1145 void expand_depart(depart_node *d); 1146 void expand_repeat(repeat_node *r); 1147 1148 friend class shader; 1149 }; 1150 1151 class bb_node : public container_node { 1152 protected: bb_node(unsigned id,unsigned loop_level)1153 bb_node(unsigned id, unsigned loop_level) 1154 : container_node(NT_LIST, NST_BB), id(id), loop_level(loop_level) {} 1155 public: 1156 unsigned id; 1157 unsigned loop_level; 1158 1159 virtual bool accept(vpass &p, bool enter); 1160 1161 friend class shader; 1162 }; 1163 1164 1165 typedef std::vector<region_node*> regions_vec; 1166 typedef std::vector<bb_node*> bbs_vec; 1167 typedef std::list<node*> sched_queue; 1168 typedef sched_queue::iterator sq_iterator; 1169 typedef std::vector<node*> node_vec; 1170 typedef std::list<node*> node_list; 1171 typedef std::set<node*> node_set; 1172 1173 1174 1175 } // namespace r600_sb 1176 1177 #endif /* R600_SB_IR_H_ */ 1178