1 /* 2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 * 23 * Authors: 24 * Vadim Girlin 25 */ 26 27 #ifndef R600_SB_IR_H_ 28 #define R600_SB_IR_H_ 29 30 #include <algorithm> 31 #include <stdint.h> 32 #include <vector> 33 #include <set> 34 #include <algorithm> 35 36 #include "sb_bc.h" 37 38 namespace r600_sb { 39 40 enum special_regs { 41 SV_ALU_PRED = 128, 42 SV_EXEC_MASK, 43 SV_AR_INDEX, 44 SV_VALID_MASK, 45 SV_GEOMETRY_EMIT 46 }; 47 48 class node; 49 class value; 50 class shader; 51 52 struct sel_chan 53 { 54 unsigned id; 55 idsel_chan56 sel_chan(unsigned id = 0) : id(id) {} sel_chansel_chan57 sel_chan(unsigned sel, unsigned chan) : id(((sel << 2) | chan) + 1) {} 58 selsel_chan59 unsigned sel() const { return sel(id); } chansel_chan60 unsigned chan() const {return chan(id); } 61 operator unsigned() const {return id;} 62 selsel_chan63 static unsigned sel(unsigned idx) { return (idx-1) >> 2; } chansel_chan64 static unsigned chan(unsigned idx) { return (idx-1) & 3; } 65 sel_chansel_chan66 sel_chan(unsigned bank, unsigned index, 67 unsigned chan, alu_kcache_index_mode index_mode) 68 : id(sel_chan((bank << 12) | index | ((unsigned)index_mode << 28), chan).id) {} kcache_index_modesel_chan69 unsigned kcache_index_mode() const { return sel() >> 28; } kcache_selsel_chan70 unsigned kcache_sel() const { return sel() & 0x0fffffffu; } kcache_banksel_chan71 unsigned kcache_bank() const { return kcache_sel() >> 12; } 72 }; 73 74 inline sb_ostream& operator <<(sb_ostream& o, sel_chan r) { 75 static const char * ch = "xyzw"; 76 o << r.sel() << "." << ch[r.chan()]; 77 return o; 78 } 79 80 typedef std::vector<value*> vvec; 81 82 class sb_pool { 83 protected: 84 static const unsigned SB_POOL_ALIGN = 8; 85 static const unsigned SB_POOL_DEFAULT_BLOCK_SIZE = (1 << 16); 86 87 typedef std::vector<void*> block_vector; 88 89 unsigned block_size; 90 block_vector blocks; 91 unsigned total_size; 92 93 public: 94 sb_pool(unsigned block_size = SB_POOL_DEFAULT_BLOCK_SIZE) block_size(block_size)95 : block_size(block_size), blocks(), total_size() {} 96 ~sb_pool()97 virtual ~sb_pool() { free_all(); } 98 99 void* allocate(unsigned sz); 100 101 protected: 102 void free_all(); 103 }; 104 105 template <typename V, typename Comp = std::less<V> > 106 class sb_set { 107 typedef std::vector<V> data_vector; 108 data_vector vec; 109 public: 110 111 typedef typename data_vector::iterator iterator; 112 typedef typename data_vector::const_iterator const_iterator; 113 sb_set()114 sb_set() : vec() {} ~sb_set()115 ~sb_set() { } 116 begin()117 iterator begin() { return vec.begin(); } end()118 iterator end() { return vec.end(); } begin()119 const_iterator begin() const { return vec.begin(); } end()120 const_iterator end() const { return vec.end(); } 121 add_set(const sb_set & s)122 void add_set(const sb_set& s) { 123 data_vector t; 124 t.reserve(vec.size() + s.vec.size()); 125 std::set_union(vec.begin(), vec.end(), s.vec.begin(), s.vec.end(), 126 std::inserter(t, t.begin()), Comp()); 127 vec.swap(t); 128 } 129 lower_bound(const V & v)130 iterator lower_bound(const V& v) { 131 return std::lower_bound(vec.begin(), vec.end(), v, Comp()); 132 } 133 insert(const V & v)134 std::pair<iterator, bool> insert(const V& v) { 135 iterator P = lower_bound(v); 136 if (P != vec.end() && is_equal(*P, v)) 137 return std::make_pair(P, false); 138 return std::make_pair(vec.insert(P, v), true); 139 } 140 erase(const V & v)141 unsigned erase(const V& v) { 142 iterator P = lower_bound(v); 143 if (P == vec.end() || !is_equal(*P, v)) 144 return 0; 145 vec.erase(P); 146 return 1; 147 } 148 clear()149 void clear() { vec.clear(); } 150 empty()151 bool empty() { return vec.empty(); } 152 is_equal(const V & v1,const V & v2)153 bool is_equal(const V& v1, const V& v2) { 154 return !Comp()(v1, v2) && !Comp()(v2, v1); 155 } 156 find(const V & v)157 iterator find(const V& v) { 158 iterator P = lower_bound(v); 159 return (P != vec.end() && is_equal(*P, v)) ? P : vec.end(); 160 } 161 size()162 unsigned size() { return vec.size(); } erase(iterator I)163 void erase(iterator I) { vec.erase(I); } 164 }; 165 166 template <typename K, typename V, typename KComp = std::less<K> > 167 class sb_map { 168 typedef std::pair<K, V> datatype; 169 170 struct Comp { operatorComp171 bool operator()(const datatype &v1, const datatype &v2) { 172 return KComp()(v1.first, v2.first); 173 } 174 }; 175 176 typedef sb_set<datatype, Comp> dataset; 177 178 dataset set; 179 180 public: 181 sb_map()182 sb_map() : set() {} 183 184 typedef typename dataset::iterator iterator; 185 begin()186 iterator begin() { return set.begin(); } end()187 iterator end() { return set.end(); } 188 clear()189 void clear() { set.clear(); } 190 191 V& operator[](const K& key) { 192 datatype P = std::make_pair(key, V()); 193 iterator F = set.find(P); 194 if (F == set.end()) { 195 return (*(set.insert(P).first)).second; 196 } else { 197 return (*F).second; 198 } 199 } 200 insert(const datatype & d)201 std::pair<iterator, bool> insert(const datatype& d) { 202 return set.insert(d); 203 } 204 find(const K & key)205 iterator find(const K& key) { 206 return set.find(std::make_pair(key, V())); 207 } 208 erase(const K & key)209 unsigned erase(const K& key) { 210 return set.erase(std::make_pair(key, V())); 211 } 212 erase(iterator I)213 void erase(iterator I) { 214 set.erase(I); 215 } 216 }; 217 218 class sb_bitset { 219 typedef uint32_t basetype; 220 static const unsigned bt_bits = sizeof(basetype) << 3; 221 std::vector<basetype> data; 222 unsigned bit_size; 223 224 public: 225 sb_bitset()226 sb_bitset() : data(), bit_size() {} 227 228 bool get(unsigned id); 229 void set(unsigned id, bool bit = true); 230 bool set_chk(unsigned id, bool bit = true); 231 232 void clear(); 233 void resize(unsigned size); 234 size()235 unsigned size() { return bit_size; } 236 237 unsigned find_bit(unsigned start = 0); 238 239 void swap(sb_bitset & bs2); 240 241 bool operator==(const sb_bitset &bs2); 242 bool operator!=(const sb_bitset &bs2) { return !(*this == bs2); } 243 244 sb_bitset& operator|=(const sb_bitset &bs2) { 245 if (bit_size < bs2.bit_size) { 246 resize(bs2.bit_size); 247 } 248 249 for (unsigned i = 0, c = std::min(data.size(), bs2.data.size()); i < c; 250 ++i) { 251 data[i] |= bs2.data[i]; 252 } 253 return *this; 254 } 255 256 sb_bitset& operator&=(const sb_bitset &bs2); 257 sb_bitset& mask(const sb_bitset &bs2); 258 259 friend sb_bitset operator|(const sb_bitset &b1, const sb_bitset &b2) { 260 sb_bitset nbs(b1); 261 nbs |= b2; 262 return nbs; 263 } 264 }; 265 266 enum value_kind { 267 VLK_REG, 268 VLK_REL_REG, 269 VLK_SPECIAL_REG, 270 VLK_TEMP, 271 272 VLK_CONST, 273 VLK_KCACHE, 274 VLK_PARAM, 275 VLK_SPECIAL_CONST, 276 277 VLK_UNDEF 278 }; 279 280 281 282 class sb_value_pool : protected sb_pool { 283 unsigned aligned_elt_size; 284 285 public: 286 sb_value_pool(unsigned elt_size, unsigned block_elts = 256) 287 : sb_pool(block_elts * (aligned_elt_size = ((elt_size + 288 SB_POOL_ALIGN - 1) & ~(SB_POOL_ALIGN - 1)))) {} 289 ~sb_value_pool()290 virtual ~sb_value_pool() { delete_all(); } 291 292 value* create(value_kind k, sel_chan regid, unsigned ver); 293 294 value* operator[](unsigned id) { 295 unsigned offset = id * aligned_elt_size; 296 unsigned block_id; 297 if (offset < block_size) { 298 block_id = 0; 299 } else { 300 block_id = offset / block_size; 301 offset = offset % block_size; 302 } 303 return (value*)((char*)blocks[block_id] + offset); 304 } 305 size()306 unsigned size() { return total_size / aligned_elt_size; } 307 308 protected: 309 void delete_all(); 310 }; 311 312 313 314 315 316 class sb_value_set { 317 318 sb_bitset bs; 319 320 public: sb_value_set()321 sb_value_set() : bs() {} 322 323 class iterator { 324 sb_value_pool &vp; 325 sb_value_set *s; 326 unsigned nb; 327 public: 328 iterator(shader &sh, sb_value_set *s, unsigned nb = 0); 329 330 331 iterator& operator++() { 332 if (nb + 1 < s->bs.size()) 333 nb = s->bs.find_bit(nb + 1); 334 else 335 nb = s->bs.size(); 336 return *this; 337 } 338 bool operator !=(const iterator &i) { 339 return s != i.s || nb != i.nb; 340 } 341 bool operator ==(const iterator &i) { return !(*this != i); } 342 value* operator *() { 343 return vp[nb]; 344 } 345 346 347 }; 348 begin(shader & sh)349 iterator begin(shader &sh) { 350 return iterator(sh, this, bs.size() ? bs.find_bit(0) : 0); 351 } end(shader & sh)352 iterator end(shader &sh) { return iterator(sh, this, bs.size()); } 353 354 bool add_set_checked(sb_value_set & s2); 355 add_set(sb_value_set & s2)356 void add_set(sb_value_set & s2) { 357 if (bs.size() < s2.bs.size()) 358 bs.resize(s2.bs.size()); 359 bs |= s2.bs; 360 } 361 362 void remove_set(sb_value_set & s2); 363 364 bool add_vec(vvec &vv); 365 366 bool add_val(value *v); 367 bool contains(value *v); 368 369 bool remove_val(value *v); 370 371 bool remove_vec(vvec &vv); 372 373 void clear(); 374 375 bool empty(); 376 }; 377 378 typedef sb_value_set val_set; 379 380 struct gpr_array { 381 sel_chan base_gpr; // original gpr 382 sel_chan gpr; // assigned by regalloc 383 unsigned array_size; 384 gpr_arraygpr_array385 gpr_array(sel_chan base_gpr, unsigned array_size) : base_gpr(base_gpr), 386 array_size(array_size) {} 387 hashgpr_array388 unsigned hash() { return (base_gpr << 10) * array_size; } 389 390 val_set interferences; 391 vvec refs; 392 393 bool is_dead(); 394 395 }; 396 397 typedef std::vector<gpr_array*> regarray_vec; 398 399 enum value_flags { 400 VLF_UNDEF = (1 << 0), 401 VLF_READONLY = (1 << 1), 402 VLF_DEAD = (1 << 2), 403 404 VLF_PIN_REG = (1 << 3), 405 VLF_PIN_CHAN = (1 << 4), 406 407 // opposite to alu clause local value - goes through alu clause boundary 408 // (can't use temp gpr, can't recolor in the alu scheduler, etc) 409 VLF_GLOBAL = (1 << 5), 410 VLF_FIXED = (1 << 6), 411 VLF_PVPS = (1 << 7), 412 413 VLF_PREALLOC = (1 << 8) 414 }; 415 416 inline value_flags operator |(value_flags l, value_flags r) { 417 return (value_flags)((unsigned)l|(unsigned)r); 418 } 419 inline value_flags operator &(value_flags l, value_flags r) { 420 return (value_flags)((unsigned)l&(unsigned)r); 421 } 422 inline value_flags operator ~(value_flags l) { 423 return (value_flags)(~(unsigned)l); 424 } 425 inline value_flags& operator |=(value_flags &l, value_flags r) { 426 l = l | r; 427 return l; 428 } 429 inline value_flags& operator &=(value_flags &l, value_flags r) { 430 l = l & r; 431 return l; 432 } 433 434 sb_ostream& operator << (sb_ostream &o, value &v); 435 436 typedef uint32_t value_hash; 437 438 enum use_kind { 439 UK_SRC, 440 UK_SRC_REL, 441 UK_DST_REL, 442 UK_MAYDEF, 443 UK_MAYUSE, 444 UK_PRED, 445 UK_COND 446 }; 447 448 struct use_info { 449 node *op; 450 use_kind kind; 451 int arg; 452 use_infouse_info453 use_info(node *n, use_kind kind, int arg) 454 : op(n), kind(kind), arg(arg) {} 455 }; 456 457 typedef std::list< use_info * > uselist; 458 459 enum constraint_kind { 460 CK_SAME_REG, 461 CK_PACKED_BS, 462 CK_PHI 463 }; 464 465 class shader; 466 class sb_value_pool; 467 struct ra_chunk; 468 class ra_constraint; 469 470 class value { 471 protected: 472 value(unsigned sh_id, value_kind k, sel_chan select, unsigned ver = 0) kind(k)473 : kind(k), flags(), 474 rel(), array(), 475 version(ver), select(select), pin_gpr(select), gpr(), 476 gvn_source(), ghash(), 477 def(), adef(), uses(), constraint(), chunk(), 478 literal_value(), uid(sh_id) {} 479 ~value()480 ~value() { delete_uses(); } 481 482 friend class sb_value_pool; 483 public: 484 value_kind kind; 485 value_flags flags; 486 487 vvec mdef; 488 vvec muse; 489 value *rel; 490 gpr_array *array; 491 492 unsigned version; 493 494 sel_chan select; 495 sel_chan pin_gpr; 496 sel_chan gpr; 497 498 value *gvn_source; 499 value_hash ghash; 500 501 node *def, *adef; 502 uselist uses; 503 504 ra_constraint *constraint; 505 ra_chunk *chunk; 506 507 literal literal_value; 508 is_const()509 bool is_const() { return kind == VLK_CONST || kind == VLK_UNDEF; } 510 is_AR()511 bool is_AR() { 512 return is_special_reg() && select == sel_chan(SV_AR_INDEX, 0); 513 } is_geometry_emit()514 bool is_geometry_emit() { 515 return is_special_reg() && select == sel_chan(SV_GEOMETRY_EMIT, 0); 516 } 517 any_def()518 node* any_def() { 519 assert(!(def && adef)); 520 return def ? def : adef; 521 } 522 gvalue()523 value* gvalue() { 524 value *v = this; 525 while (v->gvn_source && v != v->gvn_source) 526 // FIXME we really shouldn't have such chains 527 v = v->gvn_source; 528 return v; 529 } 530 is_float_0_or_1()531 bool is_float_0_or_1() { 532 value *v = gvalue(); 533 return v->is_const() && (v->literal_value == literal(0) 534 || v->literal_value == literal(1.0f)); 535 } 536 is_undef()537 bool is_undef() { return gvalue()->kind == VLK_UNDEF; } 538 is_any_gpr()539 bool is_any_gpr() { 540 return (kind == VLK_REG || kind == VLK_TEMP); 541 } 542 is_agpr()543 bool is_agpr() { 544 return array && is_any_gpr(); 545 } 546 547 // scalar gpr, as opposed to element of gpr array is_sgpr()548 bool is_sgpr() { 549 return !array && is_any_gpr(); 550 } 551 is_special_reg()552 bool is_special_reg() { return kind == VLK_SPECIAL_REG; } is_any_reg()553 bool is_any_reg() { return is_any_gpr() || is_special_reg(); } is_kcache()554 bool is_kcache() { return kind == VLK_KCACHE; } is_rel()555 bool is_rel() { return kind == VLK_REL_REG; } is_readonly()556 bool is_readonly() { return flags & VLF_READONLY; } 557 is_chan_pinned()558 bool is_chan_pinned() { return flags & VLF_PIN_CHAN; } is_reg_pinned()559 bool is_reg_pinned() { return flags & VLF_PIN_REG; } 560 561 bool is_global(); 562 void set_global(); 563 void set_prealloc(); 564 565 bool is_prealloc(); 566 567 bool is_fixed(); 568 void fix(); 569 is_dead()570 bool is_dead() { return flags & VLF_DEAD; } 571 get_const_value()572 literal & get_const_value() { 573 value *v = gvalue(); 574 assert(v->is_const()); 575 return v->literal_value; 576 } 577 578 // true if needs to be encoded as literal in alu is_literal()579 bool is_literal() { 580 return is_const() 581 && literal_value != literal(0) 582 && literal_value != literal(1) 583 && literal_value != literal(-1) 584 && literal_value != literal(0.5) 585 && literal_value != literal(1.0); 586 } 587 588 void add_use(node *n, use_kind kind, int arg); 589 void remove_use(const node *n); 590 591 value_hash hash(); 592 value_hash rel_hash(); 593 assign_source(value * v)594 void assign_source(value *v) { 595 assert(!gvn_source || gvn_source == this); 596 gvn_source = v->gvalue(); 597 } 598 v_equal(value * v)599 bool v_equal(value *v) { return gvalue() == v->gvalue(); } 600 601 unsigned use_count(); 602 void delete_uses(); 603 get_final_gpr()604 sel_chan get_final_gpr() { 605 if (array && array->gpr) { 606 int reg_offset = select.sel() - array->base_gpr.sel(); 607 if (rel && rel->is_const()) 608 reg_offset += rel->get_const_value().i; 609 return array->gpr + (reg_offset << 2); 610 } else { 611 return gpr; 612 } 613 } 614 get_final_chan()615 unsigned get_final_chan() { 616 if (array) { 617 assert(array->gpr); 618 return array->gpr.chan(); 619 } else { 620 assert(gpr); 621 return gpr.chan(); 622 } 623 } 624 625 val_set interferences; 626 unsigned uid; 627 }; 628 629 class expr_handler; 630 631 class value_table { 632 typedef std::vector<value*> vt_item; 633 typedef std::vector<vt_item> vt_table; 634 635 expr_handler &ex; 636 637 unsigned size_bits; 638 unsigned size; 639 unsigned size_mask; 640 641 vt_table hashtable; 642 643 unsigned cnt; 644 645 public: 646 647 value_table(expr_handler &ex, unsigned size_bits = 10) ex(ex)648 : ex(ex), size_bits(size_bits), size(1u << size_bits), 649 size_mask(size - 1), hashtable(size), cnt() {} 650 ~value_table()651 ~value_table() {} 652 653 void add_value(value* v); 654 655 bool expr_equal(value* l, value* r); 656 count()657 unsigned count() { return cnt; } 658 659 void get_values(vvec & v); 660 }; 661 662 class sb_context; 663 664 enum node_type { 665 NT_UNKNOWN, 666 NT_LIST, 667 NT_OP, 668 NT_REGION, 669 NT_REPEAT, 670 NT_DEPART, 671 NT_IF, 672 }; 673 674 enum node_subtype { 675 NST_UNKNOWN, 676 NST_LIST, 677 NST_ALU_GROUP, 678 NST_ALU_CLAUSE, 679 NST_ALU_INST, 680 NST_ALU_PACKED_INST, 681 NST_CF_INST, 682 NST_FETCH_INST, 683 NST_TEX_CLAUSE, 684 NST_VTX_CLAUSE, 685 686 NST_BB, 687 688 NST_PHI, 689 NST_PSI, 690 NST_COPY, 691 692 NST_LOOP_PHI_CONTAINER, 693 NST_LOOP_CONTINUE, 694 NST_LOOP_BREAK 695 }; 696 697 enum node_flags { 698 NF_EMPTY = 0, 699 NF_DEAD = (1 << 0), 700 NF_REG_CONSTRAINT = (1 << 1), 701 NF_CHAN_CONSTRAINT = (1 << 2), 702 NF_ALU_4SLOT = (1 << 3), 703 NF_CONTAINER = (1 << 4), 704 705 NF_COPY_MOV = (1 << 5), 706 707 NF_DONT_KILL = (1 << 6), 708 NF_DONT_HOIST = (1 << 7), 709 NF_DONT_MOVE = (1 << 8), 710 711 // for KILLxx - we want to schedule them as early as possible 712 NF_SCHEDULE_EARLY = (1 << 9), 713 714 // for ALU_PUSH_BEFORE - when set, replace with PUSH + ALU 715 NF_ALU_STACK_WORKAROUND = (1 << 10) 716 }; 717 718 inline node_flags operator |(node_flags l, node_flags r) { 719 return (node_flags)((unsigned)l|(unsigned)r); 720 } 721 inline node_flags& operator |=(node_flags &l, node_flags r) { 722 l = l | r; 723 return l; 724 } 725 726 inline node_flags& operator &=(node_flags &l, node_flags r) { 727 l = (node_flags)((unsigned)l & (unsigned)r); 728 return l; 729 } 730 731 inline node_flags operator ~(node_flags r) { 732 return (node_flags)~(unsigned)r; 733 } 734 735 struct node_stats { 736 unsigned alu_count; 737 unsigned alu_kill_count; 738 unsigned alu_copy_mov_count; 739 unsigned cf_count; 740 unsigned fetch_count; 741 unsigned region_count; 742 unsigned loop_count; 743 unsigned phi_count; 744 unsigned loop_phi_count; 745 unsigned depart_count; 746 unsigned repeat_count; 747 unsigned if_count; 748 node_statsnode_stats749 node_stats() : alu_count(), alu_kill_count(), alu_copy_mov_count(), 750 cf_count(), fetch_count(), region_count(), 751 loop_count(), phi_count(), loop_phi_count(), depart_count(), 752 repeat_count(), if_count() {} 753 754 void dump(); 755 }; 756 757 class shader; 758 759 class vpass; 760 761 class container_node; 762 class region_node; 763 764 class node { 765 766 protected: 767 node(node_type nt, node_subtype nst, node_flags flags = NF_EMPTY) prev()768 : prev(), next(), parent(), 769 type(nt), subtype(nst), flags(flags), 770 pred(), dst(), src() {} 771 ~node()772 virtual ~node() {}; 773 774 public: 775 node *prev, *next; 776 container_node *parent; 777 778 node_type type; 779 node_subtype subtype; 780 node_flags flags; 781 782 value *pred; 783 784 vvec dst; 785 vvec src; 786 is_valid()787 virtual bool is_valid() { return true; } 788 virtual bool accept(vpass &p, bool enter); 789 790 void insert_before(node *n); 791 void insert_after(node *n); 792 void replace_with(node *n); 793 void remove(); 794 795 virtual value_hash hash() const; 796 value_hash hash_src() const; 797 798 virtual bool fold_dispatch(expr_handler *ex); 799 is_container()800 bool is_container() { return flags & NF_CONTAINER; } 801 is_alu_packed()802 bool is_alu_packed() { return subtype == NST_ALU_PACKED_INST; } is_alu_inst()803 bool is_alu_inst() { return subtype == NST_ALU_INST; } is_alu_group()804 bool is_alu_group() { return subtype == NST_ALU_GROUP; } is_alu_clause()805 bool is_alu_clause() { return subtype == NST_ALU_CLAUSE; } 806 is_fetch_clause()807 bool is_fetch_clause() { 808 return subtype == NST_TEX_CLAUSE || subtype == NST_VTX_CLAUSE; 809 } 810 is_copy()811 bool is_copy() { return subtype == NST_COPY; } is_copy_mov()812 bool is_copy_mov() { return flags & NF_COPY_MOV; } is_any_alu()813 bool is_any_alu() { return is_alu_inst() || is_alu_packed() || is_copy(); } 814 is_fetch_inst()815 bool is_fetch_inst() { return subtype == NST_FETCH_INST; } is_cf_inst()816 bool is_cf_inst() { return subtype == NST_CF_INST; } 817 is_region()818 bool is_region() { return type == NT_REGION; } is_depart()819 bool is_depart() { return type == NT_DEPART; } is_repeat()820 bool is_repeat() { return type == NT_REPEAT; } is_if()821 bool is_if() { return type == NT_IF; } is_bb()822 bool is_bb() { return subtype == NST_BB; } 823 is_phi()824 bool is_phi() { return subtype == NST_PHI; } 825 is_dead()826 bool is_dead() { return flags & NF_DEAD; } 827 828 bool is_cf_op(unsigned op); 829 bool is_alu_op(unsigned op); 830 bool is_fetch_op(unsigned op); 831 832 unsigned cf_op_flags(); 833 unsigned alu_op_flags(); 834 unsigned alu_op_slot_flags(); 835 unsigned fetch_op_flags(); 836 837 bool is_mova(); 838 bool is_pred_set(); 839 vec_uses_ar(vvec & vv)840 bool vec_uses_ar(vvec &vv) { 841 for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) { 842 value *v = *I; 843 if (v && v->rel && !v->rel->is_const()) 844 return true; 845 } 846 return false; 847 } 848 uses_ar()849 bool uses_ar() { 850 return vec_uses_ar(dst) || vec_uses_ar(src); 851 } 852 853 854 region_node* get_parent_region(); 855 856 friend class shader; 857 }; 858 859 class container_node : public node { 860 public: 861 862 container_node(node_type nt = NT_LIST, node_subtype nst = NST_LIST, 863 node_flags flags = NF_EMPTY) 864 : node(nt, nst, flags | NF_CONTAINER), first(), last(), 865 live_after(), live_before() {} 866 867 // child items list 868 node *first, *last; 869 870 val_set live_after; 871 val_set live_before; 872 873 class iterator { 874 node *p; 875 public: p(pp)876 iterator(node *pp = NULL) : p(pp) {} 877 iterator & operator ++() { p = p->next; return *this;} 878 iterator & operator --() { p = p->prev; return *this;} 879 node* operator *() { return p; } 880 node* operator ->() { return p; } advance(int n)881 const iterator advance(int n) { 882 if (!n) return *this; 883 iterator I(p); 884 if (n > 0) while (n--) ++I; 885 else while (n++) --I; 886 return I; 887 } 888 const iterator operator +(int n) { return advance(n); } 889 const iterator operator -(int n) { return advance(-n); } 890 bool operator !=(const iterator &i) { return p != i.p; } 891 bool operator ==(const iterator &i) { return p == i.p; } 892 }; 893 894 class riterator { 895 iterator i; 896 public: i(p)897 riterator(node *p = NULL) : i(p) {} 898 riterator & operator ++() { --i; return *this;} 899 riterator & operator --() { ++i; return *this;} 900 node* operator *() { return *i; } 901 node* operator ->() { return *i; } 902 bool operator !=(const riterator &r) { return i != r.i; } 903 bool operator ==(const riterator &r) { return i == r.i; } 904 }; 905 begin()906 iterator begin() { return first; } end()907 iterator end() { return NULL; } rbegin()908 riterator rbegin() { return last; } rend()909 riterator rend() { return NULL; } 910 empty()911 bool empty() { assert(first != NULL || first == last); return !first; } 912 unsigned count(); 913 914 // used with node containers that represent shceduling queues 915 // ignores copies and takes into account alu_packed_node items 916 unsigned real_alu_count(); 917 918 void push_back(node *n); 919 void push_front(node *n); 920 921 void insert_node_before(node *s, node *n); 922 void insert_node_after(node *s, node *n); 923 924 void append_from(container_node *c); 925 926 // remove range [b..e) from some container and assign to this container 927 void move(iterator b, iterator e); 928 929 void expand(); 930 void expand(container_node *n); 931 void remove_node(node *n); 932 933 node *cut(iterator b, iterator e); 934 clear()935 void clear() { first = last = NULL; } 936 is_valid()937 virtual bool is_valid() { return true; } 938 virtual bool accept(vpass &p, bool enter); 939 virtual bool fold_dispatch(expr_handler *ex); 940 front()941 node* front() { return first; } back()942 node* back() { return last; } 943 944 void collect_stats(node_stats &s); 945 946 friend class shader; 947 948 949 }; 950 951 typedef container_node::iterator node_iterator; 952 typedef container_node::riterator node_riterator; 953 954 class alu_group_node : public container_node { 955 protected: alu_group_node()956 alu_group_node() : container_node(NT_LIST, NST_ALU_GROUP), literals() {} 957 public: 958 959 std::vector<literal> literals; 960 is_valid()961 virtual bool is_valid() { return subtype == NST_ALU_GROUP; } 962 virtual bool accept(vpass &p, bool enter); 963 964 literal_chan(literal l)965 unsigned literal_chan(literal l) { 966 std::vector<literal>::iterator F = 967 std::find(literals.begin(), literals.end(), l); 968 assert(F != literals.end()); 969 return F - literals.begin(); 970 } 971 972 friend class shader; 973 }; 974 975 class cf_node : public container_node { 976 protected: cf_node()977 cf_node() : container_node(NT_OP, NST_CF_INST), jump_target(), 978 jump_after_target() { memset(&bc, 0, sizeof(bc_cf)); }; 979 public: 980 bc_cf bc; 981 982 cf_node *jump_target; 983 bool jump_after_target; 984 is_valid()985 virtual bool is_valid() { return subtype == NST_CF_INST; } 986 virtual bool accept(vpass &p, bool enter); 987 virtual bool fold_dispatch(expr_handler *ex); 988 jump(cf_node * c)989 void jump(cf_node *c) { jump_target = c; jump_after_target = false; } jump_after(cf_node * c)990 void jump_after(cf_node *c) { jump_target = c; jump_after_target = true; } 991 992 friend class shader; 993 }; 994 995 class alu_node : public node { 996 protected: alu_node()997 alu_node() : node(NT_OP, NST_ALU_INST) { memset(&bc, 0, sizeof(bc_alu)); }; 998 public: 999 bc_alu bc; 1000 is_valid()1001 virtual bool is_valid() { return subtype == NST_ALU_INST; } 1002 virtual bool accept(vpass &p, bool enter); 1003 virtual bool fold_dispatch(expr_handler *ex); 1004 forced_bank_swizzle()1005 unsigned forced_bank_swizzle() { 1006 return ((bc.op_ptr->flags & AF_INTERP) && (bc.slot_flags == AF_4V)) ? 1007 VEC_210 : 0; 1008 } 1009 1010 // return param index + 1 if instruction references interpolation param, 1011 // otherwise 0 1012 unsigned interp_param(); 1013 1014 alu_group_node *get_alu_group_node(); 1015 1016 friend class shader; 1017 }; 1018 1019 // for multi-slot instrs - DOT/INTERP/... (maybe useful for 64bit pairs later) 1020 class alu_packed_node : public container_node { 1021 protected: alu_packed_node()1022 alu_packed_node() : container_node(NT_OP, NST_ALU_PACKED_INST) {} 1023 public: 1024 op_ptr()1025 const alu_op_info* op_ptr() { 1026 return static_cast<alu_node*>(first)->bc.op_ptr; 1027 } op()1028 unsigned op() { return static_cast<alu_node*>(first)->bc.op; } 1029 void init_args(bool repl); 1030 is_valid()1031 virtual bool is_valid() { return subtype == NST_ALU_PACKED_INST; } 1032 virtual bool accept(vpass &p, bool enter); 1033 virtual bool fold_dispatch(expr_handler *ex); 1034 1035 unsigned get_slot_mask(); 1036 void update_packed_items(sb_context &ctx); 1037 1038 friend class shader; 1039 }; 1040 1041 class fetch_node : public node { 1042 protected: fetch_node()1043 fetch_node() : node(NT_OP, NST_FETCH_INST) { memset(&bc, 0, sizeof(bc_fetch)); }; 1044 public: 1045 bc_fetch bc; 1046 is_valid()1047 virtual bool is_valid() { return subtype == NST_FETCH_INST; } 1048 virtual bool accept(vpass &p, bool enter); 1049 virtual bool fold_dispatch(expr_handler *ex); 1050 uses_grad()1051 bool uses_grad() { return bc.op_ptr->flags & FF_USEGRAD; } 1052 1053 friend class shader; 1054 }; 1055 1056 class region_node; 1057 1058 class repeat_node : public container_node { 1059 protected: repeat_node(region_node * target,unsigned id)1060 repeat_node(region_node *target, unsigned id) 1061 : container_node(NT_REPEAT, NST_LIST), target(target), rep_id(id) {} 1062 public: 1063 region_node *target; 1064 unsigned rep_id; 1065 1066 virtual bool accept(vpass &p, bool enter); 1067 1068 friend class shader; 1069 }; 1070 1071 class depart_node : public container_node { 1072 protected: depart_node(region_node * target,unsigned id)1073 depart_node(region_node *target, unsigned id) 1074 : container_node(NT_DEPART, NST_LIST), target(target), dep_id(id) {} 1075 public: 1076 region_node *target; 1077 unsigned dep_id; 1078 1079 virtual bool accept(vpass &p, bool enter); 1080 1081 friend class shader; 1082 }; 1083 1084 class if_node : public container_node { 1085 protected: if_node()1086 if_node() : container_node(NT_IF, NST_LIST), cond() {}; 1087 public: 1088 value *cond; // glued to pseudo output (dst[2]) of the PRED_SETxxx 1089 1090 virtual bool accept(vpass &p, bool enter); 1091 1092 friend class shader; 1093 }; 1094 1095 typedef std::vector<depart_node*> depart_vec; 1096 typedef std::vector<repeat_node*> repeat_vec; 1097 1098 class region_node : public container_node { 1099 protected: region_node(unsigned id)1100 region_node(unsigned id) : container_node(NT_REGION, NST_LIST), region_id(id), 1101 loop_phi(), phi(), vars_defined(), departs(), repeats(), src_loop() 1102 {} 1103 public: 1104 unsigned region_id; 1105 1106 container_node *loop_phi; 1107 container_node *phi; 1108 1109 val_set vars_defined; 1110 1111 depart_vec departs; 1112 repeat_vec repeats; 1113 1114 // true if region was created for loop in the parser, sometimes repeat_node 1115 // may be optimized away so we need to remember this information 1116 bool src_loop; 1117 1118 virtual bool accept(vpass &p, bool enter); 1119 dep_count()1120 unsigned dep_count() { return departs.size(); } rep_count()1121 unsigned rep_count() { return repeats.size() + 1; } 1122 is_loop()1123 bool is_loop() { return src_loop || !repeats.empty(); } 1124 get_entry_code_location()1125 container_node* get_entry_code_location() { 1126 node *p = first; 1127 while (p && (p->is_depart() || p->is_repeat())) 1128 p = static_cast<container_node*>(p)->first; 1129 1130 container_node *c = static_cast<container_node*>(p); 1131 if (c->is_bb()) 1132 return c; 1133 else 1134 return c->parent; 1135 } 1136 1137 void expand_depart(depart_node *d); 1138 void expand_repeat(repeat_node *r); 1139 1140 friend class shader; 1141 }; 1142 1143 class bb_node : public container_node { 1144 protected: bb_node(unsigned id,unsigned loop_level)1145 bb_node(unsigned id, unsigned loop_level) 1146 : container_node(NT_LIST, NST_BB), id(id), loop_level(loop_level) {} 1147 public: 1148 unsigned id; 1149 unsigned loop_level; 1150 1151 virtual bool accept(vpass &p, bool enter); 1152 1153 friend class shader; 1154 }; 1155 1156 1157 typedef std::vector<region_node*> regions_vec; 1158 typedef std::vector<bb_node*> bbs_vec; 1159 typedef std::list<node*> sched_queue; 1160 typedef sched_queue::iterator sq_iterator; 1161 typedef std::vector<node*> node_vec; 1162 typedef std::list<node*> node_list; 1163 typedef std::set<node*> node_set; 1164 1165 1166 1167 } // namespace r600_sb 1168 1169 #endif /* R600_SB_IR_H_ */ 1170