1 /* 2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 * 23 * Authors: 24 * Vadim Girlin 25 */ 26 27 #ifndef SB_PASS_H_ 28 #define SB_PASS_H_ 29 30 #include <stack> 31 32 namespace r600_sb { 33 34 class pass { 35 protected: 36 sb_context &ctx; 37 shader &sh; 38 39 public: 40 pass(shader &s); 41 42 virtual int run(); 43 ~pass()44 virtual ~pass() {} 45 }; 46 47 class vpass : public pass { 48 49 public: 50 vpass(shader & s)51 vpass(shader &s) : pass(s) {} 52 53 virtual int init(); 54 virtual int done(); 55 56 virtual int run(); 57 virtual void run_on(container_node &n); 58 59 virtual bool visit(node &n, bool enter); 60 virtual bool visit(container_node &n, bool enter); 61 virtual bool visit(alu_group_node &n, bool enter); 62 virtual bool visit(cf_node &n, bool enter); 63 virtual bool visit(alu_node &n, bool enter); 64 virtual bool visit(alu_packed_node &n, bool enter); 65 virtual bool visit(fetch_node &n, bool enter); 66 virtual bool visit(region_node &n, bool enter); 67 virtual bool visit(repeat_node &n, bool enter); 68 virtual bool visit(depart_node &n, bool enter); 69 virtual bool visit(if_node &n, bool enter); 70 virtual bool visit(bb_node &n, bool enter); 71 72 }; 73 74 class rev_vpass : public vpass { 75 76 public: rev_vpass(shader & s)77 rev_vpass(shader &s) : vpass(s) {} 78 79 virtual void run_on(container_node &n); 80 }; 81 82 83 // =================== PASSES 84 85 class bytecode; 86 87 class bc_dump : public vpass { 88 using vpass::visit; 89 90 uint32_t *bc_data; 91 unsigned ndw; 92 93 unsigned id; 94 95 unsigned new_group, group_index; 96 97 public: 98 99 bc_dump(shader &s, bytecode *bc = NULL); 100 bc_dump(shader & s,uint32_t * bc_ptr,unsigned ndw)101 bc_dump(shader &s, uint32_t *bc_ptr, unsigned ndw) : 102 vpass(s), bc_data(bc_ptr), ndw(ndw), id(), new_group(), group_index() {} 103 104 virtual int init(); 105 virtual int done(); 106 107 virtual bool visit(cf_node &n, bool enter); 108 virtual bool visit(alu_node &n, bool enter); 109 virtual bool visit(fetch_node &n, bool enter); 110 111 void dump_dw(unsigned dw_id, unsigned count = 2); 112 113 void dump(cf_node& n); 114 void dump(alu_node& n); 115 void dump(fetch_node& n); 116 }; 117 118 119 class dce_cleanup : public vpass { 120 using vpass::visit; 121 122 bool remove_unused; 123 124 public: 125 dce_cleanup(shader & s)126 dce_cleanup(shader &s) : vpass(s), 127 remove_unused(s.dce_flags & DF_REMOVE_UNUSED), nodes_changed(false) {} 128 129 virtual int run(); 130 131 virtual bool visit(node &n, bool enter); 132 virtual bool visit(alu_group_node &n, bool enter); 133 virtual bool visit(cf_node &n, bool enter); 134 virtual bool visit(alu_node &n, bool enter); 135 virtual bool visit(alu_packed_node &n, bool enter); 136 virtual bool visit(fetch_node &n, bool enter); 137 virtual bool visit(region_node &n, bool enter); 138 virtual bool visit(container_node &n, bool enter); 139 140 private: 141 142 void cleanup_dst(node &n); 143 bool cleanup_dst_vec(vvec &vv); 144 145 // Did we alter/remove nodes during a single pass? 146 bool nodes_changed; 147 }; 148 149 150 class def_use : public pass { 151 152 public: 153 def_use(shader & sh)154 def_use(shader &sh) : pass(sh) {} 155 156 virtual int run(); 157 void run_on(node *n, bool defs); 158 159 private: 160 161 void process_uses(node *n); 162 void process_defs(node *n, vvec &vv, bool arr_def); 163 void process_phi(container_node *c, bool defs, bool uses); 164 }; 165 166 167 168 class dump : public vpass { 169 using vpass::visit; 170 171 int level; 172 173 public: 174 dump(shader & s)175 dump(shader &s) : vpass(s), level(0) {} 176 177 virtual bool visit(node &n, bool enter); 178 virtual bool visit(container_node &n, bool enter); 179 virtual bool visit(alu_group_node &n, bool enter); 180 virtual bool visit(cf_node &n, bool enter); 181 virtual bool visit(alu_node &n, bool enter); 182 virtual bool visit(alu_packed_node &n, bool enter); 183 virtual bool visit(fetch_node &n, bool enter); 184 virtual bool visit(region_node &n, bool enter); 185 virtual bool visit(repeat_node &n, bool enter); 186 virtual bool visit(depart_node &n, bool enter); 187 virtual bool visit(if_node &n, bool enter); 188 virtual bool visit(bb_node &n, bool enter); 189 190 191 static void dump_op(node &n, const char *name); 192 static void dump_vec(const vvec & vv); 193 static void dump_set(shader &sh, val_set & v); 194 195 static void dump_rels(vvec & vv); 196 197 static void dump_val(value *v); 198 static void dump_op(node *n); 199 200 static void dump_op_list(container_node *c); 201 static void dump_queue(sched_queue &q); 202 203 static void dump_alu(alu_node *n); 204 205 private: 206 207 void indent(); 208 209 void dump_common(node &n); 210 void dump_flags(node &n); 211 212 void dump_live_values(container_node &n, bool before); 213 }; 214 215 216 // Global Code Motion 217 218 class gcm : public pass { 219 220 sched_queue bu_ready[SQ_NUM]; 221 sched_queue bu_ready_next[SQ_NUM]; 222 sched_queue bu_ready_early[SQ_NUM]; 223 sched_queue ready; 224 sched_queue ready_above; 225 226 unsigned outstanding_lds_oq; 227 container_node pending; 228 229 struct op_info { 230 bb_node* top_bb; 231 bb_node* bottom_bb; op_infoop_info232 op_info() : top_bb(), bottom_bb() {} 233 }; 234 235 typedef std::map<node*, op_info> op_info_map; 236 237 typedef std::map<node*, unsigned> nuc_map; 238 239 op_info_map op_map; 240 nuc_map uses; 241 242 typedef std::vector<nuc_map> nuc_stack; 243 244 nuc_stack nuc_stk; 245 unsigned ucs_level; 246 247 bb_node * bu_bb; 248 249 vvec pending_defs; 250 251 node_list pending_nodes; 252 253 unsigned cur_sq; 254 255 // for register pressure tracking in bottom-up pass 256 val_set live; 257 int live_count; 258 259 static const int rp_threshold = 100; 260 261 bool pending_exec_mask_update; 262 263 public: 264 gcm(shader & sh)265 gcm(shader &sh) : pass(sh), 266 bu_ready(), bu_ready_next(), bu_ready_early(), 267 ready(), outstanding_lds_oq(), 268 op_map(), uses(), nuc_stk(1), ucs_level(), 269 bu_bb(), pending_defs(), pending_nodes(), cur_sq(), 270 live(), live_count(), pending_exec_mask_update() {} 271 272 virtual int run(); 273 274 private: 275 276 void collect_instructions(container_node *c, bool early_pass); 277 278 void sched_early(container_node *n); 279 void td_sched_bb(bb_node *bb); 280 bool td_is_ready(node *n); 281 void td_release_uses(vvec &v); 282 void td_release_val(value *v); 283 void td_schedule(bb_node *bb, node *n); 284 285 void sched_late(container_node *n); 286 void bu_sched_bb(bb_node *bb); 287 void bu_release_defs(vvec &v, bool src); 288 void bu_release_phi_defs(container_node *p, unsigned op); 289 bool bu_is_ready(node *n); 290 void bu_release_val(value *v); 291 void bu_release_op(node * n); 292 void bu_find_best_bb(node *n, op_info &oi); 293 void bu_schedule(container_node *bb, node *n); 294 295 void push_uc_stack(); 296 void pop_uc_stack(); 297 298 void init_def_count(nuc_map &m, container_node &s); 299 void init_use_count(nuc_map &m, container_node &s); 300 unsigned get_uc_vec(vvec &vv); 301 unsigned get_dc_vec(vvec &vv, bool src); 302 303 void add_ready(node *n); 304 305 void dump_uc_stack(); 306 307 unsigned real_alu_count(sched_queue &q, unsigned max); 308 309 // check if we have not less than threshold ready alu instructions 310 bool check_alu_ready_count(unsigned threshold); 311 }; 312 313 314 class gvn : public vpass { 315 using vpass::visit; 316 317 public: 318 gvn(shader & sh)319 gvn(shader &sh) : vpass(sh) {} 320 321 virtual bool visit(node &n, bool enter); 322 virtual bool visit(cf_node &n, bool enter); 323 virtual bool visit(alu_node &n, bool enter); 324 virtual bool visit(alu_packed_node &n, bool enter); 325 virtual bool visit(fetch_node &n, bool enter); 326 virtual bool visit(region_node &n, bool enter); 327 328 private: 329 330 void process_op(node &n, bool rewrite = true); 331 332 // returns true if the value was rewritten 333 bool process_src(value* &v, bool rewrite); 334 335 336 void process_alu_src_constants(node &n, value* &v); 337 }; 338 339 340 class if_conversion : public pass { 341 342 public: 343 if_conversion(shader & sh)344 if_conversion(shader &sh) : pass(sh) {} 345 346 virtual int run(); 347 348 bool run_on(region_node *r); 349 350 void convert_kill_instructions(region_node *r, value *em, bool branch, 351 container_node *c); 352 353 bool check_and_convert(region_node *r); 354 355 alu_node* convert_phi(value *select, node *phi); 356 357 }; 358 359 360 class liveness : public rev_vpass { 361 using vpass::visit; 362 363 val_set live; 364 bool live_changed; 365 366 public: 367 liveness(shader & s)368 liveness(shader &s) : rev_vpass(s), live_changed(false) {} 369 370 virtual int init(); 371 372 virtual bool visit(node &n, bool enter); 373 virtual bool visit(bb_node &n, bool enter); 374 virtual bool visit(container_node &n, bool enter); 375 virtual bool visit(alu_group_node &n, bool enter); 376 virtual bool visit(cf_node &n, bool enter); 377 virtual bool visit(alu_node &n, bool enter); 378 virtual bool visit(alu_packed_node &n, bool enter); 379 virtual bool visit(fetch_node &n, bool enter); 380 virtual bool visit(region_node &n, bool enter); 381 virtual bool visit(repeat_node &n, bool enter); 382 virtual bool visit(depart_node &n, bool enter); 383 virtual bool visit(if_node &n, bool enter); 384 385 private: 386 387 void update_interferences(); 388 void process_op(node &n); 389 390 bool remove_val(value *v); 391 bool remove_vec(vvec &v); 392 bool process_outs(node& n); 393 void process_ins(node& n); 394 395 void process_phi_outs(container_node *phi); 396 void process_phi_branch(container_node *phi, unsigned id); 397 398 bool process_maydef(value *v); 399 400 bool add_vec(vvec &vv, bool src); 401 402 void update_src_vec(vvec &vv, bool src); 403 }; 404 405 406 struct bool_op_info { 407 bool invert; 408 unsigned int_cvt; 409 410 alu_node *n; 411 }; 412 413 class peephole : public pass { 414 415 public: 416 peephole(shader & sh)417 peephole(shader &sh) : pass(sh) {} 418 419 virtual int run(); 420 421 void run_on(container_node *c); 422 423 void optimize_cc_op(alu_node *a); 424 425 void optimize_cc_op2(alu_node *a); 426 void optimize_CNDcc_op(alu_node *a); 427 428 bool get_bool_op_info(value *b, bool_op_info& bop); 429 bool get_bool_flt_to_int_source(alu_node* &a); 430 void convert_float_setcc(alu_node *f2i, alu_node *s); 431 }; 432 433 434 class psi_ops : public rev_vpass { 435 using rev_vpass::visit; 436 437 public: 438 psi_ops(shader & s)439 psi_ops(shader &s) : rev_vpass(s) {} 440 441 virtual bool visit(node &n, bool enter); 442 virtual bool visit(alu_node &n, bool enter); 443 444 bool try_inline(node &n); 445 bool try_reduce(node &n); 446 bool eliminate(node &n); 447 448 void unpredicate(node *n); 449 }; 450 451 452 // check correctness of the generated code, e.g.: 453 // - expected source operand value is the last value written to its gpr, 454 // - all arguments of phi node should be allocated to the same gpr, 455 // TODO other tests 456 class ra_checker : public pass { 457 458 typedef std::map<sel_chan, value *> reg_value_map; 459 460 typedef std::vector<reg_value_map> regmap_stack; 461 462 regmap_stack rm_stack; 463 unsigned rm_stk_level; 464 465 value* prev_dst[5]; 466 467 public: 468 ra_checker(shader & sh)469 ra_checker(shader &sh) : pass(sh), rm_stk_level(0), prev_dst() {} 470 471 virtual int run(); 472 473 void run_on(container_node *c); 474 475 void dump_error(const error_info &e); 476 void dump_all_errors(); 477 478 private: 479 rmap()480 reg_value_map& rmap() { return rm_stack[rm_stk_level]; } 481 482 void push_stack(); 483 void pop_stack(); 484 485 // when going out of the alu clause, values in the clause temporary gprs, 486 // AR, predicate values, PS/PV are destroyed 487 void kill_alu_only_regs(); 488 void error(node *n, unsigned id, std::string msg); 489 490 void check_phi_src(container_node *p, unsigned id); 491 void process_phi_dst(container_node *p); 492 void check_alu_group(alu_group_node *g); 493 void process_op_dst(node *n); 494 void check_op_src(node *n); 495 void check_src_vec(node *n, unsigned id, vvec &vv, bool src); 496 void check_value_gpr(node *n, unsigned id, value *v); 497 }; 498 499 // ======================================= 500 501 502 class ra_coalesce : public pass { 503 504 public: 505 ra_coalesce(shader & sh)506 ra_coalesce(shader &sh) : pass(sh) {} 507 508 virtual int run(); 509 }; 510 511 512 513 // ======================================= 514 515 class ra_init : public pass { 516 517 public: 518 ra_init(shader & sh)519 ra_init(shader &sh) : pass(sh), prev_chans() { 520 521 // The parameter below affects register channels distribution. 522 // For cayman (VLIW-4) we're trying to distribute the channels 523 // uniformly, this means significantly better alu slots utilization 524 // at the expense of higher gpr usage. Hopefully this will improve 525 // performance, though it has to be proven with real benchmarks yet. 526 // For VLIW-5 this method could also slightly improve slots 527 // utilization, but increased register pressure seems more significant 528 // and overall performance effect is negative according to some 529 // benchmarks, so it's not used currently. Basically, VLIW-5 doesn't 530 // really need it because trans slot (unrestricted by register write 531 // channel) allows to consume most deviations from uniform channel 532 // distribution. 533 // Value 3 means that for new allocation we'll use channel that differs 534 // from 3 last used channels. 0 for VLIW-5 effectively turns this off. 535 536 ra_tune = sh.get_ctx().is_cayman() ? 3 : 0; 537 } 538 539 virtual int run(); 540 541 private: 542 543 unsigned prev_chans; 544 unsigned ra_tune; 545 546 void add_prev_chan(unsigned chan); 547 unsigned get_preferable_chan_mask(); 548 549 bool ra_node(container_node *c); 550 bool process_op(node *n); 551 552 bool color(value *v); 553 554 void color_bs_constraint(ra_constraint *c); 555 556 void assign_color(value *v, sel_chan c); 557 void alloc_arrays(); 558 }; 559 560 // ======================================= 561 562 class ra_split : public pass { 563 564 public: 565 ra_split(shader & sh)566 ra_split(shader &sh) : pass(sh) {} 567 568 virtual int run(); 569 570 void split(container_node *n); 571 void split_op(node *n); 572 void split_alu_packed(alu_packed_node *n); 573 void split_vector_inst(node *n); 574 575 void split_packed_ins(alu_packed_node *n); 576 577 #if 0 578 void split_pinned_outs(node *n); 579 #endif 580 581 void split_vec(vvec &vv, vvec &v1, vvec &v2, bool allow_swz); 582 583 void split_phi_src(container_node *loc, container_node *c, unsigned id, 584 bool loop); 585 void split_phi_dst(node *loc, container_node *c, bool loop); 586 void init_phi_constraints(container_node *c); 587 }; 588 589 590 591 class ssa_prepare : public vpass { 592 using vpass::visit; 593 594 typedef std::vector<val_set> vd_stk; 595 vd_stk stk; 596 597 unsigned level; 598 599 public: ssa_prepare(shader & s)600 ssa_prepare(shader &s) : vpass(s), level(0) {} 601 602 virtual bool visit(cf_node &n, bool enter); 603 virtual bool visit(alu_node &n, bool enter); 604 virtual bool visit(fetch_node &n, bool enter); 605 virtual bool visit(region_node &n, bool enter); 606 virtual bool visit(repeat_node &n, bool enter); 607 virtual bool visit(depart_node &n, bool enter); 608 609 private: 610 push_stk()611 void push_stk() { 612 ++level; 613 if (level + 1 > stk.size()) 614 stk.resize(level+1); 615 else 616 stk[level].clear(); 617 } pop_stk()618 void pop_stk() { 619 assert(level); 620 --level; 621 stk[level].add_set(stk[level + 1]); 622 } 623 624 void add_defs(node &n); 625 cur_set()626 val_set & cur_set() { return stk[level]; } 627 628 container_node* create_phi_nodes(int count); 629 }; 630 631 class ssa_rename : public vpass { 632 using vpass::visit; 633 634 typedef sb_map<value*, unsigned> def_map; 635 636 def_map def_count; 637 def_map lds_oq_count; 638 def_map lds_rw_count; 639 std::stack<def_map> rename_stack; 640 std::stack<def_map> rename_lds_oq_stack; 641 std::stack<def_map> rename_lds_rw_stack; 642 643 typedef std::map<uint32_t, value*> val_map; 644 val_map values; 645 646 public: 647 ssa_rename(shader & s)648 ssa_rename(shader &s) : vpass(s) {} 649 650 virtual int init(); 651 652 virtual bool visit(container_node &n, bool enter); 653 virtual bool visit(node &n, bool enter); 654 virtual bool visit(alu_group_node &n, bool enter); 655 virtual bool visit(cf_node &n, bool enter); 656 virtual bool visit(alu_node &n, bool enter); 657 virtual bool visit(alu_packed_node &n, bool enter); 658 virtual bool visit(fetch_node &n, bool enter); 659 virtual bool visit(region_node &n, bool enter); 660 virtual bool visit(repeat_node &n, bool enter); 661 virtual bool visit(depart_node &n, bool enter); 662 virtual bool visit(if_node &n, bool enter); 663 664 private: 665 666 void push(node *phi); 667 void pop(); 668 669 unsigned get_index(def_map& m, value* v); 670 void set_index(def_map& m, value* v, unsigned index); 671 unsigned new_index(def_map& m, value* v); 672 673 value* rename_use(node *n, value* v); 674 value* rename_def(node *def, value* v); 675 676 void rename_src_vec(node *n, vvec &vv, bool src); 677 void rename_dst_vec(node *def, vvec &vv, bool set_def); 678 679 void rename_src(node *n); 680 void rename_dst(node *n); 681 682 void rename_phi_args(container_node *phi, unsigned op, bool def); 683 684 void rename_virt(node *n); 685 void rename_virt_val(node *n, value *v); 686 }; 687 688 class bc_finalizer : public pass { 689 690 cf_node *last_export[EXP_TYPE_COUNT]; 691 cf_node *last_cf; 692 693 unsigned ngpr; 694 unsigned nstack; 695 696 public: 697 bc_finalizer(shader & sh)698 bc_finalizer(shader &sh) : pass(sh), last_export(), last_cf(), ngpr(), 699 nstack() {} 700 701 virtual int run(); 702 703 void finalize_loop(region_node *r); 704 void finalize_if(region_node *r); 705 706 void run_on(container_node *c); 707 708 void insert_rv6xx_load_ar_workaround(alu_group_node *b4); 709 void finalize_alu_group(alu_group_node *g, node *prev_node); 710 bool finalize_alu_src(alu_group_node *g, alu_node *a, alu_group_node *prev_node); 711 712 void emit_set_grad(fetch_node* f); 713 void finalize_fetch(fetch_node *f); 714 715 void finalize_cf(cf_node *c); 716 717 sel_chan translate_kcache(cf_node *alu, value *v); 718 719 void update_ngpr(unsigned gpr); 720 void update_nstack(region_node *r, unsigned add = 0); 721 722 unsigned get_stack_depth(node *n, unsigned &loops, unsigned &ifs, 723 unsigned add = 0); 724 725 void cf_peephole(); 726 727 private: 728 void copy_fetch_src(fetch_node &dst, fetch_node &src, unsigned arg_start); 729 void emit_set_texture_offsets(fetch_node &f); 730 }; 731 732 733 } // namespace r600_sb 734 735 #endif /* SB_PASS_H_ */ 736