1 /* 2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 * 23 * Authors: 24 * Vadim Girlin 25 */ 26 27 #ifndef SB_BC_H_ 28 #define SB_BC_H_ 29 30 #include <stdint.h> 31 #include "r600_isa.h" 32 33 #include <cstdio> 34 #include <string> 35 #include <vector> 36 #include <stack> 37 38 struct r600_bytecode; 39 struct r600_shader; 40 41 namespace r600_sb { 42 43 class hw_encoding_format; 44 class node; 45 class alu_node; 46 class cf_node; 47 class fetch_node; 48 class alu_group_node; 49 class region_node; 50 class shader; 51 class value; 52 53 class sb_ostream { 54 public: sb_ostream()55 sb_ostream() {} 56 57 virtual void write(const char *s) = 0; 58 59 sb_ostream& operator <<(const char *s) { 60 write(s); 61 return *this; 62 } 63 64 sb_ostream& operator <<(const std::string& s) { 65 return *this << s.c_str(); 66 } 67 68 sb_ostream& operator <<(void *p) { 69 char b[32]; 70 sprintf(b, "%p", p); 71 return *this << b; 72 } 73 74 sb_ostream& operator <<(char c) { 75 char b[2]; 76 sprintf(b, "%c", c); 77 return *this << b; 78 } 79 80 sb_ostream& operator <<(int n) { 81 char b[32]; 82 sprintf(b, "%d", n); 83 return *this << b; 84 } 85 86 sb_ostream& operator <<(unsigned n) { 87 char b[32]; 88 sprintf(b, "%u", n); 89 return *this << b; 90 } 91 92 sb_ostream& operator <<(double d) { 93 char b[32]; 94 snprintf(b, 32, "%g", d); 95 return *this << b; 96 } 97 98 // print as field of specified width, right aligned print_w(int n,int width)99 void print_w(int n, int width) { 100 char b[256],f[8]; 101 sprintf(f, "%%%dd", width); 102 snprintf(b, 256, f, n); 103 write(b); 104 } 105 106 // print as field of specified width, left aligned print_wl(int n,int width)107 void print_wl(int n, int width) { 108 char b[256],f[8]; 109 sprintf(f, "%%-%dd", width); 110 snprintf(b, 256, f, n); 111 write(b); 112 } 113 114 // print as field of specified width, left aligned print_wl(const std::string & s,int width)115 void print_wl(const std::string &s, int width) { 116 write(s.c_str()); 117 int l = s.length(); 118 while (l++ < width) { 119 write(" "); 120 } 121 } 122 123 // print int as field of specified width, right aligned, zero-padded print_zw(int n,int width)124 void print_zw(int n, int width) { 125 char b[256],f[8]; 126 sprintf(f, "%%0%dd", width); 127 snprintf(b, 256, f, n); 128 write(b); 129 } 130 131 // print int as field of specified width, right aligned, zero-padded, hex print_zw_hex(int n,int width)132 void print_zw_hex(int n, int width) { 133 char b[256],f[8]; 134 sprintf(f, "%%0%dx", width); 135 snprintf(b, 256, f, n); 136 write(b); 137 } 138 }; 139 140 class sb_ostringstream : public sb_ostream { 141 std::string data; 142 public: sb_ostringstream()143 sb_ostringstream() : data() {} 144 write(const char * s)145 virtual void write(const char *s) { 146 data += s; 147 } 148 clear()149 void clear() { data.clear(); } 150 c_str()151 const char* c_str() { return data.c_str(); } str()152 std::string& str() { return data; } 153 }; 154 155 class sb_log : public sb_ostream { 156 FILE *o; 157 public: sb_log()158 sb_log() : o(stderr) {} 159 write(const char * s)160 virtual void write(const char *s) { 161 fputs(s, o); 162 } 163 }; 164 165 extern sb_log sblog; 166 167 enum shader_target 168 { 169 TARGET_UNKNOWN, 170 TARGET_VS, 171 TARGET_ES, 172 TARGET_PS, 173 TARGET_GS, 174 TARGET_GS_COPY, 175 TARGET_COMPUTE, 176 TARGET_FETCH, 177 TARGET_HS, 178 TARGET_LS, 179 180 TARGET_NUM 181 }; 182 183 enum sb_hw_class_bits 184 { 185 HB_R6 = (1<<0), 186 HB_R7 = (1<<1), 187 HB_EG = (1<<2), 188 HB_CM = (1<<3), 189 190 HB_R6R7 = (HB_R6 | HB_R7), 191 HB_EGCM = (HB_EG | HB_CM), 192 HB_R6R7EG = (HB_R6 | HB_R7 | HB_EG), 193 HB_R7EGCM = (HB_R7 | HB_EG | HB_CM), 194 195 HB_ALL = (HB_R6 | HB_R7 | HB_EG | HB_CM) 196 }; 197 198 enum sb_hw_chip 199 { 200 HW_CHIP_UNKNOWN, 201 HW_CHIP_R600, 202 HW_CHIP_RV610, 203 HW_CHIP_RV630, 204 HW_CHIP_RV670, 205 HW_CHIP_RV620, 206 HW_CHIP_RV635, 207 HW_CHIP_RS780, 208 HW_CHIP_RS880, 209 HW_CHIP_RV770, 210 HW_CHIP_RV730, 211 HW_CHIP_RV710, 212 HW_CHIP_RV740, 213 HW_CHIP_CEDAR, 214 HW_CHIP_REDWOOD, 215 HW_CHIP_JUNIPER, 216 HW_CHIP_CYPRESS, 217 HW_CHIP_HEMLOCK, 218 HW_CHIP_PALM, 219 HW_CHIP_SUMO, 220 HW_CHIP_SUMO2, 221 HW_CHIP_BARTS, 222 HW_CHIP_TURKS, 223 HW_CHIP_CAICOS, 224 HW_CHIP_CAYMAN, 225 HW_CHIP_ARUBA 226 }; 227 228 enum sb_hw_class 229 { 230 HW_CLASS_UNKNOWN, 231 HW_CLASS_R600, 232 HW_CLASS_R700, 233 HW_CLASS_EVERGREEN, 234 HW_CLASS_CAYMAN 235 }; 236 237 enum alu_slots { 238 SLOT_X = 0, 239 SLOT_Y = 1, 240 SLOT_Z = 2, 241 SLOT_W = 3, 242 SLOT_TRANS = 4 243 }; 244 245 enum misc_consts { 246 MAX_ALU_LITERALS = 4, 247 MAX_ALU_SLOTS = 128, 248 MAX_GPR = 128, 249 MAX_CHAN = 4 250 251 }; 252 253 enum alu_src_sel { 254 255 ALU_SRC_LDS_OQ_A = 219, 256 ALU_SRC_LDS_OQ_B = 220, 257 ALU_SRC_LDS_OQ_A_POP = 221, 258 ALU_SRC_LDS_OQ_B_POP = 222, 259 ALU_SRC_LDS_DIRECT_A = 223, 260 ALU_SRC_LDS_DIRECT_B = 224, 261 ALU_SRC_TIME_HI = 227, 262 ALU_SRC_TIME_LO = 228, 263 ALU_SRC_MASK_HI = 229, 264 ALU_SRC_MASK_LO = 230, 265 ALU_SRC_HW_WAVE_ID = 231, 266 ALU_SRC_SIMD_ID = 232, 267 ALU_SRC_SE_ID = 233, 268 ALU_SRC_HW_THREADGRP_ID = 234, 269 ALU_SRC_WAVE_ID_IN_GRP = 235, 270 ALU_SRC_NUM_THREADGRP_WAVES = 236, 271 ALU_SRC_HW_ALU_ODD = 237, 272 ALU_SRC_LOOP_IDX = 238, 273 ALU_SRC_PARAM_BASE_ADDR = 240, 274 ALU_SRC_NEW_PRIM_MASK = 241, 275 ALU_SRC_PRIM_MASK_HI = 242, 276 ALU_SRC_PRIM_MASK_LO = 243, 277 ALU_SRC_1_DBL_L = 244, 278 ALU_SRC_1_DBL_M = 245, 279 ALU_SRC_0_5_DBL_L = 246, 280 ALU_SRC_0_5_DBL_M = 247, 281 ALU_SRC_0 = 248, 282 ALU_SRC_1 = 249, 283 ALU_SRC_1_INT = 250, 284 ALU_SRC_M_1_INT = 251, 285 ALU_SRC_0_5 = 252, 286 ALU_SRC_LITERAL = 253, 287 ALU_SRC_PV = 254, 288 ALU_SRC_PS = 255, 289 290 ALU_SRC_PARAM_OFFSET = 448 291 }; 292 293 enum alu_predicate_select 294 { 295 PRED_SEL_OFF = 0, 296 // RESERVED = 1, 297 PRED_SEL_0 = 2, 298 PRED_SEL_1 = 3 299 }; 300 301 302 enum alu_omod { 303 OMOD_OFF = 0, 304 OMOD_M2 = 1, 305 OMOD_M4 = 2, 306 OMOD_D2 = 3 307 }; 308 309 enum alu_index_mode { 310 INDEX_AR_X = 0, 311 INDEX_AR_Y_R600 = 1, 312 INDEX_AR_Z_R600 = 2, 313 INDEX_AR_W_R600 = 3, 314 315 INDEX_LOOP = 4, 316 INDEX_GLOBAL = 5, 317 INDEX_GLOBAL_AR_X = 6 318 }; 319 320 enum alu_cayman_mova_dst { 321 CM_MOVADST_AR_X, 322 CM_MOVADST_PC, 323 CM_MOVADST_IDX0, 324 CM_MOVADST_IDX1, 325 CM_MOVADST_CG0, // clause-global byte 0 326 CM_MOVADST_CG1, 327 CM_MOVADST_CG2, 328 CM_MOVADST_CG3 329 }; 330 331 enum alu_cayman_exec_mask_op { 332 CM_EMO_DEACTIVATE, 333 CM_EMO_BREAK, 334 CM_EMO_CONTINUE, 335 CM_EMO_KILL 336 }; 337 338 339 enum cf_exp_type { 340 EXP_PIXEL, 341 EXP_POS, 342 EXP_PARAM, 343 344 EXP_TYPE_COUNT 345 }; 346 347 enum cf_mem_type { 348 MEM_WRITE, 349 MEM_WRITE_IND, 350 MEM_WRITE_ACK, 351 MEM_WRITE_IND_ACK 352 }; 353 354 355 enum alu_kcache_mode { 356 KC_LOCK_NONE, 357 KC_LOCK_1, 358 KC_LOCK_2, 359 KC_LOCK_LOOP 360 }; 361 362 enum alu_kcache_index_mode { 363 KC_INDEX_NONE, 364 KC_INDEX_0, 365 KC_INDEX_1, 366 KC_INDEX_INVALID 367 }; 368 369 enum chan_select { 370 SEL_X = 0, 371 SEL_Y = 1, 372 SEL_Z = 2, 373 SEL_W = 3, 374 SEL_0 = 4, 375 SEL_1 = 5, 376 // RESERVED = 6, 377 SEL_MASK = 7 378 }; 379 380 enum bank_swizzle { 381 VEC_012 = 0, 382 VEC_021 = 1, 383 VEC_120 = 2, 384 VEC_102 = 3, 385 VEC_201 = 4, 386 VEC_210 = 5, 387 388 VEC_NUM = 6, 389 390 SCL_210 = 0, 391 SCL_122 = 1, 392 SCL_212 = 2, 393 SCL_221 = 3, 394 395 SCL_NUM = 4 396 397 }; 398 399 enum sched_queue_id { 400 SQ_CF, 401 SQ_ALU, 402 SQ_TEX, 403 SQ_VTX, 404 SQ_GDS, 405 406 SQ_NUM 407 }; 408 409 struct literal { 410 union { 411 int32_t i; 412 uint32_t u; 413 float f; 414 }; 415 iliteral416 literal(int32_t i = 0) : i(i) {} literalliteral417 literal(uint32_t u) : u(u) {} literalliteral418 literal(float f) : f(f) {} literalliteral419 literal(double f) : f(f) {} uint32_tliteral420 operator uint32_t() const { return u; } 421 bool operator ==(literal l) { return u == l.u; } 422 bool operator ==(int v_int) { return i == v_int; } 423 bool operator ==(unsigned v_uns) { return u == v_uns; } 424 }; 425 426 struct bc_kcache { 427 unsigned mode; 428 unsigned bank; 429 unsigned addr; 430 unsigned index_mode; 431 } ; 432 433 // TODO optimize bc structures 434 435 struct bc_cf { 436 437 bc_kcache kc[4]; 438 439 unsigned id; 440 441 442 const cf_op_info * op_ptr; 443 unsigned op; 444 445 unsigned addr:32; 446 447 unsigned alt_const:1; 448 unsigned uses_waterfall:1; 449 450 unsigned barrier:1; 451 unsigned count:7; 452 unsigned pop_count:3; 453 unsigned call_count:6; 454 unsigned whole_quad_mode:1; 455 unsigned valid_pixel_mode:1; 456 457 unsigned jumptable_sel:3; 458 unsigned cf_const:5; 459 unsigned cond:2; 460 unsigned end_of_program:1; 461 462 unsigned array_base:13; 463 unsigned elem_size:2; 464 unsigned index_gpr:7; 465 unsigned rw_gpr:7; 466 unsigned rw_rel:1; 467 unsigned type:2; 468 469 unsigned burst_count:4; 470 unsigned mark:1; 471 unsigned sel[4]; 472 473 unsigned array_size:12; 474 unsigned comp_mask:4; 475 476 unsigned rat_id:4; 477 unsigned rat_inst:6; 478 unsigned rat_index_mode:2; 479 set_opbc_cf480 void set_op(unsigned op) { this->op = op; op_ptr = r600_isa_cf(op); } 481 is_alu_extendedbc_cf482 bool is_alu_extended() { 483 assert(op_ptr->flags & CF_ALU); 484 return kc[2].mode != KC_LOCK_NONE || kc[3].mode != KC_LOCK_NONE || 485 kc[0].index_mode != KC_INDEX_NONE || kc[1].index_mode != KC_INDEX_NONE || 486 kc[2].index_mode != KC_INDEX_NONE || kc[3].index_mode != KC_INDEX_NONE; 487 } 488 489 }; 490 491 struct bc_alu_src { 492 unsigned sel:9; 493 unsigned chan:2; 494 unsigned neg:1; 495 unsigned abs:1; 496 unsigned rel:1; 497 literal value; 498 clearbc_alu_src499 void clear() { 500 sel = 0; 501 chan = 0; 502 neg = 0; 503 abs = 0; 504 rel = 0; 505 value = 0; 506 } 507 }; 508 509 struct bc_alu { 510 const alu_op_info * op_ptr; 511 unsigned op; 512 513 bc_alu_src src[3]; 514 515 unsigned dst_gpr:7; 516 unsigned dst_chan:2; 517 unsigned dst_rel:1; 518 unsigned clamp:1; 519 unsigned omod:2; 520 unsigned bank_swizzle:3; 521 522 unsigned index_mode:3; 523 unsigned last:1; 524 unsigned pred_sel:2; 525 526 unsigned fog_merge:1; 527 unsigned write_mask:1; 528 unsigned update_exec_mask:1; 529 unsigned update_pred:1; 530 531 unsigned slot:3; 532 533 unsigned lds_idx_offset:6; 534 535 alu_op_flags slot_flags; 536 set_opbc_alu537 void set_op(unsigned op) { 538 this->op = op; 539 op_ptr = r600_isa_alu(op); 540 } clearbc_alu541 void clear() { 542 op_ptr = nullptr; 543 op = 0; 544 for (int i = 0; i < 3; ++i) 545 src[i].clear(); 546 dst_gpr = 0; 547 dst_chan = 0; 548 dst_rel = 0; 549 clamp = 0; 550 omod = 0; 551 bank_swizzle = 0; 552 index_mode = 0; 553 last = 0; 554 pred_sel = 0; 555 fog_merge = 0; 556 write_mask = 0; 557 update_exec_mask = 0; 558 update_pred = 0; 559 slot = 0; 560 lds_idx_offset = 0; 561 slot_flags = AF_NONE; 562 } bc_alubc_alu563 bc_alu() { 564 clear(); 565 } 566 }; 567 568 struct bc_fetch { 569 const fetch_op_info * op_ptr; 570 unsigned op; 571 572 unsigned bc_frac_mode:1; 573 unsigned fetch_whole_quad:1; 574 unsigned resource_id:8; 575 576 unsigned src_gpr:7; 577 unsigned src_rel:1; 578 unsigned src_rel_global:1; /* for GDS ops */ 579 unsigned src_sel[4]; 580 581 unsigned dst_gpr:7; 582 unsigned dst_rel:1; 583 unsigned dst_rel_global:1; /* for GDS ops */ 584 unsigned dst_sel[4]; 585 586 unsigned alt_const:1; 587 588 unsigned inst_mod:2; 589 unsigned resource_index_mode:2; 590 unsigned sampler_index_mode:2; 591 592 unsigned coord_type[4]; 593 unsigned lod_bias:7; 594 595 unsigned offset[3]; 596 597 unsigned sampler_id:5; 598 599 600 unsigned fetch_type:2; 601 unsigned mega_fetch_count:6; 602 unsigned coalesced_read:1; 603 unsigned structured_read:2; 604 unsigned lds_req:1; 605 606 unsigned data_format:6; 607 unsigned format_comp_all:1; 608 unsigned num_format_all:2; 609 unsigned semantic_id:8; 610 unsigned srf_mode_all:1; 611 unsigned use_const_fields:1; 612 613 unsigned const_buf_no_stride:1; 614 unsigned endian_swap:2; 615 unsigned mega_fetch:1; 616 617 unsigned src2_gpr:7; /* for GDS */ 618 unsigned alloc_consume:1; 619 unsigned uav_id:4; 620 unsigned uav_index_mode:2; 621 unsigned bcast_first_req:1; 622 623 /* for MEM ops */ 624 unsigned elem_size:2; 625 unsigned uncached:1; 626 unsigned indexed:1; 627 unsigned burst_count:4; 628 unsigned array_base:13; 629 unsigned array_size:12; 630 set_opbc_fetch631 void set_op(unsigned op) { this->op = op; op_ptr = r600_isa_fetch(op); } 632 }; 633 634 struct shader_stats { 635 unsigned ndw; 636 unsigned ngpr; 637 unsigned nstack; 638 639 unsigned cf; // clause instructions not included 640 unsigned alu; 641 unsigned alu_clauses; 642 unsigned fetch_clauses; 643 unsigned fetch; 644 unsigned alu_groups; 645 646 unsigned shaders; // number of shaders (for accumulated stats) 647 shader_statsshader_stats648 shader_stats() : ndw(), ngpr(), nstack(), cf(), alu(), alu_clauses(), 649 fetch_clauses(), fetch(), alu_groups(), shaders() {} 650 651 void collect(node *n); 652 void accumulate(shader_stats &s); 653 void dump(); 654 void dump_diff(shader_stats &s); 655 }; 656 657 class sb_context { 658 659 public: 660 661 shader_stats src_stats, opt_stats; 662 663 r600_isa *isa; 664 665 sb_hw_chip hw_chip; 666 sb_hw_class hw_class; 667 668 unsigned alu_temp_gprs; 669 unsigned max_fetch; 670 bool has_trans; 671 unsigned vtx_src_num; 672 unsigned num_slots; 673 bool uses_mova_gpr; 674 675 bool r6xx_gpr_index_workaround; 676 677 bool stack_workaround_8xx; 678 bool stack_workaround_9xx; 679 680 unsigned wavefront_size; 681 unsigned stack_entry_size; 682 683 static unsigned dump_pass; 684 static unsigned dump_stat; 685 686 static unsigned dry_run; 687 static unsigned no_fallback; 688 static unsigned safe_math; 689 690 static unsigned dskip_start; 691 static unsigned dskip_end; 692 static unsigned dskip_mode; 693 sb_context()694 sb_context() : src_stats(), opt_stats(), isa(0), 695 hw_chip(HW_CHIP_UNKNOWN), hw_class(HW_CLASS_UNKNOWN) {} 696 697 int init(r600_isa *isa, sb_hw_chip chip, sb_hw_class cclass); 698 is_r600()699 bool is_r600() {return hw_class == HW_CLASS_R600;} is_r700()700 bool is_r700() {return hw_class == HW_CLASS_R700;} is_evergreen()701 bool is_evergreen() {return hw_class == HW_CLASS_EVERGREEN;} is_cayman()702 bool is_cayman() {return hw_class == HW_CLASS_CAYMAN;} is_egcm()703 bool is_egcm() {return hw_class >= HW_CLASS_EVERGREEN;} 704 needs_8xx_stack_workaround()705 bool needs_8xx_stack_workaround() { 706 if (!is_evergreen()) 707 return false; 708 709 switch (hw_chip) { 710 case HW_CHIP_HEMLOCK: 711 case HW_CHIP_CYPRESS: 712 case HW_CHIP_JUNIPER: 713 return false; 714 default: 715 return true; 716 } 717 } 718 needs_9xx_stack_workaround()719 bool needs_9xx_stack_workaround() { 720 return is_cayman(); 721 } 722 hw_class_bit()723 sb_hw_class_bits hw_class_bit() { 724 switch (hw_class) { 725 case HW_CLASS_R600:return HB_R6; 726 case HW_CLASS_R700:return HB_R7; 727 case HW_CLASS_EVERGREEN:return HB_EG; 728 case HW_CLASS_CAYMAN:return HB_CM; 729 default: assert(!"unknown hw class"); return (sb_hw_class_bits)0; 730 731 } 732 } 733 cf_opcode(unsigned op)734 unsigned cf_opcode(unsigned op) { 735 return r600_isa_cf_opcode(isa->hw_class, op); 736 } 737 alu_opcode(unsigned op)738 unsigned alu_opcode(unsigned op) { 739 return r600_isa_alu_opcode(isa->hw_class, op); 740 } 741 alu_slots(unsigned op)742 unsigned alu_slots(unsigned op) { 743 return r600_isa_alu_slots(isa->hw_class, op); 744 } 745 alu_slots(const alu_op_info * op_ptr)746 unsigned alu_slots(const alu_op_info * op_ptr) { 747 return op_ptr->slots[isa->hw_class]; 748 } 749 alu_slots_mask(const alu_op_info * op_ptr)750 unsigned alu_slots_mask(const alu_op_info * op_ptr) { 751 unsigned mask = 0; 752 unsigned slot_flags = alu_slots(op_ptr); 753 if (slot_flags & AF_V) 754 mask = 0x0F; 755 if (!is_cayman() && (slot_flags & AF_S)) 756 mask |= 0x10; 757 /* Force LDS_IDX ops into SLOT_X */ 758 if (op_ptr->opcode[0] == -1 && ((op_ptr->opcode[1] & 0xFF) == 0x11)) 759 mask = 0x01; 760 return mask; 761 } 762 fetch_opcode(unsigned op)763 unsigned fetch_opcode(unsigned op) { 764 return r600_isa_fetch_opcode(isa->hw_class, op); 765 } 766 is_kcache_sel(unsigned sel)767 bool is_kcache_sel(unsigned sel) { 768 return ((sel >= 128 && sel < 192) || (sel >= 256 && sel < 320)); 769 } 770 is_lds_oq(unsigned sel)771 bool is_lds_oq(unsigned sel) { 772 return (sel >= 0xdb && sel <= 0xde); 773 } 774 775 const char * get_hw_class_name(); 776 const char * get_hw_chip_name(); 777 778 }; 779 780 #define SB_DUMP_STAT(a) do { if (sb_context::dump_stat) { a } } while (0) 781 #define SB_DUMP_PASS(a) do { if (sb_context::dump_pass) { a } } while (0) 782 783 class bc_decoder { 784 785 sb_context &ctx; 786 787 uint32_t* dw; 788 unsigned ndw; 789 790 public: 791 bc_decoder(sb_context & sctx,uint32_t * data,unsigned size)792 bc_decoder(sb_context &sctx, uint32_t *data, unsigned size) 793 : ctx(sctx), dw(data), ndw(size) {} 794 795 int decode_cf(unsigned &i, bc_cf &bc); 796 int decode_alu(unsigned &i, bc_alu &bc); 797 int decode_fetch(unsigned &i, bc_fetch &bc); 798 799 private: 800 int decode_cf_alu(unsigned &i, bc_cf &bc); 801 int decode_cf_exp(unsigned &i, bc_cf &bc); 802 int decode_cf_mem(unsigned &i, bc_cf &bc); 803 804 int decode_fetch_vtx(unsigned &i, bc_fetch &bc); 805 int decode_fetch_gds(unsigned &i, bc_fetch &bc); 806 int decode_fetch_mem(unsigned &i, bc_fetch &bc); 807 }; 808 809 // bytecode format definition 810 811 class hw_encoding_format { 812 const sb_hw_class_bits hw_target; //FIXME: debug - remove after testing 813 hw_encoding_format(); 814 protected: 815 uint32_t value; 816 public: hw_encoding_format(sb_hw_class_bits hw)817 hw_encoding_format(sb_hw_class_bits hw) 818 : hw_target(hw), value(0) {} hw_encoding_format(uint32_t v,sb_hw_class_bits hw)819 hw_encoding_format(uint32_t v, sb_hw_class_bits hw) 820 : hw_target(hw), value(v) {} get_value(sb_hw_class_bits hw)821 uint32_t get_value(sb_hw_class_bits hw) const { 822 assert((hw & hw_target) == hw); 823 return value; 824 } 825 }; 826 827 #define BC_FORMAT_BEGIN_HW(fmt, hwset) \ 828 class fmt##_##hwset : public hw_encoding_format {\ 829 typedef fmt##_##hwset thistype; \ 830 public: \ 831 fmt##_##hwset() : hw_encoding_format(HB_##hwset) {}; \ 832 fmt##_##hwset(uint32_t v) : hw_encoding_format(v, HB_##hwset) {}; 833 834 #define BC_FORMAT_BEGIN(fmt) BC_FORMAT_BEGIN_HW(fmt, ALL) 835 836 #define BC_FORMAT_END(fmt) }; 837 838 // bytecode format field definition 839 840 #define BC_FIELD(fmt, name, shortname, last_bit, first_bit) \ 841 thistype & name(unsigned v) { \ 842 value |= ((v&((1ull<<((last_bit)-(first_bit)+1))-1))<<(first_bit)); \ 843 return *this; \ 844 } \ 845 unsigned get_##name() const { \ 846 return (value>>(first_bit))&((1ull<<((last_bit)-(first_bit)+1))-1); \ 847 } 848 849 #define BC_RSRVD(fmt, last_bit, first_bit) 850 851 // CLAMP macro defined elsewhere interferes with bytecode field name 852 #undef CLAMP 853 #include "sb_bc_fmt_def.inc" 854 855 #undef BC_FORMAT_BEGIN 856 #undef BC_FORMAT_END 857 #undef BC_FIELD 858 #undef BC_RSRVD 859 860 class bc_parser { 861 sb_context & ctx; 862 863 bc_decoder *dec; 864 865 r600_bytecode *bc; 866 r600_shader *pshader; 867 868 uint32_t *dw; 869 unsigned bc_ndw; 870 871 unsigned max_cf; 872 873 shader *sh; 874 875 int error; 876 877 alu_node *slots[2][5]; 878 unsigned cgroup; 879 880 typedef std::vector<cf_node*> id_cf_map; 881 id_cf_map cf_map; 882 883 typedef std::stack<region_node*> region_stack; 884 region_stack loop_stack; 885 886 bool gpr_reladdr; 887 888 // Note: currently relies on input emitting SET_CF in same basic block as uses 889 value *cf_index_value[2]; 890 alu_node *mova; 891 public: 892 bc_parser(sb_context & sctx,r600_bytecode * bc,r600_shader * pshader)893 bc_parser(sb_context &sctx, r600_bytecode *bc, r600_shader* pshader) : 894 ctx(sctx), dec(), bc(bc), pshader(pshader), 895 dw(), bc_ndw(), max_cf(), 896 sh(), error(), slots(), cgroup(), 897 cf_map(), loop_stack(), gpr_reladdr(), cf_index_value(), mova() { } 898 899 int decode(); 900 int prepare(); 901 get_shader()902 shader* get_shader() { assert(!error); return sh; } 903 904 private: 905 906 int decode_shader(); 907 908 int parse_decls(); 909 910 int decode_cf(unsigned &i, bool &eop); 911 912 int decode_alu_clause(cf_node *cf); 913 int decode_alu_group(cf_node* cf, unsigned &i, unsigned &gcnt); 914 915 int decode_fetch_clause(cf_node *cf); 916 917 int prepare_ir(); 918 int prepare_alu_clause(cf_node *cf); 919 int prepare_alu_group(cf_node* cf, alu_group_node *g); 920 int prepare_fetch_clause(cf_node *cf); 921 922 int prepare_loop(cf_node *c); 923 int prepare_if(cf_node *c); 924 925 void save_set_cf_index(value *val, unsigned idx); 926 value *get_cf_index_value(unsigned idx); 927 void save_mova(alu_node *mova); 928 alu_node *get_mova(); 929 }; 930 931 932 933 934 class bytecode { 935 typedef std::vector<uint32_t> bc_vector; 936 sb_hw_class_bits hw_class_bit; 937 938 bc_vector bc; 939 940 unsigned pos; 941 942 public: 943 944 bytecode(sb_hw_class_bits hw, unsigned rdw = 256) hw_class_bit(hw)945 : hw_class_bit(hw), pos(0) { bc.reserve(rdw); } 946 ndw()947 unsigned ndw() { return bc.size(); } 948 write_data(uint32_t * dst)949 void write_data(uint32_t* dst) { 950 std::copy(bc.begin(), bc.end(), dst); 951 } 952 align(unsigned a)953 void align(unsigned a) { 954 unsigned size = bc.size(); 955 size = (size + a - 1) & ~(a-1); 956 bc.resize(size); 957 } 958 set_size(unsigned sz)959 void set_size(unsigned sz) { 960 assert(sz >= bc.size()); 961 bc.resize(sz); 962 } 963 seek(unsigned p)964 void seek(unsigned p) { 965 if (p != pos) { 966 if (p > bc.size()) { 967 bc.resize(p); 968 } 969 pos = p; 970 } 971 } 972 get_pos()973 unsigned get_pos() { return pos; } data()974 uint32_t *data() { return &bc[0]; } 975 976 bytecode & operator <<(uint32_t v) { 977 if (pos == ndw()) { 978 bc.push_back(v); 979 } else 980 bc.at(pos) = v; 981 ++pos; 982 return *this; 983 } 984 985 bytecode & operator <<(const hw_encoding_format &e) { 986 *this << e.get_value(hw_class_bit); 987 return *this; 988 } 989 990 bytecode & operator <<(const bytecode &b) { 991 bc.insert(bc.end(), b.bc.begin(), b.bc.end()); 992 return *this; 993 } 994 at(unsigned dw_id)995 uint32_t at(unsigned dw_id) { return bc.at(dw_id); } 996 }; 997 998 999 class bc_builder { 1000 shader &sh; 1001 sb_context &ctx; 1002 bytecode bb; 1003 int error; 1004 1005 public: 1006 1007 bc_builder(shader &s); 1008 int build(); get_bytecode()1009 bytecode& get_bytecode() { assert(!error); return bb; } 1010 1011 private: 1012 1013 int build_cf(cf_node *n); 1014 1015 int build_cf_alu(cf_node *n); 1016 int build_cf_mem(cf_node *n); 1017 int build_cf_exp(cf_node *n); 1018 1019 int build_alu_clause(cf_node *n); 1020 int build_alu_group(alu_group_node *n); 1021 int build_alu(alu_node *n); 1022 1023 int build_fetch_clause(cf_node *n); 1024 int build_fetch_tex(fetch_node *n); 1025 int build_fetch_vtx(fetch_node *n); 1026 int build_fetch_gds(fetch_node *n); 1027 int build_fetch_mem(fetch_node* n); 1028 }; 1029 1030 } // namespace r600_sb 1031 1032 #endif /* SB_BC_H_ */ 1033