1 /* 2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 * 23 * Authors: 24 * Vadim Girlin 25 */ 26 27 #ifndef SB_BC_H_ 28 #define SB_BC_H_ 29 30 #include <stdint.h> 31 #include "r600_isa.h" 32 33 #include <cstdio> 34 #include <string> 35 #include <vector> 36 #include <stack> 37 38 struct r600_bytecode; 39 struct r600_shader; 40 41 namespace r600_sb { 42 43 class hw_encoding_format; 44 class node; 45 class alu_node; 46 class cf_node; 47 class fetch_node; 48 class alu_group_node; 49 class region_node; 50 class shader; 51 class value; 52 53 class sb_ostream { 54 public: sb_ostream()55 sb_ostream() {} 56 57 virtual void write(const char *s) = 0; 58 59 sb_ostream& operator <<(const char *s) { 60 write(s); 61 return *this; 62 } 63 64 sb_ostream& operator <<(const std::string& s) { 65 return *this << s.c_str(); 66 } 67 68 sb_ostream& operator <<(void *p) { 69 char b[32]; 70 sprintf(b, "%p", p); 71 return *this << b; 72 } 73 74 sb_ostream& operator <<(char c) { 75 char b[2]; 76 sprintf(b, "%c", c); 77 return *this << b; 78 } 79 80 sb_ostream& operator <<(int n) { 81 char b[32]; 82 sprintf(b, "%d", n); 83 return *this << b; 84 } 85 86 sb_ostream& operator <<(unsigned n) { 87 char b[32]; 88 sprintf(b, "%u", n); 89 return *this << b; 90 } 91 92 sb_ostream& operator <<(double d) { 93 char b[32]; 94 snprintf(b, 32, "%g", d); 95 return *this << b; 96 } 97 98 // print as field of specified width, right aligned print_w(int n,int width)99 void print_w(int n, int width) { 100 char b[256],f[8]; 101 sprintf(f, "%%%dd", width); 102 snprintf(b, 256, f, n); 103 write(b); 104 } 105 106 // print as field of specified width, left aligned print_wl(int n,int width)107 void print_wl(int n, int width) { 108 char b[256],f[8]; 109 sprintf(f, "%%-%dd", width); 110 snprintf(b, 256, f, n); 111 write(b); 112 } 113 114 // print as field of specified width, left aligned print_wl(const std::string & s,int width)115 void print_wl(const std::string &s, int width) { 116 write(s.c_str()); 117 int l = s.length(); 118 while (l++ < width) { 119 write(" "); 120 } 121 } 122 123 // print int as field of specified width, right aligned, zero-padded print_zw(int n,int width)124 void print_zw(int n, int width) { 125 char b[256],f[8]; 126 sprintf(f, "%%0%dd", width); 127 snprintf(b, 256, f, n); 128 write(b); 129 } 130 131 // print int as field of specified width, right aligned, zero-padded, hex print_zw_hex(int n,int width)132 void print_zw_hex(int n, int width) { 133 char b[256],f[8]; 134 sprintf(f, "%%0%dx", width); 135 snprintf(b, 256, f, n); 136 write(b); 137 } 138 }; 139 140 class sb_ostringstream : public sb_ostream { 141 std::string data; 142 public: sb_ostringstream()143 sb_ostringstream() : data() {} 144 write(const char * s)145 virtual void write(const char *s) { 146 data += s; 147 } 148 clear()149 void clear() { data.clear(); } 150 c_str()151 const char* c_str() { return data.c_str(); } str()152 std::string& str() { return data; } 153 }; 154 155 class sb_log : public sb_ostream { 156 FILE *o; 157 public: sb_log()158 sb_log() : o(stderr) {} 159 write(const char * s)160 virtual void write(const char *s) { 161 fputs(s, o); 162 } 163 }; 164 165 extern sb_log sblog; 166 167 enum shader_target 168 { 169 TARGET_UNKNOWN, 170 TARGET_VS, 171 TARGET_ES, 172 TARGET_PS, 173 TARGET_GS, 174 TARGET_GS_COPY, 175 TARGET_COMPUTE, 176 TARGET_FETCH, 177 TARGET_HS, 178 TARGET_LS, 179 180 TARGET_NUM 181 }; 182 183 enum sb_hw_class_bits 184 { 185 HB_R6 = (1<<0), 186 HB_R7 = (1<<1), 187 HB_EG = (1<<2), 188 HB_CM = (1<<3), 189 190 HB_R6R7 = (HB_R6 | HB_R7), 191 HB_EGCM = (HB_EG | HB_CM), 192 HB_R6R7EG = (HB_R6 | HB_R7 | HB_EG), 193 HB_R7EGCM = (HB_R7 | HB_EG | HB_CM), 194 195 HB_ALL = (HB_R6 | HB_R7 | HB_EG | HB_CM) 196 }; 197 198 enum sb_hw_chip 199 { 200 HW_CHIP_UNKNOWN, 201 HW_CHIP_R600, 202 HW_CHIP_RV610, 203 HW_CHIP_RV630, 204 HW_CHIP_RV670, 205 HW_CHIP_RV620, 206 HW_CHIP_RV635, 207 HW_CHIP_RS780, 208 HW_CHIP_RS880, 209 HW_CHIP_RV770, 210 HW_CHIP_RV730, 211 HW_CHIP_RV710, 212 HW_CHIP_RV740, 213 HW_CHIP_CEDAR, 214 HW_CHIP_REDWOOD, 215 HW_CHIP_JUNIPER, 216 HW_CHIP_CYPRESS, 217 HW_CHIP_HEMLOCK, 218 HW_CHIP_PALM, 219 HW_CHIP_SUMO, 220 HW_CHIP_SUMO2, 221 HW_CHIP_BARTS, 222 HW_CHIP_TURKS, 223 HW_CHIP_CAICOS, 224 HW_CHIP_CAYMAN, 225 HW_CHIP_ARUBA 226 }; 227 228 enum sb_hw_class 229 { 230 HW_CLASS_UNKNOWN, 231 HW_CLASS_R600, 232 HW_CLASS_R700, 233 HW_CLASS_EVERGREEN, 234 HW_CLASS_CAYMAN 235 }; 236 237 enum alu_slots { 238 SLOT_X = 0, 239 SLOT_Y = 1, 240 SLOT_Z = 2, 241 SLOT_W = 3, 242 SLOT_TRANS = 4 243 }; 244 245 enum misc_consts { 246 MAX_ALU_LITERALS = 4, 247 MAX_ALU_SLOTS = 128, 248 MAX_GPR = 128, 249 MAX_CHAN = 4 250 251 }; 252 253 enum alu_src_sel { 254 255 ALU_SRC_LDS_OQ_A = 219, 256 ALU_SRC_LDS_OQ_B = 220, 257 ALU_SRC_LDS_OQ_A_POP = 221, 258 ALU_SRC_LDS_OQ_B_POP = 222, 259 ALU_SRC_LDS_DIRECT_A = 223, 260 ALU_SRC_LDS_DIRECT_B = 224, 261 ALU_SRC_TIME_HI = 227, 262 ALU_SRC_TIME_LO = 228, 263 ALU_SRC_MASK_HI = 229, 264 ALU_SRC_MASK_LO = 230, 265 ALU_SRC_HW_WAVE_ID = 231, 266 ALU_SRC_SIMD_ID = 232, 267 ALU_SRC_SE_ID = 233, 268 ALU_SRC_HW_THREADGRP_ID = 234, 269 ALU_SRC_WAVE_ID_IN_GRP = 235, 270 ALU_SRC_NUM_THREADGRP_WAVES = 236, 271 ALU_SRC_HW_ALU_ODD = 237, 272 ALU_SRC_LOOP_IDX = 238, 273 ALU_SRC_PARAM_BASE_ADDR = 240, 274 ALU_SRC_NEW_PRIM_MASK = 241, 275 ALU_SRC_PRIM_MASK_HI = 242, 276 ALU_SRC_PRIM_MASK_LO = 243, 277 ALU_SRC_1_DBL_L = 244, 278 ALU_SRC_1_DBL_M = 245, 279 ALU_SRC_0_5_DBL_L = 246, 280 ALU_SRC_0_5_DBL_M = 247, 281 ALU_SRC_0 = 248, 282 ALU_SRC_1 = 249, 283 ALU_SRC_1_INT = 250, 284 ALU_SRC_M_1_INT = 251, 285 ALU_SRC_0_5 = 252, 286 ALU_SRC_LITERAL = 253, 287 ALU_SRC_PV = 254, 288 ALU_SRC_PS = 255, 289 290 ALU_SRC_PARAM_OFFSET = 448 291 }; 292 293 enum alu_predicate_select 294 { 295 PRED_SEL_OFF = 0, 296 // RESERVED = 1, 297 PRED_SEL_0 = 2, 298 PRED_SEL_1 = 3 299 }; 300 301 302 enum alu_omod { 303 OMOD_OFF = 0, 304 OMOD_M2 = 1, 305 OMOD_M4 = 2, 306 OMOD_D2 = 3 307 }; 308 309 enum alu_index_mode { 310 INDEX_AR_X = 0, 311 INDEX_AR_Y_R600 = 1, 312 INDEX_AR_Z_R600 = 2, 313 INDEX_AR_W_R600 = 3, 314 315 INDEX_LOOP = 4, 316 INDEX_GLOBAL = 5, 317 INDEX_GLOBAL_AR_X = 6 318 }; 319 320 enum alu_cayman_mova_dst { 321 CM_MOVADST_AR_X, 322 CM_MOVADST_PC, 323 CM_MOVADST_IDX0, 324 CM_MOVADST_IDX1, 325 CM_MOVADST_CG0, // clause-global byte 0 326 CM_MOVADST_CG1, 327 CM_MOVADST_CG2, 328 CM_MOVADST_CG3 329 }; 330 331 enum alu_cayman_exec_mask_op { 332 CM_EMO_DEACTIVATE, 333 CM_EMO_BREAK, 334 CM_EMO_CONTINUE, 335 CM_EMO_KILL 336 }; 337 338 339 enum cf_exp_type { 340 EXP_PIXEL, 341 EXP_POS, 342 EXP_PARAM, 343 344 EXP_TYPE_COUNT 345 }; 346 347 enum cf_mem_type { 348 MEM_WRITE, 349 MEM_WRITE_IND, 350 MEM_WRITE_ACK, 351 MEM_WRITE_IND_ACK 352 }; 353 354 355 enum alu_kcache_mode { 356 KC_LOCK_NONE, 357 KC_LOCK_1, 358 KC_LOCK_2, 359 KC_LOCK_LOOP 360 }; 361 362 enum alu_kcache_index_mode { 363 KC_INDEX_NONE, 364 KC_INDEX_0, 365 KC_INDEX_1, 366 KC_INDEX_INVALID 367 }; 368 369 enum chan_select { 370 SEL_X = 0, 371 SEL_Y = 1, 372 SEL_Z = 2, 373 SEL_W = 3, 374 SEL_0 = 4, 375 SEL_1 = 5, 376 // RESERVED = 6, 377 SEL_MASK = 7 378 }; 379 380 enum bank_swizzle { 381 VEC_012 = 0, 382 VEC_021 = 1, 383 VEC_120 = 2, 384 VEC_102 = 3, 385 VEC_201 = 4, 386 VEC_210 = 5, 387 388 VEC_NUM = 6, 389 390 SCL_210 = 0, 391 SCL_122 = 1, 392 SCL_212 = 2, 393 SCL_221 = 3, 394 395 SCL_NUM = 4 396 397 }; 398 399 enum sched_queue_id { 400 SQ_CF, 401 SQ_ALU, 402 SQ_TEX, 403 SQ_VTX, 404 SQ_GDS, 405 406 SQ_NUM 407 }; 408 409 struct literal { 410 union { 411 int32_t i; 412 uint32_t u; 413 float f; 414 }; 415 iliteral416 literal(int32_t i = 0) : i(i) {} literalliteral417 literal(uint32_t u) : u(u) {} literalliteral418 literal(float f) : f(f) {} literalliteral419 literal(double f) : f(f) {} uint32_tliteral420 operator uint32_t() const { return u; } 421 bool operator ==(literal l) { return u == l.u; } 422 bool operator ==(int v_int) { return i == v_int; } 423 bool operator ==(unsigned v_uns) { return u == v_uns; } 424 }; 425 426 struct bc_kcache { 427 unsigned mode; 428 unsigned bank; 429 unsigned addr; 430 unsigned index_mode; 431 } ; 432 433 // TODO optimize bc structures 434 435 struct bc_cf { 436 437 bc_kcache kc[4]; 438 439 unsigned id; 440 441 442 const cf_op_info * op_ptr; 443 unsigned op; 444 445 unsigned addr:32; 446 447 unsigned alt_const:1; 448 unsigned uses_waterfall:1; 449 450 unsigned barrier:1; 451 unsigned count:7; 452 unsigned pop_count:3; 453 unsigned call_count:6; 454 unsigned whole_quad_mode:1; 455 unsigned valid_pixel_mode:1; 456 457 unsigned jumptable_sel:3; 458 unsigned cf_const:5; 459 unsigned cond:2; 460 unsigned end_of_program:1; 461 462 unsigned array_base:13; 463 unsigned elem_size:2; 464 unsigned index_gpr:7; 465 unsigned rw_gpr:7; 466 unsigned rw_rel:1; 467 unsigned type:2; 468 469 unsigned burst_count:4; 470 unsigned mark:1; 471 unsigned sel[4]; 472 473 unsigned array_size:12; 474 unsigned comp_mask:4; 475 476 unsigned rat_id:4; 477 unsigned rat_inst:6; 478 unsigned rat_index_mode:2; 479 set_opbc_cf480 void set_op(unsigned op) { this->op = op; op_ptr = r600_isa_cf(op); } 481 is_alu_extendedbc_cf482 bool is_alu_extended() { 483 assert(op_ptr->flags & CF_ALU); 484 return kc[2].mode != KC_LOCK_NONE || kc[3].mode != KC_LOCK_NONE || 485 kc[0].index_mode != KC_INDEX_NONE || kc[1].index_mode != KC_INDEX_NONE || 486 kc[2].index_mode != KC_INDEX_NONE || kc[3].index_mode != KC_INDEX_NONE; 487 } 488 489 }; 490 491 struct bc_alu_src { 492 unsigned sel:9; 493 unsigned chan:2; 494 unsigned neg:1; 495 unsigned abs:1; 496 unsigned rel:1; 497 literal value; 498 clearbc_alu_src499 void clear() { 500 sel = 0; 501 chan = 0; 502 neg = 0; 503 abs = 0; 504 rel = 0; 505 value = 0; 506 } 507 }; 508 509 struct bc_alu { 510 const alu_op_info * op_ptr; 511 unsigned op; 512 513 bc_alu_src src[3]; 514 515 unsigned dst_gpr:7; 516 unsigned dst_chan:2; 517 unsigned dst_rel:1; 518 unsigned clamp:1; 519 unsigned omod:2; 520 unsigned bank_swizzle:3; 521 522 unsigned index_mode:3; 523 unsigned last:1; 524 unsigned pred_sel:2; 525 526 unsigned fog_merge:1; 527 unsigned write_mask:1; 528 unsigned update_exec_mask:1; 529 unsigned update_pred:1; 530 531 unsigned slot:3; 532 533 unsigned lds_idx_offset:6; 534 535 alu_op_flags slot_flags; 536 set_opbc_alu537 void set_op(unsigned op) { 538 this->op = op; 539 op_ptr = r600_isa_alu(op); 540 } clearbc_alu541 void clear() { 542 op_ptr = nullptr; 543 op = 0; 544 for (int i = 0; i < 3; ++i) 545 src[i].clear(); 546 dst_gpr = 0; 547 dst_chan = 0; 548 dst_rel = 0; 549 clamp = 0; 550 omod = 0; 551 bank_swizzle = 0; 552 index_mode = 0; 553 last = 0; 554 pred_sel = 0; 555 fog_merge = 0; 556 write_mask = 0; 557 update_exec_mask = 0; 558 update_pred = 0; 559 slot = 0; 560 lds_idx_offset = 0; 561 slot_flags = AF_NONE; 562 } bc_alubc_alu563 bc_alu() { 564 clear(); 565 } 566 }; 567 568 struct bc_fetch { 569 const fetch_op_info * op_ptr; 570 unsigned op; 571 572 unsigned bc_frac_mode:1; 573 unsigned fetch_whole_quad:1; 574 unsigned resource_id:8; 575 576 unsigned src_gpr:7; 577 unsigned src_rel:1; 578 unsigned src_rel_global:1; /* for GDS ops */ 579 unsigned src_sel[4]; 580 581 unsigned dst_gpr:7; 582 unsigned dst_rel:1; 583 unsigned dst_rel_global:1; /* for GDS ops */ 584 unsigned dst_sel[4]; 585 586 unsigned alt_const:1; 587 588 unsigned inst_mod:2; 589 unsigned resource_index_mode:2; 590 unsigned sampler_index_mode:2; 591 592 unsigned coord_type[4]; 593 unsigned lod_bias:7; 594 595 unsigned offset[3]; 596 597 unsigned sampler_id:5; 598 599 600 unsigned fetch_type:2; 601 unsigned mega_fetch_count:6; 602 unsigned coalesced_read:1; 603 unsigned structured_read:2; 604 unsigned lds_req:1; 605 606 unsigned data_format:6; 607 unsigned format_comp_all:1; 608 unsigned num_format_all:2; 609 unsigned semantic_id:8; 610 unsigned srf_mode_all:1; 611 unsigned use_const_fields:1; 612 613 unsigned const_buf_no_stride:1; 614 unsigned endian_swap:2; 615 unsigned mega_fetch:1; 616 617 unsigned src2_gpr:7; /* for GDS */ 618 unsigned alloc_consume:1; 619 unsigned uav_id:4; 620 unsigned uav_index_mode:2; 621 unsigned bcast_first_req:1; 622 623 /* for MEM ops */ 624 unsigned elem_size:2; 625 unsigned uncached:1; 626 unsigned indexed:1; 627 unsigned burst_count:4; 628 unsigned array_base:13; 629 unsigned array_size:12; 630 set_opbc_fetch631 void set_op(unsigned op) { this->op = op; op_ptr = r600_isa_fetch(op); } 632 }; 633 634 struct shader_stats { 635 unsigned ndw; 636 unsigned ngpr; 637 unsigned nstack; 638 639 unsigned cf; // clause instructions not included 640 unsigned alu; 641 unsigned alu_clauses; 642 unsigned fetch_clauses; 643 unsigned fetch; 644 unsigned alu_groups; 645 646 unsigned shaders; // number of shaders (for accumulated stats) 647 shader_statsshader_stats648 shader_stats() : ndw(), ngpr(), nstack(), cf(), alu(), alu_clauses(), 649 fetch_clauses(), fetch(), alu_groups(), shaders() {} 650 651 void collect(node *n); 652 void accumulate(shader_stats &s); 653 void dump(); 654 void dump_diff(shader_stats &s); 655 }; 656 657 class sb_context { 658 659 public: 660 661 shader_stats src_stats, opt_stats; 662 663 r600_isa *isa; 664 665 sb_hw_chip hw_chip; 666 sb_hw_class hw_class; 667 668 unsigned alu_temp_gprs; 669 unsigned max_fetch; 670 bool has_trans; 671 unsigned vtx_src_num; 672 unsigned num_slots; 673 bool uses_mova_gpr; 674 675 bool r6xx_gpr_index_workaround; 676 677 bool stack_workaround_8xx; 678 bool stack_workaround_9xx; 679 680 unsigned wavefront_size; 681 unsigned stack_entry_size; 682 683 static unsigned dump_pass; 684 static unsigned dump_stat; 685 686 static unsigned dry_run; 687 static unsigned no_fallback; 688 static unsigned safe_math; 689 690 static unsigned dskip_start; 691 static unsigned dskip_end; 692 static unsigned dskip_mode; 693 sb_context()694 sb_context() : src_stats(), opt_stats(), isa(0), 695 hw_chip(HW_CHIP_UNKNOWN), hw_class(HW_CLASS_UNKNOWN), 696 alu_temp_gprs(0), max_fetch(0), has_trans(false), vtx_src_num(0), 697 num_slots(0), uses_mova_gpr(false), 698 r6xx_gpr_index_workaround(false), stack_workaround_8xx(false), 699 stack_workaround_9xx(false), wavefront_size(0), 700 stack_entry_size(0) {} 701 702 int init(r600_isa *isa, sb_hw_chip chip, sb_hw_class cclass); 703 is_r600()704 bool is_r600() {return hw_class == HW_CLASS_R600;} is_r700()705 bool is_r700() {return hw_class == HW_CLASS_R700;} is_evergreen()706 bool is_evergreen() {return hw_class == HW_CLASS_EVERGREEN;} is_cayman()707 bool is_cayman() {return hw_class == HW_CLASS_CAYMAN;} is_egcm()708 bool is_egcm() {return hw_class >= HW_CLASS_EVERGREEN;} 709 needs_8xx_stack_workaround()710 bool needs_8xx_stack_workaround() { 711 if (!is_evergreen()) 712 return false; 713 714 switch (hw_chip) { 715 case HW_CHIP_HEMLOCK: 716 case HW_CHIP_CYPRESS: 717 case HW_CHIP_JUNIPER: 718 return false; 719 default: 720 return true; 721 } 722 } 723 needs_9xx_stack_workaround()724 bool needs_9xx_stack_workaround() { 725 return is_cayman(); 726 } 727 hw_class_bit()728 sb_hw_class_bits hw_class_bit() { 729 switch (hw_class) { 730 case HW_CLASS_R600:return HB_R6; 731 case HW_CLASS_R700:return HB_R7; 732 case HW_CLASS_EVERGREEN:return HB_EG; 733 case HW_CLASS_CAYMAN:return HB_CM; 734 default: assert(!"unknown hw class"); return (sb_hw_class_bits)0; 735 736 } 737 } 738 cf_opcode(unsigned op)739 unsigned cf_opcode(unsigned op) { 740 return r600_isa_cf_opcode(isa->hw_class, op); 741 } 742 alu_opcode(unsigned op)743 unsigned alu_opcode(unsigned op) { 744 return r600_isa_alu_opcode(isa->hw_class, op); 745 } 746 alu_slots(unsigned op)747 unsigned alu_slots(unsigned op) { 748 return r600_isa_alu_slots(isa->hw_class, op); 749 } 750 alu_slots(const alu_op_info * op_ptr)751 unsigned alu_slots(const alu_op_info * op_ptr) { 752 return op_ptr->slots[isa->hw_class]; 753 } 754 alu_slots_mask(const alu_op_info * op_ptr)755 unsigned alu_slots_mask(const alu_op_info * op_ptr) { 756 unsigned mask = 0; 757 unsigned slot_flags = alu_slots(op_ptr); 758 if (slot_flags & AF_V) 759 mask = 0x0F; 760 if (!is_cayman() && (slot_flags & AF_S)) 761 mask |= 0x10; 762 /* Force LDS_IDX ops into SLOT_X */ 763 if (op_ptr->opcode[0] == -1 && ((op_ptr->opcode[1] & 0xFF) == 0x11)) 764 mask = 0x01; 765 return mask; 766 } 767 fetch_opcode(unsigned op)768 unsigned fetch_opcode(unsigned op) { 769 return r600_isa_fetch_opcode(isa->hw_class, op); 770 } 771 is_kcache_sel(unsigned sel)772 bool is_kcache_sel(unsigned sel) { 773 return ((sel >= 128 && sel < 192) || (sel >= 256 && sel < 320)); 774 } 775 is_lds_oq(unsigned sel)776 bool is_lds_oq(unsigned sel) { 777 return (sel >= 0xdb && sel <= 0xde); 778 } 779 780 const char * get_hw_class_name(); 781 const char * get_hw_chip_name(); 782 783 }; 784 785 #define SB_DUMP_STAT(a) do { if (sb_context::dump_stat) { a } } while (0) 786 #define SB_DUMP_PASS(a) do { if (sb_context::dump_pass) { a } } while (0) 787 788 class bc_decoder { 789 790 sb_context &ctx; 791 792 uint32_t* dw; 793 unsigned ndw; 794 795 public: 796 bc_decoder(sb_context & sctx,uint32_t * data,unsigned size)797 bc_decoder(sb_context &sctx, uint32_t *data, unsigned size) 798 : ctx(sctx), dw(data), ndw(size) {} 799 800 int decode_cf(unsigned &i, bc_cf &bc); 801 int decode_alu(unsigned &i, bc_alu &bc); 802 int decode_fetch(unsigned &i, bc_fetch &bc); 803 804 private: 805 int decode_cf_alu(unsigned &i, bc_cf &bc); 806 int decode_cf_exp(unsigned &i, bc_cf &bc); 807 int decode_cf_mem(unsigned &i, bc_cf &bc); 808 809 int decode_fetch_vtx(unsigned &i, bc_fetch &bc); 810 int decode_fetch_gds(unsigned &i, bc_fetch &bc); 811 int decode_fetch_mem(unsigned &i, bc_fetch &bc); 812 }; 813 814 // bytecode format definition 815 816 class hw_encoding_format { 817 const sb_hw_class_bits hw_target; //FIXME: debug - remove after testing 818 hw_encoding_format(); 819 protected: 820 uint32_t value; 821 public: hw_encoding_format(sb_hw_class_bits hw)822 hw_encoding_format(sb_hw_class_bits hw) 823 : hw_target(hw), value(0) {} hw_encoding_format(uint32_t v,sb_hw_class_bits hw)824 hw_encoding_format(uint32_t v, sb_hw_class_bits hw) 825 : hw_target(hw), value(v) {} get_value(sb_hw_class_bits hw)826 uint32_t get_value(sb_hw_class_bits hw) const { 827 assert((hw & hw_target) == hw); 828 return value; 829 } 830 }; 831 832 #define BC_FORMAT_BEGIN_HW(fmt, hwset) \ 833 class fmt##_##hwset : public hw_encoding_format {\ 834 typedef fmt##_##hwset thistype; \ 835 public: \ 836 fmt##_##hwset() : hw_encoding_format(HB_##hwset) {}; \ 837 fmt##_##hwset(uint32_t v) : hw_encoding_format(v, HB_##hwset) {}; 838 839 #define BC_FORMAT_BEGIN(fmt) BC_FORMAT_BEGIN_HW(fmt, ALL) 840 841 #define BC_FORMAT_END(fmt) }; 842 843 // bytecode format field definition 844 845 #define BC_FIELD(fmt, name, shortname, last_bit, first_bit) \ 846 thistype & name(unsigned v) { \ 847 value |= ((v&((1ull<<((last_bit)-(first_bit)+1))-1))<<(first_bit)); \ 848 return *this; \ 849 } \ 850 unsigned get_##name() const { \ 851 return (value>>(first_bit))&((1ull<<((last_bit)-(first_bit)+1))-1); \ 852 } 853 854 #define BC_RSRVD(fmt, last_bit, first_bit) 855 856 // CLAMP macro defined elsewhere interferes with bytecode field name 857 #undef CLAMP 858 #include "sb_bc_fmt_def.inc" 859 860 #undef BC_FORMAT_BEGIN 861 #undef BC_FORMAT_END 862 #undef BC_FIELD 863 #undef BC_RSRVD 864 865 class bc_parser { 866 sb_context & ctx; 867 868 bc_decoder *dec; 869 870 r600_bytecode *bc; 871 r600_shader *pshader; 872 873 uint32_t *dw; 874 unsigned bc_ndw; 875 876 unsigned max_cf; 877 878 shader *sh; 879 880 int error; 881 882 alu_node *slots[2][5]; 883 unsigned cgroup; 884 885 typedef std::vector<cf_node*> id_cf_map; 886 id_cf_map cf_map; 887 888 typedef std::stack<region_node*> region_stack; 889 region_stack loop_stack; 890 891 bool gpr_reladdr; 892 893 // Note: currently relies on input emitting SET_CF in same basic block as uses 894 value *cf_index_value[2]; 895 alu_node *mova; 896 public: 897 bc_parser(sb_context & sctx,r600_bytecode * bc,r600_shader * pshader)898 bc_parser(sb_context &sctx, r600_bytecode *bc, r600_shader* pshader) : 899 ctx(sctx), dec(), bc(bc), pshader(pshader), 900 dw(), bc_ndw(), max_cf(), 901 sh(), error(), slots(), cgroup(), 902 cf_map(), loop_stack(), gpr_reladdr(), cf_index_value(), mova() { } 903 904 int decode(); 905 int prepare(); 906 get_shader()907 shader* get_shader() { assert(!error); return sh; } 908 909 private: 910 911 int decode_shader(); 912 913 int parse_decls(); 914 915 int decode_cf(unsigned &i, bool &eop); 916 917 int decode_alu_clause(cf_node *cf); 918 int decode_alu_group(cf_node* cf, unsigned &i, unsigned &gcnt); 919 920 int decode_fetch_clause(cf_node *cf); 921 922 int prepare_ir(); 923 int prepare_alu_clause(cf_node *cf); 924 int prepare_alu_group(cf_node* cf, alu_group_node *g); 925 int prepare_fetch_clause(cf_node *cf); 926 927 int prepare_loop(cf_node *c); 928 int prepare_if(cf_node *c); 929 930 void save_set_cf_index(value *val, unsigned idx); 931 value *get_cf_index_value(unsigned idx); 932 void save_mova(alu_node *mova); 933 alu_node *get_mova(); 934 }; 935 936 937 938 939 class bytecode { 940 typedef std::vector<uint32_t> bc_vector; 941 sb_hw_class_bits hw_class_bit; 942 943 bc_vector bc; 944 945 unsigned pos; 946 947 public: 948 949 bytecode(sb_hw_class_bits hw, unsigned rdw = 256) hw_class_bit(hw)950 : hw_class_bit(hw), pos(0) { bc.reserve(rdw); } 951 ndw()952 unsigned ndw() { return bc.size(); } 953 write_data(uint32_t * dst)954 void write_data(uint32_t* dst) { 955 std::copy(bc.begin(), bc.end(), dst); 956 } 957 align(unsigned a)958 void align(unsigned a) { 959 unsigned size = bc.size(); 960 size = (size + a - 1) & ~(a-1); 961 bc.resize(size); 962 } 963 set_size(unsigned sz)964 void set_size(unsigned sz) { 965 assert(sz >= bc.size()); 966 bc.resize(sz); 967 } 968 seek(unsigned p)969 void seek(unsigned p) { 970 if (p != pos) { 971 if (p > bc.size()) { 972 bc.resize(p); 973 } 974 pos = p; 975 } 976 } 977 get_pos()978 unsigned get_pos() { return pos; } data()979 uint32_t *data() { return &bc[0]; } 980 981 bytecode & operator <<(uint32_t v) { 982 if (pos == ndw()) { 983 bc.push_back(v); 984 } else 985 bc.at(pos) = v; 986 ++pos; 987 return *this; 988 } 989 990 bytecode & operator <<(const hw_encoding_format &e) { 991 *this << e.get_value(hw_class_bit); 992 return *this; 993 } 994 995 bytecode & operator <<(const bytecode &b) { 996 bc.insert(bc.end(), b.bc.begin(), b.bc.end()); 997 return *this; 998 } 999 at(unsigned dw_id)1000 uint32_t at(unsigned dw_id) { return bc.at(dw_id); } 1001 }; 1002 1003 1004 class bc_builder { 1005 shader &sh; 1006 sb_context &ctx; 1007 bytecode bb; 1008 int error; 1009 1010 public: 1011 1012 bc_builder(shader &s); 1013 int build(); get_bytecode()1014 bytecode& get_bytecode() { assert(!error); return bb; } 1015 1016 private: 1017 1018 int build_cf(cf_node *n); 1019 1020 int build_cf_alu(cf_node *n); 1021 int build_cf_mem(cf_node *n); 1022 int build_cf_exp(cf_node *n); 1023 1024 int build_alu_clause(cf_node *n); 1025 int build_alu_group(alu_group_node *n); 1026 int build_alu(alu_node *n); 1027 1028 int build_fetch_clause(cf_node *n); 1029 int build_fetch_tex(fetch_node *n); 1030 int build_fetch_vtx(fetch_node *n); 1031 int build_fetch_gds(fetch_node *n); 1032 int build_fetch_mem(fetch_node* n); 1033 }; 1034 1035 } // namespace r600_sb 1036 1037 #endif /* SB_BC_H_ */ 1038