1 /* 2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 * 23 * Authors: 24 * Vadim Girlin 25 */ 26 27 #ifndef SB_BC_H_ 28 #define SB_BC_H_ 29 30 #include <stdint.h> 31 #include "r600_isa.h" 32 33 #include <cstdio> 34 #include <string> 35 #include <vector> 36 #include <stack> 37 38 struct r600_bytecode; 39 struct r600_shader; 40 41 namespace r600_sb { 42 43 class hw_encoding_format; 44 class node; 45 class alu_node; 46 class cf_node; 47 class fetch_node; 48 class alu_group_node; 49 class region_node; 50 class shader; 51 class value; 52 53 class sb_ostream { 54 public: sb_ostream()55 sb_ostream() {} 56 57 virtual void write(const char *s) = 0; 58 59 sb_ostream& operator <<(const char *s) { 60 write(s); 61 return *this; 62 } 63 64 sb_ostream& operator <<(const std::string& s) { 65 return *this << s.c_str(); 66 } 67 68 sb_ostream& operator <<(void *p) { 69 char b[32]; 70 sprintf(b, "%p", p); 71 return *this << b; 72 } 73 74 sb_ostream& operator <<(char c) { 75 char b[2]; 76 sprintf(b, "%c", c); 77 return *this << b; 78 } 79 80 sb_ostream& operator <<(int n) { 81 char b[32]; 82 sprintf(b, "%d", n); 83 return *this << b; 84 } 85 86 sb_ostream& operator <<(unsigned n) { 87 char b[32]; 88 sprintf(b, "%u", n); 89 return *this << b; 90 } 91 92 sb_ostream& operator <<(double d) { 93 char b[32]; 94 snprintf(b, 32, "%g", d); 95 return *this << b; 96 } 97 98 // print as field of specified width, right aligned print_w(int n,int width)99 void print_w(int n, int width) { 100 char b[256],f[8]; 101 sprintf(f, "%%%dd", width); 102 snprintf(b, 256, f, n); 103 write(b); 104 } 105 106 // print as field of specified width, left aligned print_wl(int n,int width)107 void print_wl(int n, int width) { 108 char b[256],f[8]; 109 sprintf(f, "%%-%dd", width); 110 snprintf(b, 256, f, n); 111 write(b); 112 } 113 114 // print as field of specified width, left aligned print_wl(const std::string & s,int width)115 void print_wl(const std::string &s, int width) { 116 write(s.c_str()); 117 int l = s.length(); 118 while (l++ < width) { 119 write(" "); 120 } 121 } 122 123 // print int as field of specified width, right aligned, zero-padded print_zw(int n,int width)124 void print_zw(int n, int width) { 125 char b[256],f[8]; 126 sprintf(f, "%%0%dd", width); 127 snprintf(b, 256, f, n); 128 write(b); 129 } 130 131 // print int as field of specified width, right aligned, zero-padded, hex print_zw_hex(int n,int width)132 void print_zw_hex(int n, int width) { 133 char b[256],f[8]; 134 sprintf(f, "%%0%dx", width); 135 snprintf(b, 256, f, n); 136 write(b); 137 } 138 }; 139 140 class sb_ostringstream : public sb_ostream { 141 std::string data; 142 public: sb_ostringstream()143 sb_ostringstream() : data() {} 144 write(const char * s)145 virtual void write(const char *s) { 146 data += s; 147 } 148 clear()149 void clear() { data.clear(); } 150 c_str()151 const char* c_str() { return data.c_str(); } str()152 std::string& str() { return data; } 153 }; 154 155 class sb_log : public sb_ostream { 156 FILE *o; 157 public: sb_log()158 sb_log() : o(stderr) {} 159 write(const char * s)160 virtual void write(const char *s) { 161 fputs(s, o); 162 } 163 }; 164 165 extern sb_log sblog; 166 167 enum shader_target 168 { 169 TARGET_UNKNOWN, 170 TARGET_VS, 171 TARGET_ES, 172 TARGET_PS, 173 TARGET_GS, 174 TARGET_GS_COPY, 175 TARGET_COMPUTE, 176 TARGET_FETCH, 177 TARGET_HS, 178 TARGET_LS, 179 180 TARGET_NUM 181 }; 182 183 enum sb_hw_class_bits 184 { 185 HB_R6 = (1<<0), 186 HB_R7 = (1<<1), 187 HB_EG = (1<<2), 188 HB_CM = (1<<3), 189 190 HB_R6R7 = (HB_R6 | HB_R7), 191 HB_EGCM = (HB_EG | HB_CM), 192 HB_R6R7EG = (HB_R6 | HB_R7 | HB_EG), 193 HB_R7EGCM = (HB_R7 | HB_EG | HB_CM), 194 195 HB_ALL = (HB_R6 | HB_R7 | HB_EG | HB_CM) 196 }; 197 198 enum sb_hw_chip 199 { 200 HW_CHIP_UNKNOWN, 201 HW_CHIP_R600, 202 HW_CHIP_RV610, 203 HW_CHIP_RV630, 204 HW_CHIP_RV670, 205 HW_CHIP_RV620, 206 HW_CHIP_RV635, 207 HW_CHIP_RS780, 208 HW_CHIP_RS880, 209 HW_CHIP_RV770, 210 HW_CHIP_RV730, 211 HW_CHIP_RV710, 212 HW_CHIP_RV740, 213 HW_CHIP_CEDAR, 214 HW_CHIP_REDWOOD, 215 HW_CHIP_JUNIPER, 216 HW_CHIP_CYPRESS, 217 HW_CHIP_HEMLOCK, 218 HW_CHIP_PALM, 219 HW_CHIP_SUMO, 220 HW_CHIP_SUMO2, 221 HW_CHIP_BARTS, 222 HW_CHIP_TURKS, 223 HW_CHIP_CAICOS, 224 HW_CHIP_CAYMAN, 225 HW_CHIP_ARUBA 226 }; 227 228 enum sb_hw_class 229 { 230 HW_CLASS_UNKNOWN, 231 HW_CLASS_R600, 232 HW_CLASS_R700, 233 HW_CLASS_EVERGREEN, 234 HW_CLASS_CAYMAN 235 }; 236 237 enum alu_slots { 238 SLOT_X = 0, 239 SLOT_Y = 1, 240 SLOT_Z = 2, 241 SLOT_W = 3, 242 SLOT_TRANS = 4 243 }; 244 245 enum misc_consts { 246 MAX_ALU_LITERALS = 4, 247 MAX_ALU_SLOTS = 128, 248 MAX_GPR = 128, 249 MAX_CHAN = 4 250 251 }; 252 253 enum alu_src_sel { 254 255 ALU_SRC_LDS_OQ_A = 219, 256 ALU_SRC_LDS_OQ_B = 220, 257 ALU_SRC_LDS_OQ_A_POP = 221, 258 ALU_SRC_LDS_OQ_B_POP = 222, 259 ALU_SRC_LDS_DIRECT_A = 223, 260 ALU_SRC_LDS_DIRECT_B = 224, 261 ALU_SRC_TIME_HI = 227, 262 ALU_SRC_TIME_LO = 228, 263 ALU_SRC_MASK_HI = 229, 264 ALU_SRC_MASK_LO = 230, 265 ALU_SRC_HW_WAVE_ID = 231, 266 ALU_SRC_SIMD_ID = 232, 267 ALU_SRC_SE_ID = 233, 268 ALU_SRC_HW_THREADGRP_ID = 234, 269 ALU_SRC_WAVE_ID_IN_GRP = 235, 270 ALU_SRC_NUM_THREADGRP_WAVES = 236, 271 ALU_SRC_HW_ALU_ODD = 237, 272 ALU_SRC_LOOP_IDX = 238, 273 ALU_SRC_PARAM_BASE_ADDR = 240, 274 ALU_SRC_NEW_PRIM_MASK = 241, 275 ALU_SRC_PRIM_MASK_HI = 242, 276 ALU_SRC_PRIM_MASK_LO = 243, 277 ALU_SRC_1_DBL_L = 244, 278 ALU_SRC_1_DBL_M = 245, 279 ALU_SRC_0_5_DBL_L = 246, 280 ALU_SRC_0_5_DBL_M = 247, 281 ALU_SRC_0 = 248, 282 ALU_SRC_1 = 249, 283 ALU_SRC_1_INT = 250, 284 ALU_SRC_M_1_INT = 251, 285 ALU_SRC_0_5 = 252, 286 ALU_SRC_LITERAL = 253, 287 ALU_SRC_PV = 254, 288 ALU_SRC_PS = 255, 289 290 ALU_SRC_PARAM_OFFSET = 448 291 }; 292 293 enum alu_predicate_select 294 { 295 PRED_SEL_OFF = 0, 296 // RESERVED = 1, 297 PRED_SEL_0 = 2, 298 PRED_SEL_1 = 3 299 }; 300 301 302 enum alu_omod { 303 OMOD_OFF = 0, 304 OMOD_M2 = 1, 305 OMOD_M4 = 2, 306 OMOD_D2 = 3 307 }; 308 309 enum alu_index_mode { 310 INDEX_AR_X = 0, 311 INDEX_AR_Y_R600 = 1, 312 INDEX_AR_Z_R600 = 2, 313 INDEX_AR_W_R600 = 3, 314 315 INDEX_LOOP = 4, 316 INDEX_GLOBAL = 5, 317 INDEX_GLOBAL_AR_X = 6 318 }; 319 320 enum alu_cayman_mova_dst { 321 CM_MOVADST_AR_X, 322 CM_MOVADST_PC, 323 CM_MOVADST_IDX0, 324 CM_MOVADST_IDX1, 325 CM_MOVADST_CG0, // clause-global byte 0 326 CM_MOVADST_CG1, 327 CM_MOVADST_CG2, 328 CM_MOVADST_CG3 329 }; 330 331 enum alu_cayman_exec_mask_op { 332 CM_EMO_DEACTIVATE, 333 CM_EMO_BREAK, 334 CM_EMO_CONTINUE, 335 CM_EMO_KILL 336 }; 337 338 339 enum cf_exp_type { 340 EXP_PIXEL, 341 EXP_POS, 342 EXP_PARAM, 343 344 EXP_TYPE_COUNT 345 }; 346 347 enum cf_mem_type { 348 MEM_WRITE, 349 MEM_WRITE_IND, 350 MEM_WRITE_ACK, 351 MEM_WRITE_IND_ACK 352 }; 353 354 355 enum alu_kcache_mode { 356 KC_LOCK_NONE, 357 KC_LOCK_1, 358 KC_LOCK_2, 359 KC_LOCK_LOOP 360 }; 361 362 enum alu_kcache_index_mode { 363 KC_INDEX_NONE, 364 KC_INDEX_0, 365 KC_INDEX_1, 366 KC_INDEX_INVALID 367 }; 368 369 enum chan_select { 370 SEL_X = 0, 371 SEL_Y = 1, 372 SEL_Z = 2, 373 SEL_W = 3, 374 SEL_0 = 4, 375 SEL_1 = 5, 376 // RESERVED = 6, 377 SEL_MASK = 7 378 }; 379 380 enum bank_swizzle { 381 VEC_012 = 0, 382 VEC_021 = 1, 383 VEC_120 = 2, 384 VEC_102 = 3, 385 VEC_201 = 4, 386 VEC_210 = 5, 387 388 VEC_NUM = 6, 389 390 SCL_210 = 0, 391 SCL_122 = 1, 392 SCL_212 = 2, 393 SCL_221 = 3, 394 395 SCL_NUM = 4 396 397 }; 398 399 enum sched_queue_id { 400 SQ_CF, 401 SQ_ALU, 402 SQ_TEX, 403 SQ_VTX, 404 405 SQ_NUM 406 }; 407 408 struct literal { 409 union { 410 int32_t i; 411 uint32_t u; 412 float f; 413 }; 414 iliteral415 literal(int32_t i = 0) : i(i) {} literalliteral416 literal(uint32_t u) : u(u) {} literalliteral417 literal(float f) : f(f) {} literalliteral418 literal(double f) : f(f) {} uint32_tliteral419 operator uint32_t() const { return u; } 420 bool operator ==(literal l) { return u == l.u; } 421 bool operator ==(int v_int) { return i == v_int; } 422 bool operator ==(unsigned v_uns) { return u == v_uns; } 423 }; 424 425 struct bc_kcache { 426 unsigned mode; 427 unsigned bank; 428 unsigned addr; 429 unsigned index_mode; 430 } ; 431 432 // TODO optimize bc structures 433 434 struct bc_cf { 435 436 bc_kcache kc[4]; 437 438 unsigned id; 439 440 441 const cf_op_info * op_ptr; 442 unsigned op; 443 444 unsigned addr:32; 445 446 unsigned alt_const:1; 447 unsigned uses_waterfall:1; 448 449 unsigned barrier:1; 450 unsigned count:7; 451 unsigned pop_count:3; 452 unsigned call_count:6; 453 unsigned whole_quad_mode:1; 454 unsigned valid_pixel_mode:1; 455 456 unsigned jumptable_sel:3; 457 unsigned cf_const:5; 458 unsigned cond:2; 459 unsigned end_of_program:1; 460 461 unsigned array_base:13; 462 unsigned elem_size:2; 463 unsigned index_gpr:7; 464 unsigned rw_gpr:7; 465 unsigned rw_rel:1; 466 unsigned type:2; 467 468 unsigned burst_count:4; 469 unsigned mark:1; 470 unsigned sel[4]; 471 472 unsigned array_size:12; 473 unsigned comp_mask:4; 474 475 unsigned rat_id:4; 476 unsigned rat_inst:6; 477 unsigned rat_index_mode:2; 478 set_opbc_cf479 void set_op(unsigned op) { this->op = op; op_ptr = r600_isa_cf(op); } 480 is_alu_extendedbc_cf481 bool is_alu_extended() { 482 assert(op_ptr->flags & CF_ALU); 483 return kc[2].mode != KC_LOCK_NONE || kc[3].mode != KC_LOCK_NONE || 484 kc[0].index_mode != KC_INDEX_NONE || kc[1].index_mode != KC_INDEX_NONE || 485 kc[2].index_mode != KC_INDEX_NONE || kc[3].index_mode != KC_INDEX_NONE; 486 } 487 488 }; 489 490 struct bc_alu_src { 491 unsigned sel:9; 492 unsigned chan:2; 493 unsigned neg:1; 494 unsigned abs:1; 495 unsigned rel:1; 496 literal value; 497 }; 498 499 struct bc_alu { 500 const alu_op_info * op_ptr; 501 unsigned op; 502 503 bc_alu_src src[3]; 504 505 unsigned dst_gpr:7; 506 unsigned dst_chan:2; 507 unsigned dst_rel:1; 508 unsigned clamp:1; 509 unsigned omod:2; 510 unsigned bank_swizzle:3; 511 512 unsigned index_mode:3; 513 unsigned last:1; 514 unsigned pred_sel:2; 515 516 unsigned fog_merge:1; 517 unsigned write_mask:1; 518 unsigned update_exec_mask:1; 519 unsigned update_pred:1; 520 521 unsigned slot:3; 522 523 unsigned lds_idx_offset:6; 524 525 alu_op_flags slot_flags; 526 set_opbc_alu527 void set_op(unsigned op) { 528 this->op = op; 529 op_ptr = r600_isa_alu(op); 530 } 531 }; 532 533 struct bc_fetch { 534 const fetch_op_info * op_ptr; 535 unsigned op; 536 537 unsigned bc_frac_mode:1; 538 unsigned fetch_whole_quad:1; 539 unsigned resource_id:8; 540 541 unsigned src_gpr:7; 542 unsigned src_rel:1; 543 unsigned src_rel_global:1; /* for GDS ops */ 544 unsigned src_sel[4]; 545 546 unsigned dst_gpr:7; 547 unsigned dst_rel:1; 548 unsigned dst_rel_global:1; /* for GDS ops */ 549 unsigned dst_sel[4]; 550 551 unsigned alt_const:1; 552 553 unsigned inst_mod:2; 554 unsigned resource_index_mode:2; 555 unsigned sampler_index_mode:2; 556 557 unsigned coord_type[4]; 558 unsigned lod_bias:7; 559 560 unsigned offset[3]; 561 562 unsigned sampler_id:5; 563 564 565 unsigned fetch_type:2; 566 unsigned mega_fetch_count:6; 567 unsigned coalesced_read:1; 568 unsigned structured_read:2; 569 unsigned lds_req:1; 570 571 unsigned data_format:6; 572 unsigned format_comp_all:1; 573 unsigned num_format_all:2; 574 unsigned semantic_id:8; 575 unsigned srf_mode_all:1; 576 unsigned use_const_fields:1; 577 578 unsigned const_buf_no_stride:1; 579 unsigned endian_swap:2; 580 unsigned mega_fetch:1; 581 582 unsigned src2_gpr:7; /* for GDS */ set_opbc_fetch583 void set_op(unsigned op) { this->op = op; op_ptr = r600_isa_fetch(op); } 584 }; 585 586 struct shader_stats { 587 unsigned ndw; 588 unsigned ngpr; 589 unsigned nstack; 590 591 unsigned cf; // clause instructions not included 592 unsigned alu; 593 unsigned alu_clauses; 594 unsigned fetch_clauses; 595 unsigned fetch; 596 unsigned alu_groups; 597 598 unsigned shaders; // number of shaders (for accumulated stats) 599 shader_statsshader_stats600 shader_stats() : ndw(), ngpr(), nstack(), cf(), alu(), alu_clauses(), 601 fetch_clauses(), fetch(), alu_groups(), shaders() {} 602 603 void collect(node *n); 604 void accumulate(shader_stats &s); 605 void dump(); 606 void dump_diff(shader_stats &s); 607 }; 608 609 class sb_context { 610 611 public: 612 613 shader_stats src_stats, opt_stats; 614 615 r600_isa *isa; 616 617 sb_hw_chip hw_chip; 618 sb_hw_class hw_class; 619 620 unsigned alu_temp_gprs; 621 unsigned max_fetch; 622 bool has_trans; 623 unsigned vtx_src_num; 624 unsigned num_slots; 625 bool uses_mova_gpr; 626 627 bool r6xx_gpr_index_workaround; 628 629 bool stack_workaround_8xx; 630 bool stack_workaround_9xx; 631 632 unsigned wavefront_size; 633 unsigned stack_entry_size; 634 635 static unsigned dump_pass; 636 static unsigned dump_stat; 637 638 static unsigned dry_run; 639 static unsigned no_fallback; 640 static unsigned safe_math; 641 642 static unsigned dskip_start; 643 static unsigned dskip_end; 644 static unsigned dskip_mode; 645 sb_context()646 sb_context() : src_stats(), opt_stats(), isa(0), 647 hw_chip(HW_CHIP_UNKNOWN), hw_class(HW_CLASS_UNKNOWN) {} 648 649 int init(r600_isa *isa, sb_hw_chip chip, sb_hw_class cclass); 650 is_r600()651 bool is_r600() {return hw_class == HW_CLASS_R600;} is_r700()652 bool is_r700() {return hw_class == HW_CLASS_R700;} is_evergreen()653 bool is_evergreen() {return hw_class == HW_CLASS_EVERGREEN;} is_cayman()654 bool is_cayman() {return hw_class == HW_CLASS_CAYMAN;} is_egcm()655 bool is_egcm() {return hw_class >= HW_CLASS_EVERGREEN;} 656 needs_8xx_stack_workaround()657 bool needs_8xx_stack_workaround() { 658 if (!is_evergreen()) 659 return false; 660 661 switch (hw_chip) { 662 case HW_CHIP_CYPRESS: 663 case HW_CHIP_JUNIPER: 664 return false; 665 default: 666 return true; 667 } 668 } 669 needs_9xx_stack_workaround()670 bool needs_9xx_stack_workaround() { 671 return is_cayman(); 672 } 673 hw_class_bit()674 sb_hw_class_bits hw_class_bit() { 675 switch (hw_class) { 676 case HW_CLASS_R600:return HB_R6; 677 case HW_CLASS_R700:return HB_R7; 678 case HW_CLASS_EVERGREEN:return HB_EG; 679 case HW_CLASS_CAYMAN:return HB_CM; 680 default: assert(!"unknown hw class"); return (sb_hw_class_bits)0; 681 682 } 683 } 684 cf_opcode(unsigned op)685 unsigned cf_opcode(unsigned op) { 686 return r600_isa_cf_opcode(isa->hw_class, op); 687 } 688 alu_opcode(unsigned op)689 unsigned alu_opcode(unsigned op) { 690 return r600_isa_alu_opcode(isa->hw_class, op); 691 } 692 alu_slots(unsigned op)693 unsigned alu_slots(unsigned op) { 694 return r600_isa_alu_slots(isa->hw_class, op); 695 } 696 alu_slots(const alu_op_info * op_ptr)697 unsigned alu_slots(const alu_op_info * op_ptr) { 698 return op_ptr->slots[isa->hw_class]; 699 } 700 alu_slots_mask(const alu_op_info * op_ptr)701 unsigned alu_slots_mask(const alu_op_info * op_ptr) { 702 unsigned mask = 0; 703 unsigned slot_flags = alu_slots(op_ptr); 704 if (slot_flags & AF_V) 705 mask = 0x0F; 706 if (!is_cayman() && (slot_flags & AF_S)) 707 mask |= 0x10; 708 return mask; 709 } 710 fetch_opcode(unsigned op)711 unsigned fetch_opcode(unsigned op) { 712 return r600_isa_fetch_opcode(isa->hw_class, op); 713 } 714 is_kcache_sel(unsigned sel)715 bool is_kcache_sel(unsigned sel) { 716 return ((sel >= 128 && sel < 192) || (sel >= 256 && sel < 320)); 717 } 718 719 const char * get_hw_class_name(); 720 const char * get_hw_chip_name(); 721 722 }; 723 724 #define SB_DUMP_STAT(a) do { if (sb_context::dump_stat) { a } } while (0) 725 #define SB_DUMP_PASS(a) do { if (sb_context::dump_pass) { a } } while (0) 726 727 class bc_decoder { 728 729 sb_context &ctx; 730 731 uint32_t* dw; 732 unsigned ndw; 733 734 public: 735 bc_decoder(sb_context & sctx,uint32_t * data,unsigned size)736 bc_decoder(sb_context &sctx, uint32_t *data, unsigned size) 737 : ctx(sctx), dw(data), ndw(size) {} 738 739 int decode_cf(unsigned &i, bc_cf &bc); 740 int decode_alu(unsigned &i, bc_alu &bc); 741 int decode_fetch(unsigned &i, bc_fetch &bc); 742 743 private: 744 int decode_cf_alu(unsigned &i, bc_cf &bc); 745 int decode_cf_exp(unsigned &i, bc_cf &bc); 746 int decode_cf_mem(unsigned &i, bc_cf &bc); 747 748 int decode_fetch_vtx(unsigned &i, bc_fetch &bc); 749 int decode_fetch_gds(unsigned &i, bc_fetch &bc); 750 }; 751 752 // bytecode format definition 753 754 class hw_encoding_format { 755 const sb_hw_class_bits hw_target; //FIXME: debug - remove after testing 756 hw_encoding_format(); 757 protected: 758 uint32_t value; 759 public: hw_encoding_format(sb_hw_class_bits hw)760 hw_encoding_format(sb_hw_class_bits hw) 761 : hw_target(hw), value(0) {} hw_encoding_format(uint32_t v,sb_hw_class_bits hw)762 hw_encoding_format(uint32_t v, sb_hw_class_bits hw) 763 : hw_target(hw), value(v) {} get_value(sb_hw_class_bits hw)764 uint32_t get_value(sb_hw_class_bits hw) const { 765 assert((hw & hw_target) == hw); 766 return value; 767 } 768 }; 769 770 #define BC_FORMAT_BEGIN_HW(fmt, hwset) \ 771 class fmt##_##hwset : public hw_encoding_format {\ 772 typedef fmt##_##hwset thistype; \ 773 public: \ 774 fmt##_##hwset() : hw_encoding_format(HB_##hwset) {}; \ 775 fmt##_##hwset(uint32_t v) : hw_encoding_format(v, HB_##hwset) {}; 776 777 #define BC_FORMAT_BEGIN(fmt) BC_FORMAT_BEGIN_HW(fmt, ALL) 778 779 #define BC_FORMAT_END(fmt) }; 780 781 // bytecode format field definition 782 783 #define BC_FIELD(fmt, name, shortname, last_bit, first_bit) \ 784 thistype & name(unsigned v) { \ 785 value |= ((v&((1ull<<((last_bit)-(first_bit)+1))-1))<<(first_bit)); \ 786 return *this; \ 787 } \ 788 unsigned get_##name() const { \ 789 return (value>>(first_bit))&((1ull<<((last_bit)-(first_bit)+1))-1); \ 790 } \ 791 792 #define BC_RSRVD(fmt, last_bit, first_bit) 793 794 // CLAMP macro defined elsewhere interferes with bytecode field name 795 #undef CLAMP 796 #include "sb_bc_fmt_def.inc" 797 798 #undef BC_FORMAT_BEGIN 799 #undef BC_FORMAT_END 800 #undef BC_FIELD 801 #undef BC_RSRVD 802 803 class bc_parser { 804 sb_context & ctx; 805 806 bc_decoder *dec; 807 808 r600_bytecode *bc; 809 r600_shader *pshader; 810 811 uint32_t *dw; 812 unsigned bc_ndw; 813 814 unsigned max_cf; 815 816 shader *sh; 817 818 int error; 819 820 alu_node *slots[2][5]; 821 unsigned cgroup; 822 823 typedef std::vector<cf_node*> id_cf_map; 824 id_cf_map cf_map; 825 826 typedef std::stack<region_node*> region_stack; 827 region_stack loop_stack; 828 829 bool gpr_reladdr; 830 831 // Note: currently relies on input emitting SET_CF in same basic block as uses 832 value *cf_index_value[2]; 833 alu_node *mova; 834 public: 835 bc_parser(sb_context & sctx,r600_bytecode * bc,r600_shader * pshader)836 bc_parser(sb_context &sctx, r600_bytecode *bc, r600_shader* pshader) : 837 ctx(sctx), dec(), bc(bc), pshader(pshader), 838 dw(), bc_ndw(), max_cf(), 839 sh(), error(), slots(), cgroup(), 840 cf_map(), loop_stack(), gpr_reladdr(), cf_index_value(), mova() { } 841 842 int decode(); 843 int prepare(); 844 get_shader()845 shader* get_shader() { assert(!error); return sh; } 846 847 private: 848 849 int decode_shader(); 850 851 int parse_decls(); 852 853 int decode_cf(unsigned &i, bool &eop); 854 855 int decode_alu_clause(cf_node *cf); 856 int decode_alu_group(cf_node* cf, unsigned &i, unsigned &gcnt); 857 858 int decode_fetch_clause(cf_node *cf); 859 860 int prepare_ir(); 861 int prepare_alu_clause(cf_node *cf); 862 int prepare_alu_group(cf_node* cf, alu_group_node *g); 863 int prepare_fetch_clause(cf_node *cf); 864 865 int prepare_loop(cf_node *c); 866 int prepare_if(cf_node *c); 867 868 void save_set_cf_index(value *val, unsigned idx); 869 value *get_cf_index_value(unsigned idx); 870 void save_mova(alu_node *mova); 871 alu_node *get_mova(); 872 }; 873 874 875 876 877 class bytecode { 878 typedef std::vector<uint32_t> bc_vector; 879 sb_hw_class_bits hw_class_bit; 880 881 bc_vector bc; 882 883 unsigned pos; 884 885 public: 886 887 bytecode(sb_hw_class_bits hw, unsigned rdw = 256) hw_class_bit(hw)888 : hw_class_bit(hw), pos(0) { bc.reserve(rdw); } 889 ndw()890 unsigned ndw() { return bc.size(); } 891 write_data(uint32_t * dst)892 void write_data(uint32_t* dst) { 893 std::copy(bc.begin(), bc.end(), dst); 894 } 895 align(unsigned a)896 void align(unsigned a) { 897 unsigned size = bc.size(); 898 size = (size + a - 1) & ~(a-1); 899 bc.resize(size); 900 } 901 set_size(unsigned sz)902 void set_size(unsigned sz) { 903 assert(sz >= bc.size()); 904 bc.resize(sz); 905 } 906 seek(unsigned p)907 void seek(unsigned p) { 908 if (p != pos) { 909 if (p > bc.size()) { 910 bc.resize(p); 911 } 912 pos = p; 913 } 914 } 915 get_pos()916 unsigned get_pos() { return pos; } data()917 uint32_t *data() { return &bc[0]; } 918 919 bytecode & operator <<(uint32_t v) { 920 if (pos == ndw()) { 921 bc.push_back(v); 922 } else 923 bc.at(pos) = v; 924 ++pos; 925 return *this; 926 } 927 928 bytecode & operator <<(const hw_encoding_format &e) { 929 *this << e.get_value(hw_class_bit); 930 return *this; 931 } 932 933 bytecode & operator <<(const bytecode &b) { 934 bc.insert(bc.end(), b.bc.begin(), b.bc.end()); 935 return *this; 936 } 937 at(unsigned dw_id)938 uint32_t at(unsigned dw_id) { return bc.at(dw_id); } 939 }; 940 941 942 class bc_builder { 943 shader &sh; 944 sb_context &ctx; 945 bytecode bb; 946 int error; 947 948 public: 949 950 bc_builder(shader &s); 951 int build(); get_bytecode()952 bytecode& get_bytecode() { assert(!error); return bb; } 953 954 private: 955 956 int build_cf(cf_node *n); 957 958 int build_cf_alu(cf_node *n); 959 int build_cf_mem(cf_node *n); 960 int build_cf_exp(cf_node *n); 961 962 int build_alu_clause(cf_node *n); 963 int build_alu_group(alu_group_node *n); 964 int build_alu(alu_node *n); 965 966 int build_fetch_clause(cf_node *n); 967 int build_fetch_tex(fetch_node *n); 968 int build_fetch_vtx(fetch_node *n); 969 }; 970 971 } // namespace r600_sb 972 973 #endif /* SB_BC_H_ */ 974